xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision 851d6770254d03a41059de971668c70a9305ed07)
18cd53115SBarry Smith 
240e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h>
395fce210SBarry Smith 
440e23c03SJunchao Zhang /*===================================================================================*/
5eb02082bSJunchao Zhang /*              Internal routines for PetscSFPack                              */
640e23c03SJunchao Zhang /*===================================================================================*/
795fce210SBarry Smith 
840e23c03SJunchao Zhang /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been
940e23c03SJunchao Zhang    initialized (since we use persistent requests), then initialize them.
1095fce210SBarry Smith */
11eb02082bSJunchao Zhang static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,PetscSFPack link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs)
1240e23c03SJunchao Zhang {
13b23bfdefSJunchao Zhang   PetscErrorCode ierr;
1440e23c03SJunchao Zhang   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
1506f2e050SLisandro Dalcin   PetscInt       i,j,nrootranks,ndrootranks,nleafranks,ndleafranks;
1640e23c03SJunchao Zhang   const PetscInt *rootoffset,*leafoffset;
1740e23c03SJunchao Zhang   PetscMPIInt    n;
1840e23c03SJunchao Zhang   MPI_Comm       comm = PetscObjectComm((PetscObject)sf);
19eb02082bSJunchao Zhang   MPI_Datatype   unit = link->unit;
20eb02082bSJunchao Zhang   PetscMemType   rootmtype = link->rootmtype,leafmtype = link->leafmtype;
2195fce210SBarry Smith 
2240e23c03SJunchao Zhang   PetscFunctionBegin;
23eb02082bSJunchao Zhang   if (rootreqs && !link->rootreqsinited[direction][rootmtype]) {
2440e23c03SJunchao Zhang     ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
2540e23c03SJunchao Zhang     if (direction == PETSCSF_LEAF2ROOT_REDUCE) {
26eb02082bSJunchao Zhang       for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
2706f2e050SLisandro Dalcin         MPI_Aint disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes;
2840e23c03SJunchao Zhang         ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
29eb02082bSJunchao Zhang         ierr = MPI_Recv_init(link->rootbuf[rootmtype]+disp,n,unit,bas->iranks[i],link->tag,comm,&link->rootreqs[direction][rootmtype][j]);CHKERRQ(ierr);
3040e23c03SJunchao Zhang       }
3140e23c03SJunchao Zhang     } else if (direction == PETSCSF_ROOT2LEAF_BCAST) {
32eb02082bSJunchao Zhang       for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
3306f2e050SLisandro Dalcin         MPI_Aint disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes;
3440e23c03SJunchao Zhang         ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
35eb02082bSJunchao Zhang         ierr = MPI_Send_init(link->rootbuf[rootmtype]+disp,n,unit,bas->iranks[i],link->tag,comm,&link->rootreqs[direction][rootmtype][j]);CHKERRQ(ierr);
3640e23c03SJunchao Zhang       }
37eb02082bSJunchao Zhang     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %d\n",(int)direction);
38eb02082bSJunchao Zhang     link->rootreqsinited[direction][rootmtype] = PETSC_TRUE;
39eb02082bSJunchao Zhang   }
40eb02082bSJunchao Zhang 
41eb02082bSJunchao Zhang   if (leafreqs && !link->leafreqsinited[direction][leafmtype]) {
42eb02082bSJunchao Zhang     ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
43eb02082bSJunchao Zhang     if (direction == PETSCSF_LEAF2ROOT_REDUCE) {
44eb02082bSJunchao Zhang       for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
4506f2e050SLisandro Dalcin         MPI_Aint disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes;
4640e23c03SJunchao Zhang         ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
47eb02082bSJunchao Zhang         ierr = MPI_Send_init(link->leafbuf[leafmtype]+disp,n,unit,sf->ranks[i],link->tag,comm,&link->leafreqs[direction][leafmtype][j]);CHKERRQ(ierr);
4840e23c03SJunchao Zhang       }
49eb02082bSJunchao Zhang     } else if (direction == PETSCSF_ROOT2LEAF_BCAST) {
50eb02082bSJunchao Zhang       for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
5106f2e050SLisandro Dalcin         MPI_Aint disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes;
52eb02082bSJunchao Zhang         ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
53eb02082bSJunchao Zhang         ierr = MPI_Recv_init(link->leafbuf[leafmtype]+disp,n,unit,sf->ranks[i],link->tag,comm,&link->leafreqs[direction][leafmtype][j]);CHKERRQ(ierr);
54eb02082bSJunchao Zhang       }
55eb02082bSJunchao Zhang     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %d\n",(int)direction);
56eb02082bSJunchao Zhang     link->leafreqsinited[direction][leafmtype] = PETSC_TRUE;
5795fce210SBarry Smith   }
5895fce210SBarry Smith 
59eb02082bSJunchao Zhang   if (rootreqs) *rootreqs = link->rootreqs[direction][rootmtype];
60eb02082bSJunchao Zhang   if (leafreqs) *leafreqs = link->leafreqs[direction][leafmtype];
6140e23c03SJunchao Zhang   PetscFunctionReturn(0);
6295fce210SBarry Smith }
6395fce210SBarry Smith 
64b23bfdefSJunchao Zhang /* Common part shared by SFBasic and SFNeighbor based on the fact they all deal with sparse graphs. */
65eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGet_Basic_Common(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscInt nrootreqs,PetscInt nleafreqs,PetscSFPack *mylink)
6640e23c03SJunchao Zhang {
6740e23c03SJunchao Zhang   PetscErrorCode    ierr;
68b23bfdefSJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
69eb02082bSJunchao Zhang   PetscInt          i,j,nreqs,nrootranks,ndrootranks,nleafranks,ndleafranks;
7040e23c03SJunchao Zhang   const PetscInt    *rootoffset,*leafoffset;
71eb02082bSJunchao Zhang   PetscSFPack       *p,link;
72b23bfdefSJunchao Zhang   PetscBool         match;
7340e23c03SJunchao Zhang 
7440e23c03SJunchao Zhang   PetscFunctionBegin;
75b23bfdefSJunchao Zhang   ierr = PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rootdata,leafdata);CHKERRQ(ierr);
769d1c8addSJunchao Zhang 
7740e23c03SJunchao Zhang   /* Look for types in cache */
78eb02082bSJunchao Zhang   for (p=&bas->avail; (link=*p); p=&link->next) {
7940e23c03SJunchao Zhang     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
8040e23c03SJunchao Zhang     if (match) {
8140e23c03SJunchao Zhang       *p = link->next; /* Remove from available list */
8240e23c03SJunchao Zhang       goto found;
8340e23c03SJunchao Zhang     }
8453deab39SPeter Brune   }
8553deab39SPeter Brune 
8640e23c03SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
8740e23c03SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
8840e23c03SJunchao Zhang   ierr = PetscNew(&link);CHKERRQ(ierr);
89eb02082bSJunchao Zhang   ierr = PetscSFPackSetUp_Host(sf,link,unit);CHKERRQ(ierr);
90b23bfdefSJunchao Zhang   ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr); /* One tag per link */
9153deab39SPeter Brune 
92eb02082bSJunchao Zhang   /* Allocate root, leaf, self buffers, and MPI requests */
93eb02082bSJunchao Zhang   link->rootbuflen = rootoffset[nrootranks]-rootoffset[ndrootranks];
94eb02082bSJunchao Zhang   link->leafbuflen = leafoffset[nleafranks]-leafoffset[ndleafranks];
95a89cec69SJunchao Zhang   link->selfbuflen = rootoffset[ndrootranks];
96eb02082bSJunchao Zhang   link->nrootreqs  = nrootreqs;
97eb02082bSJunchao Zhang   link->nleafreqs  = nleafreqs;
98eb02082bSJunchao Zhang   nreqs            = (nrootreqs+nleafreqs)*4; /* Quadruple the requests since there are two communication directions and two memory types */
99eb02082bSJunchao Zhang   ierr             = PetscMalloc1(nreqs,&link->reqs);CHKERRQ(ierr);
100eb02082bSJunchao Zhang   for (i=0; i<nreqs; i++) link->reqs[i] = MPI_REQUEST_NULL; /* Initialized to NULL so that we know which need to be freed in Destroy */
101eb02082bSJunchao Zhang 
102eb02082bSJunchao Zhang   for (i=0; i<2; i++) { /* Two communication directions */
103eb02082bSJunchao Zhang     for (j=0; j<2; j++) { /* Two memory types */
104eb02082bSJunchao Zhang       link->rootreqs[i][j] = link->reqs + nrootreqs*(2*i+j);
105eb02082bSJunchao Zhang       link->leafreqs[i][j] = link->reqs + nrootreqs*4 + nleafreqs*(2*i+j);
106eb02082bSJunchao Zhang     }
107eb02082bSJunchao Zhang   }
10853deab39SPeter Brune 
10940e23c03SJunchao Zhang found:
110eb02082bSJunchao Zhang   link->rootmtype = rootmtype;
111eb02082bSJunchao Zhang   link->leafmtype = leafmtype;
112eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
113eb02082bSJunchao Zhang   if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {ierr = PetscSFPackSetUp_Device(sf,link,unit);CHKERRQ(ierr);}
114eb02082bSJunchao Zhang #endif
115eb02082bSJunchao Zhang   if (!link->rootbuf[rootmtype]) {ierr = PetscMallocWithMemType(rootmtype,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[rootmtype]);CHKERRQ(ierr);}
116eb02082bSJunchao Zhang   if (!link->leafbuf[leafmtype]) {ierr = PetscMallocWithMemType(leafmtype,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[leafmtype]);CHKERRQ(ierr);}
117eb02082bSJunchao Zhang   if (!link->selfbuf[rootmtype]) {ierr = PetscMallocWithMemType(rootmtype,link->selfbuflen*link->unitbytes,(void**)&link->selfbuf[rootmtype]);CHKERRQ(ierr);}
118eb02082bSJunchao Zhang   if (rootmtype != leafmtype && !link->selfbuf[leafmtype]) {ierr = PetscMallocWithMemType(leafmtype,link->selfbuflen*link->unitbytes,(void**)&link->selfbuf[leafmtype]);CHKERRQ(ierr);}
119b23bfdefSJunchao Zhang   link->rkey = rootdata;
120b23bfdefSJunchao Zhang   link->lkey = leafdata;
12140e23c03SJunchao Zhang   link->next = bas->inuse;
122eb02082bSJunchao Zhang   bas->inuse = link;
12340e23c03SJunchao Zhang 
12440e23c03SJunchao Zhang   *mylink    = link;
12540e23c03SJunchao Zhang   PetscFunctionReturn(0);
12695fce210SBarry Smith }
12795fce210SBarry Smith 
128eb02082bSJunchao Zhang static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscSFDirection direction,PetscSFPack *mylink)
129b23bfdefSJunchao Zhang {
130b23bfdefSJunchao Zhang   PetscErrorCode    ierr;
131eb02082bSJunchao Zhang   PetscInt          nrootranks,ndrootranks,nleafranks,ndleafranks;
132b23bfdefSJunchao Zhang 
133b23bfdefSJunchao Zhang   PetscFunctionBegin;
134b23bfdefSJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,NULL,NULL);CHKERRQ(ierr);
135b23bfdefSJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
136eb02082bSJunchao Zhang   ierr = PetscSFPackGet_Basic_Common(sf,unit,rootmtype,rootdata,leafmtype,leafdata,nrootranks-ndrootranks,nleafranks-ndleafranks,mylink);CHKERRQ(ierr);
137b23bfdefSJunchao Zhang   PetscFunctionReturn(0);
138b23bfdefSJunchao Zhang }
139b23bfdefSJunchao Zhang 
14040e23c03SJunchao Zhang /*===================================================================================*/
14140e23c03SJunchao Zhang /*              SF public interface implementations                                  */
14240e23c03SJunchao Zhang /*===================================================================================*/
14340e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
14495fce210SBarry Smith {
14595fce210SBarry Smith   PetscErrorCode ierr;
146b23bfdefSJunchao Zhang   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
14795fce210SBarry Smith   PetscInt       *rlengths,*ilengths,i;
14840e23c03SJunchao Zhang   PetscMPIInt    rank,niranks,*iranks,tag;
14995fce210SBarry Smith   MPI_Comm       comm;
150b5a8e515SJed Brown   MPI_Group      group;
15140e23c03SJunchao Zhang   MPI_Request    *rootreqs,*leafreqs;
15295fce210SBarry Smith 
15395fce210SBarry Smith   PetscFunctionBegin;
154b5a8e515SJed Brown   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
155b5a8e515SJed Brown   ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr);
156b5a8e515SJed Brown   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
15795fce210SBarry Smith   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
15840e23c03SJunchao Zhang   ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr);
159c943f53fSJed Brown   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
16095fce210SBarry Smith   /*
16195fce210SBarry Smith    * Inform roots about how many leaves and from which ranks
16295fce210SBarry Smith    */
163785e854fSJed Brown   ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr);
16495fce210SBarry Smith   /* Determine number, sending ranks, and length of incoming */
16595fce210SBarry Smith   for (i=0; i<sf->nranks; i++) {
16695fce210SBarry Smith     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
16795fce210SBarry Smith   }
16840e23c03SJunchao Zhang   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr);
169c943f53fSJed Brown 
1700b899082SJunchao Zhang   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
1710b899082SJunchao Zhang      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
1720b899082SJunchao Zhang      small and the sorting is cheap.
1730b899082SJunchao Zhang    */
1740b899082SJunchao Zhang   ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr);
1750b899082SJunchao Zhang 
176c943f53fSJed Brown   /* Partition into distinguished and non-distinguished incoming ranks */
177c943f53fSJed Brown   bas->ndiranks = sf->ndranks;
178c943f53fSJed Brown   bas->niranks = bas->ndiranks + niranks;
179c943f53fSJed Brown   ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr);
180c943f53fSJed Brown   bas->ioffset[0] = 0;
181c943f53fSJed Brown   for (i=0; i<bas->ndiranks; i++) {
182c943f53fSJed Brown     bas->iranks[i] = sf->ranks[i];
183c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
184c943f53fSJed Brown   }
18540e23c03SJunchao Zhang   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
18640e23c03SJunchao Zhang   for ( ; i<bas->niranks; i++) {
187c943f53fSJed Brown     bas->iranks[i] = iranks[i-bas->ndiranks];
188c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
189c943f53fSJed Brown   }
190c943f53fSJed Brown   bas->itotal = bas->ioffset[i];
19195fce210SBarry Smith   ierr = PetscFree(rlengths);CHKERRQ(ierr);
192c943f53fSJed Brown   ierr = PetscFree(iranks);CHKERRQ(ierr);
193c943f53fSJed Brown   ierr = PetscFree(ilengths);CHKERRQ(ierr);
19495fce210SBarry Smith 
19595fce210SBarry Smith   /* Send leaf identities to roots */
196c943f53fSJed Brown   ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr);
19740e23c03SJunchao Zhang   ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr);
19840e23c03SJunchao Zhang   for (i=bas->ndiranks; i<bas->niranks; i++) {
19940e23c03SJunchao Zhang     ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr);
20040e23c03SJunchao Zhang   }
20140e23c03SJunchao Zhang   for (i=0; i<sf->nranks; i++) {
20295fce210SBarry Smith     PetscMPIInt npoints;
20395fce210SBarry Smith     ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr);
20440e23c03SJunchao Zhang     if (i < sf->ndranks) {
20540e23c03SJunchao Zhang       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
20640e23c03SJunchao Zhang       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
20740e23c03SJunchao Zhang       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
20840e23c03SJunchao Zhang       ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr);
209c943f53fSJed Brown       continue;
210c943f53fSJed Brown     }
21140e23c03SJunchao Zhang     ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr);
212bf39f1bfSJed Brown   }
21340e23c03SJunchao Zhang   ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
21440e23c03SJunchao Zhang   ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
21540e23c03SJunchao Zhang   ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr);
21695fce210SBarry Smith 
217eb02082bSJunchao Zhang   sf->selfleafdups    = PETSC_TRUE; /* The conservative assumption is there are data race */
218eb02082bSJunchao Zhang   sf->remoteleafdups  = PETSC_TRUE;
219eb02082bSJunchao Zhang   bas->selfrootdups   = PETSC_TRUE;
220eb02082bSJunchao Zhang   bas->remoterootdups = PETSC_TRUE;
221eb02082bSJunchao Zhang 
222b23bfdefSJunchao Zhang   /* Setup packing optimization for roots and leaves */
223eb02082bSJunchao Zhang   ierr = PetscSFPackSetupOptimizations_Basic(sf);CHKERRQ(ierr);
22495fce210SBarry Smith   PetscFunctionReturn(0);
22595fce210SBarry Smith }
22695fce210SBarry Smith 
2274416b707SBarry Smith static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf)
22895fce210SBarry Smith {
22995fce210SBarry Smith   PetscErrorCode ierr;
23095fce210SBarry Smith 
23195fce210SBarry Smith   PetscFunctionBegin;
232e55864a3SBarry Smith   ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr);
23395fce210SBarry Smith   ierr = PetscOptionsTail();CHKERRQ(ierr);
23495fce210SBarry Smith   PetscFunctionReturn(0);
23595fce210SBarry Smith }
23695fce210SBarry Smith 
23740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
23895fce210SBarry Smith {
23995fce210SBarry Smith   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
24095fce210SBarry Smith   PetscErrorCode    ierr;
24195fce210SBarry Smith 
24295fce210SBarry Smith   PetscFunctionBegin;
24329046d53SLisandro Dalcin   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
244c943f53fSJed Brown   ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr);
245c943f53fSJed Brown   ierr = PetscFree(bas->irootloc);CHKERRQ(ierr);
246eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
247eb02082bSJunchao Zhang   if (bas->irootloc_d) {cudaError_t err = cudaFree(bas->irootloc_d);CHKERRCUDA(err);bas->irootloc_d=NULL;}
248eb02082bSJunchao Zhang #endif
24964f49babSJed Brown   ierr = PetscSFPackDestroyAvailable(&bas->avail);CHKERRQ(ierr);
250eb02082bSJunchao Zhang   ierr = PetscSFPackDestroyOptimizations_Basic(sf);CHKERRQ(ierr);
25195fce210SBarry Smith   PetscFunctionReturn(0);
25295fce210SBarry Smith }
25395fce210SBarry Smith 
25440e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
25595fce210SBarry Smith {
25695fce210SBarry Smith   PetscErrorCode ierr;
25795fce210SBarry Smith 
25895fce210SBarry Smith   PetscFunctionBegin;
25940e23c03SJunchao Zhang   ierr = PetscSFReset(sf);CHKERRQ(ierr);
26095fce210SBarry Smith   ierr = PetscFree(sf->data);CHKERRQ(ierr);
26195fce210SBarry Smith   PetscFunctionReturn(0);
26295fce210SBarry Smith }
26395fce210SBarry Smith 
26440e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
26595fce210SBarry Smith {
26695fce210SBarry Smith   PetscErrorCode ierr;
26795fce210SBarry Smith   PetscBool      iascii;
26895fce210SBarry Smith 
26995fce210SBarry Smith   PetscFunctionBegin;
27095fce210SBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
271b23bfdefSJunchao Zhang   if (iascii) {ierr = PetscViewerASCIIPrintf(viewer,"  sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);}
27295fce210SBarry Smith   PetscFunctionReturn(0);
27395fce210SBarry Smith }
27495fce210SBarry Smith 
275eb02082bSJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
27695fce210SBarry Smith {
27795fce210SBarry Smith   PetscErrorCode    ierr;
278eb02082bSJunchao Zhang   PetscSFPack       link;
279eb02082bSJunchao Zhang   const PetscInt    *rootloc = NULL;
280*851d6770SJunchao Zhang   MPI_Request       *rootreqs = NULL,*leafreqs = NULL;
28195fce210SBarry Smith 
28295fce210SBarry Smith   PetscFunctionBegin;
283eb02082bSJunchao Zhang   ierr = PetscSFPackGet_Basic(sf,unit,rootmtype,rootdata,leafmtype,leafdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr);
284eb02082bSJunchao Zhang   ierr = PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);CHKERRQ(ierr);
28595fce210SBarry Smith 
286b23bfdefSJunchao Zhang   ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
287b23bfdefSJunchao Zhang   /* Post Irecv. Note distinguished ranks receive data via shared memory (i.e., not via MPI) */
288eb02082bSJunchao Zhang   ierr = MPI_Startall_irecv(link->leafbuflen,unit,link->nleafreqs,leafreqs);CHKERRQ(ierr);
28930e38525SJunchao Zhang 
290b23bfdefSJunchao Zhang   /* Do Isend */
291eb02082bSJunchao Zhang   ierr = PetscSFPackRootData(sf,link,rootloc,rootdata,PETSC_TRUE);CHKERRQ(ierr);
292eb02082bSJunchao Zhang   ierr = MPI_Startall_isend(link->rootbuflen,unit,link->nrootreqs,rootreqs);CHKERRQ(ierr);
293eb02082bSJunchao Zhang 
294eb02082bSJunchao Zhang   /* Do self to self communication via memcpy only when rootdata and leafdata are in different memory */
295eb02082bSJunchao Zhang   if (rootmtype != leafmtype) {ierr = PetscMemcpyWithMemType(leafmtype,rootmtype,link->selfbuf[leafmtype],link->selfbuf[rootmtype],link->selfbuflen*link->unitbytes);CHKERRQ(ierr);}
29695fce210SBarry Smith   PetscFunctionReturn(0);
29795fce210SBarry Smith }
29895fce210SBarry Smith 
299eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
30095fce210SBarry Smith {
30195fce210SBarry Smith   PetscErrorCode    ierr;
302eb02082bSJunchao Zhang   PetscSFPack       link;
303eb02082bSJunchao Zhang   const PetscInt    *leafloc = NULL;
30495fce210SBarry Smith 
30595fce210SBarry Smith   PetscFunctionBegin;
306eb02082bSJunchao Zhang   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
30740e23c03SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
308eb02082bSJunchao Zhang   ierr = PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);CHKERRQ(ierr);
309eb02082bSJunchao Zhang   ierr = PetscSFUnpackAndOpLeafData(sf,link,leafloc,leafdata,op,PETSC_TRUE);CHKERRQ(ierr);
310eb02082bSJunchao Zhang   ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr);
31195fce210SBarry Smith   PetscFunctionReturn(0);
31295fce210SBarry Smith }
31395fce210SBarry Smith 
31495fce210SBarry Smith /* leaf -> root with reduction */
315eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
31695fce210SBarry Smith {
31795fce210SBarry Smith   PetscErrorCode    ierr;
318eb02082bSJunchao Zhang   PetscSFPack       link;
319eb02082bSJunchao Zhang   const PetscInt    *leafloc = NULL;
320277f51e8SBarry Smith   MPI_Request       *rootreqs = NULL,*leafreqs = NULL; /* dummy values for compiler warnings about uninitialized value */
32195fce210SBarry Smith 
32295fce210SBarry Smith   PetscFunctionBegin;
323eb02082bSJunchao Zhang   ierr = PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);
32495fce210SBarry Smith 
325eb02082bSJunchao Zhang   ierr = PetscSFPackGet_Basic(sf,unit,rootmtype,rootdata,leafmtype,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr);
326b23bfdefSJunchao Zhang   ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_LEAF2ROOT_REDUCE,&rootreqs,&leafreqs);CHKERRQ(ierr);
327c943f53fSJed Brown   /* Eagerly post root receives for non-distinguished ranks */
328eb02082bSJunchao Zhang   ierr = MPI_Startall_irecv(link->rootbuflen,unit,link->nrootreqs,rootreqs);CHKERRQ(ierr);
32930e38525SJunchao Zhang 
33095fce210SBarry Smith   /* Pack and send leaf data */
331eb02082bSJunchao Zhang   ierr = PetscSFPackLeafData(sf,link,leafloc,leafdata,PETSC_TRUE);CHKERRQ(ierr);
332eb02082bSJunchao Zhang   ierr = MPI_Startall_isend(link->leafbuflen,unit,link->nleafreqs,leafreqs);CHKERRQ(ierr);
333eb02082bSJunchao Zhang 
334eb02082bSJunchao Zhang   if (rootmtype != leafmtype) {ierr = PetscMemcpyWithMemType(rootmtype,leafmtype,link->selfbuf[rootmtype],link->selfbuf[leafmtype],link->selfbuflen*link->unitbytes);CHKERRQ(ierr);}
33595fce210SBarry Smith   PetscFunctionReturn(0);
33695fce210SBarry Smith }
33795fce210SBarry Smith 
338eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
33995fce210SBarry Smith {
34095fce210SBarry Smith   PetscErrorCode    ierr;
341eb02082bSJunchao Zhang   PetscSFPack       link;
342eb02082bSJunchao Zhang   const PetscInt    *rootloc = NULL;
34395fce210SBarry Smith 
34495fce210SBarry Smith   PetscFunctionBegin;
345eb02082bSJunchao Zhang   ierr = PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);CHKERRQ(ierr);
346eb02082bSJunchao Zhang   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
34740e23c03SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
348eb02082bSJunchao Zhang   ierr = PetscSFUnpackAndOpRootData(sf,link,rootloc,rootdata,op,PETSC_TRUE);CHKERRQ(ierr);
349eb02082bSJunchao Zhang   ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr);
35095fce210SBarry Smith   PetscFunctionReturn(0);
35195fce210SBarry Smith }
35295fce210SBarry Smith 
353eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
35495fce210SBarry Smith {
35595fce210SBarry Smith   PetscErrorCode ierr;
35695fce210SBarry Smith 
35795fce210SBarry Smith   PetscFunctionBegin;
35840e23c03SJunchao Zhang   ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
35995fce210SBarry Smith   PetscFunctionReturn(0);
36095fce210SBarry Smith }
36195fce210SBarry Smith 
362eb02082bSJunchao Zhang static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
36395fce210SBarry Smith {
36495fce210SBarry Smith   PetscErrorCode    ierr;
365eb02082bSJunchao Zhang   PetscSFPack       link;
366eb02082bSJunchao Zhang   const PetscInt    *rootloc = NULL,*leafloc = NULL;
367*851d6770SJunchao Zhang   MPI_Request       *rootreqs = NULL,*leafreqs = NULL;
36895fce210SBarry Smith 
36995fce210SBarry Smith   PetscFunctionBegin;
370eb02082bSJunchao Zhang   ierr = PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);CHKERRQ(ierr);
371eb02082bSJunchao Zhang   ierr = PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);CHKERRQ(ierr);
372eb02082bSJunchao Zhang   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
37395fce210SBarry Smith   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
37440e23c03SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
375b23bfdefSJunchao Zhang   ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
37640e23c03SJunchao Zhang 
37795fce210SBarry Smith   /* Post leaf receives */
378eb02082bSJunchao Zhang   ierr = MPI_Startall_irecv(link->leafbuflen,unit,link->nleafreqs,leafreqs);CHKERRQ(ierr);
37930e38525SJunchao Zhang 
38095fce210SBarry Smith   /* Process local fetch-and-op, post root sends */
381eb02082bSJunchao Zhang   ierr = PetscSFFetchAndOpRootData(sf,link,rootloc,rootdata,op,PETSC_TRUE);CHKERRQ(ierr);
382eb02082bSJunchao Zhang   ierr = MPI_Startall_isend(link->rootbuflen,unit,link->nrootreqs,rootreqs);CHKERRQ(ierr);
383eb02082bSJunchao Zhang   if (rootmtype != leafmtype) {ierr = PetscMemcpyWithMemType(leafmtype,rootmtype,link->selfbuf[leafmtype],link->selfbuf[rootmtype],link->selfbuflen*link->unitbytes);CHKERRQ(ierr);}
384b23bfdefSJunchao Zhang 
385b23bfdefSJunchao Zhang   /* Unpack and insert fetched data into leaves */
38640e23c03SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
387eb02082bSJunchao Zhang   ierr = PetscSFUnpackAndOpLeafData(sf,link,leafloc,leafupdate,MPIU_REPLACE,PETSC_TRUE);CHKERRQ(ierr);
388eb02082bSJunchao Zhang   ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr);
38995fce210SBarry Smith   PetscFunctionReturn(0);
39095fce210SBarry Smith }
39195fce210SBarry Smith 
39240e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
3938750ddebSJunchao Zhang {
3948750ddebSJunchao Zhang   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
3958750ddebSJunchao Zhang 
3968750ddebSJunchao Zhang   PetscFunctionBegin;
3978750ddebSJunchao Zhang   if (niranks)  *niranks  = bas->niranks;
3988750ddebSJunchao Zhang   if (iranks)   *iranks   = bas->iranks;
3998750ddebSJunchao Zhang   if (ioffset)  *ioffset  = bas->ioffset;
4008750ddebSJunchao Zhang   if (irootloc) *irootloc = bas->irootloc;
4018750ddebSJunchao Zhang   PetscFunctionReturn(0);
4028750ddebSJunchao Zhang }
4038750ddebSJunchao Zhang 
404f659e5c7SJunchao Zhang /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf.
405f659e5c7SJunchao Zhang    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
406f659e5c7SJunchao Zhang    was sorted before calling the routine.
407f659e5c7SJunchao Zhang  */
408f659e5c7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
409f659e5c7SJunchao Zhang {
410f659e5c7SJunchao Zhang   PetscSF           esf;
411b23bfdefSJunchao Zhang   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote,count;
412b23bfdefSJunchao Zhang   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,connected_leaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
413f659e5c7SJunchao Zhang   PetscMPIInt       *esf_ranks;
414f659e5c7SJunchao Zhang   const PetscMPIInt *ranks,*iranks;
415b23bfdefSJunchao Zhang   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc,*buffer;
416f659e5c7SJunchao Zhang   PetscBool         connected;
417eb02082bSJunchao Zhang   PetscSFPack       link;
418f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
419f659e5c7SJunchao Zhang   PetscSF_Basic     *bas;
420f659e5c7SJunchao Zhang   PetscErrorCode    ierr;
421f659e5c7SJunchao Zhang 
422f659e5c7SJunchao Zhang   PetscFunctionBegin;
423f659e5c7SJunchao Zhang   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
424f659e5c7SJunchao Zhang   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
425f659e5c7SJunchao Zhang 
426f659e5c7SJunchao Zhang   /* Find out which leaves are still connected to roots in the embedded sf */
427f659e5c7SJunchao Zhang   ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr);
428f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
429f659e5c7SJunchao Zhang   /* We abused the term leafdata here, whose size is usually the number of leaf data items. Here its size is # of leaves (always >= # of leaf data items) */
430f659e5c7SJunchao Zhang   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
431f659e5c7SJunchao Zhang   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr);
432f659e5c7SJunchao Zhang   /* Tag selected roots */
433f659e5c7SJunchao Zhang   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
434f659e5c7SJunchao Zhang 
435f659e5c7SJunchao Zhang   /* Bcast from roots to leaves to tag connected leaves. We reuse the established bcast communication in
436f659e5c7SJunchao Zhang      sf but do not do unpacking (from leaf buffer to leafdata). The raw data in leaf buffer is what we are
437f659e5c7SJunchao Zhang      interested in since it tells which leaves are connected to which ranks.
438f659e5c7SJunchao Zhang    */
439eb02082bSJunchao Zhang   ierr = PetscSFBcastAndOpBegin_Basic(sf,MPIU_INT,PETSC_MEMTYPE_HOST,rootdata,PETSC_MEMTYPE_HOST,leafdata-minleaf,MPIU_REPLACE);CHKERRQ(ierr); /* Need to give leafdata but we won't use it */
440eb02082bSJunchao Zhang   ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
441f659e5c7SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
442f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */
443f659e5c7SJunchao Zhang   esf_nranks = esf_ndranks = connected_leaves = 0;
444b23bfdefSJunchao Zhang   for (i=0; i<nranks; i++) {
445f659e5c7SJunchao Zhang     connected = PETSC_FALSE; /* Is the current process still connected to this remote root rank? */
446eb02082bSJunchao Zhang     buffer    = i < ndranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->leafbuf[PETSC_MEMTYPE_HOST] + (roffset[i] - roffset[ndranks]);
447b23bfdefSJunchao Zhang     count     = roffset[i+1] - roffset[i];
448b23bfdefSJunchao Zhang     for (j=0; j<count; j++) {if (buffer[j]) {connected_leaves++; connected = PETSC_TRUE;}}
449f659e5c7SJunchao Zhang     if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
450f659e5c7SJunchao Zhang   }
451f659e5c7SJunchao Zhang 
452f659e5c7SJunchao Zhang   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
453f659e5c7SJunchao Zhang   ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr);
454f659e5c7SJunchao Zhang   ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr);
455f659e5c7SJunchao Zhang   ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,connected_leaves,&esf_rmine,connected_leaves,&esf_rremote);CHKERRQ(ierr);
456f659e5c7SJunchao Zhang   p    = 0; /* Counter for connected root ranks */
457f659e5c7SJunchao Zhang   q    = 0; /* Counter for connected leaves */
458f659e5c7SJunchao Zhang   esf_roffset[0] = 0;
459f659e5c7SJunchao Zhang   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
460eb02082bSJunchao Zhang     buffer    = i < ndranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->leafbuf[PETSC_MEMTYPE_HOST] + (roffset[i] - roffset[ndranks]);
461f659e5c7SJunchao Zhang     connected = PETSC_FALSE;
462f659e5c7SJunchao Zhang     for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) {
463b23bfdefSJunchao Zhang         if (buffer[k]) {
464f659e5c7SJunchao Zhang         esf_rmine[q]         = new_ilocal[q] = rmine[j];
465f659e5c7SJunchao Zhang         esf_rremote[q]       = rremote[j];
466f659e5c7SJunchao Zhang         new_iremote[q].index = rremote[j];
467f659e5c7SJunchao Zhang         new_iremote[q].rank  = ranks[i];
468f659e5c7SJunchao Zhang         connected            = PETSC_TRUE;
469f659e5c7SJunchao Zhang         q++;
470f659e5c7SJunchao Zhang       }
471f659e5c7SJunchao Zhang     }
472f659e5c7SJunchao Zhang     if (connected) {
473f659e5c7SJunchao Zhang       esf_ranks[p]     = ranks[i];
474f659e5c7SJunchao Zhang       esf_roffset[p+1] = q;
475f659e5c7SJunchao Zhang       p++;
476f659e5c7SJunchao Zhang     }
477f659e5c7SJunchao Zhang   }
478f659e5c7SJunchao Zhang 
479eb02082bSJunchao Zhang   ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr);
480f659e5c7SJunchao Zhang 
481f659e5c7SJunchao Zhang   /* SetGraph internally resets the SF, so we only set its fields after the call */
482f659e5c7SJunchao Zhang   ierr         = PetscSFSetGraph(esf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
483f659e5c7SJunchao Zhang   esf->nranks  = esf_nranks;
484f659e5c7SJunchao Zhang   esf->ndranks = esf_ndranks;
485f659e5c7SJunchao Zhang   esf->ranks   = esf_ranks;
486f659e5c7SJunchao Zhang   esf->roffset = esf_roffset;
487f659e5c7SJunchao Zhang   esf->rmine   = esf_rmine;
488f659e5c7SJunchao Zhang   esf->rremote = esf_rremote;
489f659e5c7SJunchao Zhang 
490f659e5c7SJunchao Zhang   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
491f659e5c7SJunchao Zhang   bas  = (PetscSF_Basic*)esf->data;
492f659e5c7SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */
493f659e5c7SJunchao Zhang   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
494f659e5c7SJunchao Zhang      expect these arrays are usually short, so we do not care. The benefit is we can fill these arrays by just parsing irootloc once.
495f659e5c7SJunchao Zhang    */
496f659e5c7SJunchao Zhang   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
497f659e5c7SJunchao Zhang   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
498f659e5c7SJunchao Zhang   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
499f659e5c7SJunchao Zhang   p = 0; /* Counter for connected leaf ranks */
500f659e5c7SJunchao Zhang   q = 0; /* Counter for connected roots */
501f659e5c7SJunchao Zhang   for (i=0; i<niranks; i++) {
502f659e5c7SJunchao Zhang     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
503f659e5c7SJunchao Zhang     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
504f659e5c7SJunchao Zhang       PetscInt loc;
505f659e5c7SJunchao Zhang       ierr = PetscFindInt(irootloc[j],nselected,selected,&loc);CHKERRQ(ierr);
506f659e5c7SJunchao Zhang       if (loc >= 0) { /* Found in selected this root is connected */
507f659e5c7SJunchao Zhang         bas->irootloc[q++] = irootloc[j];
508f659e5c7SJunchao Zhang         connected = PETSC_TRUE;
509f659e5c7SJunchao Zhang       }
510f659e5c7SJunchao Zhang     }
511f659e5c7SJunchao Zhang     if (connected) {
512f659e5c7SJunchao Zhang       bas->niranks++;
513f659e5c7SJunchao Zhang       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
514f659e5c7SJunchao Zhang       bas->iranks[p]    = iranks[i];
515f659e5c7SJunchao Zhang       bas->ioffset[p+1] = q;
516f659e5c7SJunchao Zhang       p++;
517f659e5c7SJunchao Zhang     }
518f659e5c7SJunchao Zhang   }
519f659e5c7SJunchao Zhang   bas->itotal = q;
520f659e5c7SJunchao Zhang 
521f659e5c7SJunchao Zhang   /* Setup packing optimizations */
522eb02082bSJunchao Zhang   ierr = PetscSFPackSetupOptimizations_Basic(esf);CHKERRQ(ierr);
523f659e5c7SJunchao Zhang   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
524f659e5c7SJunchao Zhang 
525f659e5c7SJunchao Zhang   ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
526f659e5c7SJunchao Zhang   *newsf = esf;
527f659e5c7SJunchao Zhang   PetscFunctionReturn(0);
528f659e5c7SJunchao Zhang }
529f659e5c7SJunchao Zhang 
530f659e5c7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
531f659e5c7SJunchao Zhang {
532f659e5c7SJunchao Zhang   PetscSF           esf;
533f659e5c7SJunchao Zhang   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,*new_ilocal,niranks,ndiranks,minleaf,maxleaf,maxlocal;
534b23bfdefSJunchao Zhang   const PetscInt    *ilocal,*ioffset,*irootloc,*buffer;
535f659e5c7SJunchao Zhang   const PetscMPIInt *iranks;
536eb02082bSJunchao Zhang   PetscSFPack       link;
537f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
538f659e5c7SJunchao Zhang   const PetscSFNode *iremote;
539f659e5c7SJunchao Zhang   PetscSF_Basic     *bas;
540f659e5c7SJunchao Zhang   MPI_Group         group;
541f659e5c7SJunchao Zhang   PetscErrorCode    ierr;
542f659e5c7SJunchao Zhang 
543f659e5c7SJunchao Zhang   PetscFunctionBegin;
544f659e5c7SJunchao Zhang   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
545f659e5c7SJunchao Zhang   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
546f659e5c7SJunchao Zhang 
547f659e5c7SJunchao Zhang   /* Set the graph of esf, which is easy for CreateEmbeddedLeafSF */
548f659e5c7SJunchao Zhang   ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
549f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
550f659e5c7SJunchao Zhang   ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
551f659e5c7SJunchao Zhang   ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
552f659e5c7SJunchao Zhang   for (i=0; i<nselected; i++) {
553f659e5c7SJunchao Zhang     const PetscInt l     = selected[i];
554f659e5c7SJunchao Zhang     new_ilocal[i]        = ilocal ? ilocal[l] : l;
555f659e5c7SJunchao Zhang     new_iremote[i].rank  = iremote[l].rank;
556f659e5c7SJunchao Zhang     new_iremote[i].index = iremote[l].index;
557f659e5c7SJunchao Zhang   }
558f659e5c7SJunchao Zhang 
559f659e5c7SJunchao Zhang   /* Tag selected leaves before PetscSFSetGraph since new_ilocal might turn into NULL since we use PETSC_OWN_POINTER below */
560f659e5c7SJunchao Zhang   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
561f659e5c7SJunchao Zhang   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr);
562f659e5c7SJunchao Zhang   for (i=0; i<nselected; i++) leafdata[new_ilocal[i]-minleaf] = 1; /* -minleaf to adjust indices according to minleaf */
563f659e5c7SJunchao Zhang 
564f659e5c7SJunchao Zhang   ierr = PetscSFSetGraph(esf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
565f659e5c7SJunchao Zhang 
566f659e5c7SJunchao Zhang   /* Set up the outgoing communication (i.e., send info). We can not reuse rmine etc in sf since there is no way to
567f659e5c7SJunchao Zhang      map rmine[i] (ilocal of leaves) back to selected[j]  (leaf indices).
568f659e5c7SJunchao Zhang    */
569f659e5c7SJunchao Zhang   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
570f659e5c7SJunchao Zhang   ierr = PetscSFSetUpRanks(esf,group);CHKERRQ(ierr);
571f659e5c7SJunchao Zhang   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
572f659e5c7SJunchao Zhang 
573f659e5c7SJunchao Zhang   /* Set up the incoming communication (i.e., recv info) */
574f659e5c7SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr);
575f659e5c7SJunchao Zhang   bas  = (PetscSF_Basic*)esf->data;
576f659e5c7SJunchao Zhang   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
577f659e5c7SJunchao Zhang   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
578f659e5c7SJunchao Zhang 
579f659e5c7SJunchao Zhang   /* Pass info about selected leaves to root buffer */
580eb02082bSJunchao Zhang   ierr = PetscSFReduceBegin_Basic(sf,MPIU_INT,PETSC_MEMTYPE_HOST,leafdata-minleaf,PETSC_MEMTYPE_HOST,rootdata,MPIU_REPLACE);CHKERRQ(ierr); /* -minleaf to re-adjust start address of leafdata */
581eb02082bSJunchao Zhang   ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
582f659e5c7SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
583f659e5c7SJunchao Zhang 
584f659e5c7SJunchao Zhang   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
585f659e5c7SJunchao Zhang   p = 0; /* Counter for connected leaf ranks */
586f659e5c7SJunchao Zhang   q = 0; /* Counter for connected roots */
587f659e5c7SJunchao Zhang   for (i=0; i<niranks; i++) {
588f659e5c7SJunchao Zhang     PetscBool connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
589eb02082bSJunchao Zhang     buffer = i < ndiranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->rootbuf[PETSC_MEMTYPE_HOST] + (ioffset[i] - ioffset[ndiranks]);
590f659e5c7SJunchao Zhang     for (j=ioffset[i],k=0; j<ioffset[i+1]; j++,k++) {
591b23bfdefSJunchao Zhang       if (buffer[k]) {bas->irootloc[q++] = irootloc[j]; connected = PETSC_TRUE;}
592f659e5c7SJunchao Zhang     }
593f659e5c7SJunchao Zhang     if (connected) {
594f659e5c7SJunchao Zhang       bas->niranks++;
595f659e5c7SJunchao Zhang       if (i<ndiranks) bas->ndiranks++;
596f659e5c7SJunchao Zhang       bas->iranks[p]    = iranks[i];
597f659e5c7SJunchao Zhang       bas->ioffset[p+1] = q;
598f659e5c7SJunchao Zhang       p++;
599f659e5c7SJunchao Zhang     }
600f659e5c7SJunchao Zhang   }
601f659e5c7SJunchao Zhang   bas->itotal = q;
602eb02082bSJunchao Zhang   ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr);
603f659e5c7SJunchao Zhang 
604f659e5c7SJunchao Zhang   /* Setup packing optimizations */
605eb02082bSJunchao Zhang   ierr = PetscSFPackSetupOptimizations_Basic(esf);CHKERRQ(ierr);
606f659e5c7SJunchao Zhang   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
607f659e5c7SJunchao Zhang 
608f659e5c7SJunchao Zhang   ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
609f659e5c7SJunchao Zhang   *newsf = esf;
610f659e5c7SJunchao Zhang   PetscFunctionReturn(0);
611f659e5c7SJunchao Zhang }
612f659e5c7SJunchao Zhang 
6138cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
61495fce210SBarry Smith {
61540e23c03SJunchao Zhang   PetscSF_Basic  *dat;
61695fce210SBarry Smith   PetscErrorCode ierr;
61795fce210SBarry Smith 
61895fce210SBarry Smith   PetscFunctionBegin;
61995fce210SBarry Smith   sf->ops->SetUp                = PetscSFSetUp_Basic;
62095fce210SBarry Smith   sf->ops->SetFromOptions       = PetscSFSetFromOptions_Basic;
62195fce210SBarry Smith   sf->ops->Reset                = PetscSFReset_Basic;
62295fce210SBarry Smith   sf->ops->Destroy              = PetscSFDestroy_Basic;
62395fce210SBarry Smith   sf->ops->View                 = PetscSFView_Basic;
6243482bfa8SJunchao Zhang   sf->ops->BcastAndOpBegin      = PetscSFBcastAndOpBegin_Basic;
6253482bfa8SJunchao Zhang   sf->ops->BcastAndOpEnd        = PetscSFBcastAndOpEnd_Basic;
62695fce210SBarry Smith   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
62795fce210SBarry Smith   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
62895fce210SBarry Smith   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
62995fce210SBarry Smith   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
6308750ddebSJunchao Zhang   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
631f659e5c7SJunchao Zhang   sf->ops->CreateEmbeddedSF     = PetscSFCreateEmbeddedSF_Basic;
632f659e5c7SJunchao Zhang   sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic;
63395fce210SBarry Smith 
63440e23c03SJunchao Zhang   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
63540e23c03SJunchao Zhang   sf->data = (void*)dat;
63695fce210SBarry Smith   PetscFunctionReturn(0);
63795fce210SBarry Smith }
638