xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision f659e5c7ab26e3388857cea03772891fbaf11827)
18cd53115SBarry Smith 
240e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h>
395fce210SBarry Smith 
440e23c03SJunchao Zhang /*===================================================================================*/
540e23c03SJunchao Zhang /*              Internal routines for PetscSFPack_Basic                              */
640e23c03SJunchao Zhang /*===================================================================================*/
795fce210SBarry Smith 
840e23c03SJunchao Zhang /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been
940e23c03SJunchao Zhang    initialized (since we use persistent requests), then initialize them.
1095fce210SBarry Smith */
1140e23c03SJunchao Zhang static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,MPI_Datatype unit,PetscSFPack_Basic link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs)
1240e23c03SJunchao Zhang {
1340e23c03SJunchao Zhang   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
1440e23c03SJunchao Zhang   MPI_Request    *requests = (direction == PETSCSF_LEAF2ROOT_REDUCE)? link->requests : link->requests + link->half;
1540e23c03SJunchao Zhang   PetscInt       i,nrootranks,ndrootranks,nleafranks,ndleafranks;
1640e23c03SJunchao Zhang   const PetscInt *rootoffset,*leafoffset;
1740e23c03SJunchao Zhang   PetscMPIInt    n;
1840e23c03SJunchao Zhang   MPI_Comm       comm = PetscObjectComm((PetscObject)sf);
1940e23c03SJunchao Zhang   PetscErrorCode ierr;
2095fce210SBarry Smith 
2140e23c03SJunchao Zhang   PetscFunctionBegin;
2295fce210SBarry Smith 
2340e23c03SJunchao Zhang   if (!link->initialized[direction]) {
2440e23c03SJunchao Zhang     ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
2540e23c03SJunchao Zhang     ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
2640e23c03SJunchao Zhang     if (direction == PETSCSF_LEAF2ROOT_REDUCE) {
2740e23c03SJunchao Zhang       for (i=0; i<nrootranks; i++) {
2840e23c03SJunchao Zhang         if (i >= ndrootranks) {
2940e23c03SJunchao Zhang           ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
3040e23c03SJunchao Zhang           ierr = MPI_Recv_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr);
3140e23c03SJunchao Zhang         }
3240e23c03SJunchao Zhang       }
3340e23c03SJunchao Zhang       for (i=0; i<nleafranks; i++) {
3440e23c03SJunchao Zhang         if (i >= ndleafranks) {
3540e23c03SJunchao Zhang           ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
3640e23c03SJunchao Zhang           ierr = MPI_Send_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr);
3740e23c03SJunchao Zhang         }
3840e23c03SJunchao Zhang       }
3940e23c03SJunchao Zhang     } else if (direction == PETSCSF_ROOT2LEAF_BCAST) {
4040e23c03SJunchao Zhang       for (i=0; i<nrootranks; i++) {
4140e23c03SJunchao Zhang         if (i >= ndrootranks) {
4240e23c03SJunchao Zhang           ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
4340e23c03SJunchao Zhang           ierr = MPI_Send_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr);
4440e23c03SJunchao Zhang         }
4540e23c03SJunchao Zhang       }
4640e23c03SJunchao Zhang       for (i=0; i<nleafranks; i++) {
4740e23c03SJunchao Zhang         if (i >= ndleafranks) {
4840e23c03SJunchao Zhang           ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
4940e23c03SJunchao Zhang           ierr = MPI_Recv_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr);
5040e23c03SJunchao Zhang         }
5140e23c03SJunchao Zhang       }
5240e23c03SJunchao Zhang     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %D\n",direction);
5340e23c03SJunchao Zhang 
5440e23c03SJunchao Zhang     link->initialized[direction] = PETSC_TRUE;
5595fce210SBarry Smith   }
5695fce210SBarry Smith 
5740e23c03SJunchao Zhang   if (rootreqs) *rootreqs = requests;
5840e23c03SJunchao Zhang   if (leafreqs) *leafreqs = requests + bas->niranks;
5940e23c03SJunchao Zhang   PetscFunctionReturn(0);
6095fce210SBarry Smith }
6195fce210SBarry Smith 
6240e23c03SJunchao Zhang static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,const void *key,PetscSFDirection direction,PetscSFPack_Basic *mylink)
6340e23c03SJunchao Zhang {
6440e23c03SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
6540e23c03SJunchao Zhang   PetscErrorCode    ierr;
6640e23c03SJunchao Zhang   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
6740e23c03SJunchao Zhang   const PetscInt    *rootoffset,*leafoffset;
6840e23c03SJunchao Zhang   PetscSFPack       *p;
6940e23c03SJunchao Zhang   PetscSFPack_Basic link;
7040e23c03SJunchao Zhang 
7140e23c03SJunchao Zhang   PetscFunctionBegin;
7240e23c03SJunchao Zhang   /* Look for types in cache */
7340e23c03SJunchao Zhang   for (p=&bas->avail; (link=(PetscSFPack_Basic)*p); p=&link->next) {
7440e23c03SJunchao Zhang     PetscBool match;
7540e23c03SJunchao Zhang     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
7640e23c03SJunchao Zhang     if (match) {
7740e23c03SJunchao Zhang       *p = link->next; /* Remove from available list */
7840e23c03SJunchao Zhang       goto found;
7940e23c03SJunchao Zhang     }
8053deab39SPeter Brune   }
8153deab39SPeter Brune 
8240e23c03SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
8340e23c03SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
8440e23c03SJunchao Zhang   ierr = PetscNew(&link);CHKERRQ(ierr);
8540e23c03SJunchao Zhang   ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr);
8640e23c03SJunchao Zhang   ierr = PetscMalloc2(nrootranks,&link->root,nleafranks,&link->leaf);CHKERRQ(ierr);
8740e23c03SJunchao Zhang   /* Double the requests. First half are used for reduce (leaf2root) communication, second half for bcast (root2leaf) communication */
8840e23c03SJunchao Zhang   link->half = nrootranks + nleafranks;
8940e23c03SJunchao Zhang   ierr       = PetscMalloc1(link->half*2,&link->requests);CHKERRQ(ierr);
9040e23c03SJunchao Zhang   for (i=0; i<link->half*2; i++) link->requests[i] = MPI_REQUEST_NULL; /* Must be init'ed since some are unused but we call MPI_Waitall on them in whole */
9140e23c03SJunchao Zhang   /* One tag per link */
9240e23c03SJunchao Zhang   ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr);
9353deab39SPeter Brune 
9440e23c03SJunchao Zhang   /* Allocate root and leaf buffers */
9540e23c03SJunchao Zhang   for (i=0; i<nrootranks; i++) {ierr = PetscMalloc((rootoffset[i+1]-rootoffset[i])*link->unitbytes,&link->root[i]);CHKERRQ(ierr);}
9640e23c03SJunchao Zhang   for (i=0; i<nleafranks; i++) {
9740e23c03SJunchao Zhang     if (i < ndleafranks) { /* Leaf buffers for distinguished ranks are pointers directly into root buffers */
9840e23c03SJunchao Zhang       if (ndrootranks != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot match distinguished ranks");
9940e23c03SJunchao Zhang       link->leaf[i] = link->root[0];
10040e23c03SJunchao Zhang       continue;
10140e23c03SJunchao Zhang     }
10240e23c03SJunchao Zhang     ierr = PetscMalloc((leafoffset[i+1]-leafoffset[i])*link->unitbytes,&link->leaf[i]);CHKERRQ(ierr);
10353deab39SPeter Brune   }
10453deab39SPeter Brune 
10540e23c03SJunchao Zhang found:
10640e23c03SJunchao Zhang   link->key  = key;
10740e23c03SJunchao Zhang   link->next = bas->inuse;
10840e23c03SJunchao Zhang   bas->inuse = (PetscSFPack)link;
10940e23c03SJunchao Zhang 
11040e23c03SJunchao Zhang   *mylink    = link;
11140e23c03SJunchao Zhang   PetscFunctionReturn(0);
11295fce210SBarry Smith }
11395fce210SBarry Smith 
11440e23c03SJunchao Zhang /*===================================================================================*/
11540e23c03SJunchao Zhang /*              SF public interface implementations                                  */
11640e23c03SJunchao Zhang /*===================================================================================*/
11740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
11895fce210SBarry Smith {
11995fce210SBarry Smith   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
12095fce210SBarry Smith   PetscErrorCode ierr;
12195fce210SBarry Smith   PetscInt       *rlengths,*ilengths,i;
12240e23c03SJunchao Zhang   PetscMPIInt    rank,niranks,*iranks,tag;
12395fce210SBarry Smith   MPI_Comm       comm;
124b5a8e515SJed Brown   MPI_Group      group;
12540e23c03SJunchao Zhang   MPI_Request    *rootreqs,*leafreqs;
12695fce210SBarry Smith 
12795fce210SBarry Smith   PetscFunctionBegin;
128b5a8e515SJed Brown   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
129b5a8e515SJed Brown   ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr);
130b5a8e515SJed Brown   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
13195fce210SBarry Smith   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
13240e23c03SJunchao Zhang   ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr);
133c943f53fSJed Brown   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
13495fce210SBarry Smith   /*
13595fce210SBarry Smith    * Inform roots about how many leaves and from which ranks
13695fce210SBarry Smith    */
137785e854fSJed Brown   ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr);
13895fce210SBarry Smith   /* Determine number, sending ranks, and length of incoming */
13995fce210SBarry Smith   for (i=0; i<sf->nranks; i++) {
14095fce210SBarry Smith     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
14195fce210SBarry Smith   }
14240e23c03SJunchao Zhang   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr);
143c943f53fSJed Brown 
1440b899082SJunchao Zhang   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
1450b899082SJunchao Zhang      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
1460b899082SJunchao Zhang      small and the sorting is cheap.
1470b899082SJunchao Zhang    */
1480b899082SJunchao Zhang   ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr);
1490b899082SJunchao Zhang 
150c943f53fSJed Brown   /* Partition into distinguished and non-distinguished incoming ranks */
151c943f53fSJed Brown   bas->ndiranks = sf->ndranks;
152c943f53fSJed Brown   bas->niranks = bas->ndiranks + niranks;
153c943f53fSJed Brown   ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr);
154c943f53fSJed Brown   bas->ioffset[0] = 0;
155c943f53fSJed Brown   for (i=0; i<bas->ndiranks; i++) {
156c943f53fSJed Brown     bas->iranks[i] = sf->ranks[i];
157c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
158c943f53fSJed Brown   }
15940e23c03SJunchao Zhang   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
16040e23c03SJunchao Zhang   for ( ; i<bas->niranks; i++) {
161c943f53fSJed Brown     bas->iranks[i] = iranks[i-bas->ndiranks];
162c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
163c943f53fSJed Brown   }
164c943f53fSJed Brown   bas->itotal = bas->ioffset[i];
16595fce210SBarry Smith   ierr = PetscFree(rlengths);CHKERRQ(ierr);
166c943f53fSJed Brown   ierr = PetscFree(iranks);CHKERRQ(ierr);
167c943f53fSJed Brown   ierr = PetscFree(ilengths);CHKERRQ(ierr);
16895fce210SBarry Smith 
16995fce210SBarry Smith   /* Send leaf identities to roots */
170c943f53fSJed Brown   ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr);
17140e23c03SJunchao Zhang   ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr);
17240e23c03SJunchao Zhang   for (i=bas->ndiranks; i<bas->niranks; i++) {
17340e23c03SJunchao Zhang     ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr);
17440e23c03SJunchao Zhang   }
17540e23c03SJunchao Zhang   for (i=0; i<sf->nranks; i++) {
17695fce210SBarry Smith     PetscMPIInt npoints;
17795fce210SBarry Smith     ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr);
17840e23c03SJunchao Zhang     if (i < sf->ndranks) {
17940e23c03SJunchao Zhang       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
18040e23c03SJunchao Zhang       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
18140e23c03SJunchao Zhang       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
18240e23c03SJunchao Zhang       ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr);
183c943f53fSJed Brown       continue;
184c943f53fSJed Brown     }
18540e23c03SJunchao Zhang     ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr);
186bf39f1bfSJed Brown   }
18740e23c03SJunchao Zhang   ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
18840e23c03SJunchao Zhang   ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
18940e23c03SJunchao Zhang   ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr);
19095fce210SBarry Smith 
19140e23c03SJunchao Zhang   /* Setup packing optimization for root and leaf */
19240e23c03SJunchao Zhang   ierr = PetscSFPackSetupOptimization(sf->nranks,sf->roffset,sf->rmine,&sf->leafpackopt);CHKERRQ(ierr);
19340e23c03SJunchao Zhang   ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr);
19495fce210SBarry Smith   PetscFunctionReturn(0);
19595fce210SBarry Smith }
19695fce210SBarry Smith 
1974416b707SBarry Smith static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf)
19895fce210SBarry Smith {
19995fce210SBarry Smith   PetscErrorCode ierr;
20095fce210SBarry Smith 
20195fce210SBarry Smith   PetscFunctionBegin;
202e55864a3SBarry Smith   ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr);
20395fce210SBarry Smith   ierr = PetscOptionsTail();CHKERRQ(ierr);
20495fce210SBarry Smith   PetscFunctionReturn(0);
20595fce210SBarry Smith }
20695fce210SBarry Smith 
20740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
20895fce210SBarry Smith {
20995fce210SBarry Smith   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
21095fce210SBarry Smith   PetscErrorCode    ierr;
21140e23c03SJunchao Zhang   PetscSFPack_Basic link,next;
21295fce210SBarry Smith 
21395fce210SBarry Smith   PetscFunctionBegin;
21429046d53SLisandro Dalcin   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
215c943f53fSJed Brown   ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr);
216c943f53fSJed Brown   ierr = PetscFree(bas->irootloc);CHKERRQ(ierr);
21740e23c03SJunchao Zhang   for (link=(PetscSFPack_Basic)bas->avail; link; link=next) {
218bf39f1bfSJed Brown     PetscInt i;
21940e23c03SJunchao Zhang     next = (PetscSFPack_Basic)link->next;
22077b7e48dSJunchao Zhang     if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);}
221bf39f1bfSJed Brown     for (i=0; i<bas->niranks; i++) {ierr = PetscFree(link->root[i]);CHKERRQ(ierr);}
222c943f53fSJed Brown     for (i=sf->ndranks; i<sf->nranks; i++) {ierr = PetscFree(link->leaf[i]);CHKERRQ(ierr);} /* Free only non-distinguished leaf buffers */
22395fce210SBarry Smith     ierr = PetscFree2(link->root,link->leaf);CHKERRQ(ierr);
22430e38525SJunchao Zhang     /* Free persistent requests using MPI_Request_free */
22540e23c03SJunchao Zhang     for (i=0; i<link->half*2; i++) {
22640e23c03SJunchao Zhang       if (link->requests[i] != MPI_REQUEST_NULL) {ierr = MPI_Request_free(&link->requests[i]);CHKERRQ(ierr);}
22730e38525SJunchao Zhang     }
22895fce210SBarry Smith     ierr = PetscFree(link->requests);CHKERRQ(ierr);
22995fce210SBarry Smith     ierr = PetscFree(link);CHKERRQ(ierr);
23095fce210SBarry Smith   }
23195fce210SBarry Smith   bas->avail = NULL;
23240e23c03SJunchao Zhang   ierr = PetscSFPackDestoryOptimization(&sf->leafpackopt);CHKERRQ(ierr);
23340e23c03SJunchao Zhang   ierr = PetscSFPackDestoryOptimization(&bas->rootpackopt);CHKERRQ(ierr);
23495fce210SBarry Smith   PetscFunctionReturn(0);
23595fce210SBarry Smith }
23695fce210SBarry Smith 
23740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
23895fce210SBarry Smith {
23995fce210SBarry Smith   PetscErrorCode ierr;
24095fce210SBarry Smith 
24195fce210SBarry Smith   PetscFunctionBegin;
24240e23c03SJunchao Zhang   ierr = PetscSFReset(sf);CHKERRQ(ierr);
24395fce210SBarry Smith   ierr = PetscFree(sf->data);CHKERRQ(ierr);
24495fce210SBarry Smith   PetscFunctionReturn(0);
24595fce210SBarry Smith }
24695fce210SBarry Smith 
24740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
24895fce210SBarry Smith {
24995fce210SBarry Smith   PetscErrorCode ierr;
25095fce210SBarry Smith   PetscBool      iascii;
25195fce210SBarry Smith 
25295fce210SBarry Smith   PetscFunctionBegin;
25395fce210SBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
25495fce210SBarry Smith   if (iascii) {
25595fce210SBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"  sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);
25695fce210SBarry Smith   }
25795fce210SBarry Smith   PetscFunctionReturn(0);
25895fce210SBarry Smith }
25995fce210SBarry Smith 
2603482bfa8SJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
26195fce210SBarry Smith {
26295fce210SBarry Smith   PetscErrorCode    ierr;
26340e23c03SJunchao Zhang   PetscSFPack_Basic link;
264c943f53fSJed Brown   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
26595fce210SBarry Smith   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
26695fce210SBarry Smith   const PetscMPIInt *rootranks,*leafranks;
26795fce210SBarry Smith   MPI_Request       *rootreqs,*leafreqs;
26830e38525SJunchao Zhang   PetscMPIInt       n;
26940e23c03SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
27095fce210SBarry Smith 
27195fce210SBarry Smith   PetscFunctionBegin;
27240e23c03SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
27340e23c03SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
27440e23c03SJunchao Zhang   ierr = PetscSFPackGet_Basic(sf,unit,rootdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr);
27595fce210SBarry Smith 
27640e23c03SJunchao Zhang   ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
277c943f53fSJed Brown   /* Eagerly post leaf receives, but only from non-distinguished ranks -- distinguished ranks will receive via shared memory */
27830e38525SJunchao Zhang   ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr);
27940e23c03SJunchao Zhang   ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr); /* One can wait but not start a null request */
28030e38525SJunchao Zhang 
28195fce210SBarry Smith   /* Pack and send root data */
28295fce210SBarry Smith   for (i=0; i<nrootranks; i++) {
283bf39f1bfSJed Brown     void *packstart = link->root[i];
28430e38525SJunchao Zhang     ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
28540e23c03SJunchao Zhang     (*link->Pack)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart);
286c943f53fSJed Brown     if (i < ndrootranks) continue; /* shared memory */
28740e23c03SJunchao Zhang     ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr);
28895fce210SBarry Smith   }
28995fce210SBarry Smith   PetscFunctionReturn(0);
29095fce210SBarry Smith }
29195fce210SBarry Smith 
29240e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
29395fce210SBarry Smith {
29495fce210SBarry Smith   PetscErrorCode    ierr;
29540e23c03SJunchao Zhang   PetscSFPack_Basic link;
296c943f53fSJed Brown   PetscInt          i,nleafranks,ndleafranks;
29795fce210SBarry Smith   const PetscInt    *leafoffset,*leafloc;
29840e23c03SJunchao Zhang   PetscErrorCode    (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);
2993482bfa8SJunchao Zhang   PetscMPIInt       typesize = -1;
30095fce210SBarry Smith 
30195fce210SBarry Smith   PetscFunctionBegin;
30240e23c03SJunchao Zhang   ierr = PetscSFPackGetInUse(sf,unit,rootdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
30340e23c03SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
30440e23c03SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
30540e23c03SJunchao Zhang   ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr);
3063482bfa8SJunchao Zhang 
30740e23c03SJunchao Zhang   if (UnpackAndOp) { typesize = link->unitbytes; }
3083482bfa8SJunchao Zhang   else { ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr); }
3093482bfa8SJunchao Zhang 
31095fce210SBarry Smith   for (i=0; i<nleafranks; i++) {
31195fce210SBarry Smith     PetscMPIInt n   = leafoffset[i+1] - leafoffset[i];
3123482bfa8SJunchao Zhang     char *packstart = (char *) link->leaf[i];
31340e23c03SJunchao Zhang     if (UnpackAndOp) { (*UnpackAndOp)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,(const void *)packstart); }
3143482bfa8SJunchao Zhang #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL)
3153482bfa8SJunchao Zhang     else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */
3163482bfa8SJunchao Zhang       PetscInt j;
3173482bfa8SJunchao Zhang       for (j=0; j<n; j++) { ierr = MPI_Reduce_local(packstart+j*typesize,((char *) leafdata)+(leafloc[leafoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr); }
31895fce210SBarry Smith     }
3193482bfa8SJunchao Zhang #else
320d2a25953SJunchao Zhang     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op");
3213482bfa8SJunchao Zhang #endif
3223482bfa8SJunchao Zhang   }
3233482bfa8SJunchao Zhang 
32440e23c03SJunchao Zhang   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
32595fce210SBarry Smith   PetscFunctionReturn(0);
32695fce210SBarry Smith }
32795fce210SBarry Smith 
32895fce210SBarry Smith /* leaf -> root with reduction */
32940e23c03SJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
33095fce210SBarry Smith {
33140e23c03SJunchao Zhang   PetscSFPack_Basic link;
33295fce210SBarry Smith   PetscErrorCode    ierr;
333c943f53fSJed Brown   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
33495fce210SBarry Smith   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
33595fce210SBarry Smith   const PetscMPIInt *rootranks,*leafranks;
33695fce210SBarry Smith   MPI_Request       *rootreqs,*leafreqs;
33730e38525SJunchao Zhang   PetscMPIInt       n;
33895fce210SBarry Smith 
33995fce210SBarry Smith   PetscFunctionBegin;
34040e23c03SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
34140e23c03SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
34240e23c03SJunchao Zhang   ierr = PetscSFPackGet_Basic(sf,unit,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr);
34395fce210SBarry Smith 
34440e23c03SJunchao Zhang   ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_LEAF2ROOT_REDUCE,&rootreqs,&leafreqs);CHKERRQ(ierr);
345c943f53fSJed Brown   /* Eagerly post root receives for non-distinguished ranks */
34630e38525SJunchao Zhang   ierr = PetscMPIIntCast(rootoffset[nrootranks]-rootoffset[ndrootranks],&n);CHKERRQ(ierr);
34740e23c03SJunchao Zhang   ierr = MPI_Startall_irecv(n,unit,nrootranks-ndrootranks,rootreqs+ndrootranks);CHKERRQ(ierr);
34830e38525SJunchao Zhang 
34995fce210SBarry Smith   /* Pack and send leaf data */
35095fce210SBarry Smith   for (i=0; i<nleafranks; i++) {
351bf39f1bfSJed Brown     void *packstart = link->leaf[i];
35230e38525SJunchao Zhang     ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
35340e23c03SJunchao Zhang     (*link->Pack)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,packstart);
354c943f53fSJed Brown     if (i < ndleafranks) continue; /* shared memory */
35540e23c03SJunchao Zhang     ierr = MPI_Start_isend(n,unit,&leafreqs[i]);CHKERRQ(ierr);
35695fce210SBarry Smith   }
35795fce210SBarry Smith   PetscFunctionReturn(0);
35895fce210SBarry Smith }
35995fce210SBarry Smith 
36040e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
36195fce210SBarry Smith {
36240e23c03SJunchao Zhang   PetscErrorCode    (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);
36395fce210SBarry Smith   PetscErrorCode    ierr;
36440e23c03SJunchao Zhang   PetscSFPack_Basic link;
36595fce210SBarry Smith   PetscInt          i,nrootranks;
3661620da3bSToby Isaac   PetscMPIInt       typesize = -1;
36795fce210SBarry Smith   const PetscInt    *rootoffset,*rootloc;
36840e23c03SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
36995fce210SBarry Smith 
37095fce210SBarry Smith   PetscFunctionBegin;
37140e23c03SJunchao Zhang   ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
37295fce210SBarry Smith   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
37340e23c03SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
37440e23c03SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,NULL,NULL,&rootoffset,&rootloc);CHKERRQ(ierr);
37540e23c03SJunchao Zhang   ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr);
37640e23c03SJunchao Zhang   if (UnpackAndOp) {
3771620da3bSToby Isaac     typesize = link->unitbytes;
3781620da3bSToby Isaac   }
3791620da3bSToby Isaac   else {
3801620da3bSToby Isaac     ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr);
3811620da3bSToby Isaac   }
38295fce210SBarry Smith   for (i=0; i<nrootranks; i++) {
38395fce210SBarry Smith     PetscMPIInt n   = rootoffset[i+1] - rootoffset[i];
384bf39f1bfSJed Brown     char *packstart = (char *) link->root[i];
38595fce210SBarry Smith 
38640e23c03SJunchao Zhang     if (UnpackAndOp) {
38740e23c03SJunchao Zhang       (*UnpackAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,(const void *)packstart);
38895fce210SBarry Smith     }
38962795ce3SStefano Zampini #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL)
39078ec4774SToby Isaac     else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */
39178ec4774SToby Isaac       PetscInt j;
3921620da3bSToby Isaac 
3931620da3bSToby Isaac       for (j = 0; j < n; j++) {
39478ec4774SToby Isaac         ierr = MPI_Reduce_local(packstart+j*typesize,((char *) rootdata)+(rootloc[rootoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr);
3951620da3bSToby Isaac       }
3961620da3bSToby Isaac     }
397a9ec7e9eSToby Isaac #else
398d2a25953SJunchao Zhang     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op");
399a9ec7e9eSToby Isaac #endif
4001620da3bSToby Isaac   }
40140e23c03SJunchao Zhang   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
40295fce210SBarry Smith   PetscFunctionReturn(0);
40395fce210SBarry Smith }
40495fce210SBarry Smith 
40540e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
40695fce210SBarry Smith {
40795fce210SBarry Smith   PetscErrorCode ierr;
40895fce210SBarry Smith 
40995fce210SBarry Smith   PetscFunctionBegin;
41040e23c03SJunchao Zhang   ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
41195fce210SBarry Smith   PetscFunctionReturn(0);
41295fce210SBarry Smith }
41395fce210SBarry Smith 
41495fce210SBarry Smith static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
41595fce210SBarry Smith {
41640e23c03SJunchao Zhang   PetscErrorCode    (*FetchAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);
41795fce210SBarry Smith   PetscErrorCode    ierr;
41840e23c03SJunchao Zhang   PetscSFPack_Basic link;
419c943f53fSJed Brown   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
42095fce210SBarry Smith   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
42195fce210SBarry Smith   const PetscMPIInt *rootranks,*leafranks;
42295fce210SBarry Smith   MPI_Request       *rootreqs,*leafreqs;
42330e38525SJunchao Zhang   PetscMPIInt       n;
42440e23c03SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
42595fce210SBarry Smith 
42695fce210SBarry Smith   PetscFunctionBegin;
42740e23c03SJunchao Zhang   ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
42895fce210SBarry Smith   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
42940e23c03SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
43040e23c03SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
43140e23c03SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
43240e23c03SJunchao Zhang 
43340e23c03SJunchao Zhang   ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
43495fce210SBarry Smith   /* Post leaf receives */
43530e38525SJunchao Zhang   ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr);
43640e23c03SJunchao Zhang   ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr);
43730e38525SJunchao Zhang 
43895fce210SBarry Smith   /* Process local fetch-and-op, post root sends */
43940e23c03SJunchao Zhang   ierr = PetscSFPackGetFetchAndOp(sf,(PetscSFPack)link,op,&FetchAndOp);CHKERRQ(ierr);
44095fce210SBarry Smith   for (i=0; i<nrootranks; i++) {
441bf39f1bfSJed Brown     void *packstart = link->root[i];
44230e38525SJunchao Zhang     ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
44340e23c03SJunchao Zhang     (*FetchAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart);
444c943f53fSJed Brown     if (i < ndrootranks) continue; /* shared memory */
44540e23c03SJunchao Zhang     ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr);
44695fce210SBarry Smith   }
44740e23c03SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
44895fce210SBarry Smith   for (i=0; i<nleafranks; i++) {
449bf39f1bfSJed Brown     const void  *packstart = link->leaf[i];
45030e38525SJunchao Zhang     ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
45140e23c03SJunchao Zhang     (*link->UnpackAndInsert)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafupdate,packstart);
45295fce210SBarry Smith   }
45340e23c03SJunchao Zhang   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
45495fce210SBarry Smith   PetscFunctionReturn(0);
45595fce210SBarry Smith }
45695fce210SBarry Smith 
45740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
4588750ddebSJunchao Zhang {
4598750ddebSJunchao Zhang   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
4608750ddebSJunchao Zhang 
4618750ddebSJunchao Zhang   PetscFunctionBegin;
4628750ddebSJunchao Zhang   if (niranks)  *niranks  = bas->niranks;
4638750ddebSJunchao Zhang   if (iranks)   *iranks   = bas->iranks;
4648750ddebSJunchao Zhang   if (ioffset)  *ioffset  = bas->ioffset;
4658750ddebSJunchao Zhang   if (irootloc) *irootloc = bas->irootloc;
4668750ddebSJunchao Zhang   PetscFunctionReturn(0);
4678750ddebSJunchao Zhang }
4688750ddebSJunchao Zhang 
469*f659e5c7SJunchao Zhang /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf.
470*f659e5c7SJunchao Zhang    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
471*f659e5c7SJunchao Zhang    was sorted before calling the routine.
472*f659e5c7SJunchao Zhang  */
473*f659e5c7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
474*f659e5c7SJunchao Zhang {
475*f659e5c7SJunchao Zhang   PetscSF           esf;
476*f659e5c7SJunchao Zhang   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote;
477*f659e5c7SJunchao Zhang   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,*leafbuf,connected_leaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
478*f659e5c7SJunchao Zhang   PetscMPIInt       *esf_ranks;
479*f659e5c7SJunchao Zhang   const PetscMPIInt *ranks,*iranks;
480*f659e5c7SJunchao Zhang   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc;
481*f659e5c7SJunchao Zhang   PetscBool         connected;
482*f659e5c7SJunchao Zhang   PetscSFPack_Basic link;
483*f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
484*f659e5c7SJunchao Zhang   PetscSF_Basic     *bas;
485*f659e5c7SJunchao Zhang   PetscErrorCode    ierr;
486*f659e5c7SJunchao Zhang 
487*f659e5c7SJunchao Zhang   PetscFunctionBegin;
488*f659e5c7SJunchao Zhang   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
489*f659e5c7SJunchao Zhang   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
490*f659e5c7SJunchao Zhang 
491*f659e5c7SJunchao Zhang   /* Find out which leaves are still connected to roots in the embedded sf */
492*f659e5c7SJunchao Zhang   ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr);
493*f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
494*f659e5c7SJunchao Zhang   /* We abused the term leafdata here, whose size is usually the number of leaf data items. Here its size is # of leaves (always >= # of leaf data items) */
495*f659e5c7SJunchao Zhang   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
496*f659e5c7SJunchao Zhang   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr);
497*f659e5c7SJunchao Zhang   /* Tag selected roots */
498*f659e5c7SJunchao Zhang   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
499*f659e5c7SJunchao Zhang 
500*f659e5c7SJunchao Zhang   /* Bcast from roots to leaves to tag connected leaves. We reuse the established bcast communication in
501*f659e5c7SJunchao Zhang      sf but do not do unpacking (from leaf buffer to leafdata). The raw data in leaf buffer is what we are
502*f659e5c7SJunchao Zhang      interested in since it tells which leaves are connected to which ranks.
503*f659e5c7SJunchao Zhang    */
504*f659e5c7SJunchao Zhang   ierr = PetscSFBcastAndOpBegin_Basic(sf,MPIU_INT,rootdata,leafdata-minleaf,MPIU_REPLACE);CHKERRQ(ierr); /* Need to give leafdata but we won't use it */
505*f659e5c7SJunchao Zhang   ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
506*f659e5c7SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
507*f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */
508*f659e5c7SJunchao Zhang   esf_nranks = esf_ndranks = connected_leaves = 0;
509*f659e5c7SJunchao Zhang   for (i=0; i<nranks; i++) { /* Scan leaf data to calculate some counts */
510*f659e5c7SJunchao Zhang     leafbuf   = (PetscInt*)link->leaf[i];
511*f659e5c7SJunchao Zhang     connected = PETSC_FALSE; /* Is the current process still connected to this remote root rank? */
512*f659e5c7SJunchao Zhang     for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) {
513*f659e5c7SJunchao Zhang       if (leafbuf[k]) {
514*f659e5c7SJunchao Zhang         connected_leaves++;
515*f659e5c7SJunchao Zhang         connected   = PETSC_TRUE;
516*f659e5c7SJunchao Zhang       }
517*f659e5c7SJunchao Zhang     }
518*f659e5c7SJunchao Zhang     if (connected) {esf_nranks++; if (i<ndranks) esf_ndranks++;}
519*f659e5c7SJunchao Zhang   }
520*f659e5c7SJunchao Zhang 
521*f659e5c7SJunchao Zhang   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
522*f659e5c7SJunchao Zhang   ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr);
523*f659e5c7SJunchao Zhang   ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr);
524*f659e5c7SJunchao Zhang   ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,connected_leaves,&esf_rmine,connected_leaves,&esf_rremote);CHKERRQ(ierr);
525*f659e5c7SJunchao Zhang   p    = 0; /* Counter for connected root ranks */
526*f659e5c7SJunchao Zhang   q    = 0; /* Counter for connected leaves */
527*f659e5c7SJunchao Zhang   esf_roffset[0] = 0;
528*f659e5c7SJunchao Zhang   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
529*f659e5c7SJunchao Zhang     leafbuf   = (PetscInt*)link->leaf[i];
530*f659e5c7SJunchao Zhang     connected = PETSC_FALSE;
531*f659e5c7SJunchao Zhang     for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) {
532*f659e5c7SJunchao Zhang         if (leafbuf[k]) {
533*f659e5c7SJunchao Zhang         esf_rmine[q]         = new_ilocal[q] = rmine[j];
534*f659e5c7SJunchao Zhang         esf_rremote[q]       = rremote[j];
535*f659e5c7SJunchao Zhang         new_iremote[q].index = rremote[j];
536*f659e5c7SJunchao Zhang         new_iremote[q].rank  = ranks[i];
537*f659e5c7SJunchao Zhang         connected            = PETSC_TRUE;
538*f659e5c7SJunchao Zhang         q++;
539*f659e5c7SJunchao Zhang       }
540*f659e5c7SJunchao Zhang     }
541*f659e5c7SJunchao Zhang     if (connected) {
542*f659e5c7SJunchao Zhang       esf_ranks[p]     = ranks[i];
543*f659e5c7SJunchao Zhang       esf_roffset[p+1] = q;
544*f659e5c7SJunchao Zhang       p++;
545*f659e5c7SJunchao Zhang     }
546*f659e5c7SJunchao Zhang   }
547*f659e5c7SJunchao Zhang 
548*f659e5c7SJunchao Zhang   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
549*f659e5c7SJunchao Zhang 
550*f659e5c7SJunchao Zhang   /* SetGraph internally resets the SF, so we only set its fields after the call */
551*f659e5c7SJunchao Zhang   ierr         = PetscSFSetGraph(esf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
552*f659e5c7SJunchao Zhang   esf->nranks  = esf_nranks;
553*f659e5c7SJunchao Zhang   esf->ndranks = esf_ndranks;
554*f659e5c7SJunchao Zhang   esf->ranks   = esf_ranks;
555*f659e5c7SJunchao Zhang   esf->roffset = esf_roffset;
556*f659e5c7SJunchao Zhang   esf->rmine   = esf_rmine;
557*f659e5c7SJunchao Zhang   esf->rremote = esf_rremote;
558*f659e5c7SJunchao Zhang 
559*f659e5c7SJunchao Zhang   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
560*f659e5c7SJunchao Zhang   bas  = (PetscSF_Basic*)esf->data;
561*f659e5c7SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */
562*f659e5c7SJunchao Zhang   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
563*f659e5c7SJunchao Zhang      expect these arrays are usually short, so we do not care. The benefit is we can fill these arrays by just parsing irootloc once.
564*f659e5c7SJunchao Zhang    */
565*f659e5c7SJunchao Zhang   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
566*f659e5c7SJunchao Zhang   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
567*f659e5c7SJunchao Zhang   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
568*f659e5c7SJunchao Zhang   p = 0; /* Counter for connected leaf ranks */
569*f659e5c7SJunchao Zhang   q = 0; /* Counter for connected roots */
570*f659e5c7SJunchao Zhang   for (i=0; i<niranks; i++) {
571*f659e5c7SJunchao Zhang     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
572*f659e5c7SJunchao Zhang     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
573*f659e5c7SJunchao Zhang       PetscInt loc;
574*f659e5c7SJunchao Zhang       ierr = PetscFindInt(irootloc[j],nselected,selected,&loc);CHKERRQ(ierr);
575*f659e5c7SJunchao Zhang       if (loc >= 0) { /* Found in selected this root is connected */
576*f659e5c7SJunchao Zhang         bas->irootloc[q++] = irootloc[j];
577*f659e5c7SJunchao Zhang         connected = PETSC_TRUE;
578*f659e5c7SJunchao Zhang       }
579*f659e5c7SJunchao Zhang     }
580*f659e5c7SJunchao Zhang     if (connected) {
581*f659e5c7SJunchao Zhang       bas->niranks++;
582*f659e5c7SJunchao Zhang       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
583*f659e5c7SJunchao Zhang       bas->iranks[p]    = iranks[i];
584*f659e5c7SJunchao Zhang       bas->ioffset[p+1] = q;
585*f659e5c7SJunchao Zhang       p++;
586*f659e5c7SJunchao Zhang     }
587*f659e5c7SJunchao Zhang   }
588*f659e5c7SJunchao Zhang   bas->itotal = q;
589*f659e5c7SJunchao Zhang 
590*f659e5c7SJunchao Zhang   /* Setup packing optimizations */
591*f659e5c7SJunchao Zhang   ierr = PetscSFPackSetupOptimization(esf->nranks,esf->roffset,esf->rmine,&esf->leafpackopt);CHKERRQ(ierr);
592*f659e5c7SJunchao Zhang   ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr);
593*f659e5c7SJunchao Zhang   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
594*f659e5c7SJunchao Zhang 
595*f659e5c7SJunchao Zhang   ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
596*f659e5c7SJunchao Zhang   *newsf = esf;
597*f659e5c7SJunchao Zhang   PetscFunctionReturn(0);
598*f659e5c7SJunchao Zhang }
599*f659e5c7SJunchao Zhang 
600*f659e5c7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
601*f659e5c7SJunchao Zhang {
602*f659e5c7SJunchao Zhang   PetscSF           esf;
603*f659e5c7SJunchao Zhang   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,*new_ilocal,niranks,ndiranks,minleaf,maxleaf,maxlocal;
604*f659e5c7SJunchao Zhang   const PetscInt    *ilocal,*ioffset,*irootloc;
605*f659e5c7SJunchao Zhang   const PetscMPIInt *iranks;
606*f659e5c7SJunchao Zhang   PetscSFPack_Basic link;
607*f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
608*f659e5c7SJunchao Zhang   const PetscSFNode *iremote;
609*f659e5c7SJunchao Zhang   PetscSF_Basic     *bas;
610*f659e5c7SJunchao Zhang   MPI_Group         group;
611*f659e5c7SJunchao Zhang   PetscErrorCode    ierr;
612*f659e5c7SJunchao Zhang 
613*f659e5c7SJunchao Zhang   PetscFunctionBegin;
614*f659e5c7SJunchao Zhang   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
615*f659e5c7SJunchao Zhang   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
616*f659e5c7SJunchao Zhang 
617*f659e5c7SJunchao Zhang   /* Set the graph of esf, which is easy for CreateEmbeddedLeafSF */
618*f659e5c7SJunchao Zhang   ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
619*f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
620*f659e5c7SJunchao Zhang   ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
621*f659e5c7SJunchao Zhang   ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
622*f659e5c7SJunchao Zhang   for (i=0; i<nselected; i++) {
623*f659e5c7SJunchao Zhang     const PetscInt l     = selected[i];
624*f659e5c7SJunchao Zhang     new_ilocal[i]        = ilocal ? ilocal[l] : l;
625*f659e5c7SJunchao Zhang     new_iremote[i].rank  = iremote[l].rank;
626*f659e5c7SJunchao Zhang     new_iremote[i].index = iremote[l].index;
627*f659e5c7SJunchao Zhang   }
628*f659e5c7SJunchao Zhang 
629*f659e5c7SJunchao Zhang   /* Tag selected leaves before PetscSFSetGraph since new_ilocal might turn into NULL since we use PETSC_OWN_POINTER below */
630*f659e5c7SJunchao Zhang   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
631*f659e5c7SJunchao Zhang   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr);
632*f659e5c7SJunchao Zhang   for (i=0; i<nselected; i++) leafdata[new_ilocal[i]-minleaf] = 1; /* -minleaf to adjust indices according to minleaf */
633*f659e5c7SJunchao Zhang 
634*f659e5c7SJunchao Zhang   ierr = PetscSFSetGraph(esf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
635*f659e5c7SJunchao Zhang 
636*f659e5c7SJunchao Zhang   /* Set up the outgoing communication (i.e., send info). We can not reuse rmine etc in sf since there is no way to
637*f659e5c7SJunchao Zhang      map rmine[i] (ilocal of leaves) back to selected[j]  (leaf indices).
638*f659e5c7SJunchao Zhang    */
639*f659e5c7SJunchao Zhang   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
640*f659e5c7SJunchao Zhang   ierr = PetscSFSetUpRanks(esf,group);CHKERRQ(ierr);
641*f659e5c7SJunchao Zhang   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
642*f659e5c7SJunchao Zhang 
643*f659e5c7SJunchao Zhang   /* Set up the incoming communication (i.e., recv info) */
644*f659e5c7SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr);
645*f659e5c7SJunchao Zhang   bas  = (PetscSF_Basic*)esf->data;
646*f659e5c7SJunchao Zhang   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
647*f659e5c7SJunchao Zhang   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
648*f659e5c7SJunchao Zhang 
649*f659e5c7SJunchao Zhang   /* Pass info about selected leaves to root buffer */
650*f659e5c7SJunchao Zhang   ierr = PetscSFReduceBegin_Basic(sf,MPIU_INT,leafdata-minleaf,rootdata,MPIU_REPLACE);CHKERRQ(ierr); /* -minleaf to re-adjust start address of leafdata */
651*f659e5c7SJunchao Zhang   ierr = PetscSFPackGetInUse(sf,MPIU_INT,leafdata-minleaf,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
652*f659e5c7SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
653*f659e5c7SJunchao Zhang 
654*f659e5c7SJunchao Zhang   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
655*f659e5c7SJunchao Zhang   p = 0; /* Counter for connected leaf ranks */
656*f659e5c7SJunchao Zhang   q = 0; /* Counter for connected roots */
657*f659e5c7SJunchao Zhang   for (i=0; i<niranks; i++) {
658*f659e5c7SJunchao Zhang     PetscInt *rootbuf = (PetscInt*)link->root[i];
659*f659e5c7SJunchao Zhang     PetscBool connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
660*f659e5c7SJunchao Zhang     for (j=ioffset[i],k=0; j<ioffset[i+1]; j++,k++) {
661*f659e5c7SJunchao Zhang       if (rootbuf[k]) {bas->irootloc[q++] = irootloc[j]; connected = PETSC_TRUE;}
662*f659e5c7SJunchao Zhang     }
663*f659e5c7SJunchao Zhang     if (connected) {
664*f659e5c7SJunchao Zhang       bas->niranks++;
665*f659e5c7SJunchao Zhang       if (i<ndiranks) bas->ndiranks++;
666*f659e5c7SJunchao Zhang       bas->iranks[p]    = iranks[i];
667*f659e5c7SJunchao Zhang       bas->ioffset[p+1] = q;
668*f659e5c7SJunchao Zhang       p++;
669*f659e5c7SJunchao Zhang     }
670*f659e5c7SJunchao Zhang   }
671*f659e5c7SJunchao Zhang   bas->itotal = q;
672*f659e5c7SJunchao Zhang   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
673*f659e5c7SJunchao Zhang 
674*f659e5c7SJunchao Zhang   /* Setup packing optimizations */
675*f659e5c7SJunchao Zhang   ierr = PetscSFPackSetupOptimization(esf->nranks,esf->roffset,esf->rmine,&esf->leafpackopt);CHKERRQ(ierr);
676*f659e5c7SJunchao Zhang   ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr);
677*f659e5c7SJunchao Zhang   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
678*f659e5c7SJunchao Zhang 
679*f659e5c7SJunchao Zhang   ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
680*f659e5c7SJunchao Zhang   *newsf = esf;
681*f659e5c7SJunchao Zhang   PetscFunctionReturn(0);
682*f659e5c7SJunchao Zhang }
683*f659e5c7SJunchao Zhang 
6848cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
68595fce210SBarry Smith {
68640e23c03SJunchao Zhang   PetscSF_Basic  *dat;
68795fce210SBarry Smith   PetscErrorCode ierr;
68895fce210SBarry Smith 
68995fce210SBarry Smith   PetscFunctionBegin;
69095fce210SBarry Smith   sf->ops->SetUp                = PetscSFSetUp_Basic;
69195fce210SBarry Smith   sf->ops->SetFromOptions       = PetscSFSetFromOptions_Basic;
69295fce210SBarry Smith   sf->ops->Reset                = PetscSFReset_Basic;
69395fce210SBarry Smith   sf->ops->Destroy              = PetscSFDestroy_Basic;
69495fce210SBarry Smith   sf->ops->View                 = PetscSFView_Basic;
6953482bfa8SJunchao Zhang   sf->ops->BcastAndOpBegin      = PetscSFBcastAndOpBegin_Basic;
6963482bfa8SJunchao Zhang   sf->ops->BcastAndOpEnd        = PetscSFBcastAndOpEnd_Basic;
69795fce210SBarry Smith   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
69895fce210SBarry Smith   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
69995fce210SBarry Smith   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
70095fce210SBarry Smith   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
7018750ddebSJunchao Zhang   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
702*f659e5c7SJunchao Zhang   sf->ops->CreateEmbeddedSF     = PetscSFCreateEmbeddedSF_Basic;
703*f659e5c7SJunchao Zhang   sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic;
70495fce210SBarry Smith 
70540e23c03SJunchao Zhang   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
70640e23c03SJunchao Zhang   sf->data = (void*)dat;
70795fce210SBarry Smith   PetscFunctionReturn(0);
70895fce210SBarry Smith }
709