xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision 40e23c03978c13ac666c3a9a3d12ba4068b9b111)
18cd53115SBarry Smith 
2*40e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h>
395fce210SBarry Smith 
4*40e23c03SJunchao Zhang /*===================================================================================*/
5*40e23c03SJunchao Zhang /*              Internal routines for PetscSFPack_Basic                              */
6*40e23c03SJunchao Zhang /*===================================================================================*/
795fce210SBarry Smith 
8*40e23c03SJunchao Zhang /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been
9*40e23c03SJunchao Zhang    initialized (since we use persistent requests), then initialize them.
1095fce210SBarry Smith */
11*40e23c03SJunchao Zhang static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,MPI_Datatype unit,PetscSFPack_Basic link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs)
12*40e23c03SJunchao Zhang {
13*40e23c03SJunchao Zhang   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
14*40e23c03SJunchao Zhang   MPI_Request    *requests = (direction == PETSCSF_LEAF2ROOT_REDUCE)? link->requests : link->requests + link->half;
15*40e23c03SJunchao Zhang   PetscInt       i,nrootranks,ndrootranks,nleafranks,ndleafranks;
16*40e23c03SJunchao Zhang   const PetscInt *rootoffset,*leafoffset;
17*40e23c03SJunchao Zhang   PetscMPIInt    n;
18*40e23c03SJunchao Zhang   MPI_Comm       comm = PetscObjectComm((PetscObject)sf);
19*40e23c03SJunchao Zhang   PetscErrorCode ierr;
2095fce210SBarry Smith 
21*40e23c03SJunchao Zhang   PetscFunctionBegin;
2295fce210SBarry Smith 
23*40e23c03SJunchao Zhang   if (!link->initialized[direction]) {
24*40e23c03SJunchao Zhang     ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
25*40e23c03SJunchao Zhang     ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
26*40e23c03SJunchao Zhang     if (direction == PETSCSF_LEAF2ROOT_REDUCE) {
27*40e23c03SJunchao Zhang       for (i=0; i<nrootranks; i++) {
28*40e23c03SJunchao Zhang         if (i >= ndrootranks) {
29*40e23c03SJunchao Zhang           ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
30*40e23c03SJunchao Zhang           ierr = MPI_Recv_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr);
31*40e23c03SJunchao Zhang         }
32*40e23c03SJunchao Zhang       }
33*40e23c03SJunchao Zhang       for (i=0; i<nleafranks; i++) {
34*40e23c03SJunchao Zhang         if (i >= ndleafranks) {
35*40e23c03SJunchao Zhang           ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
36*40e23c03SJunchao Zhang           ierr = MPI_Send_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr);
37*40e23c03SJunchao Zhang         }
38*40e23c03SJunchao Zhang       }
39*40e23c03SJunchao Zhang     } else if (direction == PETSCSF_ROOT2LEAF_BCAST) {
40*40e23c03SJunchao Zhang       for (i=0; i<nrootranks; i++) {
41*40e23c03SJunchao Zhang         if (i >= ndrootranks) {
42*40e23c03SJunchao Zhang           ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
43*40e23c03SJunchao Zhang           ierr = MPI_Send_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr);
44*40e23c03SJunchao Zhang         }
45*40e23c03SJunchao Zhang       }
46*40e23c03SJunchao Zhang       for (i=0; i<nleafranks; i++) {
47*40e23c03SJunchao Zhang         if (i >= ndleafranks) {
48*40e23c03SJunchao Zhang           ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
49*40e23c03SJunchao Zhang           ierr = MPI_Recv_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr);
50*40e23c03SJunchao Zhang         }
51*40e23c03SJunchao Zhang       }
52*40e23c03SJunchao Zhang     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %D\n",direction);
53*40e23c03SJunchao Zhang 
54*40e23c03SJunchao Zhang     link->initialized[direction] = PETSC_TRUE;
5595fce210SBarry Smith   }
5695fce210SBarry Smith 
57*40e23c03SJunchao Zhang   if (rootreqs) *rootreqs = requests;
58*40e23c03SJunchao Zhang   if (leafreqs) *leafreqs = requests + bas->niranks;
59*40e23c03SJunchao Zhang   PetscFunctionReturn(0);
6095fce210SBarry Smith }
6195fce210SBarry Smith 
62*40e23c03SJunchao Zhang static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,const void *key,PetscSFDirection direction,PetscSFPack_Basic *mylink)
63*40e23c03SJunchao Zhang {
64*40e23c03SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
65*40e23c03SJunchao Zhang   PetscErrorCode    ierr;
66*40e23c03SJunchao Zhang   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
67*40e23c03SJunchao Zhang   const PetscInt    *rootoffset,*leafoffset;
68*40e23c03SJunchao Zhang   PetscSFPack       *p;
69*40e23c03SJunchao Zhang   PetscSFPack_Basic link;
70*40e23c03SJunchao Zhang 
71*40e23c03SJunchao Zhang   PetscFunctionBegin;
72*40e23c03SJunchao Zhang   /* Look for types in cache */
73*40e23c03SJunchao Zhang   for (p=&bas->avail; (link=(PetscSFPack_Basic)*p); p=&link->next) {
74*40e23c03SJunchao Zhang     PetscBool match;
75*40e23c03SJunchao Zhang     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
76*40e23c03SJunchao Zhang     if (match) {
77*40e23c03SJunchao Zhang       *p = link->next; /* Remove from available list */
78*40e23c03SJunchao Zhang       goto found;
79*40e23c03SJunchao Zhang     }
8053deab39SPeter Brune   }
8153deab39SPeter Brune 
82*40e23c03SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
83*40e23c03SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
84*40e23c03SJunchao Zhang   ierr = PetscNew(&link);CHKERRQ(ierr);
85*40e23c03SJunchao Zhang   ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr);
86*40e23c03SJunchao Zhang   ierr = PetscMalloc2(nrootranks,&link->root,nleafranks,&link->leaf);CHKERRQ(ierr);
87*40e23c03SJunchao Zhang   /* Double the requests. First half are used for reduce (leaf2root) communication, second half for bcast (root2leaf) communication */
88*40e23c03SJunchao Zhang   link->half = nrootranks + nleafranks;
89*40e23c03SJunchao Zhang   ierr       = PetscMalloc1(link->half*2,&link->requests);CHKERRQ(ierr);
90*40e23c03SJunchao Zhang   for (i=0; i<link->half*2; i++) link->requests[i] = MPI_REQUEST_NULL; /* Must be init'ed since some are unused but we call MPI_Waitall on them in whole */
91*40e23c03SJunchao Zhang   /* One tag per link */
92*40e23c03SJunchao Zhang   ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr);
9353deab39SPeter Brune 
94*40e23c03SJunchao Zhang   /* Allocate root and leaf buffers */
95*40e23c03SJunchao Zhang   for (i=0; i<nrootranks; i++) {ierr = PetscMalloc((rootoffset[i+1]-rootoffset[i])*link->unitbytes,&link->root[i]);CHKERRQ(ierr);}
96*40e23c03SJunchao Zhang   for (i=0; i<nleafranks; i++) {
97*40e23c03SJunchao Zhang     if (i < ndleafranks) { /* Leaf buffers for distinguished ranks are pointers directly into root buffers */
98*40e23c03SJunchao Zhang       if (ndrootranks != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot match distinguished ranks");
99*40e23c03SJunchao Zhang       link->leaf[i] = link->root[0];
100*40e23c03SJunchao Zhang       continue;
101*40e23c03SJunchao Zhang     }
102*40e23c03SJunchao Zhang     ierr = PetscMalloc((leafoffset[i+1]-leafoffset[i])*link->unitbytes,&link->leaf[i]);CHKERRQ(ierr);
10353deab39SPeter Brune   }
10453deab39SPeter Brune 
105*40e23c03SJunchao Zhang found:
106*40e23c03SJunchao Zhang   link->key  = key;
107*40e23c03SJunchao Zhang   link->next = bas->inuse;
108*40e23c03SJunchao Zhang   bas->inuse = (PetscSFPack)link;
109*40e23c03SJunchao Zhang 
110*40e23c03SJunchao Zhang   *mylink    = link;
111*40e23c03SJunchao Zhang   PetscFunctionReturn(0);
11295fce210SBarry Smith }
11395fce210SBarry Smith 
114*40e23c03SJunchao Zhang /*===================================================================================*/
115*40e23c03SJunchao Zhang /*              SF public interface implementations                                  */
116*40e23c03SJunchao Zhang /*===================================================================================*/
117*40e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
11895fce210SBarry Smith {
11995fce210SBarry Smith   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
12095fce210SBarry Smith   PetscErrorCode ierr;
12195fce210SBarry Smith   PetscInt       *rlengths,*ilengths,i;
122*40e23c03SJunchao Zhang   PetscMPIInt    rank,niranks,*iranks,tag;
12395fce210SBarry Smith   MPI_Comm       comm;
124b5a8e515SJed Brown   MPI_Group      group;
125*40e23c03SJunchao Zhang   MPI_Request    *rootreqs,*leafreqs;
12695fce210SBarry Smith 
12795fce210SBarry Smith   PetscFunctionBegin;
128b5a8e515SJed Brown   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
129b5a8e515SJed Brown   ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr);
130b5a8e515SJed Brown   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
13195fce210SBarry Smith   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
132*40e23c03SJunchao Zhang   ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr);
133c943f53fSJed Brown   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
13495fce210SBarry Smith   /*
13595fce210SBarry Smith    * Inform roots about how many leaves and from which ranks
13695fce210SBarry Smith    */
137785e854fSJed Brown   ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr);
13895fce210SBarry Smith   /* Determine number, sending ranks, and length of incoming */
13995fce210SBarry Smith   for (i=0; i<sf->nranks; i++) {
14095fce210SBarry Smith     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
14195fce210SBarry Smith   }
142*40e23c03SJunchao Zhang   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr);
143c943f53fSJed Brown 
1440b899082SJunchao Zhang   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
1450b899082SJunchao Zhang      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
1460b899082SJunchao Zhang      small and the sorting is cheap.
1470b899082SJunchao Zhang    */
1480b899082SJunchao Zhang   ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr);
1490b899082SJunchao Zhang 
150c943f53fSJed Brown   /* Partition into distinguished and non-distinguished incoming ranks */
151c943f53fSJed Brown   bas->ndiranks = sf->ndranks;
152c943f53fSJed Brown   bas->niranks = bas->ndiranks + niranks;
153c943f53fSJed Brown   ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr);
154c943f53fSJed Brown   bas->ioffset[0] = 0;
155c943f53fSJed Brown   for (i=0; i<bas->ndiranks; i++) {
156c943f53fSJed Brown     bas->iranks[i] = sf->ranks[i];
157c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
158c943f53fSJed Brown   }
159*40e23c03SJunchao Zhang   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
160*40e23c03SJunchao Zhang   for ( ; i<bas->niranks; i++) {
161c943f53fSJed Brown     bas->iranks[i] = iranks[i-bas->ndiranks];
162c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
163c943f53fSJed Brown   }
164c943f53fSJed Brown   bas->itotal = bas->ioffset[i];
16595fce210SBarry Smith   ierr = PetscFree(rlengths);CHKERRQ(ierr);
166c943f53fSJed Brown   ierr = PetscFree(iranks);CHKERRQ(ierr);
167c943f53fSJed Brown   ierr = PetscFree(ilengths);CHKERRQ(ierr);
16895fce210SBarry Smith 
16995fce210SBarry Smith   /* Send leaf identities to roots */
170c943f53fSJed Brown   ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr);
171*40e23c03SJunchao Zhang   ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr);
172*40e23c03SJunchao Zhang   for (i=bas->ndiranks; i<bas->niranks; i++) {
173*40e23c03SJunchao Zhang     ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr);
174*40e23c03SJunchao Zhang   }
175*40e23c03SJunchao Zhang   for (i=0; i<sf->nranks; i++) {
17695fce210SBarry Smith     PetscMPIInt npoints;
17795fce210SBarry Smith     ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr);
178*40e23c03SJunchao Zhang     if (i < sf->ndranks) {
179*40e23c03SJunchao Zhang       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
180*40e23c03SJunchao Zhang       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
181*40e23c03SJunchao Zhang       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
182*40e23c03SJunchao Zhang       ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr);
183c943f53fSJed Brown       continue;
184c943f53fSJed Brown     }
185*40e23c03SJunchao Zhang     ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr);
186bf39f1bfSJed Brown   }
187*40e23c03SJunchao Zhang   ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
188*40e23c03SJunchao Zhang   ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
189*40e23c03SJunchao Zhang   ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr);
19095fce210SBarry Smith 
191*40e23c03SJunchao Zhang   /* Setup packing optimization for root and leaf */
192*40e23c03SJunchao Zhang   ierr = PetscSFPackSetupOptimization(sf->nranks,sf->roffset,sf->rmine,&sf->leafpackopt);CHKERRQ(ierr);
193*40e23c03SJunchao Zhang   ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr);
19495fce210SBarry Smith   PetscFunctionReturn(0);
19595fce210SBarry Smith }
19695fce210SBarry Smith 
1974416b707SBarry Smith static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf)
19895fce210SBarry Smith {
19995fce210SBarry Smith   PetscErrorCode ierr;
20095fce210SBarry Smith 
20195fce210SBarry Smith   PetscFunctionBegin;
202e55864a3SBarry Smith   ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr);
20395fce210SBarry Smith   ierr = PetscOptionsTail();CHKERRQ(ierr);
20495fce210SBarry Smith   PetscFunctionReturn(0);
20595fce210SBarry Smith }
20695fce210SBarry Smith 
207*40e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
20895fce210SBarry Smith {
20995fce210SBarry Smith   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
21095fce210SBarry Smith   PetscErrorCode    ierr;
211*40e23c03SJunchao Zhang   PetscSFPack_Basic link,next;
21295fce210SBarry Smith 
21395fce210SBarry Smith   PetscFunctionBegin;
21429046d53SLisandro Dalcin   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
215c943f53fSJed Brown   ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr);
216c943f53fSJed Brown   ierr = PetscFree(bas->irootloc);CHKERRQ(ierr);
217*40e23c03SJunchao Zhang   for (link=(PetscSFPack_Basic)bas->avail; link; link=next) {
218bf39f1bfSJed Brown     PetscInt i;
219*40e23c03SJunchao Zhang     next = (PetscSFPack_Basic)link->next;
22077b7e48dSJunchao Zhang     if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);}
221bf39f1bfSJed Brown     for (i=0; i<bas->niranks; i++) {ierr = PetscFree(link->root[i]);CHKERRQ(ierr);}
222c943f53fSJed Brown     for (i=sf->ndranks; i<sf->nranks; i++) {ierr = PetscFree(link->leaf[i]);CHKERRQ(ierr);} /* Free only non-distinguished leaf buffers */
22395fce210SBarry Smith     ierr = PetscFree2(link->root,link->leaf);CHKERRQ(ierr);
22430e38525SJunchao Zhang     /* Free persistent requests using MPI_Request_free */
225*40e23c03SJunchao Zhang     for (i=0; i<link->half*2; i++) {
226*40e23c03SJunchao Zhang       if (link->requests[i] != MPI_REQUEST_NULL) {ierr = MPI_Request_free(&link->requests[i]);CHKERRQ(ierr);}
22730e38525SJunchao Zhang     }
22895fce210SBarry Smith     ierr = PetscFree(link->requests);CHKERRQ(ierr);
22995fce210SBarry Smith     ierr = PetscFree(link);CHKERRQ(ierr);
23095fce210SBarry Smith   }
23195fce210SBarry Smith   bas->avail = NULL;
232*40e23c03SJunchao Zhang   ierr = PetscSFPackDestoryOptimization(&sf->leafpackopt);CHKERRQ(ierr);
233*40e23c03SJunchao Zhang   ierr = PetscSFPackDestoryOptimization(&bas->rootpackopt);CHKERRQ(ierr);
23495fce210SBarry Smith   PetscFunctionReturn(0);
23595fce210SBarry Smith }
23695fce210SBarry Smith 
237*40e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
23895fce210SBarry Smith {
23995fce210SBarry Smith   PetscErrorCode ierr;
24095fce210SBarry Smith 
24195fce210SBarry Smith   PetscFunctionBegin;
242*40e23c03SJunchao Zhang   ierr = PetscSFReset(sf);CHKERRQ(ierr);
24395fce210SBarry Smith   ierr = PetscFree(sf->data);CHKERRQ(ierr);
24495fce210SBarry Smith   PetscFunctionReturn(0);
24595fce210SBarry Smith }
24695fce210SBarry Smith 
247*40e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
24895fce210SBarry Smith {
24995fce210SBarry Smith   PetscErrorCode ierr;
25095fce210SBarry Smith   PetscBool      iascii;
25195fce210SBarry Smith 
25295fce210SBarry Smith   PetscFunctionBegin;
25395fce210SBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
25495fce210SBarry Smith   if (iascii) {
25595fce210SBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"  sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);
25695fce210SBarry Smith   }
25795fce210SBarry Smith   PetscFunctionReturn(0);
25895fce210SBarry Smith }
25995fce210SBarry Smith 
2603482bfa8SJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
26195fce210SBarry Smith {
26295fce210SBarry Smith   PetscErrorCode    ierr;
263*40e23c03SJunchao Zhang   PetscSFPack_Basic link;
264c943f53fSJed Brown   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
26595fce210SBarry Smith   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
26695fce210SBarry Smith   const PetscMPIInt *rootranks,*leafranks;
26795fce210SBarry Smith   MPI_Request       *rootreqs,*leafreqs;
26830e38525SJunchao Zhang   PetscMPIInt       n;
269*40e23c03SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
27095fce210SBarry Smith 
27195fce210SBarry Smith   PetscFunctionBegin;
272*40e23c03SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
273*40e23c03SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
274*40e23c03SJunchao Zhang   ierr = PetscSFPackGet_Basic(sf,unit,rootdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr);
27595fce210SBarry Smith 
276*40e23c03SJunchao Zhang   ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
277c943f53fSJed Brown   /* Eagerly post leaf receives, but only from non-distinguished ranks -- distinguished ranks will receive via shared memory */
27830e38525SJunchao Zhang   ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr);
279*40e23c03SJunchao Zhang   ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr); /* One can wait but not start a null request */
28030e38525SJunchao Zhang 
28195fce210SBarry Smith   /* Pack and send root data */
28295fce210SBarry Smith   for (i=0; i<nrootranks; i++) {
283bf39f1bfSJed Brown     void *packstart = link->root[i];
28430e38525SJunchao Zhang     ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
285*40e23c03SJunchao Zhang     (*link->Pack)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart);
286c943f53fSJed Brown     if (i < ndrootranks) continue; /* shared memory */
287*40e23c03SJunchao Zhang     ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr);
28895fce210SBarry Smith   }
28995fce210SBarry Smith   PetscFunctionReturn(0);
29095fce210SBarry Smith }
29195fce210SBarry Smith 
292*40e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
29395fce210SBarry Smith {
29495fce210SBarry Smith   PetscErrorCode    ierr;
295*40e23c03SJunchao Zhang   PetscSFPack_Basic link;
296c943f53fSJed Brown   PetscInt          i,nleafranks,ndleafranks;
29795fce210SBarry Smith   const PetscInt    *leafoffset,*leafloc;
298*40e23c03SJunchao Zhang   PetscErrorCode    (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);
2993482bfa8SJunchao Zhang   PetscMPIInt       typesize = -1;
30095fce210SBarry Smith 
30195fce210SBarry Smith   PetscFunctionBegin;
302*40e23c03SJunchao Zhang   ierr = PetscSFPackGetInUse(sf,unit,rootdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
303*40e23c03SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
304*40e23c03SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
305*40e23c03SJunchao Zhang   ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr);
3063482bfa8SJunchao Zhang 
307*40e23c03SJunchao Zhang   if (UnpackAndOp) { typesize = link->unitbytes; }
3083482bfa8SJunchao Zhang   else { ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr); }
3093482bfa8SJunchao Zhang 
31095fce210SBarry Smith   for (i=0; i<nleafranks; i++) {
31195fce210SBarry Smith     PetscMPIInt n   = leafoffset[i+1] - leafoffset[i];
3123482bfa8SJunchao Zhang     char *packstart = (char *) link->leaf[i];
313*40e23c03SJunchao Zhang     if (UnpackAndOp) { (*UnpackAndOp)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,(const void *)packstart); }
3143482bfa8SJunchao Zhang #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL)
3153482bfa8SJunchao Zhang     else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */
3163482bfa8SJunchao Zhang       PetscInt j;
3173482bfa8SJunchao Zhang       for (j=0; j<n; j++) { ierr = MPI_Reduce_local(packstart+j*typesize,((char *) leafdata)+(leafloc[leafoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr); }
31895fce210SBarry Smith     }
3193482bfa8SJunchao Zhang #else
320d2a25953SJunchao Zhang     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op");
3213482bfa8SJunchao Zhang #endif
3223482bfa8SJunchao Zhang   }
3233482bfa8SJunchao Zhang 
324*40e23c03SJunchao Zhang   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
32595fce210SBarry Smith   PetscFunctionReturn(0);
32695fce210SBarry Smith }
32795fce210SBarry Smith 
32895fce210SBarry Smith /* leaf -> root with reduction */
329*40e23c03SJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
33095fce210SBarry Smith {
331*40e23c03SJunchao Zhang   PetscSFPack_Basic link;
33295fce210SBarry Smith   PetscErrorCode    ierr;
333c943f53fSJed Brown   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
33495fce210SBarry Smith   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
33595fce210SBarry Smith   const PetscMPIInt *rootranks,*leafranks;
33695fce210SBarry Smith   MPI_Request       *rootreqs,*leafreqs;
33730e38525SJunchao Zhang   PetscMPIInt       n;
33895fce210SBarry Smith 
33995fce210SBarry Smith   PetscFunctionBegin;
340*40e23c03SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
341*40e23c03SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
342*40e23c03SJunchao Zhang   ierr = PetscSFPackGet_Basic(sf,unit,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr);
34395fce210SBarry Smith 
344*40e23c03SJunchao Zhang   ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_LEAF2ROOT_REDUCE,&rootreqs,&leafreqs);CHKERRQ(ierr);
345c943f53fSJed Brown   /* Eagerly post root receives for non-distinguished ranks */
34630e38525SJunchao Zhang   ierr = PetscMPIIntCast(rootoffset[nrootranks]-rootoffset[ndrootranks],&n);CHKERRQ(ierr);
347*40e23c03SJunchao Zhang   ierr = MPI_Startall_irecv(n,unit,nrootranks-ndrootranks,rootreqs+ndrootranks);CHKERRQ(ierr);
34830e38525SJunchao Zhang 
34995fce210SBarry Smith   /* Pack and send leaf data */
35095fce210SBarry Smith   for (i=0; i<nleafranks; i++) {
351bf39f1bfSJed Brown     void *packstart = link->leaf[i];
35230e38525SJunchao Zhang     ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
353*40e23c03SJunchao Zhang     (*link->Pack)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,packstart);
354c943f53fSJed Brown     if (i < ndleafranks) continue; /* shared memory */
355*40e23c03SJunchao Zhang     ierr = MPI_Start_isend(n,unit,&leafreqs[i]);CHKERRQ(ierr);
35695fce210SBarry Smith   }
35795fce210SBarry Smith   PetscFunctionReturn(0);
35895fce210SBarry Smith }
35995fce210SBarry Smith 
360*40e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
36195fce210SBarry Smith {
362*40e23c03SJunchao Zhang   PetscErrorCode    (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);
36395fce210SBarry Smith   PetscErrorCode    ierr;
364*40e23c03SJunchao Zhang   PetscSFPack_Basic link;
36595fce210SBarry Smith   PetscInt          i,nrootranks;
3661620da3bSToby Isaac   PetscMPIInt       typesize = -1;
36795fce210SBarry Smith   const PetscInt    *rootoffset,*rootloc;
368*40e23c03SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
36995fce210SBarry Smith 
37095fce210SBarry Smith   PetscFunctionBegin;
371*40e23c03SJunchao Zhang   ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
37295fce210SBarry Smith   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
373*40e23c03SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
374*40e23c03SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,NULL,NULL,&rootoffset,&rootloc);CHKERRQ(ierr);
375*40e23c03SJunchao Zhang   ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr);
376*40e23c03SJunchao Zhang   if (UnpackAndOp) {
3771620da3bSToby Isaac     typesize = link->unitbytes;
3781620da3bSToby Isaac   }
3791620da3bSToby Isaac   else {
3801620da3bSToby Isaac     ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr);
3811620da3bSToby Isaac   }
38295fce210SBarry Smith   for (i=0; i<nrootranks; i++) {
38395fce210SBarry Smith     PetscMPIInt n   = rootoffset[i+1] - rootoffset[i];
384bf39f1bfSJed Brown     char *packstart = (char *) link->root[i];
38595fce210SBarry Smith 
386*40e23c03SJunchao Zhang     if (UnpackAndOp) {
387*40e23c03SJunchao Zhang       (*UnpackAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,(const void *)packstart);
38895fce210SBarry Smith     }
38962795ce3SStefano Zampini #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL)
39078ec4774SToby Isaac     else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */
39178ec4774SToby Isaac       PetscInt j;
3921620da3bSToby Isaac 
3931620da3bSToby Isaac       for (j = 0; j < n; j++) {
39478ec4774SToby Isaac         ierr = MPI_Reduce_local(packstart+j*typesize,((char *) rootdata)+(rootloc[rootoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr);
3951620da3bSToby Isaac       }
3961620da3bSToby Isaac     }
397a9ec7e9eSToby Isaac #else
398d2a25953SJunchao Zhang     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op");
399a9ec7e9eSToby Isaac #endif
4001620da3bSToby Isaac   }
401*40e23c03SJunchao Zhang   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
40295fce210SBarry Smith   PetscFunctionReturn(0);
40395fce210SBarry Smith }
40495fce210SBarry Smith 
405*40e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
40695fce210SBarry Smith {
40795fce210SBarry Smith   PetscErrorCode ierr;
40895fce210SBarry Smith 
40995fce210SBarry Smith   PetscFunctionBegin;
410*40e23c03SJunchao Zhang   ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
41195fce210SBarry Smith   PetscFunctionReturn(0);
41295fce210SBarry Smith }
41395fce210SBarry Smith 
41495fce210SBarry Smith static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
41595fce210SBarry Smith {
416*40e23c03SJunchao Zhang   PetscErrorCode    (*FetchAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);
41795fce210SBarry Smith   PetscErrorCode    ierr;
418*40e23c03SJunchao Zhang   PetscSFPack_Basic link;
419c943f53fSJed Brown   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
42095fce210SBarry Smith   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
42195fce210SBarry Smith   const PetscMPIInt *rootranks,*leafranks;
42295fce210SBarry Smith   MPI_Request       *rootreqs,*leafreqs;
42330e38525SJunchao Zhang   PetscMPIInt       n;
424*40e23c03SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
42595fce210SBarry Smith 
42695fce210SBarry Smith   PetscFunctionBegin;
427*40e23c03SJunchao Zhang   ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
42895fce210SBarry Smith   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
429*40e23c03SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
430*40e23c03SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
431*40e23c03SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
432*40e23c03SJunchao Zhang 
433*40e23c03SJunchao Zhang   ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
43495fce210SBarry Smith   /* Post leaf receives */
43530e38525SJunchao Zhang   ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr);
436*40e23c03SJunchao Zhang   ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr);
43730e38525SJunchao Zhang 
43895fce210SBarry Smith   /* Process local fetch-and-op, post root sends */
439*40e23c03SJunchao Zhang   ierr = PetscSFPackGetFetchAndOp(sf,(PetscSFPack)link,op,&FetchAndOp);CHKERRQ(ierr);
44095fce210SBarry Smith   for (i=0; i<nrootranks; i++) {
441bf39f1bfSJed Brown     void *packstart = link->root[i];
44230e38525SJunchao Zhang     ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
443*40e23c03SJunchao Zhang     (*FetchAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart);
444c943f53fSJed Brown     if (i < ndrootranks) continue; /* shared memory */
445*40e23c03SJunchao Zhang     ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr);
44695fce210SBarry Smith   }
447*40e23c03SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
44895fce210SBarry Smith   for (i=0; i<nleafranks; i++) {
449bf39f1bfSJed Brown     const void  *packstart = link->leaf[i];
45030e38525SJunchao Zhang     ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
451*40e23c03SJunchao Zhang     (*link->UnpackAndInsert)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafupdate,packstart);
45295fce210SBarry Smith   }
453*40e23c03SJunchao Zhang   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
45495fce210SBarry Smith   PetscFunctionReturn(0);
45595fce210SBarry Smith }
45695fce210SBarry Smith 
457*40e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
4588750ddebSJunchao Zhang {
4598750ddebSJunchao Zhang   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
4608750ddebSJunchao Zhang 
4618750ddebSJunchao Zhang   PetscFunctionBegin;
4628750ddebSJunchao Zhang   if (niranks)  *niranks  = bas->niranks;
4638750ddebSJunchao Zhang   if (iranks)   *iranks   = bas->iranks;
4648750ddebSJunchao Zhang   if (ioffset)  *ioffset  = bas->ioffset;
4658750ddebSJunchao Zhang   if (irootloc) *irootloc = bas->irootloc;
4668750ddebSJunchao Zhang   PetscFunctionReturn(0);
4678750ddebSJunchao Zhang }
4688750ddebSJunchao Zhang 
4698cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
47095fce210SBarry Smith {
471*40e23c03SJunchao Zhang   PetscSF_Basic  *dat;
47295fce210SBarry Smith   PetscErrorCode ierr;
47395fce210SBarry Smith 
47495fce210SBarry Smith   PetscFunctionBegin;
47595fce210SBarry Smith   sf->ops->SetUp                = PetscSFSetUp_Basic;
47695fce210SBarry Smith   sf->ops->SetFromOptions       = PetscSFSetFromOptions_Basic;
47795fce210SBarry Smith   sf->ops->Reset                = PetscSFReset_Basic;
47895fce210SBarry Smith   sf->ops->Destroy              = PetscSFDestroy_Basic;
47995fce210SBarry Smith   sf->ops->View                 = PetscSFView_Basic;
4803482bfa8SJunchao Zhang   sf->ops->BcastAndOpBegin      = PetscSFBcastAndOpBegin_Basic;
4813482bfa8SJunchao Zhang   sf->ops->BcastAndOpEnd        = PetscSFBcastAndOpEnd_Basic;
48295fce210SBarry Smith   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
48395fce210SBarry Smith   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
48495fce210SBarry Smith   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
48595fce210SBarry Smith   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
4868750ddebSJunchao Zhang   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
48795fce210SBarry Smith 
488*40e23c03SJunchao Zhang   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
489*40e23c03SJunchao Zhang   sf->data = (void*)dat;
49095fce210SBarry Smith   PetscFunctionReturn(0);
49195fce210SBarry Smith }
492