xref: /petsc/src/sys/utils/mpits.c (revision 6145cd650024ce38668196a0ed2dfef51f77eb7a)
1f6ced4a3SJed Brown #include <petscsys.h>        /*I  "petscsys.h"  I*/
2f6ced4a3SJed Brown #include <stddef.h>
3f6ced4a3SJed Brown 
4*6145cd65SJed Brown const char *const PetscBuildTwoSidedTypes[] = {
5f6ced4a3SJed Brown   "ALLREDUCE",
6*6145cd65SJed Brown   "IBARRIER",
7*6145cd65SJed Brown   "PetscBuildTwoSidedType",
8*6145cd65SJed Brown   "PETSC_BUILDTWOSIDED_",
9f6ced4a3SJed Brown   0
10f6ced4a3SJed Brown };
11f6ced4a3SJed Brown 
12*6145cd65SJed Brown static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
13f6ced4a3SJed Brown 
14f6ced4a3SJed Brown #undef __FUNCT__
15*6145cd65SJed Brown #define __FUNCT__ "PetscCommBuildTwoSidedSetType"
16*6145cd65SJed Brown /*@
17*6145cd65SJed Brown    PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
18*6145cd65SJed Brown 
19*6145cd65SJed Brown    Logically Collective
20*6145cd65SJed Brown 
21*6145cd65SJed Brown    Input Arguments:
22*6145cd65SJed Brown +  comm - PETSC_COMM_WORLD
23*6145cd65SJed Brown -  twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
24*6145cd65SJed Brown 
25*6145cd65SJed Brown    Level: developer
26*6145cd65SJed Brown 
27*6145cd65SJed Brown    Note:
28*6145cd65SJed Brown    This option is currently global, but could be made per-communicator.
29*6145cd65SJed Brown 
30*6145cd65SJed Brown .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
31*6145cd65SJed Brown @*/
32*6145cd65SJed Brown PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
33*6145cd65SJed Brown {
34*6145cd65SJed Brown   PetscFunctionBegin;
35*6145cd65SJed Brown #if defined(PETSC_USE_DEBUG)
36*6145cd65SJed Brown   {                             /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */
37*6145cd65SJed Brown     PetscMPIInt ierr;
38*6145cd65SJed Brown     PetscMPIInt b1[2],b2[2];
39*6145cd65SJed Brown     b1[0] = -(PetscMPIInt)twosided;
40*6145cd65SJed Brown     b1[1] = (PetscMPIInt)twosided;
41*6145cd65SJed Brown     ierr = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
42*6145cd65SJed Brown     if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
43*6145cd65SJed Brown   }
44*6145cd65SJed Brown #endif
45*6145cd65SJed Brown   _twosided_type = twosided;
46*6145cd65SJed Brown   PetscFunctionReturn(0);
47*6145cd65SJed Brown }
48*6145cd65SJed Brown 
49*6145cd65SJed Brown #undef __FUNCT__
50*6145cd65SJed Brown #define __FUNCT__ "PetscCommBuildTwoSidedGetType"
51*6145cd65SJed Brown /*@
52*6145cd65SJed Brown    PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
53*6145cd65SJed Brown 
54*6145cd65SJed Brown    Logically Collective
55*6145cd65SJed Brown 
56*6145cd65SJed Brown    Output Arguments:
57*6145cd65SJed Brown +  comm - communicator on which to query algorithm
58*6145cd65SJed Brown -  twosided - algorithm to use for PetscCommBuildTwoSided()
59*6145cd65SJed Brown 
60*6145cd65SJed Brown    Level: developer
61*6145cd65SJed Brown 
62*6145cd65SJed Brown .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
63*6145cd65SJed Brown @*/
64*6145cd65SJed Brown PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
65f6ced4a3SJed Brown {
66f6ced4a3SJed Brown   PetscErrorCode ierr;
67f6ced4a3SJed Brown 
68f6ced4a3SJed Brown   PetscFunctionBegin;
69*6145cd65SJed Brown   *twosided = PETSC_BUILDTWOSIDED_NOTSET;
70*6145cd65SJed Brown   if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
71f6ced4a3SJed Brown #if defined(PETSC_HAVE_MPI_IBARRIER)
72*6145cd65SJed Brown #  if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS)
73*6145cd65SJed Brown     /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */
74*6145cd65SJed Brown     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
75f6ced4a3SJed Brown #  else
76*6145cd65SJed Brown     _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
77f6ced4a3SJed Brown #  endif
78*6145cd65SJed Brown #else
79*6145cd65SJed Brown     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
80*6145cd65SJed Brown #endif
81*6145cd65SJed Brown     ierr = PetscOptionsGetEnum(PETSC_NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,PETSC_NULL);CHKERRQ(ierr);
82f6ced4a3SJed Brown   }
83f6ced4a3SJed Brown   *twosided = _twosided_type;
84f6ced4a3SJed Brown   PetscFunctionReturn(0);
85f6ced4a3SJed Brown }
86f6ced4a3SJed Brown 
87f6ced4a3SJed Brown #if defined(PETSC_HAVE_MPI_IBARRIER)
88f6ced4a3SJed Brown /* Segmented (extendable) array implementation */
89f6ced4a3SJed Brown typedef struct _SegArray *SegArray;
90f6ced4a3SJed Brown struct _SegArray {
91f6ced4a3SJed Brown   PetscInt unitbytes;
92f6ced4a3SJed Brown   PetscInt alloc;
93f6ced4a3SJed Brown   PetscInt used;
94f6ced4a3SJed Brown   PetscInt tailused;
95f6ced4a3SJed Brown   SegArray tail;
96f6ced4a3SJed Brown   union {                       /* Dummy types to ensure alignment */
97f6ced4a3SJed Brown     PetscReal dummy_real;
98f6ced4a3SJed Brown     PetscInt dummy_int;
99f6ced4a3SJed Brown     char array[1];
100f6ced4a3SJed Brown   } u;
101f6ced4a3SJed Brown };
102f6ced4a3SJed Brown 
103f6ced4a3SJed Brown #undef __FUNCT__
104f6ced4a3SJed Brown #define __FUNCT__ "SegArrayCreate"
105f6ced4a3SJed Brown static PetscErrorCode SegArrayCreate(PetscInt unitbytes,PetscInt expected,SegArray *seg)
106f6ced4a3SJed Brown {
107f6ced4a3SJed Brown   PetscErrorCode ierr;
108f6ced4a3SJed Brown 
109f6ced4a3SJed Brown   PetscFunctionBegin;
110f6ced4a3SJed Brown   ierr = PetscMalloc(offsetof(struct _SegArray,u)+expected*unitbytes,seg);CHKERRQ(ierr);
111f6ced4a3SJed Brown   ierr = PetscMemzero(*seg,offsetof(struct _SegArray,u));CHKERRQ(ierr);
112f6ced4a3SJed Brown   (*seg)->unitbytes = unitbytes;
113f6ced4a3SJed Brown   (*seg)->alloc = expected;
114f6ced4a3SJed Brown   PetscFunctionReturn(0);
115f6ced4a3SJed Brown }
116f6ced4a3SJed Brown 
117f6ced4a3SJed Brown #undef __FUNCT__
118f6ced4a3SJed Brown #define __FUNCT__ "SegArrayAlloc_Private"
119f6ced4a3SJed Brown static PetscErrorCode SegArrayAlloc_Private(SegArray *seg,PetscInt count)
120f6ced4a3SJed Brown {
121f6ced4a3SJed Brown   PetscErrorCode ierr;
122f6ced4a3SJed Brown   SegArray newseg,s;
123f6ced4a3SJed Brown   PetscInt alloc;
124f6ced4a3SJed Brown 
125f6ced4a3SJed Brown   PetscFunctionBegin;
126f6ced4a3SJed Brown   s = *seg;
127f6ced4a3SJed Brown   /* Grow at least fast enough to hold next item, like Fibonacci otherwise (up to 1MB chunks) */
128f6ced4a3SJed Brown   alloc = PetscMax(s->used+count,PetscMin(1000000/s->unitbytes+1,s->alloc+s->tailused));
129f6ced4a3SJed Brown   ierr = PetscMalloc(offsetof(struct _SegArray,u)+alloc*s->unitbytes,&newseg);CHKERRQ(ierr);
130f6ced4a3SJed Brown   ierr = PetscMemzero(newseg,offsetof(struct _SegArray,u));CHKERRQ(ierr);
131f6ced4a3SJed Brown   newseg->unitbytes = s->unitbytes;
132f6ced4a3SJed Brown   newseg->tailused = s->used + s->tailused;
133f6ced4a3SJed Brown   newseg->tail = s;
134f6ced4a3SJed Brown   newseg->alloc = alloc;
135f6ced4a3SJed Brown   *seg = newseg;
136f6ced4a3SJed Brown   PetscFunctionReturn(0);
137f6ced4a3SJed Brown }
138f6ced4a3SJed Brown 
139f6ced4a3SJed Brown #undef __FUNCT__
140f6ced4a3SJed Brown #define __FUNCT__ "SegArrayGet"
141f6ced4a3SJed Brown static PetscErrorCode SegArrayGet(SegArray *seg,PetscInt count,void *array)
142f6ced4a3SJed Brown {
143f6ced4a3SJed Brown   PetscErrorCode ierr;
144f6ced4a3SJed Brown   SegArray s;
145f6ced4a3SJed Brown 
146f6ced4a3SJed Brown   PetscFunctionBegin;
147f6ced4a3SJed Brown   s = *seg;
148f6ced4a3SJed Brown   if (PetscUnlikely(s->used + count > s->alloc)) {ierr = SegArrayAlloc_Private(seg,count);CHKERRQ(ierr);}
149f6ced4a3SJed Brown   s = *seg;
150f6ced4a3SJed Brown   *(char**)array = &s->u.array[s->used*s->unitbytes];
151f6ced4a3SJed Brown   s->used += count;
152f6ced4a3SJed Brown   PetscFunctionReturn(0);
153f6ced4a3SJed Brown }
154f6ced4a3SJed Brown 
155f6ced4a3SJed Brown #undef __FUNCT__
156f6ced4a3SJed Brown #define __FUNCT__ "SegArrayDestroy"
157f6ced4a3SJed Brown static PetscErrorCode SegArrayDestroy(SegArray *seg)
158f6ced4a3SJed Brown {
159f6ced4a3SJed Brown   PetscErrorCode ierr;
160f6ced4a3SJed Brown   SegArray s;
161f6ced4a3SJed Brown 
162f6ced4a3SJed Brown   PetscFunctionBegin;
163f6ced4a3SJed Brown   for (s=*seg; s;) {
164f6ced4a3SJed Brown     SegArray tail = s->tail;
165f6ced4a3SJed Brown     ierr = PetscFree(s);CHKERRQ(ierr);
166f6ced4a3SJed Brown     s = tail;
167f6ced4a3SJed Brown   }
168f6ced4a3SJed Brown   *seg = PETSC_NULL;
169f6ced4a3SJed Brown   PetscFunctionReturn(0);
170f6ced4a3SJed Brown }
171f6ced4a3SJed Brown 
172f6ced4a3SJed Brown #undef __FUNCT__
173f6ced4a3SJed Brown #define __FUNCT__ "SegArrayExtract"
174f6ced4a3SJed Brown /* Extracts contiguous data and resets segarray */
175f6ced4a3SJed Brown static PetscErrorCode SegArrayExtract(SegArray *seg,void *contiguous)
176f6ced4a3SJed Brown {
177f6ced4a3SJed Brown   PetscErrorCode ierr;
178f6ced4a3SJed Brown   PetscInt unitbytes;
179f6ced4a3SJed Brown   SegArray s,t;
180f6ced4a3SJed Brown   char *contig,*ptr;
181f6ced4a3SJed Brown 
182f6ced4a3SJed Brown   PetscFunctionBegin;
183f6ced4a3SJed Brown   s = *seg;
184f6ced4a3SJed Brown   unitbytes = s->unitbytes;
185f6ced4a3SJed Brown   ierr = PetscMalloc((s->used+s->tailused)*unitbytes,&contig);CHKERRQ(ierr);
186f6ced4a3SJed Brown   ptr = contig + s->tailused*unitbytes;
187f6ced4a3SJed Brown   ierr = PetscMemcpy(ptr,s->u.array,s->used*unitbytes);CHKERRQ(ierr);
188f6ced4a3SJed Brown   for (t=s->tail; t;) {
189f6ced4a3SJed Brown     SegArray tail = t->tail;
190f6ced4a3SJed Brown     ptr -= t->used*unitbytes;
191f6ced4a3SJed Brown     ierr = PetscMemcpy(ptr,t->u.array,t->used*unitbytes);CHKERRQ(ierr);
192f6ced4a3SJed Brown     ierr = PetscFree(t);CHKERRQ(ierr);
193f6ced4a3SJed Brown     t = tail;
194f6ced4a3SJed Brown   }
195f6ced4a3SJed Brown   if (ptr != contig) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Tail count does not match");
196f6ced4a3SJed Brown   s->tailused = 0;
197f6ced4a3SJed Brown   s->tail = PETSC_NULL;
198f6ced4a3SJed Brown   *(char**)contiguous = contig;
199f6ced4a3SJed Brown   PetscFunctionReturn(0);
200f6ced4a3SJed Brown }
201f6ced4a3SJed Brown 
202f6ced4a3SJed Brown #undef __FUNCT__
203*6145cd65SJed Brown #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier"
204*6145cd65SJed Brown static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
205f6ced4a3SJed Brown {
206f6ced4a3SJed Brown   PetscErrorCode ierr;
207f6ced4a3SJed Brown   PetscMPIInt    nrecvs,tag,unitbytes,done;
208f6ced4a3SJed Brown   PetscInt       i;
209f6ced4a3SJed Brown   char           *tdata;
210f6ced4a3SJed Brown   MPI_Request    *sendreqs,barrier;
211f6ced4a3SJed Brown   SegArray       segrank,segdata;
212f6ced4a3SJed Brown 
213f6ced4a3SJed Brown   PetscFunctionBegin;
214f6ced4a3SJed Brown   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
215f6ced4a3SJed Brown   ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
216f6ced4a3SJed Brown   tdata = (char*)todata;
217f6ced4a3SJed Brown   ierr = PetscMalloc(nto*sizeof(MPI_Request),&sendreqs);CHKERRQ(ierr);
218f6ced4a3SJed Brown   for (i=0; i<nto; i++) {
219f6ced4a3SJed Brown     ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
220f6ced4a3SJed Brown   }
221f6ced4a3SJed Brown   ierr = SegArrayCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr);
222f6ced4a3SJed Brown   ierr = SegArrayCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr);
223f6ced4a3SJed Brown 
224f6ced4a3SJed Brown   nrecvs = 0;
225f6ced4a3SJed Brown   barrier = MPI_REQUEST_NULL;
226f6ced4a3SJed Brown   for (done=0; !done;) {
227f6ced4a3SJed Brown     PetscMPIInt flag;
228f6ced4a3SJed Brown     MPI_Status status;
229f6ced4a3SJed Brown     ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr);
230f6ced4a3SJed Brown     if (flag) {                 /* incoming message */
231f6ced4a3SJed Brown       PetscMPIInt *recvrank;
232f6ced4a3SJed Brown       void *buf;
233f6ced4a3SJed Brown       ierr = SegArrayGet(&segrank,1,&recvrank);CHKERRQ(ierr);
234f6ced4a3SJed Brown       ierr = SegArrayGet(&segdata,count,&buf);CHKERRQ(ierr);
235f6ced4a3SJed Brown       *recvrank = status.MPI_SOURCE;
236f6ced4a3SJed Brown       ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr);
237f6ced4a3SJed Brown       nrecvs++;
238f6ced4a3SJed Brown     }
239f6ced4a3SJed Brown     if (barrier == MPI_REQUEST_NULL) {
240f6ced4a3SJed Brown       PetscMPIInt sent,nsends = PetscMPIIntCast(nto);
241f6ced4a3SJed Brown       ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
242f6ced4a3SJed Brown       if (sent) {
243f6ced4a3SJed Brown         ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr);
244f6ced4a3SJed Brown         ierr = PetscFree(sendreqs);CHKERRQ(ierr);
245f6ced4a3SJed Brown       }
246f6ced4a3SJed Brown     } else {
247f6ced4a3SJed Brown       ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr);
248f6ced4a3SJed Brown     }
249f6ced4a3SJed Brown   }
250f6ced4a3SJed Brown   *nfrom = nrecvs;
251f6ced4a3SJed Brown   ierr = SegArrayExtract(&segrank,fromranks);CHKERRQ(ierr);
252f6ced4a3SJed Brown   ierr = SegArrayDestroy(&segrank);CHKERRQ(ierr);
253f6ced4a3SJed Brown   ierr = SegArrayExtract(&segdata,fromdata);CHKERRQ(ierr);
254f6ced4a3SJed Brown   ierr = SegArrayDestroy(&segdata);CHKERRQ(ierr);
255f6ced4a3SJed Brown   PetscFunctionReturn(0);
256f6ced4a3SJed Brown }
257f6ced4a3SJed Brown #endif
258f6ced4a3SJed Brown 
259f6ced4a3SJed Brown #undef __FUNCT__
260*6145cd65SJed Brown #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce"
261*6145cd65SJed Brown static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
262f6ced4a3SJed Brown {
263f6ced4a3SJed Brown   PetscErrorCode ierr;
264f6ced4a3SJed Brown   PetscMPIInt    size,*iflags,nrecvs,tag,unitbytes,*franks;
265f6ced4a3SJed Brown   PetscInt       i;
266f6ced4a3SJed Brown   char           *tdata,*fdata;
267f6ced4a3SJed Brown   MPI_Request    *reqs,*sendreqs;
268f6ced4a3SJed Brown   MPI_Status     *statuses;
269f6ced4a3SJed Brown 
270f6ced4a3SJed Brown   PetscFunctionBegin;
271f6ced4a3SJed Brown   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
272f6ced4a3SJed Brown   ierr = PetscMalloc(size*sizeof(*iflags),&iflags);CHKERRQ(ierr);
273f6ced4a3SJed Brown   ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr);
274f6ced4a3SJed Brown   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
275f6ced4a3SJed Brown   ierr = PetscGatherNumberOfMessages(comm,iflags,PETSC_NULL,&nrecvs);CHKERRQ(ierr);
276f6ced4a3SJed Brown   ierr = PetscFree(iflags);CHKERRQ(ierr);
277f6ced4a3SJed Brown 
278f6ced4a3SJed Brown   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
279f6ced4a3SJed Brown   ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
280f6ced4a3SJed Brown   ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr);
281f6ced4a3SJed Brown   tdata = (char*)todata;
282f6ced4a3SJed Brown   ierr = PetscMalloc2(nto+nrecvs,MPI_Request,&reqs,nto+nrecvs,MPI_Status,&statuses);CHKERRQ(ierr);
283f6ced4a3SJed Brown   sendreqs = reqs + nrecvs;
284f6ced4a3SJed Brown   for (i=0; i<nrecvs; i++) {
285f6ced4a3SJed Brown     ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr);
286f6ced4a3SJed Brown   }
287f6ced4a3SJed Brown   for (i=0; i<nto; i++) {
288f6ced4a3SJed Brown     ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
289f6ced4a3SJed Brown   }
290f6ced4a3SJed Brown   ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr);
291f6ced4a3SJed Brown   ierr = PetscMalloc(nrecvs*sizeof(PetscMPIInt),&franks);CHKERRQ(ierr);
292f6ced4a3SJed Brown   for (i=0; i<nrecvs; i++) {
293f6ced4a3SJed Brown     franks[i] = statuses[i].MPI_SOURCE;
294f6ced4a3SJed Brown   }
295f6ced4a3SJed Brown   ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr);
296f6ced4a3SJed Brown 
297f6ced4a3SJed Brown   *nfrom = nrecvs;
298f6ced4a3SJed Brown   *fromranks = franks;
299f6ced4a3SJed Brown   *(void**)fromdata = fdata;
300f6ced4a3SJed Brown   PetscFunctionReturn(0);
301f6ced4a3SJed Brown }
302f6ced4a3SJed Brown 
303f6ced4a3SJed Brown #undef __FUNCT__
304f6ced4a3SJed Brown #define __FUNCT__ "PetscCommBuildTwoSided"
305f6ced4a3SJed Brown /*@C
306f6ced4a3SJed Brown    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
307f6ced4a3SJed Brown 
308f6ced4a3SJed Brown    Collective on MPI_Comm
309f6ced4a3SJed Brown 
310f6ced4a3SJed Brown    Input Arguments:
311f6ced4a3SJed Brown +  comm - communicator
312f6ced4a3SJed Brown .  count - number of entries to send/receive (must match on all ranks)
313f6ced4a3SJed Brown .  dtype - datatype to send/receive from each rank (must match on all ranks)
314f6ced4a3SJed Brown .  nto - number of ranks to send data to
315f6ced4a3SJed Brown .  toranks - ranks to send to (array of length nto)
316f6ced4a3SJed Brown -  todata - data to send to each rank (packed)
317f6ced4a3SJed Brown 
318f6ced4a3SJed Brown    Output Arguments:
319f6ced4a3SJed Brown +  nfrom - number of ranks receiving messages from
320f6ced4a3SJed Brown .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
321f6ced4a3SJed Brown -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
322f6ced4a3SJed Brown 
323f6ced4a3SJed Brown    Level: developer
324f6ced4a3SJed Brown 
325f6ced4a3SJed Brown    Notes:
326f6ced4a3SJed Brown    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
327f6ced4a3SJed Brown    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
328f6ced4a3SJed Brown 
329f6ced4a3SJed Brown    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
330f6ced4a3SJed Brown 
331f6ced4a3SJed Brown    References:
332f6ced4a3SJed Brown    The MPI_Ibarrier implementation uses the algorithm in
333f6ced4a3SJed Brown    Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010.
334f6ced4a3SJed Brown 
335f6ced4a3SJed Brown .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
336f6ced4a3SJed Brown @*/
337f6ced4a3SJed Brown PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
338f6ced4a3SJed Brown {
339f6ced4a3SJed Brown   PetscErrorCode ierr;
340*6145cd65SJed Brown   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
341f6ced4a3SJed Brown 
342f6ced4a3SJed Brown   PetscFunctionBegin;
343*6145cd65SJed Brown   ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr);
344f6ced4a3SJed Brown   switch (buildtype) {
345*6145cd65SJed Brown   case PETSC_BUILDTWOSIDED_IBARRIER:
346f6ced4a3SJed Brown #if defined(PETSC_HAVE_MPI_IBARRIER)
347*6145cd65SJed Brown     ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
348*6145cd65SJed Brown #else
349*6145cd65SJed Brown     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
350f6ced4a3SJed Brown #endif
351*6145cd65SJed Brown     break;
352*6145cd65SJed Brown   case PETSC_BUILDTWOSIDED_ALLREDUCE:
353*6145cd65SJed Brown     ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
354f6ced4a3SJed Brown     break;
355f6ced4a3SJed Brown   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
356f6ced4a3SJed Brown   }
357f6ced4a3SJed Brown   PetscFunctionReturn(0);
358f6ced4a3SJed Brown }
359