xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision 95fce210c9a5323d60d47a8c3b992a28141ecb57)
1*95fce210SBarry Smith #define PETSC_DESIRE_COMPLEX
2*95fce210SBarry Smith #include <petsc-private/sfimpl.h> /*I "petscsf.h" I*/
3*95fce210SBarry Smith 
4*95fce210SBarry Smith typedef struct _n_PetscSFBasicPack *PetscSFBasicPack;
5*95fce210SBarry Smith struct _n_PetscSFBasicPack {
6*95fce210SBarry Smith   void (*Pack)(PetscInt,const PetscInt*,const void*,void*);
7*95fce210SBarry Smith   void (*UnpackInsert)(PetscInt,const PetscInt*,void*,const void*);
8*95fce210SBarry Smith   void (*UnpackAdd)(PetscInt,const PetscInt*,void*,const void*);
9*95fce210SBarry Smith   void (*UnpackMin)(PetscInt,const PetscInt*,void*,const void*);
10*95fce210SBarry Smith   void (*UnpackMax)(PetscInt,const PetscInt*,void*,const void*);
11*95fce210SBarry Smith   void (*UnpackMinloc)(PetscInt,const PetscInt*,void*,const void*);
12*95fce210SBarry Smith   void (*UnpackMaxloc)(PetscInt,const PetscInt*,void*,const void*);
13*95fce210SBarry Smith   void (*FetchAndInsert)(PetscInt,const PetscInt*,void*,void*);
14*95fce210SBarry Smith   void (*FetchAndAdd)(PetscInt,const PetscInt*,void*,void*);
15*95fce210SBarry Smith   void (*FetchAndMin)(PetscInt,const PetscInt*,void*,void*);
16*95fce210SBarry Smith   void (*FetchAndMax)(PetscInt,const PetscInt*,void*,void*);
17*95fce210SBarry Smith   void (*FetchAndMinloc)(PetscInt,const PetscInt*,void*,void*);
18*95fce210SBarry Smith   void (*FetchAndMaxloc)(PetscInt,const PetscInt*,void*,void*);
19*95fce210SBarry Smith 
20*95fce210SBarry Smith   MPI_Datatype     unit;
21*95fce210SBarry Smith   size_t           unitbytes;   /* Number of bytes in a unit */
22*95fce210SBarry Smith   const void       *key;        /* Array used as key for operation */
23*95fce210SBarry Smith   char             *root;       /* Packed root data, contiguous by leaf rank */
24*95fce210SBarry Smith   char             *leaf;       /* Packed leaf data, contiguous by root rank */
25*95fce210SBarry Smith   MPI_Request      *requests;   /* Array of root requests followed by leaf requests */
26*95fce210SBarry Smith   PetscSFBasicPack next;
27*95fce210SBarry Smith };
28*95fce210SBarry Smith 
29*95fce210SBarry Smith typedef struct {
30*95fce210SBarry Smith   PetscMPIInt      tag;
31*95fce210SBarry Smith   PetscInt         niranks;     /* Number of incoming ranks (ranks accessing my roots) */
32*95fce210SBarry Smith   PetscMPIInt      *iranks;     /* Array of ranks that reference my roots */
33*95fce210SBarry Smith   PetscInt         itotal;      /* Total number of graph edges referencing my roots */
34*95fce210SBarry Smith   PetscInt         *ioffset;    /* Array of length niranks+1 holding offset in irootloc[] for each rank */
35*95fce210SBarry Smith   PetscInt         *irootloc;   /* Incoming roots referenced by ranks starting at ioffset[rank] */
36*95fce210SBarry Smith   PetscSFBasicPack avail;       /* One or more entries per MPI Datatype, lazily constructed */
37*95fce210SBarry Smith   PetscSFBasicPack inuse;       /* Buffers being used for transactions that have not yet completed */
38*95fce210SBarry Smith } PetscSF_Basic;
39*95fce210SBarry Smith 
40*95fce210SBarry Smith #if !defined(PETSC_HAVE_MPI_TYPE_DUP) /* Danger: type is not reference counted; subject to ABA problem */
41*95fce210SBarry Smith PETSC_STATIC_INLINE PetscErrorCode MPI_Type_dup(MPI_Datatype datatype,MPI_Datatype *newtype)
42*95fce210SBarry Smith {
43*95fce210SBarry Smith   *newtype = datatype;
44*95fce210SBarry Smith   return 0;
45*95fce210SBarry Smith }
46*95fce210SBarry Smith #endif
47*95fce210SBarry Smith 
48*95fce210SBarry Smith /*
49*95fce210SBarry Smith  * MPI_Reduce_local is not really useful because it can't handle sparse data and it vectorizes "in the wrong direction",
50*95fce210SBarry Smith  * therefore we pack data types manually. This section defines packing routines for the standard data types.
51*95fce210SBarry Smith  */
52*95fce210SBarry Smith 
53*95fce210SBarry Smith #define CPPJoin2_exp(a,b) a ## b
54*95fce210SBarry Smith #define CPPJoin2(a,b) CPPJoin2_exp(a,b)
55*95fce210SBarry Smith #define CPPJoin3_exp_(a,b,c) a ## b ## _ ## c
56*95fce210SBarry Smith #define CPPJoin3_(a,b,c) CPPJoin3_exp_(a,b,c)
57*95fce210SBarry Smith 
58*95fce210SBarry Smith /* Basic types without addition */
59*95fce210SBarry Smith #define DEF_PackNoInit(type)                                            \
60*95fce210SBarry Smith   static void CPPJoin2(Pack_,type)(PetscInt n,const PetscInt *idx,const void *unpacked,void *packed) { \
61*95fce210SBarry Smith     const type *u = (const type*)unpacked;                              \
62*95fce210SBarry Smith     type *p = (type*)packed;                                            \
63*95fce210SBarry Smith     PetscInt i;                                                         \
64*95fce210SBarry Smith     for (i=0; i<n; i++) p[i] = u[idx[i]];                               \
65*95fce210SBarry Smith   }                                                                     \
66*95fce210SBarry Smith   static void CPPJoin2(UnpackInsert_,type)(PetscInt n,const PetscInt *idx,void *unpacked,const void *packed) { \
67*95fce210SBarry Smith     type *u = (type*)unpacked;                                          \
68*95fce210SBarry Smith     const type *p = (const type*)packed;                                \
69*95fce210SBarry Smith     PetscInt i;                                                         \
70*95fce210SBarry Smith     for (i=0; i<n; i++) u[idx[i]] = p[i];                               \
71*95fce210SBarry Smith   }                                                                     \
72*95fce210SBarry Smith   static void CPPJoin2(FetchAndInsert_,type)(PetscInt n,const PetscInt *idx,void *unpacked,void *packed) { \
73*95fce210SBarry Smith     type *u = (type*)unpacked;                                          \
74*95fce210SBarry Smith     type *p = (type*)packed;                                            \
75*95fce210SBarry Smith     PetscInt i;                                                         \
76*95fce210SBarry Smith     for (i=0; i<n; i++) {                                               \
77*95fce210SBarry Smith       PetscInt j = idx[i];                                              \
78*95fce210SBarry Smith       type t = u[j];                                                    \
79*95fce210SBarry Smith       u[j] = p[i];                                                      \
80*95fce210SBarry Smith       p[i] = t;                                                         \
81*95fce210SBarry Smith     }                                                                   \
82*95fce210SBarry Smith   }
83*95fce210SBarry Smith 
84*95fce210SBarry Smith /* Basic types defining addition */
85*95fce210SBarry Smith #define DEF_PackAddNoInit(type)                                         \
86*95fce210SBarry Smith   DEF_PackNoInit(type)                                                  \
87*95fce210SBarry Smith   static void CPPJoin2(UnpackAdd_,type)(PetscInt n,const PetscInt *idx,void *unpacked,const void *packed) { \
88*95fce210SBarry Smith     type *u = (type*)unpacked;                                          \
89*95fce210SBarry Smith     const type *p = (const type*)packed;                                \
90*95fce210SBarry Smith     PetscInt i;                                                         \
91*95fce210SBarry Smith     for (i=0; i<n; i++) u[idx[i]] += p[i];                              \
92*95fce210SBarry Smith   }                                                                     \
93*95fce210SBarry Smith   static void CPPJoin2(FetchAndAdd_,type)(PetscInt n,const PetscInt *idx,void *unpacked,void *packed) { \
94*95fce210SBarry Smith     type *u = (type*)unpacked;                                          \
95*95fce210SBarry Smith     type *p = (type*)packed;                                            \
96*95fce210SBarry Smith     PetscInt i;                                                         \
97*95fce210SBarry Smith     for (i=0; i<n; i++) {                                               \
98*95fce210SBarry Smith       PetscInt j = idx[i];                                              \
99*95fce210SBarry Smith       type t = u[j];                                                    \
100*95fce210SBarry Smith       u[j] = t + p[i];                                                  \
101*95fce210SBarry Smith       p[i] = t;                                                         \
102*95fce210SBarry Smith     }                                                                   \
103*95fce210SBarry Smith   }
104*95fce210SBarry Smith #define DEF_Pack(type) \
105*95fce210SBarry Smith   DEF_PackAddNoInit(type)                                               \
106*95fce210SBarry Smith   static void CPPJoin2(PackInit_,type)(PetscSFBasicPack link) {         \
107*95fce210SBarry Smith     link->Pack = CPPJoin2(Pack_,type);                                  \
108*95fce210SBarry Smith     link->UnpackInsert = CPPJoin2(UnpackInsert_,type);                  \
109*95fce210SBarry Smith     link->UnpackAdd = CPPJoin2(UnpackAdd_,type);                        \
110*95fce210SBarry Smith     link->FetchAndInsert = CPPJoin2(FetchAndInsert_,type);              \
111*95fce210SBarry Smith     link->FetchAndAdd = CPPJoin2(FetchAndAdd_,type);                    \
112*95fce210SBarry Smith     link->unitbytes = sizeof(type);                                     \
113*95fce210SBarry Smith   }
114*95fce210SBarry Smith /* Comparable types */
115*95fce210SBarry Smith #define DEF_PackCmp(type)                                               \
116*95fce210SBarry Smith   DEF_PackAddNoInit(type)                                               \
117*95fce210SBarry Smith   static void CPPJoin2(UnpackMax_,type)(PetscInt n,const PetscInt *idx,void *unpacked,const void *packed) { \
118*95fce210SBarry Smith     type *u = (type*)unpacked;                                          \
119*95fce210SBarry Smith     const type *p = (const type*)packed;                                \
120*95fce210SBarry Smith     PetscInt i;                                                         \
121*95fce210SBarry Smith     for (i=0; i<n; i++) {                                               \
122*95fce210SBarry Smith       type v = u[idx[i]];                                               \
123*95fce210SBarry Smith       u[idx[i]] = PetscMax(v,p[i]);                                     \
124*95fce210SBarry Smith     }                                                                   \
125*95fce210SBarry Smith   }                                                                     \
126*95fce210SBarry Smith   static void CPPJoin2(UnpackMin_,type)(PetscInt n,const PetscInt *idx,void *unpacked,const void *packed) { \
127*95fce210SBarry Smith     type *u = (type*)unpacked;                                          \
128*95fce210SBarry Smith     const type *p = (const type*)packed;                                \
129*95fce210SBarry Smith     PetscInt i;                                                         \
130*95fce210SBarry Smith     for (i=0; i<n; i++) {                                               \
131*95fce210SBarry Smith       type v = u[idx[i]];                                               \
132*95fce210SBarry Smith       u[idx[i]] = PetscMin(v,p[i]);                                     \
133*95fce210SBarry Smith     }                                                                   \
134*95fce210SBarry Smith   }                                                                     \
135*95fce210SBarry Smith   static void CPPJoin2(FetchAndMax_,type)(PetscInt n,const PetscInt *idx,void *unpacked,void *packed) { \
136*95fce210SBarry Smith     type *u = (type*)unpacked;                                          \
137*95fce210SBarry Smith     type *p = (type*)packed;                                            \
138*95fce210SBarry Smith     PetscInt i;                                                         \
139*95fce210SBarry Smith     for (i=0; i<n; i++) {                                               \
140*95fce210SBarry Smith       PetscInt j = idx[i];                                              \
141*95fce210SBarry Smith       type v = u[j];                                                    \
142*95fce210SBarry Smith       u[j] = PetscMax(v,p[i]);                                          \
143*95fce210SBarry Smith       p[i] = v;                                                         \
144*95fce210SBarry Smith     }                                                                   \
145*95fce210SBarry Smith   }                                                                     \
146*95fce210SBarry Smith   static void CPPJoin2(FetchAndMin_,type)(PetscInt n,const PetscInt *idx,void *unpacked,void *packed) { \
147*95fce210SBarry Smith     type *u = (type*)unpacked;                                          \
148*95fce210SBarry Smith     type *p = (type*)packed;                                            \
149*95fce210SBarry Smith     PetscInt i;                                                         \
150*95fce210SBarry Smith     for (i=0; i<n; i++) {                                               \
151*95fce210SBarry Smith       PetscInt j = idx[i];                                              \
152*95fce210SBarry Smith       type v = u[j];                                                    \
153*95fce210SBarry Smith       u[j] = PetscMin(v,p[i]);                                          \
154*95fce210SBarry Smith       p[i] = v;                                                         \
155*95fce210SBarry Smith     }                                                                   \
156*95fce210SBarry Smith   }                                                                     \
157*95fce210SBarry Smith   static void CPPJoin2(PackInit_,type)(PetscSFBasicPack link) {         \
158*95fce210SBarry Smith     link->Pack = CPPJoin2(Pack_,type);                                  \
159*95fce210SBarry Smith     link->UnpackInsert = CPPJoin2(UnpackInsert_,type);                  \
160*95fce210SBarry Smith     link->UnpackAdd = CPPJoin2(UnpackAdd_,type);                        \
161*95fce210SBarry Smith     link->UnpackMax = CPPJoin2(UnpackMax_,type);                        \
162*95fce210SBarry Smith     link->UnpackMin = CPPJoin2(UnpackMin_,type);                        \
163*95fce210SBarry Smith     link->FetchAndInsert = CPPJoin2(FetchAndInsert_,type);              \
164*95fce210SBarry Smith     link->FetchAndAdd = CPPJoin2(FetchAndAdd_ ,type);                   \
165*95fce210SBarry Smith     link->FetchAndMax = CPPJoin2(FetchAndMax_ ,type);                   \
166*95fce210SBarry Smith     link->FetchAndMin = CPPJoin2(FetchAndMin_ ,type);                   \
167*95fce210SBarry Smith     link->unitbytes = sizeof(type);                                     \
168*95fce210SBarry Smith   }
169*95fce210SBarry Smith 
170*95fce210SBarry Smith /* Pair types */
171*95fce210SBarry Smith #define CPPJoinloc_exp(base,op,t1,t2) base ## op ## loc_ ## t1 ## _ ## t2
172*95fce210SBarry Smith #define CPPJoinloc(base,op,t1,t2) CPPJoinloc_exp(base,op,t1,t2)
173*95fce210SBarry Smith #define PairType(type1,type2) CPPJoin3_(_pairtype_,type1,type2)
174*95fce210SBarry Smith #define DEF_UnpackXloc(type1,type2,locname,op)                              \
175*95fce210SBarry Smith   static void CPPJoinloc(Unpack,locname,type1,type2)(PetscInt n,const PetscInt *idx,void *unpacked,const void *packed) { \
176*95fce210SBarry Smith     PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked;        \
177*95fce210SBarry Smith     const PairType(type1,type2) *p = (const PairType(type1,type2)*)packed; \
178*95fce210SBarry Smith     PetscInt i;                                                         \
179*95fce210SBarry Smith     for (i=0; i<n; i++) {                                               \
180*95fce210SBarry Smith       PetscInt j = idx[i];                                              \
181*95fce210SBarry Smith       if (p[i].a op u[j].a) {                                           \
182*95fce210SBarry Smith         u[j].a = p[i].a;                                                \
183*95fce210SBarry Smith         u[j].b = p[i].b;                                                \
184*95fce210SBarry Smith       } else if (u[j].a == p[i].a) {                                    \
185*95fce210SBarry Smith         u[j].b = PetscMin(u[j].b,p[i].b);                               \
186*95fce210SBarry Smith       }                                                                 \
187*95fce210SBarry Smith     }                                                                   \
188*95fce210SBarry Smith   }                                                                     \
189*95fce210SBarry Smith   static void CPPJoinloc(FetchAnd,locname,type1,type2)(PetscInt n,const PetscInt *idx,void *unpacked,void *packed) { \
190*95fce210SBarry Smith     PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked;        \
191*95fce210SBarry Smith     PairType(type1,type2) *p = (PairType(type1,type2)*)packed;          \
192*95fce210SBarry Smith     PetscInt i;                                                         \
193*95fce210SBarry Smith     for (i=0; i<n; i++) {                                               \
194*95fce210SBarry Smith       PetscInt j = idx[i];                                              \
195*95fce210SBarry Smith       PairType(type1,type2) v;                                          \
196*95fce210SBarry Smith       v.a = u[j].a;                                                     \
197*95fce210SBarry Smith       v.b = u[j].b;                                                     \
198*95fce210SBarry Smith       if (p[i].a op u[j].a) {                                           \
199*95fce210SBarry Smith         u[j].a = p[i].a;                                                \
200*95fce210SBarry Smith         u[j].b = p[i].b;                                                \
201*95fce210SBarry Smith       } else if (u[j].a == p[i].a) {                                    \
202*95fce210SBarry Smith         u[j].b = PetscMin(u[j].b,p[i].b);                               \
203*95fce210SBarry Smith       }                                                                 \
204*95fce210SBarry Smith       p[i].a = v.a;                                                     \
205*95fce210SBarry Smith       p[i].b = v.b;                                                     \
206*95fce210SBarry Smith     }                                                                   \
207*95fce210SBarry Smith   }
208*95fce210SBarry Smith #define DEF_PackPair(type1,type2)                                       \
209*95fce210SBarry Smith   typedef struct {type1 a; type2 b;} PairType(type1,type2);             \
210*95fce210SBarry Smith   static void CPPJoin3_(Pack_,type1,type2)(PetscInt n,const PetscInt *idx,const void *unpacked,void *packed) { \
211*95fce210SBarry Smith     const PairType(type1,type2) *u = (const PairType(type1,type2)*)unpacked; \
212*95fce210SBarry Smith     PairType(type1,type2) *p = (PairType(type1,type2)*)packed;          \
213*95fce210SBarry Smith     PetscInt i;                                                         \
214*95fce210SBarry Smith     for (i=0; i<n; i++) {                                               \
215*95fce210SBarry Smith       p[i].a = u[idx[i]].a;                                             \
216*95fce210SBarry Smith       p[i].b = u[idx[i]].b;                                             \
217*95fce210SBarry Smith     }                                                                   \
218*95fce210SBarry Smith   }                                                                     \
219*95fce210SBarry Smith   static void CPPJoin3_(UnpackInsert_,type1,type2)(PetscInt n,const PetscInt *idx,void *unpacked,const void *packed) { \
220*95fce210SBarry Smith     PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked;       \
221*95fce210SBarry Smith     const PairType(type1,type2) *p = (const PairType(type1,type2)*)packed; \
222*95fce210SBarry Smith     PetscInt i;                                                         \
223*95fce210SBarry Smith     for (i=0; i<n; i++) {                                               \
224*95fce210SBarry Smith       u[idx[i]].a = p[i].a;                                             \
225*95fce210SBarry Smith       u[idx[i]].b = p[i].b;                                             \
226*95fce210SBarry Smith     }                                                                   \
227*95fce210SBarry Smith   }                                                                     \
228*95fce210SBarry Smith   static void CPPJoin3_(UnpackAdd_,type1,type2)(PetscInt n,const PetscInt *idx,void *unpacked,const void *packed) { \
229*95fce210SBarry Smith     PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked;       \
230*95fce210SBarry Smith     const PairType(type1,type2) *p = (const PairType(type1,type2)*)packed; \
231*95fce210SBarry Smith     PetscInt i;                                                         \
232*95fce210SBarry Smith     for (i=0; i<n; i++) {                                               \
233*95fce210SBarry Smith       u[idx[i]].a += p[i].a;                                            \
234*95fce210SBarry Smith       u[idx[i]].b += p[i].b;                                            \
235*95fce210SBarry Smith     }                                                                   \
236*95fce210SBarry Smith   }                                                                     \
237*95fce210SBarry Smith   static void CPPJoin3_(FetchAndInsert_,type1,type2)(PetscInt n,const PetscInt *idx,void *unpacked,void *packed) { \
238*95fce210SBarry Smith     PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked;        \
239*95fce210SBarry Smith     PairType(type1,type2) *p = (PairType(type1,type2)*)packed;          \
240*95fce210SBarry Smith     PetscInt i;                                                         \
241*95fce210SBarry Smith     for (i=0; i<n; i++) {                                               \
242*95fce210SBarry Smith       PetscInt j = idx[i];                                              \
243*95fce210SBarry Smith       PairType(type1,type2) v;                                          \
244*95fce210SBarry Smith       v.a = u[j].a;                                                     \
245*95fce210SBarry Smith       v.b = u[j].b;                                                     \
246*95fce210SBarry Smith       u[j].a = p[i].a;                                                  \
247*95fce210SBarry Smith       u[j].b = p[i].b;                                                  \
248*95fce210SBarry Smith       p[i].a = v.a;                                                     \
249*95fce210SBarry Smith       p[i].b = v.b;                                                     \
250*95fce210SBarry Smith     }                                                                   \
251*95fce210SBarry Smith   }                                                                     \
252*95fce210SBarry Smith   static void FetchAndAdd_ ## type1 ## _ ## type2(PetscInt n,const PetscInt *idx,void *unpacked,void *packed) { \
253*95fce210SBarry Smith     PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked;       \
254*95fce210SBarry Smith     PairType(type1,type2) *p = (PairType(type1,type2)*)packed;         \
255*95fce210SBarry Smith     PetscInt i;                                                         \
256*95fce210SBarry Smith     for (i=0; i<n; i++) {                                               \
257*95fce210SBarry Smith       PetscInt j = idx[i];                                              \
258*95fce210SBarry Smith       PairType(type1,type2) v;                                          \
259*95fce210SBarry Smith       v.a = u[j].a;                                                     \
260*95fce210SBarry Smith       v.b = u[j].b;                                                     \
261*95fce210SBarry Smith       u[j].a = v.a + p[i].a;                                            \
262*95fce210SBarry Smith       u[j].b = v.b + p[i].b;                                            \
263*95fce210SBarry Smith       p[i].a = v.a;                                                     \
264*95fce210SBarry Smith       p[i].b = v.b;                                                     \
265*95fce210SBarry Smith     }                                                                   \
266*95fce210SBarry Smith   }                                                                     \
267*95fce210SBarry Smith   DEF_UnpackXloc(type1,type2,Max,>)                                     \
268*95fce210SBarry Smith   DEF_UnpackXloc(type1,type2,Min,<)                                     \
269*95fce210SBarry Smith   static void CPPJoin3_(PackInit_,type1,type2)(PetscSFBasicPack link) { \
270*95fce210SBarry Smith     link->Pack = CPPJoin3_(Pack_,type1,type2);                          \
271*95fce210SBarry Smith     link->UnpackInsert = CPPJoin3_(UnpackInsert_,type1,type2);          \
272*95fce210SBarry Smith     link->UnpackAdd = CPPJoin3_(UnpackAdd_,type1,type2);                \
273*95fce210SBarry Smith     link->UnpackMaxloc = CPPJoin3_(UnpackMaxloc_,type1,type2);          \
274*95fce210SBarry Smith     link->UnpackMinloc = CPPJoin3_(UnpackMinloc_,type1,type2);          \
275*95fce210SBarry Smith     link->FetchAndInsert = CPPJoin3_(FetchAndInsert_,type1,type2);      \
276*95fce210SBarry Smith     link->FetchAndAdd = CPPJoin3_(FetchAndAdd_,type1,type2);            \
277*95fce210SBarry Smith     link->FetchAndMaxloc = CPPJoin3_(FetchAndMaxloc_,type1,type2);      \
278*95fce210SBarry Smith     link->FetchAndMinloc = CPPJoin3_(FetchAndMinloc_,type1,type2);      \
279*95fce210SBarry Smith     link->unitbytes = sizeof(PairType(type1,type2));                    \
280*95fce210SBarry Smith   }
281*95fce210SBarry Smith 
282*95fce210SBarry Smith /* Currently only dumb blocks of data */
283*95fce210SBarry Smith #define BlockType(unit,count) CPPJoin3_(_blocktype_,unit,count)
284*95fce210SBarry Smith #define DEF_Block(unit,count)                                           \
285*95fce210SBarry Smith   typedef struct {unit v[count];} BlockType(unit,count);                \
286*95fce210SBarry Smith   DEF_PackNoInit(BlockType(unit,count))                                 \
287*95fce210SBarry Smith   static void CPPJoin3_(PackInit_block_,unit,count)(PetscSFBasicPack link) { \
288*95fce210SBarry Smith     link->Pack = CPPJoin2(Pack_,BlockType(unit,count));                 \
289*95fce210SBarry Smith     link->UnpackInsert = CPPJoin2(UnpackInsert_,BlockType(unit,count)); \
290*95fce210SBarry Smith     link->FetchAndInsert = CPPJoin2(FetchAndInsert_,BlockType(unit,count)); \
291*95fce210SBarry Smith     link->unitbytes = sizeof(BlockType(unit,count));                    \
292*95fce210SBarry Smith   }
293*95fce210SBarry Smith 
294*95fce210SBarry Smith DEF_PackCmp(int)
295*95fce210SBarry Smith DEF_PackCmp(PetscInt)
296*95fce210SBarry Smith DEF_PackCmp(PetscReal)
297*95fce210SBarry Smith #if defined(PETSC_HAVE_COMPLEX)
298*95fce210SBarry Smith DEF_Pack(PetscComplex)
299*95fce210SBarry Smith #endif
300*95fce210SBarry Smith DEF_PackPair(int,int)
301*95fce210SBarry Smith DEF_PackPair(PetscInt,PetscInt)
302*95fce210SBarry Smith DEF_Block(int,1)
303*95fce210SBarry Smith DEF_Block(int,2)
304*95fce210SBarry Smith DEF_Block(int,3)
305*95fce210SBarry Smith DEF_Block(int,4)
306*95fce210SBarry Smith DEF_Block(int,5)
307*95fce210SBarry Smith DEF_Block(int,6)
308*95fce210SBarry Smith DEF_Block(int,7)
309*95fce210SBarry Smith DEF_Block(int,8)
310*95fce210SBarry Smith 
311*95fce210SBarry Smith #undef __FUNCT__
312*95fce210SBarry Smith #define __FUNCT__ "PetscSFSetUp_Basic"
313*95fce210SBarry Smith static PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
314*95fce210SBarry Smith {
315*95fce210SBarry Smith   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
316*95fce210SBarry Smith   PetscErrorCode ierr;
317*95fce210SBarry Smith   PetscInt *rlengths,*ilengths,i;
318*95fce210SBarry Smith   MPI_Comm comm;
319*95fce210SBarry Smith   MPI_Request *rootreqs,*leafreqs;
320*95fce210SBarry Smith 
321*95fce210SBarry Smith   PetscFunctionBegin;
322*95fce210SBarry Smith   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
323*95fce210SBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)sf,&bas->tag);CHKERRQ(ierr);
324*95fce210SBarry Smith   /*
325*95fce210SBarry Smith    * Inform roots about how many leaves and from which ranks
326*95fce210SBarry Smith    */
327*95fce210SBarry Smith   ierr = PetscMalloc(sf->nranks*sizeof(PetscInt),&rlengths);CHKERRQ(ierr);
328*95fce210SBarry Smith   /* Determine number, sending ranks, and length of incoming  */
329*95fce210SBarry Smith   for (i=0; i<sf->nranks; i++) {
330*95fce210SBarry Smith     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
331*95fce210SBarry Smith   }
332*95fce210SBarry Smith   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks,sf->ranks,rlengths,&bas->niranks,&bas->iranks,(void**)&ilengths);CHKERRQ(ierr);
333*95fce210SBarry Smith   ierr = PetscFree(rlengths);CHKERRQ(ierr);
334*95fce210SBarry Smith 
335*95fce210SBarry Smith   /* Send leaf identities to roots */
336*95fce210SBarry Smith   for (i=0,bas->itotal=0; i<bas->niranks; i++) bas->itotal += ilengths[i];
337*95fce210SBarry Smith   ierr = PetscMalloc2(bas->niranks+1,PetscInt,&bas->ioffset,bas->itotal,PetscInt,&bas->irootloc);CHKERRQ(ierr);
338*95fce210SBarry Smith   ierr = PetscMalloc((bas->niranks+sf->nranks)*sizeof(MPI_Request),&rootreqs);CHKERRQ(ierr);
339*95fce210SBarry Smith 
340*95fce210SBarry Smith   leafreqs = rootreqs + bas->niranks;
341*95fce210SBarry Smith   bas->ioffset[0] = 0;
342*95fce210SBarry Smith   for (i=0; i<bas->niranks; i++) {
343*95fce210SBarry Smith     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i];
344*95fce210SBarry Smith     ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],ilengths[i],MPIU_INT,bas->iranks[i],bas->tag,comm,&rootreqs[i]);CHKERRQ(ierr);
345*95fce210SBarry Smith   }
346*95fce210SBarry Smith   for (i=0; i<sf->nranks; i++) {
347*95fce210SBarry Smith     PetscMPIInt npoints;
348*95fce210SBarry Smith     ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr);
349*95fce210SBarry Smith     ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],bas->tag,comm,&leafreqs[i]);CHKERRQ(ierr);
350*95fce210SBarry Smith   }
351*95fce210SBarry Smith   ierr = MPI_Waitall(sf->nranks+bas->niranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
352*95fce210SBarry Smith   ierr = PetscFree(ilengths);CHKERRQ(ierr);
353*95fce210SBarry Smith   ierr = PetscFree(rootreqs);CHKERRQ(ierr);
354*95fce210SBarry Smith   PetscFunctionReturn(0);
355*95fce210SBarry Smith }
356*95fce210SBarry Smith 
357*95fce210SBarry Smith #undef __FUNCT__
358*95fce210SBarry Smith #define __FUNCT__ "PetscSFBasicPackTypeSetup"
359*95fce210SBarry Smith static PetscErrorCode PetscSFBasicPackTypeSetup(PetscSFBasicPack link,MPI_Datatype unit)
360*95fce210SBarry Smith {
361*95fce210SBarry Smith   PetscErrorCode ierr;
362*95fce210SBarry Smith   PetscBool      isInt,isPetscInt,isPetscReal,is2Int,is2PetscInt;
363*95fce210SBarry Smith #if defined(PETSC_HAVE_COMPLEX)
364*95fce210SBarry Smith   PetscBool isPetscComplex;
365*95fce210SBarry Smith #endif
366*95fce210SBarry Smith 
367*95fce210SBarry Smith   PetscFunctionBegin;
368*95fce210SBarry Smith   ierr = MPIPetsc_Type_compare(unit,MPI_INT,&isInt);CHKERRQ(ierr);
369*95fce210SBarry Smith   ierr = MPIPetsc_Type_compare(unit,MPIU_INT,&isPetscInt);CHKERRQ(ierr);
370*95fce210SBarry Smith   ierr = MPIPetsc_Type_compare(unit,MPIU_REAL,&isPetscReal);CHKERRQ(ierr);
371*95fce210SBarry Smith #if defined(PETSC_HAVE_COMPLEX)
372*95fce210SBarry Smith   ierr = MPIPetsc_Type_compare(unit,MPIU_COMPLEX,&isPetscComplex);CHKERRQ(ierr);
373*95fce210SBarry Smith #endif
374*95fce210SBarry Smith   ierr = MPIPetsc_Type_compare(unit,MPI_2INT,&is2Int);CHKERRQ(ierr);
375*95fce210SBarry Smith   ierr = MPIPetsc_Type_compare(unit,MPIU_2INT,&is2PetscInt);CHKERRQ(ierr);
376*95fce210SBarry Smith   if (isInt) PackInit_int(link);
377*95fce210SBarry Smith   else if (isPetscInt) PackInit_PetscInt(link);
378*95fce210SBarry Smith   else if (isPetscReal) PackInit_PetscReal(link);
379*95fce210SBarry Smith #if defined(PETSC_HAVE_COMPLEX)
380*95fce210SBarry Smith   else if (isPetscComplex) PackInit_PetscComplex(link);
381*95fce210SBarry Smith #endif
382*95fce210SBarry Smith   else if (is2Int) PackInit_int_int(link);
383*95fce210SBarry Smith   else if (is2PetscInt) PackInit_PetscInt_PetscInt(link);
384*95fce210SBarry Smith   else {
385*95fce210SBarry Smith     PetscMPIInt bytes;
386*95fce210SBarry Smith     ierr = MPI_Type_size(unit,&bytes);CHKERRQ(ierr);
387*95fce210SBarry Smith     if (bytes % sizeof(int)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for type size not divisible by %D",sizeof(int));
388*95fce210SBarry Smith     switch (bytes / sizeof(int)) {
389*95fce210SBarry Smith     case 1: PackInit_block_int_1(link); break;
390*95fce210SBarry Smith     case 2: PackInit_block_int_2(link); break;
391*95fce210SBarry Smith     case 3: PackInit_block_int_3(link); break;
392*95fce210SBarry Smith     case 4: PackInit_block_int_4(link); break;
393*95fce210SBarry Smith     case 5: PackInit_block_int_5(link); break;
394*95fce210SBarry Smith     case 6: PackInit_block_int_6(link); break;
395*95fce210SBarry Smith     case 7: PackInit_block_int_7(link); break;
396*95fce210SBarry Smith     case 8: PackInit_block_int_8(link); break;
397*95fce210SBarry Smith     default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for arbitrary block sizes");
398*95fce210SBarry Smith     }
399*95fce210SBarry Smith   }
400*95fce210SBarry Smith   ierr = MPI_Type_dup(unit,&link->unit);CHKERRQ(ierr);
401*95fce210SBarry Smith   PetscFunctionReturn(0);
402*95fce210SBarry Smith }
403*95fce210SBarry Smith 
404*95fce210SBarry Smith #undef __FUNCT__
405*95fce210SBarry Smith #define __FUNCT__ "PetscSFBasicPackGetUnpackOp"
406*95fce210SBarry Smith static PetscErrorCode PetscSFBasicPackGetUnpackOp(PetscSF sf,PetscSFBasicPack link,MPI_Op op,void (**UnpackOp)(PetscInt,const PetscInt*,void*,const void*))
407*95fce210SBarry Smith {
408*95fce210SBarry Smith   PetscFunctionBegin;
409*95fce210SBarry Smith   *UnpackOp = NULL;
410*95fce210SBarry Smith   if (op == MPI_REPLACE) *UnpackOp = link->UnpackInsert;
411*95fce210SBarry Smith   else if (op == MPI_SUM || op == MPIU_SUM) *UnpackOp = link->UnpackAdd;
412*95fce210SBarry Smith   else if (op == MPI_MAX || op == MPIU_MAX) *UnpackOp = link->UnpackMax;
413*95fce210SBarry Smith   else if (op == MPI_MIN || op == MPIU_MIN) *UnpackOp = link->UnpackMin;
414*95fce210SBarry Smith   else if (op == MPI_MAXLOC) *UnpackOp = link->UnpackMaxloc;
415*95fce210SBarry Smith   else if (op == MPI_MINLOC) *UnpackOp = link->UnpackMinloc;
416*95fce210SBarry Smith   else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for MPI_Op");
417*95fce210SBarry Smith   PetscFunctionReturn(0);
418*95fce210SBarry Smith }
419*95fce210SBarry Smith #undef __FUNCT__
420*95fce210SBarry Smith #define __FUNCT__ "PetscSFBasicPackGetFetchAndOp"
421*95fce210SBarry Smith static PetscErrorCode PetscSFBasicPackGetFetchAndOp(PetscSF sf,PetscSFBasicPack link,MPI_Op op,void (**FetchAndOp)(PetscInt,const PetscInt*,void*,void*))
422*95fce210SBarry Smith {
423*95fce210SBarry Smith   PetscFunctionBegin;
424*95fce210SBarry Smith   *FetchAndOp = NULL;
425*95fce210SBarry Smith   if (op == MPI_REPLACE) *FetchAndOp = link->FetchAndInsert;
426*95fce210SBarry Smith   else if (op == MPI_SUM || op == MPIU_SUM) *FetchAndOp = link->FetchAndAdd;
427*95fce210SBarry Smith   else if (op == MPI_MAX || op == MPIU_MAX) *FetchAndOp = link->FetchAndMax;
428*95fce210SBarry Smith   else if (op == MPI_MIN || op == MPIU_MIN) *FetchAndOp = link->FetchAndMin;
429*95fce210SBarry Smith   else if (op == MPI_MAXLOC) *FetchAndOp = link->FetchAndMaxloc;
430*95fce210SBarry Smith   else if (op == MPI_MINLOC) *FetchAndOp = link->FetchAndMinloc;
431*95fce210SBarry Smith   else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for MPI_Op");
432*95fce210SBarry Smith   PetscFunctionReturn(0);
433*95fce210SBarry Smith }
434*95fce210SBarry Smith 
435*95fce210SBarry Smith #undef __FUNCT__
436*95fce210SBarry Smith #define __FUNCT__ "PetscSFBasicPackGetReqs"
437*95fce210SBarry Smith static PetscErrorCode PetscSFBasicPackGetReqs(PetscSF sf,PetscSFBasicPack link,MPI_Request **rootreqs,MPI_Request **leafreqs)
438*95fce210SBarry Smith {
439*95fce210SBarry Smith   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
440*95fce210SBarry Smith 
441*95fce210SBarry Smith   PetscFunctionBegin;
442*95fce210SBarry Smith   if (rootreqs) *rootreqs = link->requests;
443*95fce210SBarry Smith   if (leafreqs) *leafreqs = link->requests + bas->niranks;
444*95fce210SBarry Smith   PetscFunctionReturn(0);
445*95fce210SBarry Smith }
446*95fce210SBarry Smith 
447*95fce210SBarry Smith #undef __FUNCT__
448*95fce210SBarry Smith #define __FUNCT__ "PetscSFBasicPackWaitall"
449*95fce210SBarry Smith static PetscErrorCode PetscSFBasicPackWaitall(PetscSF sf,PetscSFBasicPack link)
450*95fce210SBarry Smith {
451*95fce210SBarry Smith   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
452*95fce210SBarry Smith   PetscErrorCode ierr;
453*95fce210SBarry Smith 
454*95fce210SBarry Smith   PetscFunctionBegin;
455*95fce210SBarry Smith   ierr = MPI_Waitall(bas->niranks+sf->nranks,link->requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
456*95fce210SBarry Smith   PetscFunctionReturn(0);
457*95fce210SBarry Smith }
458*95fce210SBarry Smith 
459*95fce210SBarry Smith #undef __FUNCT__
460*95fce210SBarry Smith #define __FUNCT__ "PetscSFBasicGetRootInfo"
461*95fce210SBarry Smith static PetscErrorCode PetscSFBasicGetRootInfo(PetscSF sf,PetscInt *nrootranks,const PetscMPIInt **rootranks,const PetscInt **rootoffset,const PetscInt **rootloc)
462*95fce210SBarry Smith {
463*95fce210SBarry Smith   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
464*95fce210SBarry Smith 
465*95fce210SBarry Smith   PetscFunctionBegin;
466*95fce210SBarry Smith   if (nrootranks) *nrootranks = bas->niranks;
467*95fce210SBarry Smith   if (rootranks)  *rootranks  = bas->iranks;
468*95fce210SBarry Smith   if (rootoffset) *rootoffset = bas->ioffset;
469*95fce210SBarry Smith   if (rootloc)    *rootloc    = bas->irootloc;
470*95fce210SBarry Smith   PetscFunctionReturn(0);
471*95fce210SBarry Smith }
472*95fce210SBarry Smith 
473*95fce210SBarry Smith #undef __FUNCT__
474*95fce210SBarry Smith #define __FUNCT__ "PetscSFBasicGetLeafInfo"
475*95fce210SBarry Smith static PetscErrorCode PetscSFBasicGetLeafInfo(PetscSF sf,PetscInt *nleafranks,const PetscMPIInt **leafranks,const PetscInt **leafoffset,const PetscInt **leafloc)
476*95fce210SBarry Smith {
477*95fce210SBarry Smith   PetscFunctionBegin;
478*95fce210SBarry Smith   if (nleafranks) *nleafranks = sf->nranks;
479*95fce210SBarry Smith   if (leafranks)  *leafranks  = sf->ranks;
480*95fce210SBarry Smith   if (leafoffset) *leafoffset = sf->roffset;
481*95fce210SBarry Smith   if (leafloc)    *leafloc    = sf->rmine;
482*95fce210SBarry Smith   PetscFunctionReturn(0);
483*95fce210SBarry Smith }
484*95fce210SBarry Smith 
485*95fce210SBarry Smith #undef __FUNCT__
486*95fce210SBarry Smith #define __FUNCT__ "PetscSFBasicGetPack"
487*95fce210SBarry Smith static PetscErrorCode PetscSFBasicGetPack(PetscSF sf,MPI_Datatype unit,const void *key,PetscSFBasicPack *mylink)
488*95fce210SBarry Smith {
489*95fce210SBarry Smith   PetscSF_Basic    *bas = (PetscSF_Basic*)sf->data;
490*95fce210SBarry Smith   PetscErrorCode   ierr;
491*95fce210SBarry Smith   PetscSFBasicPack link,*p;
492*95fce210SBarry Smith   PetscInt         nrootranks,nleafranks;
493*95fce210SBarry Smith   const PetscInt   *rootoffset,*leafoffset;
494*95fce210SBarry Smith 
495*95fce210SBarry Smith   PetscFunctionBegin;
496*95fce210SBarry Smith   /* Look for types in cache */
497*95fce210SBarry Smith   for (p=&bas->avail; (link=*p); p=&link->next) {
498*95fce210SBarry Smith     PetscBool match;
499*95fce210SBarry Smith     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
500*95fce210SBarry Smith     if (match) {
501*95fce210SBarry Smith       *p = link->next;          /* Remove from available list */
502*95fce210SBarry Smith       goto found;
503*95fce210SBarry Smith     }
504*95fce210SBarry Smith   }
505*95fce210SBarry Smith 
506*95fce210SBarry Smith   /* Create new composite types for each send rank */
507*95fce210SBarry Smith   ierr = PetscSFBasicGetRootInfo(sf,&nrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
508*95fce210SBarry Smith   ierr = PetscSFBasicGetLeafInfo(sf,&nleafranks,NULL,&leafoffset,NULL);CHKERRQ(ierr);
509*95fce210SBarry Smith   ierr = PetscNew(struct _n_PetscSFBasicPack,&link);CHKERRQ(ierr);
510*95fce210SBarry Smith   ierr = PetscSFBasicPackTypeSetup(link,unit);CHKERRQ(ierr);
511*95fce210SBarry Smith   ierr = PetscMalloc2(rootoffset[nrootranks]*link->unitbytes,char,&link->root,leafoffset[nleafranks]*link->unitbytes,char,&link->leaf);CHKERRQ(ierr);
512*95fce210SBarry Smith   ierr = PetscMalloc((nrootranks+nleafranks)*sizeof(MPI_Request),&link->requests);CHKERRQ(ierr);
513*95fce210SBarry Smith 
514*95fce210SBarry Smith found:
515*95fce210SBarry Smith   link->key  = key;
516*95fce210SBarry Smith   link->next = bas->inuse;
517*95fce210SBarry Smith   bas->inuse = link;
518*95fce210SBarry Smith 
519*95fce210SBarry Smith   *mylink = link;
520*95fce210SBarry Smith   PetscFunctionReturn(0);
521*95fce210SBarry Smith }
522*95fce210SBarry Smith 
523*95fce210SBarry Smith #undef __FUNCT__
524*95fce210SBarry Smith #define __FUNCT__ "PetscSFBasicGetPackInUse"
525*95fce210SBarry Smith static PetscErrorCode PetscSFBasicGetPackInUse(PetscSF sf,MPI_Datatype unit,const void *key,PetscCopyMode cmode,PetscSFBasicPack *mylink)
526*95fce210SBarry Smith {
527*95fce210SBarry Smith   PetscSF_Basic    *bas = (PetscSF_Basic*)sf->data;
528*95fce210SBarry Smith   PetscErrorCode   ierr;
529*95fce210SBarry Smith   PetscSFBasicPack link,*p;
530*95fce210SBarry Smith 
531*95fce210SBarry Smith   PetscFunctionBegin;
532*95fce210SBarry Smith   /* Look for types in cache */
533*95fce210SBarry Smith   for (p=&bas->inuse; (link=*p); p=&link->next) {
534*95fce210SBarry Smith     PetscBool match;
535*95fce210SBarry Smith     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
536*95fce210SBarry Smith     if (match) {
537*95fce210SBarry Smith       switch (cmode) {
538*95fce210SBarry Smith       case PETSC_OWN_POINTER: *p = link->next; break; /* Remove from inuse list */
539*95fce210SBarry Smith       case PETSC_USE_POINTER: break;
540*95fce210SBarry Smith       default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"invalid cmode");
541*95fce210SBarry Smith       }
542*95fce210SBarry Smith       *mylink = link;
543*95fce210SBarry Smith       PetscFunctionReturn(0);
544*95fce210SBarry Smith     }
545*95fce210SBarry Smith   }
546*95fce210SBarry Smith   SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Could not find pack");
547*95fce210SBarry Smith   PetscFunctionReturn(0);
548*95fce210SBarry Smith }
549*95fce210SBarry Smith 
550*95fce210SBarry Smith #undef __FUNCT__
551*95fce210SBarry Smith #define __FUNCT__ "PetscSFBasicReclaimPack"
552*95fce210SBarry Smith static PetscErrorCode PetscSFBasicReclaimPack(PetscSF sf,PetscSFBasicPack *link)
553*95fce210SBarry Smith {
554*95fce210SBarry Smith   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
555*95fce210SBarry Smith 
556*95fce210SBarry Smith   PetscFunctionBegin;
557*95fce210SBarry Smith   (*link)->key  = NULL;
558*95fce210SBarry Smith   (*link)->next = bas->avail;
559*95fce210SBarry Smith   bas->avail    = *link;
560*95fce210SBarry Smith   *link         = NULL;
561*95fce210SBarry Smith   PetscFunctionReturn(0);
562*95fce210SBarry Smith }
563*95fce210SBarry Smith 
564*95fce210SBarry Smith #undef __FUNCT__
565*95fce210SBarry Smith #define __FUNCT__ "PetscSFSetFromOptions_Basic"
566*95fce210SBarry Smith static PetscErrorCode PetscSFSetFromOptions_Basic(PetscSF sf)
567*95fce210SBarry Smith {
568*95fce210SBarry Smith   PetscErrorCode ierr;
569*95fce210SBarry Smith 
570*95fce210SBarry Smith   PetscFunctionBegin;
571*95fce210SBarry Smith   ierr = PetscOptionsHead("PetscSF Basic options");CHKERRQ(ierr);
572*95fce210SBarry Smith   ierr = PetscOptionsTail();CHKERRQ(ierr);
573*95fce210SBarry Smith   PetscFunctionReturn(0);
574*95fce210SBarry Smith }
575*95fce210SBarry Smith 
576*95fce210SBarry Smith #undef __FUNCT__
577*95fce210SBarry Smith #define __FUNCT__ "PetscSFReset_Basic"
578*95fce210SBarry Smith static PetscErrorCode PetscSFReset_Basic(PetscSF sf)
579*95fce210SBarry Smith {
580*95fce210SBarry Smith   PetscSF_Basic    *bas = (PetscSF_Basic*)sf->data;
581*95fce210SBarry Smith   PetscErrorCode   ierr;
582*95fce210SBarry Smith   PetscSFBasicPack link,next;
583*95fce210SBarry Smith 
584*95fce210SBarry Smith   PetscFunctionBegin;
585*95fce210SBarry Smith   ierr = PetscFree(bas->iranks);CHKERRQ(ierr);
586*95fce210SBarry Smith   ierr = PetscFree2(bas->ioffset,bas->irootloc);CHKERRQ(ierr);
587*95fce210SBarry Smith   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
588*95fce210SBarry Smith   for (link=bas->avail; link; link=next) {
589*95fce210SBarry Smith     next = link->next;
590*95fce210SBarry Smith     ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);
591*95fce210SBarry Smith     ierr = PetscFree2(link->root,link->leaf);CHKERRQ(ierr);
592*95fce210SBarry Smith     ierr = PetscFree(link->requests);CHKERRQ(ierr);
593*95fce210SBarry Smith     ierr = PetscFree(link);CHKERRQ(ierr);
594*95fce210SBarry Smith   }
595*95fce210SBarry Smith   bas->avail = NULL;
596*95fce210SBarry Smith   PetscFunctionReturn(0);
597*95fce210SBarry Smith }
598*95fce210SBarry Smith 
599*95fce210SBarry Smith #undef __FUNCT__
600*95fce210SBarry Smith #define __FUNCT__ "PetscSFDestroy_Basic"
601*95fce210SBarry Smith static PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
602*95fce210SBarry Smith {
603*95fce210SBarry Smith   PetscErrorCode ierr;
604*95fce210SBarry Smith 
605*95fce210SBarry Smith   PetscFunctionBegin;
606*95fce210SBarry Smith   ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr);
607*95fce210SBarry Smith   ierr = PetscFree(sf->data);CHKERRQ(ierr);
608*95fce210SBarry Smith   PetscFunctionReturn(0);
609*95fce210SBarry Smith }
610*95fce210SBarry Smith 
611*95fce210SBarry Smith #undef __FUNCT__
612*95fce210SBarry Smith #define __FUNCT__ "PetscSFView_Basic"
613*95fce210SBarry Smith static PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
614*95fce210SBarry Smith {
615*95fce210SBarry Smith   /* PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; */
616*95fce210SBarry Smith   PetscErrorCode ierr;
617*95fce210SBarry Smith   PetscBool      iascii;
618*95fce210SBarry Smith 
619*95fce210SBarry Smith   PetscFunctionBegin;
620*95fce210SBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
621*95fce210SBarry Smith   if (iascii) {
622*95fce210SBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"  sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);
623*95fce210SBarry Smith   }
624*95fce210SBarry Smith   PetscFunctionReturn(0);
625*95fce210SBarry Smith }
626*95fce210SBarry Smith 
627*95fce210SBarry Smith #undef __FUNCT__
628*95fce210SBarry Smith #define __FUNCT__ "PetscSFBcastBegin_Basic"
629*95fce210SBarry Smith /* Send from roots to leaves */
630*95fce210SBarry Smith static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
631*95fce210SBarry Smith {
632*95fce210SBarry Smith   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
633*95fce210SBarry Smith   PetscErrorCode    ierr;
634*95fce210SBarry Smith   PetscSFBasicPack  link;
635*95fce210SBarry Smith   PetscInt          i,nrootranks,nleafranks;
636*95fce210SBarry Smith   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
637*95fce210SBarry Smith   const PetscMPIInt *rootranks,*leafranks;
638*95fce210SBarry Smith   MPI_Request       *rootreqs,*leafreqs;
639*95fce210SBarry Smith   size_t            unitbytes;
640*95fce210SBarry Smith 
641*95fce210SBarry Smith   PetscFunctionBegin;
642*95fce210SBarry Smith   ierr = PetscSFBasicGetRootInfo(sf,&nrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
643*95fce210SBarry Smith   ierr = PetscSFBasicGetLeafInfo(sf,&nleafranks,&leafranks,&leafoffset,&leafloc);CHKERRQ(ierr);
644*95fce210SBarry Smith   ierr = PetscSFBasicGetPack(sf,unit,rootdata,&link);CHKERRQ(ierr);
645*95fce210SBarry Smith 
646*95fce210SBarry Smith   unitbytes = link->unitbytes;
647*95fce210SBarry Smith 
648*95fce210SBarry Smith   ierr = PetscSFBasicPackGetReqs(sf,link,&rootreqs,&leafreqs);CHKERRQ(ierr);
649*95fce210SBarry Smith   /* Eagerly post leaf receives */
650*95fce210SBarry Smith   for (i=0; i<nleafranks; i++) {
651*95fce210SBarry Smith     PetscMPIInt n = leafoffset[i+1] - leafoffset[i];
652*95fce210SBarry Smith     ierr = MPI_Irecv(link->leaf+leafoffset[i]*unitbytes,n,unit,leafranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&leafreqs[i]);CHKERRQ(ierr);
653*95fce210SBarry Smith   }
654*95fce210SBarry Smith   /* Pack and send root data */
655*95fce210SBarry Smith   for (i=0; i<nrootranks; i++) {
656*95fce210SBarry Smith     PetscMPIInt n          = rootoffset[i+1] - rootoffset[i];
657*95fce210SBarry Smith     void        *packstart = link->root+rootoffset[i]*unitbytes;
658*95fce210SBarry Smith     (*link->Pack)(n,rootloc+rootoffset[i],rootdata,packstart);
659*95fce210SBarry Smith     ierr = MPI_Isend(packstart,n,unit,rootranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&rootreqs[i]);CHKERRQ(ierr);
660*95fce210SBarry Smith   }
661*95fce210SBarry Smith   PetscFunctionReturn(0);
662*95fce210SBarry Smith }
663*95fce210SBarry Smith 
664*95fce210SBarry Smith #undef __FUNCT__
665*95fce210SBarry Smith #define __FUNCT__ "PetscSFBcastEnd_Basic"
666*95fce210SBarry Smith PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
667*95fce210SBarry Smith {
668*95fce210SBarry Smith   PetscErrorCode   ierr;
669*95fce210SBarry Smith   PetscSFBasicPack link;
670*95fce210SBarry Smith   PetscInt         i,nleafranks;
671*95fce210SBarry Smith   const PetscInt   *leafoffset,*leafloc;
672*95fce210SBarry Smith 
673*95fce210SBarry Smith   PetscFunctionBegin;
674*95fce210SBarry Smith   ierr = PetscSFBasicGetPackInUse(sf,unit,rootdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
675*95fce210SBarry Smith   ierr = PetscSFBasicPackWaitall(sf,link);CHKERRQ(ierr);
676*95fce210SBarry Smith   ierr = PetscSFBasicGetLeafInfo(sf,&nleafranks,NULL,&leafoffset,&leafloc);CHKERRQ(ierr);
677*95fce210SBarry Smith   for (i=0; i<nleafranks; i++) {
678*95fce210SBarry Smith     PetscMPIInt n          = leafoffset[i+1] - leafoffset[i];
679*95fce210SBarry Smith     const void  *packstart = link->leaf+leafoffset[i]*link->unitbytes;
680*95fce210SBarry Smith     (*link->UnpackInsert)(n,leafloc+leafoffset[i],leafdata,packstart);
681*95fce210SBarry Smith   }
682*95fce210SBarry Smith   ierr = PetscSFBasicReclaimPack(sf,&link);CHKERRQ(ierr);
683*95fce210SBarry Smith   PetscFunctionReturn(0);
684*95fce210SBarry Smith }
685*95fce210SBarry Smith 
686*95fce210SBarry Smith #undef __FUNCT__
687*95fce210SBarry Smith #define __FUNCT__ "PetscSFReduceBegin_Basic"
688*95fce210SBarry Smith /* leaf -> root with reduction */
689*95fce210SBarry Smith PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
690*95fce210SBarry Smith {
691*95fce210SBarry Smith   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
692*95fce210SBarry Smith   PetscSFBasicPack  link;
693*95fce210SBarry Smith   PetscErrorCode    ierr;
694*95fce210SBarry Smith   PetscInt          i,nrootranks,nleafranks;
695*95fce210SBarry Smith   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
696*95fce210SBarry Smith   const PetscMPIInt *rootranks,*leafranks;
697*95fce210SBarry Smith   MPI_Request       *rootreqs,*leafreqs;
698*95fce210SBarry Smith   size_t            unitbytes;
699*95fce210SBarry Smith 
700*95fce210SBarry Smith   PetscFunctionBegin;
701*95fce210SBarry Smith   ierr = PetscSFBasicGetRootInfo(sf,&nrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
702*95fce210SBarry Smith   ierr = PetscSFBasicGetLeafInfo(sf,&nleafranks,&leafranks,&leafoffset,&leafloc);CHKERRQ(ierr);
703*95fce210SBarry Smith   ierr = PetscSFBasicGetPack(sf,unit,rootdata,&link);CHKERRQ(ierr);
704*95fce210SBarry Smith 
705*95fce210SBarry Smith   unitbytes = link->unitbytes;
706*95fce210SBarry Smith 
707*95fce210SBarry Smith   ierr = PetscSFBasicPackGetReqs(sf,link,&rootreqs,&leafreqs);CHKERRQ(ierr);
708*95fce210SBarry Smith   /* Eagerly post root receives */
709*95fce210SBarry Smith   for (i=0; i<nrootranks; i++) {
710*95fce210SBarry Smith     PetscMPIInt n = rootoffset[i+1] - rootoffset[i];
711*95fce210SBarry Smith     ierr = MPI_Irecv(link->root+rootoffset[i]*unitbytes,n,unit,rootranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&rootreqs[i]);CHKERRQ(ierr);
712*95fce210SBarry Smith   }
713*95fce210SBarry Smith   /* Pack and send leaf data */
714*95fce210SBarry Smith   for (i=0; i<nleafranks; i++) {
715*95fce210SBarry Smith     PetscMPIInt n          = leafoffset[i+1] - leafoffset[i];
716*95fce210SBarry Smith     void        *packstart = link->leaf+leafoffset[i]*unitbytes;
717*95fce210SBarry Smith     (*link->Pack)(n,leafloc+leafoffset[i],leafdata,packstart);
718*95fce210SBarry Smith     ierr = MPI_Isend(packstart,n,unit,leafranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&leafreqs[i]);CHKERRQ(ierr);
719*95fce210SBarry Smith   }
720*95fce210SBarry Smith   PetscFunctionReturn(0);
721*95fce210SBarry Smith }
722*95fce210SBarry Smith 
723*95fce210SBarry Smith #undef __FUNCT__
724*95fce210SBarry Smith #define __FUNCT__ "PetscSFReduceEnd_Basic"
725*95fce210SBarry Smith static PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
726*95fce210SBarry Smith {
727*95fce210SBarry Smith   void             (*UnpackOp)(PetscInt,const PetscInt*,void*,const void*);
728*95fce210SBarry Smith   PetscErrorCode   ierr;
729*95fce210SBarry Smith   PetscSFBasicPack link;
730*95fce210SBarry Smith   PetscInt         i,nrootranks;
731*95fce210SBarry Smith   const PetscInt   *rootoffset,*rootloc;
732*95fce210SBarry Smith 
733*95fce210SBarry Smith   PetscFunctionBegin;
734*95fce210SBarry Smith   ierr = PetscSFBasicGetPackInUse(sf,unit,rootdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
735*95fce210SBarry Smith   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
736*95fce210SBarry Smith   ierr = PetscSFBasicPackWaitall(sf,link);CHKERRQ(ierr);
737*95fce210SBarry Smith   ierr = PetscSFBasicGetRootInfo(sf,&nrootranks,NULL,&rootoffset,&rootloc);CHKERRQ(ierr);
738*95fce210SBarry Smith   ierr = PetscSFBasicPackGetUnpackOp(sf,link,op,&UnpackOp);CHKERRQ(ierr);
739*95fce210SBarry Smith   for (i=0; i<nrootranks; i++) {
740*95fce210SBarry Smith     PetscMPIInt n          = rootoffset[i+1] - rootoffset[i];
741*95fce210SBarry Smith     const void  *packstart = link->root+rootoffset[i]*link->unitbytes;
742*95fce210SBarry Smith 
743*95fce210SBarry Smith     (*UnpackOp)(n,rootloc+rootoffset[i],rootdata,packstart);
744*95fce210SBarry Smith   }
745*95fce210SBarry Smith   ierr = PetscSFBasicReclaimPack(sf,&link);CHKERRQ(ierr);
746*95fce210SBarry Smith   PetscFunctionReturn(0);
747*95fce210SBarry Smith }
748*95fce210SBarry Smith 
749*95fce210SBarry Smith #undef __FUNCT__
750*95fce210SBarry Smith #define __FUNCT__ "PetscSFFetchAndOpBegin_Basic"
751*95fce210SBarry Smith static PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
752*95fce210SBarry Smith {
753*95fce210SBarry Smith   PetscErrorCode ierr;
754*95fce210SBarry Smith 
755*95fce210SBarry Smith   PetscFunctionBegin;
756*95fce210SBarry Smith   ierr = PetscSFReduceBegin_Basic(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
757*95fce210SBarry Smith   PetscFunctionReturn(0);
758*95fce210SBarry Smith }
759*95fce210SBarry Smith 
760*95fce210SBarry Smith #undef __FUNCT__
761*95fce210SBarry Smith #define __FUNCT__ "PetscSFFetchAndOpEnd_Basic"
762*95fce210SBarry Smith static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
763*95fce210SBarry Smith {
764*95fce210SBarry Smith   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
765*95fce210SBarry Smith   void              (*FetchAndOp)(PetscInt,const PetscInt*,void*,void*);
766*95fce210SBarry Smith   PetscErrorCode    ierr;
767*95fce210SBarry Smith   PetscSFBasicPack  link;
768*95fce210SBarry Smith   PetscInt          i,nrootranks,nleafranks;
769*95fce210SBarry Smith   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
770*95fce210SBarry Smith   const PetscMPIInt *rootranks,*leafranks;
771*95fce210SBarry Smith   MPI_Request       *rootreqs,*leafreqs;
772*95fce210SBarry Smith   size_t            unitbytes;
773*95fce210SBarry Smith 
774*95fce210SBarry Smith   PetscFunctionBegin;
775*95fce210SBarry Smith   ierr = PetscSFBasicGetPackInUse(sf,unit,rootdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
776*95fce210SBarry Smith   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
777*95fce210SBarry Smith   ierr      = PetscSFBasicPackWaitall(sf,link);CHKERRQ(ierr);
778*95fce210SBarry Smith   unitbytes = link->unitbytes;
779*95fce210SBarry Smith   ierr      = PetscSFBasicGetRootInfo(sf,&nrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
780*95fce210SBarry Smith   ierr      = PetscSFBasicGetLeafInfo(sf,&nleafranks,&leafranks,&leafoffset,&leafloc);CHKERRQ(ierr);
781*95fce210SBarry Smith   ierr      = PetscSFBasicPackGetReqs(sf,link,&rootreqs,&leafreqs);CHKERRQ(ierr);
782*95fce210SBarry Smith   /* Post leaf receives */
783*95fce210SBarry Smith   for (i=0; i<nleafranks; i++) {
784*95fce210SBarry Smith     PetscMPIInt n = leafoffset[i+1] - leafoffset[i];
785*95fce210SBarry Smith     ierr = MPI_Irecv(link->leaf+leafoffset[i]*unitbytes,n,unit,leafranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&leafreqs[i]);CHKERRQ(ierr);
786*95fce210SBarry Smith   }
787*95fce210SBarry Smith   /* Process local fetch-and-op, post root sends */
788*95fce210SBarry Smith   ierr = PetscSFBasicPackGetFetchAndOp(sf,link,op,&FetchAndOp);CHKERRQ(ierr);
789*95fce210SBarry Smith   for (i=0; i<nrootranks; i++) {
790*95fce210SBarry Smith     PetscMPIInt n          = rootoffset[i+1] - rootoffset[i];
791*95fce210SBarry Smith     void        *packstart = link->root+rootoffset[i]*unitbytes;
792*95fce210SBarry Smith 
793*95fce210SBarry Smith     (*FetchAndOp)(n,rootloc+rootoffset[i],rootdata,packstart);
794*95fce210SBarry Smith     ierr = MPI_Isend(packstart,n,unit,rootranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&rootreqs[i]);CHKERRQ(ierr);
795*95fce210SBarry Smith   }
796*95fce210SBarry Smith   ierr = PetscSFBasicPackWaitall(sf,link);CHKERRQ(ierr);
797*95fce210SBarry Smith   for (i=0; i<nleafranks; i++) {
798*95fce210SBarry Smith     PetscMPIInt n          = leafoffset[i+1] - leafoffset[i];
799*95fce210SBarry Smith     const void  *packstart = link->leaf+leafoffset[i]*unitbytes;
800*95fce210SBarry Smith     (*link->UnpackInsert)(n,leafloc+leafoffset[i],leafupdate,packstart);
801*95fce210SBarry Smith   }
802*95fce210SBarry Smith   ierr = PetscSFBasicReclaimPack(sf,&link);CHKERRQ(ierr);
803*95fce210SBarry Smith   PetscFunctionReturn(0);
804*95fce210SBarry Smith }
805*95fce210SBarry Smith 
806*95fce210SBarry Smith #undef __FUNCT__
807*95fce210SBarry Smith #define __FUNCT__ "PetscSFCreate_Basic"
808*95fce210SBarry Smith PETSC_EXTERN_C PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
809*95fce210SBarry Smith {
810*95fce210SBarry Smith   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
811*95fce210SBarry Smith   PetscErrorCode ierr;
812*95fce210SBarry Smith 
813*95fce210SBarry Smith   PetscFunctionBegin;
814*95fce210SBarry Smith   sf->ops->SetUp           = PetscSFSetUp_Basic;
815*95fce210SBarry Smith   sf->ops->SetFromOptions  = PetscSFSetFromOptions_Basic;
816*95fce210SBarry Smith   sf->ops->Reset           = PetscSFReset_Basic;
817*95fce210SBarry Smith   sf->ops->Destroy         = PetscSFDestroy_Basic;
818*95fce210SBarry Smith   sf->ops->View            = PetscSFView_Basic;
819*95fce210SBarry Smith   sf->ops->BcastBegin      = PetscSFBcastBegin_Basic;
820*95fce210SBarry Smith   sf->ops->BcastEnd        = PetscSFBcastEnd_Basic;
821*95fce210SBarry Smith   sf->ops->ReduceBegin     = PetscSFReduceBegin_Basic;
822*95fce210SBarry Smith   sf->ops->ReduceEnd       = PetscSFReduceEnd_Basic;
823*95fce210SBarry Smith   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic;
824*95fce210SBarry Smith   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Basic;
825*95fce210SBarry Smith 
826*95fce210SBarry Smith   ierr     = PetscNewLog(sf,PetscSF_Basic,&bas);CHKERRQ(ierr);
827*95fce210SBarry Smith   sf->data = (void*)bas;
828*95fce210SBarry Smith   PetscFunctionReturn(0);
829*95fce210SBarry Smith }
830