xref: /petsc/src/dm/impls/swarm/data_ex.c (revision 70baa948e42e6bb1660071283af10f413c9269b6)
1 /*
2 Build a few basic tools to help with partitioned domains.
3 
4 1)
5 On each processor, have a DomainExchangerTopology.
6 This is a doubly-connected edge list which enumerates the
7 communication paths between connected processors. By numbering
8 these paths we can always uniquely assign message identifers.
9 
10         edge
11          10
12 proc  --------->  proc
13  0    <--------    1
14          11
15         twin
16 
17 Eg: Proc 0 send to proc 1 with message id is 10. To recieve the correct
18 message, proc 1 looks for the edge connected to proc 0, and then the
19 messgae id comes from the twin of that edge
20 
21 2)
22 A DomainExchangerArrayPacker.
23 A little function which given a piece of data, will memcpy the data into
24 an array (which will be sent to procs) into the correct place.
25 
26 On Proc 1 we sent data to procs 0,2,3. The data is on different lengths.
27 All data gets jammed into single array. Need to "jam" data into correct locations
28 The Packer knows how much is to going to each processor and keeps track of the inserts
29 so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory
30 
31 data to 0    data to 2       data to 3
32 
33 |--------|-----------------|--|
34 
35 
36 User has to unpack message themselves. I can get you the pointer for each i
37 entry, but you'll have to cast it to the appropriate data type.
38 
39 
40 
41 
42 Phase A: Build topology
43 
44 Phase B: Define message lengths
45 
46 Phase C: Pack data
47 
48 Phase D: Send data
49 
50 + Constructor
51 DataExCreate()
52 + Phase A
53 DataExTopologyInitialize()
54 DataExTopologyAddNeighbour()
55 DataExTopologyAddNeighbour()
56 DataExTopologyFinalize()
57 + Phase B
58 DataExZeroAllSendCount()
59 DataExAddToSendCount()
60 DataExAddToSendCount()
61 DataExAddToSendCount()
62 + Phase C
63 DataExPackInitialize()
64 DataExPackData()
65 DataExPackData()
66 DataExPackFinalize()
67 +Phase D
68 DataExBegin()
69  ... perform any calculations ...
70 DataExEnd()
71 
72 ... user calls any getters here ...
73 
74 
75 */
76 
77 #include <petsc.h>
78 #include <petscvec.h>
79 #include <petscmat.h>
80 
81 #include "data_ex.h"
82 
83 const char *status_names[] = {"initialized", "finalized", "unknown"};
84 
85 PetscLogEvent PTATIN_DataExchangerTopologySetup;
86 PetscLogEvent PTATIN_DataExchangerBegin;
87 PetscLogEvent PTATIN_DataExchangerEnd;
88 
89 #undef __FUNCT__
90 #define __FUNCT__ "DataExCreate"
91 PetscErrorCode DataExCreate(MPI_Comm comm,const PetscInt count, DataEx *ex)
92 {
93   PetscErrorCode ierr;
94   DataEx         d;
95 
96   PetscFunctionBegin;
97   ierr = PetscMalloc(sizeof(struct _p_DataEx), &d);CHKERRQ(ierr);
98   ierr = PetscMemzero(d, sizeof(struct _p_DataEx));CHKERRQ(ierr);
99   ierr = MPI_Comm_dup(comm,&d->comm);CHKERRQ(ierr);
100   ierr = MPI_Comm_rank(d->comm,&d->rank);CHKERRQ(ierr);
101 
102   d->instance = count;
103 
104   d->topology_status        = DEOBJECT_STATE_UNKNOWN;
105   d->message_lengths_status = DEOBJECT_STATE_UNKNOWN;
106   d->packer_status          = DEOBJECT_STATE_UNKNOWN;
107   d->communication_status   = DEOBJECT_STATE_UNKNOWN;
108 
109   d->n_neighbour_procs = -1;
110   d->neighbour_procs   = NULL;
111 
112   d->messages_to_be_sent      = NULL;
113   d->message_offsets          = NULL;
114   d->messages_to_be_recvieved = NULL;
115 
116   d->unit_message_size   = -1;
117   d->send_message        = NULL;
118   d->send_message_length = -1;
119   d->recv_message        = NULL;
120   d->recv_message_length = -1;
121   d->total_pack_cnt      = -1;
122   d->pack_cnt            = NULL;
123 
124   d->send_tags = NULL;
125   d->recv_tags = NULL;
126 
127   d->_stats    = NULL;
128   d->_requests = NULL;
129   *ex = d;
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "DataExView"
135 PetscErrorCode DataExView(DataEx d)
136 {
137   PetscMPIInt p;
138   PetscErrorCode ierr;
139 
140   PetscFunctionBegin;
141   ierr = PetscPrintf( PETSC_COMM_WORLD, "DataEx: instance=%D\n",d->instance);CHKERRQ(ierr);
142   ierr = PetscPrintf( PETSC_COMM_WORLD, "  topology status:        %s \n", status_names[d->topology_status]);CHKERRQ(ierr);
143   ierr = PetscPrintf( PETSC_COMM_WORLD, "  message lengths status: %s \n", status_names[d->message_lengths_status] );CHKERRQ(ierr);
144   ierr = PetscPrintf( PETSC_COMM_WORLD, "  packer status status:   %s \n", status_names[d->packer_status] );CHKERRQ(ierr);
145   ierr = PetscPrintf( PETSC_COMM_WORLD, "  communication status:   %s \n", status_names[d->communication_status] );CHKERRQ(ierr);
146 
147   if (d->topology_status == DEOBJECT_FINALIZED) {
148     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Topology:\n");CHKERRQ(ierr);
149     ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] neighbours: %d \n", (int)d->rank, (int)d->n_neighbour_procs );CHKERRQ(ierr);
150     for (p=0; p<d->n_neighbour_procs; p++) {
151       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d]   neighbour[%D] = %d \n", (int)d->rank, p, (int)d->neighbour_procs[p]);CHKERRQ(ierr);
152     }
153   }
154   if (d->message_lengths_status == DEOBJECT_FINALIZED) {
155     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Message lengths:\n");CHKERRQ(ierr);
156     ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] atomic size: %ld \n", (int)d->rank, (long int)d->unit_message_size );CHKERRQ(ierr);
157     for (p=0; p<d->n_neighbour_procs; p++) {
158       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] >>>>> ( %D units :: tag = %d ) >>>>> [%d] \n", (int)d->rank, d->messages_to_be_sent[p], d->send_tags[p], (int)d->neighbour_procs[p] );CHKERRQ(ierr);
159     }
160     for (p=0; p<d->n_neighbour_procs; p++) {
161       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] <<<<< ( %D units :: tag = %d ) <<<<< [%d] \n", (int)d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], (int)d->neighbour_procs[p] );CHKERRQ(ierr);
162     }
163   }
164   if (d->packer_status == DEOBJECT_FINALIZED) {}
165   if (d->communication_status == DEOBJECT_FINALIZED) {}
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "DataExDestroy"
171 PetscErrorCode DataExDestroy(DataEx d)
172 {
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MPI_Comm_free(&d->comm);CHKERRQ(ierr);
177   if (d->neighbour_procs) {ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);}
178   if (d->messages_to_be_sent) {ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);}
179   if (d->message_offsets) {ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);}
180   if (d->messages_to_be_recvieved) {ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);}
181   if (d->send_message) {ierr = PetscFree(d->send_message);CHKERRQ(ierr);}
182   if (d->recv_message) {ierr = PetscFree(d->recv_message);CHKERRQ(ierr);}
183   if (d->pack_cnt) {ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);}
184   if (d->send_tags) {ierr = PetscFree(d->send_tags);CHKERRQ(ierr);}
185   if (d->recv_tags) {ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);}
186   if (d->_stats) {ierr = PetscFree(d->_stats);CHKERRQ(ierr);}
187   if (d->_requests) {ierr = PetscFree(d->_requests);CHKERRQ(ierr);}
188   ierr = PetscFree(d);CHKERRQ(ierr);
189   PetscFunctionReturn(0);
190 }
191 
192 /* === Phase A === */
193 
194 #undef __FUNCT__
195 #define __FUNCT__ "DataExTopologyInitialize"
196 PetscErrorCode DataExTopologyInitialize(DataEx d)
197 {
198   PetscErrorCode ierr;
199 
200   PetscFunctionBegin;
201   d->topology_status = DEOBJECT_INITIALIZED;
202   d->n_neighbour_procs = 0;
203   ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);
204   ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);
205   ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);
206   ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);
207   ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);
208   ierr = PetscFree(d->send_tags);CHKERRQ(ierr);
209   ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "DataExTopologyAddNeighbour"
215 PetscErrorCode DataExTopologyAddNeighbour(DataEx d,const PetscMPIInt proc_id)
216 {
217   PetscMPIInt    n,found;
218   PetscMPIInt    nproc;
219   PetscErrorCode ierr;
220 
221   PetscFunctionBegin;
222   if (d->topology_status == DEOBJECT_FINALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DataExTopologyInitialize() first");
223   else if (d->topology_status != DEOBJECT_INITIALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first");
224 
225   /* error on negative entries */
226   if (proc_id < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank < 0");
227   /* error on ranks larger than number of procs in communicator */
228   ierr = MPI_Comm_size(d->comm,&nproc);CHKERRQ(ierr);
229   if (proc_id >= nproc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank >= nproc");
230   if (d->n_neighbour_procs == 0) {ierr = PetscMalloc1(1, &d->neighbour_procs);CHKERRQ(ierr);}
231   /* check for proc_id */
232   found = 0;
233   for (n = 0; n < d->n_neighbour_procs; n++) {
234     if (d->neighbour_procs[n] == proc_id) {
235       found  = 1;
236     }
237   }
238   if (found == 0) { /* add it to list */
239     ierr = PetscRealloc(sizeof(PetscMPIInt)*(d->n_neighbour_procs+1), &d->neighbour_procs);CHKERRQ(ierr);
240     d->neighbour_procs[ d->n_neighbour_procs ] = proc_id;
241     d->n_neighbour_procs++;
242   }
243   PetscFunctionReturn(0);
244 }
245 
246 /*
247 counter: the index of the communication object
248 N: the number of processors
249 r0: rank of sender
250 r1: rank of receiver
251 
252 procs = { 0, 1, 2, 3 }
253 
254 0 ==> 0		e=0
255 0 ==> 1		e=1
256 0 ==> 2		e=2
257 0 ==> 3		e=3
258 
259 1 ==> 0		e=4
260 1 ==> 1		e=5
261 1 ==> 2		e=6
262 1 ==> 3		e=7
263 
264 2 ==> 0		e=8
265 2 ==> 1		e=9
266 2 ==> 2		e=10
267 2 ==> 3		e=11
268 
269 3 ==> 0		e=12
270 3 ==> 1		e=13
271 3 ==> 2		e=14
272 3 ==> 3		e=15
273 
274 If we require that proc A sends to proc B, then the SEND tag index will be given by
275   N * rank(A) + rank(B) + offset
276 If we require that proc A will receive from proc B, then the RECV tag index will be given by
277   N * rank(B) + rank(A) + offset
278 
279 */
280 static void _get_tags(PetscInt counter, PetscMPIInt N, PetscMPIInt r0,PetscMPIInt r1, PetscMPIInt *_st, PetscMPIInt *_rt)
281 {
282   PetscMPIInt st,rt;
283 
284   st = N*r0 + r1   +   N*N*counter;
285   rt = N*r1 + r0   +   N*N*counter;
286   *_st = st;
287   *_rt = rt;
288 }
289 
290 /*
291 Makes the communication map symmetric
292 */
293 #undef __FUNCT__
294 #define __FUNCT__ "_DataExCompleteCommunicationMap"
295 PetscErrorCode _DataExCompleteCommunicationMap(MPI_Comm comm,PetscMPIInt n,PetscMPIInt proc_neighbours[],PetscMPIInt *n_new,PetscMPIInt **proc_neighbours_new)
296 {
297   Mat               A;
298   PetscInt          i,j,nc;
299   PetscInt          n_, *proc_neighbours_;
300   PetscInt          rank_i_;
301   PetscMPIInt       size,  rank_i;
302   PetscScalar       *vals;
303   const PetscInt    *cols;
304   const PetscScalar *red_vals;
305   PetscMPIInt       _n_new, *_proc_neighbours_new;
306   PetscErrorCode    ierr;
307 
308   PetscFunctionBegin;
309   n_ = n;
310   ierr = PetscMalloc(sizeof(PetscInt) * n_, &proc_neighbours_);CHKERRQ(ierr);
311   for (i = 0; i < n_; ++i) {
312     proc_neighbours_[i] = proc_neighbours[i];
313   }
314   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
315   ierr = MPI_Comm_rank(comm,&rank_i);CHKERRQ(ierr);
316   rank_i_ = rank_i;
317 
318   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
319   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,size,size);CHKERRQ(ierr);
320   ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
321   ierr = MatSeqAIJSetPreallocation(A,1,NULL);CHKERRQ(ierr);
322   ierr = MatMPIAIJSetPreallocation(A,n_,NULL,n_,NULL);CHKERRQ(ierr);
323   ierr = MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);CHKERRQ(ierr);
324   /* Build original map */
325   ierr = PetscMalloc1(n_, &vals);CHKERRQ(ierr);
326   for (i = 0; i < n_; ++i) {
327     vals[i] = 1.0;
328   }
329   ierr = MatSetValues( A, 1,&rank_i_, n_,proc_neighbours_, vals, INSERT_VALUES );CHKERRQ(ierr);
330   ierr = MatAssemblyBegin(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
331   ierr = MatAssemblyEnd(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
332   /* Now force all other connections if they are not already there */
333   /* It's more efficient to do them all at once */
334   for (i = 0; i < n_; ++i) {
335     vals[i] = 2.0;
336   }
337   ierr = MatSetValues( A, n_,proc_neighbours_, 1,&rank_i_, vals, INSERT_VALUES );CHKERRQ(ierr);
338   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
339   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
340 /*
341   ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
342   ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
343   ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
344 */
345   if ((n_new != NULL) && (proc_neighbours_new != NULL)) {
346     ierr = MatGetRow(A, rank_i_, &nc, &cols, &red_vals);CHKERRQ(ierr);
347     _n_new = (PetscMPIInt) nc;
348     ierr = PetscMalloc1(_n_new, &_proc_neighbours_new);CHKERRQ(ierr);
349     for (j = 0; j < nc; ++j) {
350       _proc_neighbours_new[j] = (PetscMPIInt)cols[j];
351     }
352     ierr = MatRestoreRow( A, rank_i_, &nc, &cols, &red_vals );CHKERRQ(ierr);
353     *n_new               = (PetscMPIInt)_n_new;
354     *proc_neighbours_new = (PetscMPIInt*)_proc_neighbours_new;
355   }
356   ierr = MatDestroy(&A);CHKERRQ(ierr);
357   ierr = PetscFree(vals);CHKERRQ(ierr);
358   ierr = PetscFree(proc_neighbours_);CHKERRQ(ierr);
359   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "DataExTopologyFinalize"
365 PetscErrorCode DataExTopologyFinalize(DataEx d)
366 {
367   PetscMPIInt    symm_nn;
368   PetscMPIInt   *symm_procs;
369   PetscMPIInt    r0,n,st,rt;
370   PetscMPIInt    nprocs;
371   PetscErrorCode ierr;
372 
373   PetscFunctionBegin;
374   if (d->topology_status != DEOBJECT_INITIALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first");
375 
376   ierr = PetscLogEventBegin(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
377   /* given infomation about all my neighbours, make map symmetric */
378   ierr = _DataExCompleteCommunicationMap( d->comm,d->n_neighbour_procs,d->neighbour_procs, &symm_nn, &symm_procs );CHKERRQ(ierr);
379   /* update my arrays */
380   ierr = PetscFree(d->neighbour_procs);
381   d->n_neighbour_procs = symm_nn;
382   d->neighbour_procs   = symm_procs;
383   /* allocates memory */
384   if (!d->messages_to_be_sent) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->messages_to_be_sent);CHKERRQ(ierr);}
385   if (!d->message_offsets) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->message_offsets);CHKERRQ(ierr);}
386   if (!d->messages_to_be_recvieved) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->messages_to_be_recvieved);CHKERRQ(ierr);}
387   if (!d->pack_cnt) {ierr = PetscMalloc(sizeof(PetscInt) * d->n_neighbour_procs, &d->pack_cnt);CHKERRQ(ierr);}
388   if (!d->_stats) {ierr = PetscMalloc(sizeof(MPI_Status) * 2*d->n_neighbour_procs, &d->_stats);CHKERRQ(ierr);}
389   if (!d->_requests) {ierr = PetscMalloc(sizeof(MPI_Request) * 2*d->n_neighbour_procs, &d->_requests);CHKERRQ(ierr);}
390   if (!d->send_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->send_tags);CHKERRQ(ierr);}
391   if (!d->recv_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->recv_tags);CHKERRQ(ierr);}
392   /* compute message tags */
393   ierr = MPI_Comm_size(d->comm,&nprocs);CHKERRQ(ierr);
394   r0 = d->rank;
395   for (n = 0; n < d->n_neighbour_procs; ++n) {
396     PetscMPIInt r1 = d->neighbour_procs[n];
397 
398     _get_tags( d->instance, nprocs, r0,r1, &st, &rt );
399     d->send_tags[n] = (int)st;
400     d->recv_tags[n] = (int)rt;
401   }
402   d->topology_status = DEOBJECT_FINALIZED;
403   ierr = PetscLogEventEnd(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
404   PetscFunctionReturn(0);
405 }
406 
407 /* === Phase B === */
408 #undef __FUNCT__
409 #define __FUNCT__ "_DataExConvertProcIdToLocalIndex"
410 PetscErrorCode _DataExConvertProcIdToLocalIndex(DataEx de,PetscMPIInt proc_id,PetscMPIInt *local)
411 {
412   PetscMPIInt i,np;
413 
414   PetscFunctionBegin;
415   np = de->n_neighbour_procs;
416   *local = -1;
417   for (i = 0; i < np; ++i) {
418     if (proc_id == de->neighbour_procs[i]) {
419       *local = i;
420       break;
421     }
422   }
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "DataExInitializeSendCount"
428 PetscErrorCode DataExInitializeSendCount(DataEx de)
429 {
430   PetscMPIInt i;
431 
432   PetscFunctionBegin;
433   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ(de->comm, PETSC_ERR_ORDER, "Topology not finalized");
434   de->message_lengths_status = DEOBJECT_INITIALIZED;
435   for (i = 0; i < de->n_neighbour_procs; ++i) {
436     de->messages_to_be_sent[i] = 0;
437   }
438   PetscFunctionReturn(0);
439 }
440 
441 /*
442 1) only allows counters to be set on neighbouring cpus
443 */
444 #undef __FUNCT__
445 #define __FUNCT__ "DataExAddToSendCount"
446 PetscErrorCode DataExAddToSendCount(DataEx de,const PetscMPIInt proc_id,const PetscInt count)
447 {
448   PetscMPIInt    local_val;
449   PetscErrorCode ierr;
450 
451   PetscFunctionBegin;
452   if (de->message_lengths_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DataExInitializeSendCount() first" );
453   else if (de->message_lengths_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
454 
455   ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local_val );CHKERRQ(ierr);
456   if (local_val == -1) SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,"Proc %d is not a valid neighbour rank", (int)proc_id );
457 
458   de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count;
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "DataExFinalizeSendCount"
464 PetscErrorCode DataExFinalizeSendCount(DataEx de)
465 {
466   PetscFunctionBegin;
467   if (de->message_lengths_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
468 
469   de->message_lengths_status = DEOBJECT_FINALIZED;
470   PetscFunctionReturn(0);
471 }
472 
473 /* === Phase C === */
474 /*
475  * zero out all send counts
476  * free send and recv buffers
477  * zeros out message length
478  * zeros out all counters
479  * zero out packed data counters
480 */
481 #undef __FUNCT__
482 #define __FUNCT__ "_DataExInitializeTmpStorage"
483 PetscErrorCode _DataExInitializeTmpStorage(DataEx de)
484 {
485   PetscMPIInt    i, np;
486   PetscErrorCode ierr;
487 
488   PetscFunctionBegin;
489   /*if (de->n_neighbour_procs < 0) SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of neighbour procs < 0");
490   */
491   /*
492   if (!de->neighbour_procs) SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "Neighbour proc list is NULL" );
493   */
494   np = de->n_neighbour_procs;
495   for (i = 0; i < np; ++i) {
496     /*	de->messages_to_be_sent[i] = -1; */
497     de->messages_to_be_recvieved[i] = -1;
498   }
499   ierr = PetscFree(de->send_message);CHKERRQ(ierr);
500   ierr = PetscFree(de->recv_message);CHKERRQ(ierr);
501   PetscFunctionReturn(0);
502 }
503 
504 /*
505 *) Zeros out pack data counters
506 *) Ensures mesaage length is set
507 *) Checks send counts properly initialized
508 *) allocates space for pack data
509 */
510 #undef __FUNCT__
511 #define __FUNCT__ "DataExPackInitialize"
512 PetscErrorCode DataExPackInitialize(DataEx de,size_t unit_message_size)
513 {
514   PetscMPIInt    i,np;
515   PetscInt       total;
516   PetscErrorCode ierr;
517 
518   PetscFunctionBegin;
519   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
520   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
521   de->packer_status = DEOBJECT_INITIALIZED;
522   ierr = _DataExInitializeTmpStorage(de);CHKERRQ(ierr);
523   np = de->n_neighbour_procs;
524   de->unit_message_size = unit_message_size;
525   total = 0;
526   for (i = 0; i < np; ++i) {
527     if (de->messages_to_be_sent[i] == -1) {
528       PetscMPIInt proc_neighour = de->neighbour_procs[i];
529       SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DataExSetSendCount() first", (int)proc_neighour );
530     }
531     total = total + de->messages_to_be_sent[i];
532   }
533   /* create space for the data to be sent */
534   ierr = PetscMalloc(unit_message_size * (total + 1), &de->send_message);CHKERRQ(ierr);
535   /* initialize memory */
536   ierr = PetscMemzero(de->send_message, unit_message_size * (total + 1));CHKERRQ(ierr);
537   /* set total items to send */
538   de->send_message_length = total;
539   de->message_offsets[0] = 0;
540   total = de->messages_to_be_sent[0];
541   for (i = 1; i < np; ++i) {
542     de->message_offsets[i] = total;
543     total = total + de->messages_to_be_sent[i];
544   }
545   /* init the packer counters */
546   de->total_pack_cnt = 0;
547   for (i = 0; i < np; ++i) {
548     de->pack_cnt[i] = 0;
549   }
550   PetscFunctionReturn(0);
551 }
552 
553 /*
554 *) Ensures data gets been packed appropriately and no overlaps occur
555 */
556 #undef __FUNCT__
557 #define __FUNCT__ "DataExPackData"
558 PetscErrorCode DataExPackData(DataEx de,PetscMPIInt proc_id,PetscInt n,void *data)
559 {
560   PetscMPIInt    local;
561   PetscInt       insert_location;
562   void           *dest;
563   PetscErrorCode ierr;
564 
565   PetscFunctionBegin;
566   if (de->packer_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
567   else if (de->packer_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
568 
569   if (!de->send_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DataExPackInitialize() first" );
570   ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local );CHKERRQ(ierr);
571   if (local == -1) SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", (int)proc_id );
572   if (n+de->pack_cnt[local] > de->messages_to_be_sent[local]) SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %D: Attempt to insert %D",
573               (int)proc_id, de->messages_to_be_sent[local], n+de->pack_cnt[local] );
574 
575   /* copy memory */
576   insert_location = de->message_offsets[local] + de->pack_cnt[local];
577   dest = ((char*)de->send_message) + de->unit_message_size*insert_location;
578   ierr = PetscMemcpy(dest, data, de->unit_message_size * n);CHKERRQ(ierr);
579   /* increment counter */
580   de->pack_cnt[local] = de->pack_cnt[local] + n;
581   PetscFunctionReturn(0);
582 }
583 
584 /*
585 *) Ensures all data has been packed
586 */
587 #undef __FUNCT__
588 #define __FUNCT__ "DataExPackFinalize"
589 PetscErrorCode DataExPackFinalize(DataEx de)
590 {
591   PetscMPIInt i,np;
592   PetscInt    total;
593   PetscErrorCode ierr;
594 
595   PetscFunctionBegin;
596   if (de->packer_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DataExPackInitialize() first." );
597   np = de->n_neighbour_procs;
598   for (i = 0; i < np; ++i) {
599     if (de->pack_cnt[i] != de->messages_to_be_sent[i]) SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %D : Inserted %D",
600                 (int)de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i] );
601   }
602   /* init */
603   for (i = 0; i < np; ++i) {
604     de->messages_to_be_recvieved[i] = -1;
605   }
606   /* figure out the recv counts here */
607   for (i = 0; i < np; ++i) {
608     ierr = MPI_Isend(&de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]);CHKERRQ(ierr);
609   }
610   for (i = 0; i < np; ++i) {
611     ierr = MPI_Irecv(&de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i]);CHKERRQ(ierr);
612   }
613   ierr = MPI_Waitall(2*np, de->_requests, de->_stats);CHKERRQ(ierr);
614   /* create space for the data to be recvieved */
615   total = 0;
616   for (i = 0; i < np; ++i) {
617     total = total + de->messages_to_be_recvieved[i];
618   }
619   ierr = PetscMalloc(de->unit_message_size * (total + 1), &de->recv_message);CHKERRQ(ierr);
620   /* initialize memory */
621   ierr = PetscMemzero(de->recv_message, de->unit_message_size * (total + 1));CHKERRQ(ierr);
622   /* set total items to recieve */
623   de->recv_message_length = total;
624   de->packer_status = DEOBJECT_FINALIZED;
625   de->communication_status = DEOBJECT_INITIALIZED;
626   PetscFunctionReturn(0);
627 }
628 
629 /* do the actual message passing now */
630 #undef __FUNCT__
631 #define __FUNCT__ "DataExBegin"
632 PetscErrorCode DataExBegin(DataEx de)
633 {
634   PetscMPIInt i,np;
635   void       *dest;
636   PetscInt    length;
637   PetscErrorCode ierr;
638 
639   PetscFunctionBegin;
640   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
641   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
642   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer not finalized" );
643   if (de->communication_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DataExInitialize() first." );
644   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
645   ierr = PetscLogEventBegin(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
646   np = de->n_neighbour_procs;
647   /* == NON BLOCKING == */
648   for (i = 0; i < np; ++i) {
649     length = de->messages_to_be_sent[i] * de->unit_message_size;
650     dest = ((char*)de->send_message) + de->unit_message_size * de->message_offsets[i];
651     ierr = MPI_Isend( dest, length, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i] );CHKERRQ(ierr);
652   }
653   ierr = PetscLogEventEnd(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 /* do the actual message passing now */
658 #undef __FUNCT__
659 #define __FUNCT__ "DataExEnd"
660 PetscErrorCode DataExEnd(DataEx de)
661 {
662   PetscMPIInt  i,np;
663   PetscInt     total;
664   PetscInt    *message_recv_offsets;
665   void        *dest;
666   PetscInt     length;
667   PetscErrorCode ierr;
668 
669   PetscFunctionBegin;
670   if (de->communication_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DataExInitialize() first." );
671   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
672   ierr = PetscLogEventBegin(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
673   np = de->n_neighbour_procs;
674   ierr = PetscMalloc1(np+1, &message_recv_offsets);CHKERRQ(ierr);
675   message_recv_offsets[0] = 0;
676   total = de->messages_to_be_recvieved[0];
677   for (i = 1; i < np; ++i) {
678     message_recv_offsets[i] = total;
679     total = total + de->messages_to_be_recvieved[i];
680   }
681   /* == NON BLOCKING == */
682   for (i = 0; i < np; ++i) {
683     length = de->messages_to_be_recvieved[i] * de->unit_message_size;
684     dest = ((char*)de->recv_message) + de->unit_message_size * message_recv_offsets[i];
685     ierr = MPI_Irecv( dest, length, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i] );CHKERRQ(ierr);
686   }
687   ierr = MPI_Waitall( 2*np, de->_requests, de->_stats );CHKERRQ(ierr);
688   ierr = PetscFree(message_recv_offsets);
689   de->communication_status = DEOBJECT_FINALIZED;
690   ierr = PetscLogEventEnd(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "DataExGetSendData"
696 PetscErrorCode DataExGetSendData(DataEx de,PetscInt *length,void **send)
697 {
698   PetscFunctionBegin;
699   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed." );
700   *length = de->send_message_length;
701   *send   = de->send_message;
702   PetscFunctionReturn(0);
703 }
704 
705 #undef __FUNCT__
706 #define __FUNCT__ "DataExGetRecvData"
707 PetscErrorCode DataExGetRecvData(DataEx de,PetscInt *length,void **recv)
708 {
709   PetscFunctionBegin;
710   if (de->communication_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent." );
711   *length = de->recv_message_length;
712   *recv   = de->recv_message;
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "DataExTopologyGetNeighbours"
718 PetscErrorCode DataExTopologyGetNeighbours(DataEx de,PetscMPIInt *n,PetscMPIInt *neigh[])
719 {
720   PetscFunctionBegin;
721   if (n)     {*n     = de->n_neighbour_procs;}
722   if (neigh) {*neigh = de->neighbour_procs;}
723   PetscFunctionReturn(0);
724 }
725 
726