xref: /petsc/src/dm/impls/swarm/data_ex.c (revision 609bdbee21ea3be08735c64dbe00a9ab27759925)
1 /*
2 Build a few basic tools to help with partitioned domains.
3 
4 1)
5 On each processor, have a DomainExchangerTopology.
6 This is a doubly-connected edge list which enumerates the
7 communication paths between connected processors. By numbering
8 these paths we can always uniquely assign message identifers.
9 
10         edge
11          10
12 proc  --------->  proc
13  0    <--------    1
14          11
15         twin
16 
17 Eg: Proc 0 send to proc 1 with message id is 10. To recieve the correct
18 message, proc 1 looks for the edge connected to proc 0, and then the
19 messgae id comes from the twin of that edge
20 
21 2)
22 A DomainExchangerArrayPacker.
23 A little function which given a piece of data, will memcpy the data into
24 an array (which will be sent to procs) into the correct place.
25 
26 On Proc 1 we sent data to procs 0,2,3. The data is on different lengths.
27 All data gets jammed into single array. Need to "jam" data into correct locations
28 The Packer knows how much is to going to each processor and keeps track of the inserts
29 so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory
30 
31 data to 0    data to 2       data to 3
32 
33 |--------|-----------------|--|
34 
35 
36 User has to unpack message themselves. I can get you the pointer for each i
37 entry, but you'll have to cast it to the appropriate data type.
38 
39 
40 
41 
42 Phase A: Build topology
43 
44 Phase B: Define message lengths
45 
46 Phase C: Pack data
47 
48 Phase D: Send data
49 
50 + Constructor
51 DataExCreate()
52 + Phase A
53 DataExTopologyInitialize()
54 DataExTopologyAddNeighbour()
55 DataExTopologyAddNeighbour()
56 DataExTopologyFinalize()
57 + Phase B
58 DataExZeroAllSendCount()
59 DataExAddToSendCount()
60 DataExAddToSendCount()
61 DataExAddToSendCount()
62 + Phase C
63 DataExPackInitialize()
64 DataExPackData()
65 DataExPackData()
66 DataExPackFinalize()
67 +Phase D
68 DataExBegin()
69  ... perform any calculations ...
70 DataExEnd()
71 
72 ... user calls any getters here ...
73 
74 
75 */
76 
77 #include <petsc.h>
78 #include <petscvec.h>
79 #include <petscmat.h>
80 
81 #include "data_ex.h"
82 
83 const char *status_names[] = {"initialized", "finalized", "unknown"};
84 
85 PetscLogEvent PTATIN_DataExchangerTopologySetup;
86 PetscLogEvent PTATIN_DataExchangerBegin;
87 PetscLogEvent PTATIN_DataExchangerEnd;
88 
89 PetscErrorCode DataExCreate(MPI_Comm comm,const PetscInt count, DataEx *ex)
90 {
91   PetscErrorCode ierr;
92   DataEx         d;
93 
94   PetscFunctionBegin;
95   ierr = PetscMalloc(sizeof(struct _p_DataEx), &d);CHKERRQ(ierr);
96   ierr = PetscMemzero(d, sizeof(struct _p_DataEx));CHKERRQ(ierr);
97   ierr = MPI_Comm_dup(comm,&d->comm);CHKERRQ(ierr);
98   ierr = MPI_Comm_rank(d->comm,&d->rank);CHKERRQ(ierr);
99 
100   d->instance = count;
101 
102   d->topology_status        = DEOBJECT_STATE_UNKNOWN;
103   d->message_lengths_status = DEOBJECT_STATE_UNKNOWN;
104   d->packer_status          = DEOBJECT_STATE_UNKNOWN;
105   d->communication_status   = DEOBJECT_STATE_UNKNOWN;
106 
107   d->n_neighbour_procs = -1;
108   d->neighbour_procs   = NULL;
109 
110   d->messages_to_be_sent      = NULL;
111   d->message_offsets          = NULL;
112   d->messages_to_be_recvieved = NULL;
113 
114   d->unit_message_size   = -1;
115   d->send_message        = NULL;
116   d->send_message_length = -1;
117   d->recv_message        = NULL;
118   d->recv_message_length = -1;
119   d->total_pack_cnt      = -1;
120   d->pack_cnt            = NULL;
121 
122   d->send_tags = NULL;
123   d->recv_tags = NULL;
124 
125   d->_stats    = NULL;
126   d->_requests = NULL;
127   *ex = d;
128   PetscFunctionReturn(0);
129 }
130 
131 PetscErrorCode DataExView(DataEx d)
132 {
133   PetscMPIInt p;
134   PetscErrorCode ierr;
135 
136   PetscFunctionBegin;
137   ierr = PetscPrintf( PETSC_COMM_WORLD, "DataEx: instance=%D\n",d->instance);CHKERRQ(ierr);
138   ierr = PetscPrintf( PETSC_COMM_WORLD, "  topology status:        %s \n", status_names[d->topology_status]);CHKERRQ(ierr);
139   ierr = PetscPrintf( PETSC_COMM_WORLD, "  message lengths status: %s \n", status_names[d->message_lengths_status] );CHKERRQ(ierr);
140   ierr = PetscPrintf( PETSC_COMM_WORLD, "  packer status status:   %s \n", status_names[d->packer_status] );CHKERRQ(ierr);
141   ierr = PetscPrintf( PETSC_COMM_WORLD, "  communication status:   %s \n", status_names[d->communication_status] );CHKERRQ(ierr);
142 
143   if (d->topology_status == DEOBJECT_FINALIZED) {
144     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Topology:\n");CHKERRQ(ierr);
145     ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] neighbours: %d \n", (int)d->rank, (int)d->n_neighbour_procs );CHKERRQ(ierr);
146     for (p=0; p<d->n_neighbour_procs; p++) {
147       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d]   neighbour[%D] = %d \n", (int)d->rank, p, (int)d->neighbour_procs[p]);CHKERRQ(ierr);
148     }
149   }
150   if (d->message_lengths_status == DEOBJECT_FINALIZED) {
151     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Message lengths:\n");CHKERRQ(ierr);
152     ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] atomic size: %ld \n", (int)d->rank, (long int)d->unit_message_size );CHKERRQ(ierr);
153     for (p=0; p<d->n_neighbour_procs; p++) {
154       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] >>>>> ( %D units :: tag = %d ) >>>>> [%d] \n", (int)d->rank, d->messages_to_be_sent[p], d->send_tags[p], (int)d->neighbour_procs[p] );CHKERRQ(ierr);
155     }
156     for (p=0; p<d->n_neighbour_procs; p++) {
157       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] <<<<< ( %D units :: tag = %d ) <<<<< [%d] \n", (int)d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], (int)d->neighbour_procs[p] );CHKERRQ(ierr);
158     }
159   }
160   if (d->packer_status == DEOBJECT_FINALIZED) {}
161   if (d->communication_status == DEOBJECT_FINALIZED) {}
162   PetscFunctionReturn(0);
163 }
164 
165 PetscErrorCode DataExDestroy(DataEx d)
166 {
167   PetscErrorCode ierr;
168 
169   PetscFunctionBegin;
170   ierr = MPI_Comm_free(&d->comm);CHKERRQ(ierr);
171   if (d->neighbour_procs) {ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);}
172   if (d->messages_to_be_sent) {ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);}
173   if (d->message_offsets) {ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);}
174   if (d->messages_to_be_recvieved) {ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);}
175   if (d->send_message) {ierr = PetscFree(d->send_message);CHKERRQ(ierr);}
176   if (d->recv_message) {ierr = PetscFree(d->recv_message);CHKERRQ(ierr);}
177   if (d->pack_cnt) {ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);}
178   if (d->send_tags) {ierr = PetscFree(d->send_tags);CHKERRQ(ierr);}
179   if (d->recv_tags) {ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);}
180   if (d->_stats) {ierr = PetscFree(d->_stats);CHKERRQ(ierr);}
181   if (d->_requests) {ierr = PetscFree(d->_requests);CHKERRQ(ierr);}
182   ierr = PetscFree(d);CHKERRQ(ierr);
183   PetscFunctionReturn(0);
184 }
185 
186 /* === Phase A === */
187 
188 PetscErrorCode DataExTopologyInitialize(DataEx d)
189 {
190   PetscErrorCode ierr;
191 
192   PetscFunctionBegin;
193   d->topology_status = DEOBJECT_INITIALIZED;
194   d->n_neighbour_procs = 0;
195   ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);
196   ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);
197   ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);
198   ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);
199   ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);
200   ierr = PetscFree(d->send_tags);CHKERRQ(ierr);
201   ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);
202   PetscFunctionReturn(0);
203 }
204 
205 PetscErrorCode DataExTopologyAddNeighbour(DataEx d,const PetscMPIInt proc_id)
206 {
207   PetscMPIInt    n,found;
208   PetscMPIInt    nproc;
209   PetscErrorCode ierr;
210 
211   PetscFunctionBegin;
212   if (d->topology_status == DEOBJECT_FINALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DataExTopologyInitialize() first");
213   else if (d->topology_status != DEOBJECT_INITIALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first");
214 
215   /* error on negative entries */
216   if (proc_id < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank < 0");
217   /* error on ranks larger than number of procs in communicator */
218   ierr = MPI_Comm_size(d->comm,&nproc);CHKERRQ(ierr);
219   if (proc_id >= nproc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank >= nproc");
220   if (d->n_neighbour_procs == 0) {ierr = PetscMalloc1(1, &d->neighbour_procs);CHKERRQ(ierr);}
221   /* check for proc_id */
222   found = 0;
223   for (n = 0; n < d->n_neighbour_procs; n++) {
224     if (d->neighbour_procs[n] == proc_id) {
225       found  = 1;
226     }
227   }
228   if (found == 0) { /* add it to list */
229     ierr = PetscRealloc(sizeof(PetscMPIInt)*(d->n_neighbour_procs+1), &d->neighbour_procs);CHKERRQ(ierr);
230     d->neighbour_procs[ d->n_neighbour_procs ] = proc_id;
231     d->n_neighbour_procs++;
232   }
233   PetscFunctionReturn(0);
234 }
235 
236 /*
237 counter: the index of the communication object
238 N: the number of processors
239 r0: rank of sender
240 r1: rank of receiver
241 
242 procs = { 0, 1, 2, 3 }
243 
244 0 ==> 0		e=0
245 0 ==> 1		e=1
246 0 ==> 2		e=2
247 0 ==> 3		e=3
248 
249 1 ==> 0		e=4
250 1 ==> 1		e=5
251 1 ==> 2		e=6
252 1 ==> 3		e=7
253 
254 2 ==> 0		e=8
255 2 ==> 1		e=9
256 2 ==> 2		e=10
257 2 ==> 3		e=11
258 
259 3 ==> 0		e=12
260 3 ==> 1		e=13
261 3 ==> 2		e=14
262 3 ==> 3		e=15
263 
264 If we require that proc A sends to proc B, then the SEND tag index will be given by
265   N * rank(A) + rank(B) + offset
266 If we require that proc A will receive from proc B, then the RECV tag index will be given by
267   N * rank(B) + rank(A) + offset
268 
269 */
270 static void _get_tags(PetscInt counter, PetscMPIInt N, PetscMPIInt r0,PetscMPIInt r1, PetscMPIInt *_st, PetscMPIInt *_rt)
271 {
272   PetscMPIInt st,rt;
273 
274   st = N*r0 + r1   +   N*N*counter;
275   rt = N*r1 + r0   +   N*N*counter;
276   *_st = st;
277   *_rt = rt;
278 }
279 
280 /*
281 Makes the communication map symmetric
282 */
283 PetscErrorCode _DataExCompleteCommunicationMap(MPI_Comm comm,PetscMPIInt n,PetscMPIInt proc_neighbours[],PetscMPIInt *n_new,PetscMPIInt **proc_neighbours_new)
284 {
285   Mat               A;
286   PetscInt          i,j,nc;
287   PetscInt          n_, *proc_neighbours_;
288   PetscInt          rank_i_;
289   PetscMPIInt       size,  rank_i;
290   PetscScalar       *vals;
291   const PetscInt    *cols;
292   const PetscScalar *red_vals;
293   PetscMPIInt       _n_new, *_proc_neighbours_new;
294   PetscErrorCode    ierr;
295 
296   PetscFunctionBegin;
297   n_ = n;
298   ierr = PetscMalloc(sizeof(PetscInt) * n_, &proc_neighbours_);CHKERRQ(ierr);
299   for (i = 0; i < n_; ++i) {
300     proc_neighbours_[i] = proc_neighbours[i];
301   }
302   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
303   ierr = MPI_Comm_rank(comm,&rank_i);CHKERRQ(ierr);
304   rank_i_ = rank_i;
305 
306   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
307   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,size,size);CHKERRQ(ierr);
308   ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
309   ierr = MatSeqAIJSetPreallocation(A,1,NULL);CHKERRQ(ierr);
310   ierr = MatMPIAIJSetPreallocation(A,n_,NULL,n_,NULL);CHKERRQ(ierr);
311   ierr = MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);CHKERRQ(ierr);
312   /* Build original map */
313   ierr = PetscMalloc1(n_, &vals);CHKERRQ(ierr);
314   for (i = 0; i < n_; ++i) {
315     vals[i] = 1.0;
316   }
317   ierr = MatSetValues( A, 1,&rank_i_, n_,proc_neighbours_, vals, INSERT_VALUES );CHKERRQ(ierr);
318   ierr = MatAssemblyBegin(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
319   ierr = MatAssemblyEnd(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
320   /* Now force all other connections if they are not already there */
321   /* It's more efficient to do them all at once */
322   for (i = 0; i < n_; ++i) {
323     vals[i] = 2.0;
324   }
325   ierr = MatSetValues( A, n_,proc_neighbours_, 1,&rank_i_, vals, INSERT_VALUES );CHKERRQ(ierr);
326   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
327   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
328 /*
329   ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
330   ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
331   ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
332 */
333   if ((n_new != NULL) && (proc_neighbours_new != NULL)) {
334     ierr = MatGetRow(A, rank_i_, &nc, &cols, &red_vals);CHKERRQ(ierr);
335     _n_new = (PetscMPIInt) nc;
336     ierr = PetscMalloc1(_n_new, &_proc_neighbours_new);CHKERRQ(ierr);
337     for (j = 0; j < nc; ++j) {
338       _proc_neighbours_new[j] = (PetscMPIInt)cols[j];
339     }
340     ierr = MatRestoreRow( A, rank_i_, &nc, &cols, &red_vals );CHKERRQ(ierr);
341     *n_new               = (PetscMPIInt)_n_new;
342     *proc_neighbours_new = (PetscMPIInt*)_proc_neighbours_new;
343   }
344   ierr = MatDestroy(&A);CHKERRQ(ierr);
345   ierr = PetscFree(vals);CHKERRQ(ierr);
346   ierr = PetscFree(proc_neighbours_);CHKERRQ(ierr);
347   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 PetscErrorCode DataExTopologyFinalize(DataEx d)
352 {
353   PetscMPIInt    symm_nn;
354   PetscMPIInt   *symm_procs;
355   PetscMPIInt    r0,n,st,rt;
356   PetscMPIInt    nprocs;
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   if (d->topology_status != DEOBJECT_INITIALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first");
361 
362   ierr = PetscLogEventBegin(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
363   /* given infomation about all my neighbours, make map symmetric */
364   ierr = _DataExCompleteCommunicationMap( d->comm,d->n_neighbour_procs,d->neighbour_procs, &symm_nn, &symm_procs );CHKERRQ(ierr);
365   /* update my arrays */
366   ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);
367   d->n_neighbour_procs = symm_nn;
368   d->neighbour_procs   = symm_procs;
369   /* allocates memory */
370   if (!d->messages_to_be_sent) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->messages_to_be_sent);CHKERRQ(ierr);}
371   if (!d->message_offsets) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->message_offsets);CHKERRQ(ierr);}
372   if (!d->messages_to_be_recvieved) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->messages_to_be_recvieved);CHKERRQ(ierr);}
373   if (!d->pack_cnt) {ierr = PetscMalloc(sizeof(PetscInt) * d->n_neighbour_procs, &d->pack_cnt);CHKERRQ(ierr);}
374   if (!d->_stats) {ierr = PetscMalloc(sizeof(MPI_Status) * 2*d->n_neighbour_procs, &d->_stats);CHKERRQ(ierr);}
375   if (!d->_requests) {ierr = PetscMalloc(sizeof(MPI_Request) * 2*d->n_neighbour_procs, &d->_requests);CHKERRQ(ierr);}
376   if (!d->send_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->send_tags);CHKERRQ(ierr);}
377   if (!d->recv_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->recv_tags);CHKERRQ(ierr);}
378   /* compute message tags */
379   ierr = MPI_Comm_size(d->comm,&nprocs);CHKERRQ(ierr);
380   r0 = d->rank;
381   for (n = 0; n < d->n_neighbour_procs; ++n) {
382     PetscMPIInt r1 = d->neighbour_procs[n];
383 
384     _get_tags( d->instance, nprocs, r0,r1, &st, &rt );
385     d->send_tags[n] = (int)st;
386     d->recv_tags[n] = (int)rt;
387   }
388   d->topology_status = DEOBJECT_FINALIZED;
389   ierr = PetscLogEventEnd(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
390   PetscFunctionReturn(0);
391 }
392 
393 /* === Phase B === */
394 PetscErrorCode _DataExConvertProcIdToLocalIndex(DataEx de,PetscMPIInt proc_id,PetscMPIInt *local)
395 {
396   PetscMPIInt i,np;
397 
398   PetscFunctionBegin;
399   np = de->n_neighbour_procs;
400   *local = -1;
401   for (i = 0; i < np; ++i) {
402     if (proc_id == de->neighbour_procs[i]) {
403       *local = i;
404       break;
405     }
406   }
407   PetscFunctionReturn(0);
408 }
409 
410 PetscErrorCode DataExInitializeSendCount(DataEx de)
411 {
412   PetscMPIInt i;
413 
414   PetscFunctionBegin;
415   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ(de->comm, PETSC_ERR_ORDER, "Topology not finalized");
416   de->message_lengths_status = DEOBJECT_INITIALIZED;
417   for (i = 0; i < de->n_neighbour_procs; ++i) {
418     de->messages_to_be_sent[i] = 0;
419   }
420   PetscFunctionReturn(0);
421 }
422 
423 /*
424 1) only allows counters to be set on neighbouring cpus
425 */
426 PetscErrorCode DataExAddToSendCount(DataEx de,const PetscMPIInt proc_id,const PetscInt count)
427 {
428   PetscMPIInt    local_val;
429   PetscErrorCode ierr;
430 
431   PetscFunctionBegin;
432   if (de->message_lengths_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DataExInitializeSendCount() first" );
433   else if (de->message_lengths_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
434 
435   ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local_val );CHKERRQ(ierr);
436   if (local_val == -1) SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,"Proc %d is not a valid neighbour rank", (int)proc_id );
437 
438   de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count;
439   PetscFunctionReturn(0);
440 }
441 
442 PetscErrorCode DataExFinalizeSendCount(DataEx de)
443 {
444   PetscFunctionBegin;
445   if (de->message_lengths_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
446 
447   de->message_lengths_status = DEOBJECT_FINALIZED;
448   PetscFunctionReturn(0);
449 }
450 
451 /* === Phase C === */
452 /*
453  * zero out all send counts
454  * free send and recv buffers
455  * zeros out message length
456  * zeros out all counters
457  * zero out packed data counters
458 */
459 PetscErrorCode _DataExInitializeTmpStorage(DataEx de)
460 {
461   PetscMPIInt    i, np;
462   PetscErrorCode ierr;
463 
464   PetscFunctionBegin;
465   /*if (de->n_neighbour_procs < 0) SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of neighbour procs < 0");
466   */
467   /*
468   if (!de->neighbour_procs) SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "Neighbour proc list is NULL" );
469   */
470   np = de->n_neighbour_procs;
471   for (i = 0; i < np; ++i) {
472     /*	de->messages_to_be_sent[i] = -1; */
473     de->messages_to_be_recvieved[i] = -1;
474   }
475   ierr = PetscFree(de->send_message);CHKERRQ(ierr);
476   ierr = PetscFree(de->recv_message);CHKERRQ(ierr);
477   PetscFunctionReturn(0);
478 }
479 
480 /*
481 *) Zeros out pack data counters
482 *) Ensures mesaage length is set
483 *) Checks send counts properly initialized
484 *) allocates space for pack data
485 */
486 PetscErrorCode DataExPackInitialize(DataEx de,size_t unit_message_size)
487 {
488   PetscMPIInt    i,np;
489   PetscInt       total;
490   PetscErrorCode ierr;
491 
492   PetscFunctionBegin;
493   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
494   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
495   de->packer_status = DEOBJECT_INITIALIZED;
496   ierr = _DataExInitializeTmpStorage(de);CHKERRQ(ierr);
497   np = de->n_neighbour_procs;
498   de->unit_message_size = unit_message_size;
499   total = 0;
500   for (i = 0; i < np; ++i) {
501     if (de->messages_to_be_sent[i] == -1) {
502       PetscMPIInt proc_neighour = de->neighbour_procs[i];
503       SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DataExSetSendCount() first", (int)proc_neighour );
504     }
505     total = total + de->messages_to_be_sent[i];
506   }
507   /* create space for the data to be sent */
508   ierr = PetscMalloc(unit_message_size * (total + 1), &de->send_message);CHKERRQ(ierr);
509   /* initialize memory */
510   ierr = PetscMemzero(de->send_message, unit_message_size * (total + 1));CHKERRQ(ierr);
511   /* set total items to send */
512   de->send_message_length = total;
513   de->message_offsets[0] = 0;
514   total = de->messages_to_be_sent[0];
515   for (i = 1; i < np; ++i) {
516     de->message_offsets[i] = total;
517     total = total + de->messages_to_be_sent[i];
518   }
519   /* init the packer counters */
520   de->total_pack_cnt = 0;
521   for (i = 0; i < np; ++i) {
522     de->pack_cnt[i] = 0;
523   }
524   PetscFunctionReturn(0);
525 }
526 
527 /*
528 *) Ensures data gets been packed appropriately and no overlaps occur
529 */
530 PetscErrorCode DataExPackData(DataEx de,PetscMPIInt proc_id,PetscInt n,void *data)
531 {
532   PetscMPIInt    local;
533   PetscInt       insert_location;
534   void           *dest;
535   PetscErrorCode ierr;
536 
537   PetscFunctionBegin;
538   if (de->packer_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
539   else if (de->packer_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
540 
541   if (!de->send_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DataExPackInitialize() first" );
542   ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local );CHKERRQ(ierr);
543   if (local == -1) SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", (int)proc_id );
544   if (n+de->pack_cnt[local] > de->messages_to_be_sent[local]) SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %D: Attempt to insert %D",
545               (int)proc_id, de->messages_to_be_sent[local], n+de->pack_cnt[local] );
546 
547   /* copy memory */
548   insert_location = de->message_offsets[local] + de->pack_cnt[local];
549   dest = ((char*)de->send_message) + de->unit_message_size*insert_location;
550   ierr = PetscMemcpy(dest, data, de->unit_message_size * n);CHKERRQ(ierr);
551   /* increment counter */
552   de->pack_cnt[local] = de->pack_cnt[local] + n;
553   PetscFunctionReturn(0);
554 }
555 
556 /*
557 *) Ensures all data has been packed
558 */
559 PetscErrorCode DataExPackFinalize(DataEx de)
560 {
561   PetscMPIInt i,np;
562   PetscInt    total;
563   PetscErrorCode ierr;
564 
565   PetscFunctionBegin;
566   if (de->packer_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DataExPackInitialize() first." );
567   np = de->n_neighbour_procs;
568   for (i = 0; i < np; ++i) {
569     if (de->pack_cnt[i] != de->messages_to_be_sent[i]) SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %D : Inserted %D",
570                 (int)de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i] );
571   }
572   /* init */
573   for (i = 0; i < np; ++i) {
574     de->messages_to_be_recvieved[i] = -1;
575   }
576   /* figure out the recv counts here */
577   for (i = 0; i < np; ++i) {
578     ierr = MPI_Isend(&de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]);CHKERRQ(ierr);
579   }
580   for (i = 0; i < np; ++i) {
581     ierr = MPI_Irecv(&de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i]);CHKERRQ(ierr);
582   }
583   ierr = MPI_Waitall(2*np, de->_requests, de->_stats);CHKERRQ(ierr);
584   /* create space for the data to be recvieved */
585   total = 0;
586   for (i = 0; i < np; ++i) {
587     total = total + de->messages_to_be_recvieved[i];
588   }
589   ierr = PetscMalloc(de->unit_message_size * (total + 1), &de->recv_message);CHKERRQ(ierr);
590   /* initialize memory */
591   ierr = PetscMemzero(de->recv_message, de->unit_message_size * (total + 1));CHKERRQ(ierr);
592   /* set total items to recieve */
593   de->recv_message_length = total;
594   de->packer_status = DEOBJECT_FINALIZED;
595   de->communication_status = DEOBJECT_INITIALIZED;
596   PetscFunctionReturn(0);
597 }
598 
599 /* do the actual message passing now */
600 PetscErrorCode DataExBegin(DataEx de)
601 {
602   PetscMPIInt i,np;
603   void       *dest;
604   PetscInt    length;
605   PetscErrorCode ierr;
606 
607   PetscFunctionBegin;
608   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
609   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
610   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer not finalized" );
611   if (de->communication_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DataExInitialize() first." );
612   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
613   ierr = PetscLogEventBegin(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
614   np = de->n_neighbour_procs;
615   /* == NON BLOCKING == */
616   for (i = 0; i < np; ++i) {
617     length = de->messages_to_be_sent[i] * de->unit_message_size;
618     dest = ((char*)de->send_message) + de->unit_message_size * de->message_offsets[i];
619     ierr = MPI_Isend( dest, length, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i] );CHKERRQ(ierr);
620   }
621   ierr = PetscLogEventEnd(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 /* do the actual message passing now */
626 PetscErrorCode DataExEnd(DataEx de)
627 {
628   PetscMPIInt  i,np;
629   PetscInt     total;
630   PetscInt    *message_recv_offsets;
631   void        *dest;
632   PetscInt     length;
633   PetscErrorCode ierr;
634 
635   PetscFunctionBegin;
636   if (de->communication_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DataExInitialize() first." );
637   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
638   ierr = PetscLogEventBegin(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
639   np = de->n_neighbour_procs;
640   ierr = PetscMalloc1(np+1, &message_recv_offsets);CHKERRQ(ierr);
641   message_recv_offsets[0] = 0;
642   total = de->messages_to_be_recvieved[0];
643   for (i = 1; i < np; ++i) {
644     message_recv_offsets[i] = total;
645     total = total + de->messages_to_be_recvieved[i];
646   }
647   /* == NON BLOCKING == */
648   for (i = 0; i < np; ++i) {
649     length = de->messages_to_be_recvieved[i] * de->unit_message_size;
650     dest = ((char*)de->recv_message) + de->unit_message_size * message_recv_offsets[i];
651     ierr = MPI_Irecv( dest, length, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i] );CHKERRQ(ierr);
652   }
653   ierr = MPI_Waitall( 2*np, de->_requests, de->_stats );CHKERRQ(ierr);
654   ierr = PetscFree(message_recv_offsets);CHKERRQ(ierr);
655   de->communication_status = DEOBJECT_FINALIZED;
656   ierr = PetscLogEventEnd(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
657   PetscFunctionReturn(0);
658 }
659 
660 PetscErrorCode DataExGetSendData(DataEx de,PetscInt *length,void **send)
661 {
662   PetscFunctionBegin;
663   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed." );
664   *length = de->send_message_length;
665   *send   = de->send_message;
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode DataExGetRecvData(DataEx de,PetscInt *length,void **recv)
670 {
671   PetscFunctionBegin;
672   if (de->communication_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent." );
673   *length = de->recv_message_length;
674   *recv   = de->recv_message;
675   PetscFunctionReturn(0);
676 }
677 
678 PetscErrorCode DataExTopologyGetNeighbours(DataEx de,PetscMPIInt *n,PetscMPIInt *neigh[])
679 {
680   PetscFunctionBegin;
681   if (n)     {*n     = de->n_neighbour_procs;}
682   if (neigh) {*neigh = de->neighbour_procs;}
683   PetscFunctionReturn(0);
684 }
685 
686