xref: /petsc/src/dm/impls/swarm/data_ex.c (revision 279f676c5290b2d70df3ad4ba443571d8a6951be)
1 /*
2 Build a few basic tools to help with partitioned domains.
3 
4 1)
5 On each processor, have a DomainExchangerTopology.
6 This is a doubly-connected edge list which enumerates the
7 communication paths between connected processors. By numbering
8 these paths we can always uniquely assign message identifers.
9 
10         edge
11          10
12 proc  --------->  proc
13  0    <--------    1
14          11
15         twin
16 
17 Eg: Proc 0 send to proc 1 with message id is 10. To recieve the correct
18 message, proc 1 looks for the edge connected to proc 0, and then the
19 messgae id comes from the twin of that edge
20 
21 2)
22 A DomainExchangerArrayPacker.
23 A little function which given a piece of data, will memcpy the data into
24 an array (which will be sent to procs) into the correct place.
25 
26 On Proc 1 we sent data to procs 0,2,3. The data is on different lengths.
27 All data gets jammed into single array. Need to "jam" data into correct locations
28 The Packer knows how much is to going to each processor and keeps track of the inserts
29 so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory
30 
31 data to 0    data to 2       data to 3
32 
33 |--------|-----------------|--|
34 
35 
36 User has to unpack message themselves. I can get you the pointer for each i
37 entry, but you'll have to cast it to the appropriate data type.
38 
39 
40 
41 
42 Phase A: Build topology
43 
44 Phase B: Define message lengths
45 
46 Phase C: Pack data
47 
48 Phase D: Send data
49 
50 + Constructor
51 DMSwarmDataExCreate()
52 + Phase A
53 DMSwarmDataExTopologyInitialize()
54 DMSwarmDataExTopologyAddNeighbour()
55 DMSwarmDataExTopologyAddNeighbour()
56 DMSwarmDataExTopologyFinalize()
57 + Phase B
58 DMSwarmDataExZeroAllSendCount()
59 DMSwarmDataExAddToSendCount()
60 DMSwarmDataExAddToSendCount()
61 DMSwarmDataExAddToSendCount()
62 + Phase C
63 DMSwarmDataExPackInitialize()
64 DMSwarmDataExPackData()
65 DMSwarmDataExPackData()
66 DMSwarmDataExPackFinalize()
67 +Phase D
68 DMSwarmDataExBegin()
69  ... perform any calculations ...
70 DMSwarmDataExEnd()
71 
72 ... user calls any getters here ...
73 
74 
75 */
76 #include <petscvec.h>
77 #include <petscmat.h>
78 
79 #include "../src/dm/impls/swarm/data_ex.h"
80 
81 const char *status_names[] = {"initialized", "finalized", "unknown"};
82 
83 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerTopologySetup;
84 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerBegin;
85 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerEnd;
86 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerSendCount;
87 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerPack;
88 
89 PetscErrorCode DMSwarmDataExCreate(MPI_Comm comm,const PetscInt count, DMSwarmDataEx *ex)
90 {
91   PetscErrorCode ierr;
92   DMSwarmDataEx         d;
93 
94   PetscFunctionBegin;
95   ierr = PetscMalloc(sizeof(struct _p_DMSwarmDataEx), &d);CHKERRQ(ierr);
96   ierr = PetscMemzero(d, sizeof(struct _p_DMSwarmDataEx));CHKERRQ(ierr);
97   ierr = MPI_Comm_dup(comm,&d->comm);CHKERRQ(ierr);
98   ierr = MPI_Comm_rank(d->comm,&d->rank);CHKERRQ(ierr);
99 
100   d->instance = count;
101 
102   d->topology_status        = DEOBJECT_STATE_UNKNOWN;
103   d->message_lengths_status = DEOBJECT_STATE_UNKNOWN;
104   d->packer_status          = DEOBJECT_STATE_UNKNOWN;
105   d->communication_status   = DEOBJECT_STATE_UNKNOWN;
106 
107   d->n_neighbour_procs = -1;
108   d->neighbour_procs   = NULL;
109 
110   d->messages_to_be_sent      = NULL;
111   d->message_offsets          = NULL;
112   d->messages_to_be_recvieved = NULL;
113 
114   d->unit_message_size   = -1;
115   d->send_message        = NULL;
116   d->send_message_length = -1;
117   d->recv_message        = NULL;
118   d->recv_message_length = -1;
119   d->total_pack_cnt      = -1;
120   d->pack_cnt            = NULL;
121 
122   d->send_tags = NULL;
123   d->recv_tags = NULL;
124 
125   d->_stats    = NULL;
126   d->_requests = NULL;
127   *ex = d;
128   PetscFunctionReturn(0);
129 }
130 
131 /*
132     This code is horrible, who let it get into master.
133 
134     Should be printing to a viewer, should not be using PETSC_COMM_WORLD
135 
136 */
137 PetscErrorCode DMSwarmDataExView(DMSwarmDataEx d)
138 {
139   PetscMPIInt    p;
140   PetscErrorCode ierr;
141 
142   PetscFunctionBegin;
143   ierr = PetscPrintf( PETSC_COMM_WORLD, "DMSwarmDataEx: instance=%D\n",d->instance);CHKERRQ(ierr);
144   ierr = PetscPrintf( PETSC_COMM_WORLD, "  topology status:        %s \n", status_names[d->topology_status]);CHKERRQ(ierr);
145   ierr = PetscPrintf( PETSC_COMM_WORLD, "  message lengths status: %s \n", status_names[d->message_lengths_status] );CHKERRQ(ierr);
146   ierr = PetscPrintf( PETSC_COMM_WORLD, "  packer status status:   %s \n", status_names[d->packer_status] );CHKERRQ(ierr);
147   ierr = PetscPrintf( PETSC_COMM_WORLD, "  communication status:   %s \n", status_names[d->communication_status] );CHKERRQ(ierr);
148 
149   if (d->topology_status == DEOBJECT_FINALIZED) {
150     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Topology:\n");CHKERRQ(ierr);
151     ierr = PetscSynchronizedPrintf( PETSC_COMM_WORLD, "    [%d] neighbours: %d \n", d->rank, d->n_neighbour_procs );CHKERRQ(ierr);
152     for (p=0; p<d->n_neighbour_procs; p++) {
153       ierr = PetscSynchronizedPrintf( PETSC_COMM_WORLD, "    [%d]   neighbour[%d] = %d \n", d->rank, p, d->neighbour_procs[p]);CHKERRQ(ierr);
154     }
155     ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,stdout);CHKERRQ(ierr);
156   }
157 
158   if (d->message_lengths_status == DEOBJECT_FINALIZED) {
159     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Message lengths:\n");CHKERRQ(ierr);
160     ierr = PetscSynchronizedPrintf( PETSC_COMM_WORLD, "    [%d] atomic size: %ld \n", d->rank, (long int)d->unit_message_size );CHKERRQ(ierr);
161     for (p=0; p<d->n_neighbour_procs; p++) {
162       ierr = PetscSynchronizedPrintf( PETSC_COMM_WORLD, "    [%d] >>>>> ( %D units :: tag = %d ) >>>>> [%d] \n", d->rank, d->messages_to_be_sent[p], d->send_tags[p], d->neighbour_procs[p] );CHKERRQ(ierr);
163     }
164     for (p=0; p<d->n_neighbour_procs; p++) {
165       ierr = PetscSynchronizedPrintf( PETSC_COMM_WORLD, "    [%d] <<<<< ( %D units :: tag = %d ) <<<<< [%d] \n", d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], d->neighbour_procs[p] );CHKERRQ(ierr);
166     }
167     ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,stdout);CHKERRQ(ierr);
168   }
169   if (d->packer_status == DEOBJECT_FINALIZED) {}
170   if (d->communication_status == DEOBJECT_FINALIZED) {}
171   PetscFunctionReturn(0);
172 }
173 
174 PetscErrorCode DMSwarmDataExDestroy(DMSwarmDataEx d)
175 {
176   PetscErrorCode ierr;
177 
178   PetscFunctionBegin;
179   ierr = MPI_Comm_free(&d->comm);CHKERRQ(ierr);
180   if (d->neighbour_procs) {ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);}
181   if (d->messages_to_be_sent) {ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);}
182   if (d->message_offsets) {ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);}
183   if (d->messages_to_be_recvieved) {ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);}
184   if (d->send_message) {ierr = PetscFree(d->send_message);CHKERRQ(ierr);}
185   if (d->recv_message) {ierr = PetscFree(d->recv_message);CHKERRQ(ierr);}
186   if (d->pack_cnt) {ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);}
187   if (d->send_tags) {ierr = PetscFree(d->send_tags);CHKERRQ(ierr);}
188   if (d->recv_tags) {ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);}
189   if (d->_stats) {ierr = PetscFree(d->_stats);CHKERRQ(ierr);}
190   if (d->_requests) {ierr = PetscFree(d->_requests);CHKERRQ(ierr);}
191   ierr = PetscFree(d);CHKERRQ(ierr);
192   PetscFunctionReturn(0);
193 }
194 
195 /* === Phase A === */
196 
197 PetscErrorCode DMSwarmDataExTopologyInitialize(DMSwarmDataEx d)
198 {
199   PetscErrorCode ierr;
200 
201   PetscFunctionBegin;
202   d->topology_status = DEOBJECT_INITIALIZED;
203   d->n_neighbour_procs = 0;
204   ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);
205   ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);
206   ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);
207   ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);
208   ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);
209   ierr = PetscFree(d->send_tags);CHKERRQ(ierr);
210   ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);
211   PetscFunctionReturn(0);
212 }
213 
214 PetscErrorCode DMSwarmDataExTopologyAddNeighbour(DMSwarmDataEx d,const PetscMPIInt proc_id)
215 {
216   PetscMPIInt    n,found;
217   PetscMPIInt    size;
218   PetscErrorCode ierr;
219 
220   PetscFunctionBegin;
221   if (d->topology_status == DEOBJECT_FINALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DMSwarmDataExTopologyInitialize() first");
222   else if (d->topology_status != DEOBJECT_INITIALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DMSwarmDataExTopologyInitialize() first");
223 
224   /* error on negative entries */
225   if (proc_id < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank < 0");
226   /* error on ranks larger than number of procs in communicator */
227   ierr = MPI_Comm_size(d->comm,&size);CHKERRQ(ierr);
228   if (proc_id >= size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour %d with a rank >= size %d",proc_id,size);
229   if (d->n_neighbour_procs == 0) {ierr = PetscMalloc1(1, &d->neighbour_procs);CHKERRQ(ierr);}
230   /* check for proc_id */
231   found = 0;
232   for (n = 0; n < d->n_neighbour_procs; n++) {
233     if (d->neighbour_procs[n] == proc_id) {
234       found  = 1;
235     }
236   }
237   if (found == 0) { /* add it to list */
238     ierr = PetscRealloc(sizeof(PetscMPIInt)*(d->n_neighbour_procs+1), &d->neighbour_procs);CHKERRQ(ierr);
239     d->neighbour_procs[ d->n_neighbour_procs ] = proc_id;
240     d->n_neighbour_procs++;
241   }
242   PetscFunctionReturn(0);
243 }
244 
245 /*
246 counter: the index of the communication object
247 N: the number of processors
248 r0: rank of sender
249 r1: rank of receiver
250 
251 procs = { 0, 1, 2, 3 }
252 
253 0 ==> 0		e=0
254 0 ==> 1		e=1
255 0 ==> 2		e=2
256 0 ==> 3		e=3
257 
258 1 ==> 0		e=4
259 1 ==> 1		e=5
260 1 ==> 2		e=6
261 1 ==> 3		e=7
262 
263 2 ==> 0		e=8
264 2 ==> 1		e=9
265 2 ==> 2		e=10
266 2 ==> 3		e=11
267 
268 3 ==> 0		e=12
269 3 ==> 1		e=13
270 3 ==> 2		e=14
271 3 ==> 3		e=15
272 
273 If we require that proc A sends to proc B, then the SEND tag index will be given by
274   N * rank(A) + rank(B) + offset
275 If we require that proc A will receive from proc B, then the RECV tag index will be given by
276   N * rank(B) + rank(A) + offset
277 
278 */
279 static void _get_tags(PetscInt counter, PetscMPIInt N, PetscMPIInt r0,PetscMPIInt r1, PetscMPIInt *_st, PetscMPIInt *_rt)
280 {
281   PetscMPIInt st,rt;
282 
283   st = N*r0 + r1   +   N*N*counter;
284   rt = N*r1 + r0   +   N*N*counter;
285   *_st = st;
286   *_rt = rt;
287 }
288 
289 /*
290 Makes the communication map symmetric
291 */
292 PetscErrorCode _DMSwarmDataExCompleteCommunicationMap(MPI_Comm comm,PetscMPIInt n,PetscMPIInt proc_neighbours[],PetscMPIInt *n_new,PetscMPIInt **proc_neighbours_new)
293 {
294   Mat               A;
295   PetscInt          i,j,nc;
296   PetscInt          n_, *proc_neighbours_;
297   PetscInt          rank_;
298   PetscMPIInt       size,  rank;
299   PetscScalar       *vals;
300   const PetscInt    *cols;
301   const PetscScalar *red_vals;
302   PetscMPIInt       _n_new, *_proc_neighbours_new;
303   PetscErrorCode    ierr;
304 
305   PetscFunctionBegin;
306   n_ = n;
307   ierr = PetscMalloc(sizeof(PetscInt) * n_, &proc_neighbours_);CHKERRQ(ierr);
308   for (i = 0; i < n_; ++i) {
309     proc_neighbours_[i] = proc_neighbours[i];
310   }
311   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
312   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
313   rank_ = rank;
314 
315   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
316   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,size,size);CHKERRQ(ierr);
317   ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
318   ierr = MatSeqAIJSetPreallocation(A,1,NULL);CHKERRQ(ierr);
319   ierr = MatMPIAIJSetPreallocation(A,n_,NULL,n_,NULL);CHKERRQ(ierr);
320   ierr = MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);CHKERRQ(ierr);
321   /* Build original map */
322   ierr = PetscMalloc1(n_, &vals);CHKERRQ(ierr);
323   for (i = 0; i < n_; ++i) {
324     vals[i] = 1.0;
325   }
326   ierr = MatSetValues( A, 1,&rank_, n_,proc_neighbours_, vals, INSERT_VALUES );CHKERRQ(ierr);
327   ierr = MatAssemblyBegin(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
328   ierr = MatAssemblyEnd(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
329   /* Now force all other connections if they are not already there */
330   /* It's more efficient to do them all at once */
331   for (i = 0; i < n_; ++i) {
332     vals[i] = 2.0;
333   }
334   ierr = MatSetValues( A, n_,proc_neighbours_, 1,&rank_, vals, INSERT_VALUES );CHKERRQ(ierr);
335   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
336   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
337 /*
338   ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
339   ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
340   ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
341 */
342   if ((n_new != NULL) && (proc_neighbours_new != NULL)) {
343     ierr = MatGetRow(A, rank_, &nc, &cols, &red_vals);CHKERRQ(ierr);
344     _n_new = (PetscMPIInt) nc;
345     ierr = PetscMalloc1(_n_new, &_proc_neighbours_new);CHKERRQ(ierr);
346     for (j = 0; j < nc; ++j) {
347       _proc_neighbours_new[j] = (PetscMPIInt)cols[j];
348     }
349     ierr = MatRestoreRow( A, rank_, &nc, &cols, &red_vals );CHKERRQ(ierr);
350     *n_new               = (PetscMPIInt)_n_new;
351     *proc_neighbours_new = (PetscMPIInt*)_proc_neighbours_new;
352   }
353   ierr = MatDestroy(&A);CHKERRQ(ierr);
354   ierr = PetscFree(vals);CHKERRQ(ierr);
355   ierr = PetscFree(proc_neighbours_);CHKERRQ(ierr);
356   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
357   PetscFunctionReturn(0);
358 }
359 
360 PetscErrorCode DMSwarmDataExTopologyFinalize(DMSwarmDataEx d)
361 {
362   PetscMPIInt    symm_nn;
363   PetscMPIInt   *symm_procs;
364   PetscMPIInt    r0,n,st,rt;
365   PetscMPIInt    size;
366   PetscErrorCode ierr;
367 
368   PetscFunctionBegin;
369   if (d->topology_status != DEOBJECT_INITIALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DMSwarmDataExTopologyInitialize() first");
370 
371   ierr = PetscLogEventBegin(DMSWARM_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
372   /* given infomation about all my neighbours, make map symmetric */
373   ierr = _DMSwarmDataExCompleteCommunicationMap( d->comm,d->n_neighbour_procs,d->neighbour_procs, &symm_nn, &symm_procs );CHKERRQ(ierr);
374   /* update my arrays */
375   ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);
376   d->n_neighbour_procs = symm_nn;
377   d->neighbour_procs   = symm_procs;
378   /* allocates memory */
379   if (!d->messages_to_be_sent) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->messages_to_be_sent);CHKERRQ(ierr);}
380   if (!d->message_offsets) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->message_offsets);CHKERRQ(ierr);}
381   if (!d->messages_to_be_recvieved) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->messages_to_be_recvieved);CHKERRQ(ierr);}
382   if (!d->pack_cnt) {ierr = PetscMalloc(sizeof(PetscInt) * d->n_neighbour_procs, &d->pack_cnt);CHKERRQ(ierr);}
383   if (!d->_stats) {ierr = PetscMalloc(sizeof(MPI_Status) * 2*d->n_neighbour_procs, &d->_stats);CHKERRQ(ierr);}
384   if (!d->_requests) {ierr = PetscMalloc(sizeof(MPI_Request) * 2*d->n_neighbour_procs, &d->_requests);CHKERRQ(ierr);}
385   if (!d->send_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->send_tags);CHKERRQ(ierr);}
386   if (!d->recv_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->recv_tags);CHKERRQ(ierr);}
387   /* compute message tags */
388   ierr = MPI_Comm_size(d->comm,&size);CHKERRQ(ierr);
389   r0 = d->rank;
390   for (n = 0; n < d->n_neighbour_procs; ++n) {
391     PetscMPIInt r1 = d->neighbour_procs[n];
392 
393     _get_tags( d->instance, size, r0,r1, &st, &rt );
394     d->send_tags[n] = (int)st;
395     d->recv_tags[n] = (int)rt;
396   }
397   d->topology_status = DEOBJECT_FINALIZED;
398   ierr = PetscLogEventEnd(DMSWARM_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
399   PetscFunctionReturn(0);
400 }
401 
402 /* === Phase B === */
403 PetscErrorCode _DMSwarmDataExConvertProcIdToLocalIndex(DMSwarmDataEx de,PetscMPIInt proc_id,PetscMPIInt *local)
404 {
405   PetscMPIInt i,np;
406 
407   PetscFunctionBegin;
408   np = de->n_neighbour_procs;
409   *local = -1;
410   for (i = 0; i < np; ++i) {
411     if (proc_id == de->neighbour_procs[i]) {
412       *local = i;
413       break;
414     }
415   }
416   PetscFunctionReturn(0);
417 }
418 
419 PetscErrorCode DMSwarmDataExInitializeSendCount(DMSwarmDataEx de)
420 {
421   PetscMPIInt    i;
422   PetscErrorCode ierr;
423 
424   PetscFunctionBegin;
425   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ(de->comm, PETSC_ERR_ORDER, "Topology not finalized");
426   ierr = PetscLogEventBegin(DMSWARM_DataExchangerSendCount,0,0,0,0);CHKERRQ(ierr);
427   de->message_lengths_status = DEOBJECT_INITIALIZED;
428   for (i = 0; i < de->n_neighbour_procs; ++i) {
429     de->messages_to_be_sent[i] = 0;
430   }
431   PetscFunctionReturn(0);
432 }
433 
434 /*
435 1) only allows counters to be set on neighbouring cpus
436 */
437 PetscErrorCode DMSwarmDataExAddToSendCount(DMSwarmDataEx de,const PetscMPIInt proc_id,const PetscInt count)
438 {
439   PetscMPIInt    local_val;
440   PetscErrorCode ierr;
441 
442   PetscFunctionBegin;
443   if (de->message_lengths_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DMSwarmDataExInitializeSendCount() first" );
444   else if (de->message_lengths_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DMSwarmDataExInitializeSendCount() first" );
445 
446   ierr = _DMSwarmDataExConvertProcIdToLocalIndex( de, proc_id, &local_val );CHKERRQ(ierr);
447   if (local_val == -1) SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,"Proc %d is not a valid neighbour rank", (int)proc_id );
448 
449   de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count;
450   PetscFunctionReturn(0);
451 }
452 
453 PetscErrorCode DMSwarmDataExFinalizeSendCount(DMSwarmDataEx de)
454 {
455   PetscErrorCode ierr;
456 
457   PetscFunctionBegin;
458   if (de->message_lengths_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DMSwarmDataExInitializeSendCount() first" );
459 
460   de->message_lengths_status = DEOBJECT_FINALIZED;
461   ierr = PetscLogEventEnd(DMSWARM_DataExchangerSendCount,0,0,0,0);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 /* === Phase C === */
466 /*
467  * zero out all send counts
468  * free send and recv buffers
469  * zeros out message length
470  * zeros out all counters
471  * zero out packed data counters
472 */
473 PetscErrorCode _DMSwarmDataExInitializeTmpStorage(DMSwarmDataEx de)
474 {
475   PetscMPIInt    i, np;
476   PetscErrorCode ierr;
477 
478   PetscFunctionBegin;
479   /*if (de->n_neighbour_procs < 0) SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of neighbour procs < 0");
480   */
481   /*
482   if (!de->neighbour_procs) SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "Neighbour proc list is NULL" );
483   */
484   np = de->n_neighbour_procs;
485   for (i = 0; i < np; ++i) {
486     /*	de->messages_to_be_sent[i] = -1; */
487     de->messages_to_be_recvieved[i] = -1;
488   }
489   ierr = PetscFree(de->send_message);CHKERRQ(ierr);
490   ierr = PetscFree(de->recv_message);CHKERRQ(ierr);
491   PetscFunctionReturn(0);
492 }
493 
494 /*
495 *) Zeros out pack data counters
496 *) Ensures mesaage length is set
497 *) Checks send counts properly initialized
498 *) allocates space for pack data
499 */
500 PetscErrorCode DMSwarmDataExPackInitialize(DMSwarmDataEx de,size_t unit_message_size)
501 {
502   PetscMPIInt    i,np;
503   PetscInt       total;
504   PetscErrorCode ierr;
505 
506   PetscFunctionBegin;
507   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
508   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
509   ierr = PetscLogEventBegin(DMSWARM_DataExchangerPack,0,0,0,0);CHKERRQ(ierr);
510   de->packer_status = DEOBJECT_INITIALIZED;
511   ierr = _DMSwarmDataExInitializeTmpStorage(de);CHKERRQ(ierr);
512   np = de->n_neighbour_procs;
513   de->unit_message_size = unit_message_size;
514   total = 0;
515   for (i = 0; i < np; ++i) {
516     if (de->messages_to_be_sent[i] == -1) {
517       PetscMPIInt proc_neighour = de->neighbour_procs[i];
518       SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DMSwarmDataExSetSendCount() first", (int)proc_neighour );
519     }
520     total = total + de->messages_to_be_sent[i];
521   }
522   /* create space for the data to be sent */
523   ierr = PetscMalloc(unit_message_size * (total + 1), &de->send_message);CHKERRQ(ierr);
524   /* initialize memory */
525   ierr = PetscMemzero(de->send_message, unit_message_size * (total + 1));CHKERRQ(ierr);
526   /* set total items to send */
527   de->send_message_length = total;
528   de->message_offsets[0] = 0;
529   total = de->messages_to_be_sent[0];
530   for (i = 1; i < np; ++i) {
531     de->message_offsets[i] = total;
532     total = total + de->messages_to_be_sent[i];
533   }
534   /* init the packer counters */
535   de->total_pack_cnt = 0;
536   for (i = 0; i < np; ++i) {
537     de->pack_cnt[i] = 0;
538   }
539   PetscFunctionReturn(0);
540 }
541 
542 /*
543 *) Ensures data gets been packed appropriately and no overlaps occur
544 */
545 PetscErrorCode DMSwarmDataExPackData(DMSwarmDataEx de,PetscMPIInt proc_id,PetscInt n,void *data)
546 {
547   PetscMPIInt    local;
548   PetscInt       insert_location;
549   void           *dest;
550   PetscErrorCode ierr;
551 
552   PetscFunctionBegin;
553   if (de->packer_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DMSwarmDataExInitializeSendCount(), DMSwarmDataExAddToSendCount(), DMSwarmDataExPackInitialize() first" );
554   else if (de->packer_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DMSwarmDataExInitializeSendCount(), DMSwarmDataExAddToSendCount(), DMSwarmDataExPackInitialize() first" );
555 
556   if (!de->send_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DMSwarmDataExPackInitialize() first" );
557   ierr = _DMSwarmDataExConvertProcIdToLocalIndex( de, proc_id, &local );CHKERRQ(ierr);
558   if (local == -1) SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", (int)proc_id );
559   if (n+de->pack_cnt[local] > de->messages_to_be_sent[local]) SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %D: Attempt to insert %D",
560               (int)proc_id, de->messages_to_be_sent[local], n+de->pack_cnt[local] );
561 
562   /* copy memory */
563   insert_location = de->message_offsets[local] + de->pack_cnt[local];
564   dest = ((char*)de->send_message) + de->unit_message_size*insert_location;
565   ierr = PetscMemcpy(dest, data, de->unit_message_size * n);CHKERRQ(ierr);
566   /* increment counter */
567   de->pack_cnt[local] = de->pack_cnt[local] + n;
568   PetscFunctionReturn(0);
569 }
570 
571 /*
572 *) Ensures all data has been packed
573 */
574 PetscErrorCode DMSwarmDataExPackFinalize(DMSwarmDataEx de)
575 {
576   PetscMPIInt i,np;
577   PetscInt    total;
578   PetscErrorCode ierr;
579 
580   PetscFunctionBegin;
581   if (de->packer_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DMSwarmDataExPackInitialize() first." );
582   np = de->n_neighbour_procs;
583   for (i = 0; i < np; ++i) {
584     if (de->pack_cnt[i] != de->messages_to_be_sent[i]) SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %D : Inserted %D",
585                 (int)de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i] );
586   }
587   /* init */
588   for (i = 0; i < np; ++i) {
589     de->messages_to_be_recvieved[i] = -1;
590   }
591   /* figure out the recv counts here */
592   for (i = 0; i < np; ++i) {
593     ierr = MPI_Isend(&de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]);CHKERRQ(ierr);
594   }
595   for (i = 0; i < np; ++i) {
596     ierr = MPI_Irecv(&de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i]);CHKERRQ(ierr);
597   }
598   ierr = MPI_Waitall(2*np, de->_requests, de->_stats);CHKERRQ(ierr);
599   /* create space for the data to be recvieved */
600   total = 0;
601   for (i = 0; i < np; ++i) {
602     total = total + de->messages_to_be_recvieved[i];
603   }
604   ierr = PetscMalloc(de->unit_message_size * (total + 1), &de->recv_message);CHKERRQ(ierr);
605   /* initialize memory */
606   ierr = PetscMemzero(de->recv_message, de->unit_message_size * (total + 1));CHKERRQ(ierr);
607   /* set total items to recieve */
608   de->recv_message_length = total;
609   de->packer_status = DEOBJECT_FINALIZED;
610   de->communication_status = DEOBJECT_INITIALIZED;
611   ierr = PetscLogEventEnd(DMSWARM_DataExchangerPack,0,0,0,0);CHKERRQ(ierr);
612   PetscFunctionReturn(0);
613 }
614 
615 /* do the actual message passing now */
616 PetscErrorCode DMSwarmDataExBegin(DMSwarmDataEx de)
617 {
618   PetscMPIInt i,np;
619   void       *dest;
620   PetscInt    length;
621   PetscErrorCode ierr;
622 
623   PetscFunctionBegin;
624   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
625   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
626   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer not finalized" );
627   if (de->communication_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DMSwarmDataExInitialize() first." );
628   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DMSwarmDataExPackFinalize() first" );
629   ierr = PetscLogEventBegin(DMSWARM_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
630   np = de->n_neighbour_procs;
631   /* == NON BLOCKING == */
632   for (i = 0; i < np; ++i) {
633     length = de->messages_to_be_sent[i] * de->unit_message_size;
634     dest = ((char*)de->send_message) + de->unit_message_size * de->message_offsets[i];
635     ierr = MPI_Isend( dest, length, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i] );CHKERRQ(ierr);
636   }
637   ierr = PetscLogEventEnd(DMSWARM_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
638   PetscFunctionReturn(0);
639 }
640 
641 /* do the actual message passing now */
642 PetscErrorCode DMSwarmDataExEnd(DMSwarmDataEx de)
643 {
644   PetscMPIInt  i,np;
645   PetscInt     total;
646   PetscInt    *message_recv_offsets;
647   void        *dest;
648   PetscInt     length;
649   PetscErrorCode ierr;
650 
651   PetscFunctionBegin;
652   if (de->communication_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DMSwarmDataExInitialize() first." );
653   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DMSwarmDataExPackFinalize() first" );
654   ierr = PetscLogEventBegin(DMSWARM_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
655   np = de->n_neighbour_procs;
656   ierr = PetscMalloc1(np+1, &message_recv_offsets);CHKERRQ(ierr);
657   message_recv_offsets[0] = 0;
658   total = de->messages_to_be_recvieved[0];
659   for (i = 1; i < np; ++i) {
660     message_recv_offsets[i] = total;
661     total = total + de->messages_to_be_recvieved[i];
662   }
663   /* == NON BLOCKING == */
664   for (i = 0; i < np; ++i) {
665     length = de->messages_to_be_recvieved[i] * de->unit_message_size;
666     dest = ((char*)de->recv_message) + de->unit_message_size * message_recv_offsets[i];
667     ierr = MPI_Irecv( dest, length, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i] );CHKERRQ(ierr);
668   }
669   ierr = MPI_Waitall( 2*np, de->_requests, de->_stats );CHKERRQ(ierr);
670   ierr = PetscFree(message_recv_offsets);CHKERRQ(ierr);
671   de->communication_status = DEOBJECT_FINALIZED;
672   ierr = PetscLogEventEnd(DMSWARM_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 PetscErrorCode DMSwarmDataExGetSendData(DMSwarmDataEx de,PetscInt *length,void **send)
677 {
678   PetscFunctionBegin;
679   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed." );
680   *length = de->send_message_length;
681   *send   = de->send_message;
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode DMSwarmDataExGetRecvData(DMSwarmDataEx de,PetscInt *length,void **recv)
686 {
687   PetscFunctionBegin;
688   if (de->communication_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent." );
689   *length = de->recv_message_length;
690   *recv   = de->recv_message;
691   PetscFunctionReturn(0);
692 }
693 
694 PetscErrorCode DMSwarmDataExTopologyGetNeighbours(DMSwarmDataEx de,PetscMPIInt *n,PetscMPIInt *neigh[])
695 {
696   PetscFunctionBegin;
697   if (n)     {*n     = de->n_neighbour_procs;}
698   if (neigh) {*neigh = de->neighbour_procs;}
699   PetscFunctionReturn(0);
700 }
701 
702