xref: /petsc/src/dm/impls/swarm/data_ex.c (revision 030f984af8d8bb4c203755d35bded3c05b3d83ce)
1 /*
2 Build a few basic tools to help with partitioned domains.
3 
4 1)
5 On each processor, have a DomainExchangerTopology.
6 This is a doubly-connected edge list which enumerates the
7 communication paths between connected processors. By numbering
8 these paths we can always uniquely assign message identifers.
9 
10         edge
11          10
12 proc  --------->  proc
13  0    <--------    1
14          11
15         twin
16 
17 Eg: Proc 0 send to proc 1 with message id is 10. To receive the correct
18 message, proc 1 looks for the edge connected to proc 0, and then the
19 message id comes from the twin of that edge
20 
21 2)
22 A DomainExchangerArrayPacker.
23 A little function which given a piece of data, will memcpy the data into
24 an array (which will be sent to procs) into the correct place.
25 
26 On Proc 1 we sent data to procs 0,2,3. The data is on different lengths.
27 All data gets jammed into single array. Need to "jam" data into correct locations
28 The Packer knows how much is to going to each processor and keeps track of the inserts
29 so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory
30 
31 data to 0    data to 2       data to 3
32 
33 |--------|-----------------|--|
34 
35 User has to unpack message themselves. I can get you the pointer for each i
36 entry, but you'll have to cast it to the appropriate data type.
37 
38 Phase A: Build topology
39 
40 Phase B: Define message lengths
41 
42 Phase C: Pack data
43 
44 Phase D: Send data
45 
46 + Constructor
47 DMSwarmDataExCreate()
48 + Phase A
49 DMSwarmDataExTopologyInitialize()
50 DMSwarmDataExTopologyAddNeighbour()
51 DMSwarmDataExTopologyAddNeighbour()
52 DMSwarmDataExTopologyFinalize()
53 + Phase B
54 DMSwarmDataExZeroAllSendCount()
55 DMSwarmDataExAddToSendCount()
56 DMSwarmDataExAddToSendCount()
57 DMSwarmDataExAddToSendCount()
58 + Phase C
59 DMSwarmDataExPackInitialize()
60 DMSwarmDataExPackData()
61 DMSwarmDataExPackData()
62 DMSwarmDataExPackFinalize()
63 +Phase D
64 DMSwarmDataExBegin()
65  ... perform any calculations ...
66 DMSwarmDataExEnd()
67 
68 ... user calls any getters here ...
69 
70 */
71 #include <petscvec.h>
72 #include <petscmat.h>
73 
74 #include "../src/dm/impls/swarm/data_ex.h"
75 
76 const char *status_names[] = {"initialized", "finalized", "unknown"};
77 
78 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerTopologySetup;
79 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerBegin;
80 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerEnd;
81 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerSendCount;
82 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerPack;
83 
84 PetscErrorCode DMSwarmDataExCreate(MPI_Comm comm,const PetscInt count, DMSwarmDataEx *ex)
85 {
86   PetscErrorCode ierr;
87   DMSwarmDataEx  d;
88 
89   PetscFunctionBegin;
90   ierr = PetscMalloc(sizeof(struct _p_DMSwarmDataEx), &d);CHKERRQ(ierr);
91   ierr = PetscMemzero(d, sizeof(struct _p_DMSwarmDataEx));CHKERRQ(ierr);
92   ierr = MPI_Comm_dup(comm,&d->comm);CHKERRMPI(ierr);
93   ierr = MPI_Comm_rank(d->comm,&d->rank);CHKERRMPI(ierr);
94 
95   d->instance = count;
96 
97   d->topology_status        = DEOBJECT_STATE_UNKNOWN;
98   d->message_lengths_status = DEOBJECT_STATE_UNKNOWN;
99   d->packer_status          = DEOBJECT_STATE_UNKNOWN;
100   d->communication_status   = DEOBJECT_STATE_UNKNOWN;
101 
102   d->n_neighbour_procs = -1;
103   d->neighbour_procs   = NULL;
104 
105   d->messages_to_be_sent      = NULL;
106   d->message_offsets          = NULL;
107   d->messages_to_be_recvieved = NULL;
108 
109   d->unit_message_size   = (size_t)-1;
110   d->send_message        = NULL;
111   d->send_message_length = -1;
112   d->recv_message        = NULL;
113   d->recv_message_length = -1;
114   d->total_pack_cnt      = -1;
115   d->pack_cnt            = NULL;
116 
117   d->send_tags = NULL;
118   d->recv_tags = NULL;
119 
120   d->_stats    = NULL;
121   d->_requests = NULL;
122   *ex = d;
123   PetscFunctionReturn(0);
124 }
125 
126 /*
127     This code is horrible, who let it get into main.
128 
129     Should be printing to a viewer, should not be using PETSC_COMM_WORLD
130 
131 */
132 PetscErrorCode DMSwarmDataExView(DMSwarmDataEx d)
133 {
134   PetscMPIInt    p;
135   PetscErrorCode ierr;
136 
137   PetscFunctionBegin;
138   ierr = PetscPrintf( PETSC_COMM_WORLD, "DMSwarmDataEx: instance=%D\n",d->instance);CHKERRQ(ierr);
139   ierr = PetscPrintf( PETSC_COMM_WORLD, "  topology status:        %s \n", status_names[d->topology_status]);CHKERRQ(ierr);
140   ierr = PetscPrintf( PETSC_COMM_WORLD, "  message lengths status: %s \n", status_names[d->message_lengths_status]);CHKERRQ(ierr);
141   ierr = PetscPrintf( PETSC_COMM_WORLD, "  packer status status:   %s \n", status_names[d->packer_status]);CHKERRQ(ierr);
142   ierr = PetscPrintf( PETSC_COMM_WORLD, "  communication status:   %s \n", status_names[d->communication_status]);CHKERRQ(ierr);
143 
144   if (d->topology_status == DEOBJECT_FINALIZED) {
145     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Topology:\n");CHKERRQ(ierr);
146     ierr = PetscSynchronizedPrintf( PETSC_COMM_WORLD, "    [%d] neighbours: %d \n", d->rank, d->n_neighbour_procs);CHKERRQ(ierr);
147     for (p=0; p<d->n_neighbour_procs; p++) {
148       ierr = PetscSynchronizedPrintf( PETSC_COMM_WORLD, "    [%d]   neighbour[%d] = %d \n", d->rank, p, d->neighbour_procs[p]);CHKERRQ(ierr);
149     }
150     ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,stdout);CHKERRQ(ierr);
151   }
152 
153   if (d->message_lengths_status == DEOBJECT_FINALIZED) {
154     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Message lengths:\n");CHKERRQ(ierr);
155     ierr = PetscSynchronizedPrintf( PETSC_COMM_WORLD, "    [%d] atomic size: %ld \n", d->rank, (long int)d->unit_message_size);CHKERRQ(ierr);
156     for (p=0; p<d->n_neighbour_procs; p++) {
157       ierr = PetscSynchronizedPrintf( PETSC_COMM_WORLD, "    [%d] >>>>> ( %D units :: tag = %d) >>>>> [%d] \n", d->rank, d->messages_to_be_sent[p], d->send_tags[p], d->neighbour_procs[p]);CHKERRQ(ierr);
158     }
159     for (p=0; p<d->n_neighbour_procs; p++) {
160       ierr = PetscSynchronizedPrintf( PETSC_COMM_WORLD, "    [%d] <<<<< ( %D units :: tag = %d) <<<<< [%d] \n", d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], d->neighbour_procs[p]);CHKERRQ(ierr);
161     }
162     ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD,stdout);CHKERRQ(ierr);
163   }
164   if (d->packer_status == DEOBJECT_FINALIZED) {}
165   if (d->communication_status == DEOBJECT_FINALIZED) {}
166   PetscFunctionReturn(0);
167 }
168 
169 PetscErrorCode DMSwarmDataExDestroy(DMSwarmDataEx d)
170 {
171   PetscErrorCode ierr;
172 
173   PetscFunctionBegin;
174   ierr = MPI_Comm_free(&d->comm);CHKERRMPI(ierr);
175   if (d->neighbour_procs) {ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);}
176   if (d->messages_to_be_sent) {ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);}
177   if (d->message_offsets) {ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);}
178   if (d->messages_to_be_recvieved) {ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);}
179   if (d->send_message) {ierr = PetscFree(d->send_message);CHKERRQ(ierr);}
180   if (d->recv_message) {ierr = PetscFree(d->recv_message);CHKERRQ(ierr);}
181   if (d->pack_cnt) {ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);}
182   if (d->send_tags) {ierr = PetscFree(d->send_tags);CHKERRQ(ierr);}
183   if (d->recv_tags) {ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);}
184   if (d->_stats) {ierr = PetscFree(d->_stats);CHKERRQ(ierr);}
185   if (d->_requests) {ierr = PetscFree(d->_requests);CHKERRQ(ierr);}
186   ierr = PetscFree(d);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 /* === Phase A === */
191 
192 PetscErrorCode DMSwarmDataExTopologyInitialize(DMSwarmDataEx d)
193 {
194   PetscErrorCode ierr;
195 
196   PetscFunctionBegin;
197   d->topology_status = DEOBJECT_INITIALIZED;
198   d->n_neighbour_procs = 0;
199   ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);
200   ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);
201   ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);
202   ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);
203   ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);
204   ierr = PetscFree(d->send_tags);CHKERRQ(ierr);
205   ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);
206   PetscFunctionReturn(0);
207 }
208 
209 PetscErrorCode DMSwarmDataExTopologyAddNeighbour(DMSwarmDataEx d,const PetscMPIInt proc_id)
210 {
211   PetscMPIInt    n,found;
212   PetscMPIInt    size;
213   PetscErrorCode ierr;
214 
215   PetscFunctionBegin;
216   if (d->topology_status == DEOBJECT_FINALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DMSwarmDataExTopologyInitialize() first");
217   else if (d->topology_status != DEOBJECT_INITIALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be initialised. Call DMSwarmDataExTopologyInitialize() first");
218 
219   /* error on negative entries */
220   if (proc_id < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank < 0");
221   /* error on ranks larger than number of procs in communicator */
222   ierr = MPI_Comm_size(d->comm,&size);CHKERRMPI(ierr);
223   if (proc_id >= size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour %d with a rank >= size %d",proc_id,size);
224   if (d->n_neighbour_procs == 0) {ierr = PetscMalloc1(1, &d->neighbour_procs);CHKERRQ(ierr);}
225   /* check for proc_id */
226   found = 0;
227   for (n = 0; n < d->n_neighbour_procs; n++) {
228     if (d->neighbour_procs[n] == proc_id) {
229       found  = 1;
230     }
231   }
232   if (found == 0) { /* add it to list */
233     ierr = PetscRealloc(sizeof(PetscMPIInt)*(d->n_neighbour_procs+1), &d->neighbour_procs);CHKERRQ(ierr);
234     d->neighbour_procs[ d->n_neighbour_procs ] = proc_id;
235     d->n_neighbour_procs++;
236   }
237   PetscFunctionReturn(0);
238 }
239 
240 /*
241 counter: the index of the communication object
242 N: the number of processors
243 r0: rank of sender
244 r1: rank of receiver
245 
246 procs = { 0, 1, 2, 3 }
247 
248 0 ==> 0         e=0
249 0 ==> 1         e=1
250 0 ==> 2         e=2
251 0 ==> 3         e=3
252 
253 1 ==> 0         e=4
254 1 ==> 1         e=5
255 1 ==> 2         e=6
256 1 ==> 3         e=7
257 
258 2 ==> 0         e=8
259 2 ==> 1         e=9
260 2 ==> 2         e=10
261 2 ==> 3         e=11
262 
263 3 ==> 0         e=12
264 3 ==> 1         e=13
265 3 ==> 2         e=14
266 3 ==> 3         e=15
267 
268 If we require that proc A sends to proc B, then the SEND tag index will be given by
269   N * rank(A) + rank(B) + offset
270 If we require that proc A will receive from proc B, then the RECV tag index will be given by
271   N * rank(B) + rank(A) + offset
272 
273 */
274 static void _get_tags(PetscInt counter, PetscMPIInt N, PetscMPIInt r0,PetscMPIInt r1, PetscMPIInt *_st, PetscMPIInt *_rt)
275 {
276   PetscMPIInt st,rt;
277 
278   st = N*r0 + r1   +   N*N*counter;
279   rt = N*r1 + r0   +   N*N*counter;
280   *_st = st;
281   *_rt = rt;
282 }
283 
284 /*
285 Makes the communication map symmetric
286 */
287 PetscErrorCode _DMSwarmDataExCompleteCommunicationMap(MPI_Comm comm,PetscMPIInt n,PetscMPIInt proc_neighbours[],PetscMPIInt *n_new,PetscMPIInt **proc_neighbours_new)
288 {
289   Mat               A;
290   PetscInt          i,j,nc;
291   PetscInt          n_, *proc_neighbours_;
292   PetscInt          rank_;
293   PetscMPIInt       size,  rank;
294   PetscScalar       *vals;
295   const PetscInt    *cols;
296   const PetscScalar *red_vals;
297   PetscMPIInt       _n_new, *_proc_neighbours_new;
298   PetscErrorCode    ierr;
299 
300   PetscFunctionBegin;
301   n_ = n;
302   ierr = PetscMalloc(sizeof(PetscInt) * n_, &proc_neighbours_);CHKERRQ(ierr);
303   for (i = 0; i < n_; ++i) {
304     proc_neighbours_[i] = proc_neighbours[i];
305   }
306   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
307   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
308   rank_ = rank;
309 
310   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
311   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,size,size);CHKERRQ(ierr);
312   ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
313   ierr = MatSeqAIJSetPreallocation(A,1,NULL);CHKERRQ(ierr);
314   ierr = MatMPIAIJSetPreallocation(A,n_,NULL,n_,NULL);CHKERRQ(ierr);
315   ierr = MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);CHKERRQ(ierr);
316   /* Build original map */
317   ierr = PetscMalloc1(n_, &vals);CHKERRQ(ierr);
318   for (i = 0; i < n_; ++i) {
319     vals[i] = 1.0;
320   }
321   ierr = MatSetValues( A, 1,&rank_, n_,proc_neighbours_, vals, INSERT_VALUES);CHKERRQ(ierr);
322   ierr = MatAssemblyBegin(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
323   ierr = MatAssemblyEnd(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
324   /* Now force all other connections if they are not already there */
325   /* It's more efficient to do them all at once */
326   for (i = 0; i < n_; ++i) {
327     vals[i] = 2.0;
328   }
329   ierr = MatSetValues( A, n_,proc_neighbours_, 1,&rank_, vals, INSERT_VALUES);CHKERRQ(ierr);
330   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
331   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
332 /*
333   ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
334   ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
335   ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
336 */
337   if ((n_new != NULL) && (proc_neighbours_new != NULL)) {
338     ierr = MatGetRow(A, rank_, &nc, &cols, &red_vals);CHKERRQ(ierr);
339     _n_new = (PetscMPIInt) nc;
340     ierr = PetscMalloc1(_n_new, &_proc_neighbours_new);CHKERRQ(ierr);
341     for (j = 0; j < nc; ++j) {
342       _proc_neighbours_new[j] = (PetscMPIInt)cols[j];
343     }
344     ierr = MatRestoreRow( A, rank_, &nc, &cols, &red_vals);CHKERRQ(ierr);
345     *n_new               = (PetscMPIInt)_n_new;
346     *proc_neighbours_new = (PetscMPIInt*)_proc_neighbours_new;
347   }
348   ierr = MatDestroy(&A);CHKERRQ(ierr);
349   ierr = PetscFree(vals);CHKERRQ(ierr);
350   ierr = PetscFree(proc_neighbours_);CHKERRQ(ierr);
351   ierr = MPI_Barrier(comm);CHKERRMPI(ierr);
352   PetscFunctionReturn(0);
353 }
354 
355 PetscErrorCode DMSwarmDataExTopologyFinalize(DMSwarmDataEx d)
356 {
357   PetscMPIInt    symm_nn;
358   PetscMPIInt   *symm_procs;
359   PetscMPIInt    r0,n,st,rt;
360   PetscMPIInt    size;
361   PetscErrorCode ierr;
362 
363   PetscFunctionBegin;
364   if (d->topology_status != DEOBJECT_INITIALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be initialised. Call DMSwarmDataExTopologyInitialize() first");
365 
366   ierr = PetscLogEventBegin(DMSWARM_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
367   /* given infomation about all my neighbours, make map symmetric */
368   ierr = _DMSwarmDataExCompleteCommunicationMap( d->comm,d->n_neighbour_procs,d->neighbour_procs, &symm_nn, &symm_procs);CHKERRQ(ierr);
369   /* update my arrays */
370   ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);
371   d->n_neighbour_procs = symm_nn;
372   d->neighbour_procs   = symm_procs;
373   /* allocates memory */
374   if (!d->messages_to_be_sent) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->messages_to_be_sent);CHKERRQ(ierr);}
375   if (!d->message_offsets) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->message_offsets);CHKERRQ(ierr);}
376   if (!d->messages_to_be_recvieved) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->messages_to_be_recvieved);CHKERRQ(ierr);}
377   if (!d->pack_cnt) {ierr = PetscMalloc(sizeof(PetscInt) * d->n_neighbour_procs, &d->pack_cnt);CHKERRQ(ierr);}
378   if (!d->_stats) {ierr = PetscMalloc(sizeof(MPI_Status) * 2*d->n_neighbour_procs, &d->_stats);CHKERRQ(ierr);}
379   if (!d->_requests) {ierr = PetscMalloc(sizeof(MPI_Request) * 2*d->n_neighbour_procs, &d->_requests);CHKERRQ(ierr);}
380   if (!d->send_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->send_tags);CHKERRQ(ierr);}
381   if (!d->recv_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->recv_tags);CHKERRQ(ierr);}
382   /* compute message tags */
383   ierr = MPI_Comm_size(d->comm,&size);CHKERRMPI(ierr);
384   r0 = d->rank;
385   for (n = 0; n < d->n_neighbour_procs; ++n) {
386     PetscMPIInt r1 = d->neighbour_procs[n];
387 
388     _get_tags( d->instance, size, r0,r1, &st, &rt);
389     d->send_tags[n] = (int)st;
390     d->recv_tags[n] = (int)rt;
391   }
392   d->topology_status = DEOBJECT_FINALIZED;
393   ierr = PetscLogEventEnd(DMSWARM_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /* === Phase B === */
398 PetscErrorCode _DMSwarmDataExConvertProcIdToLocalIndex(DMSwarmDataEx de,PetscMPIInt proc_id,PetscMPIInt *local)
399 {
400   PetscMPIInt i,np;
401 
402   PetscFunctionBegin;
403   np = de->n_neighbour_procs;
404   *local = -1;
405   for (i = 0; i < np; ++i) {
406     if (proc_id == de->neighbour_procs[i]) {
407       *local = i;
408       break;
409     }
410   }
411   PetscFunctionReturn(0);
412 }
413 
414 PetscErrorCode DMSwarmDataExInitializeSendCount(DMSwarmDataEx de)
415 {
416   PetscMPIInt    i;
417   PetscErrorCode ierr;
418 
419   PetscFunctionBegin;
420   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ(de->comm, PETSC_ERR_ORDER, "Topology not finalized");
421   ierr = PetscLogEventBegin(DMSWARM_DataExchangerSendCount,0,0,0,0);CHKERRQ(ierr);
422   de->message_lengths_status = DEOBJECT_INITIALIZED;
423   for (i = 0; i < de->n_neighbour_procs; ++i) {
424     de->messages_to_be_sent[i] = 0;
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 /*
430 1) only allows counters to be set on neighbouring cpus
431 */
432 PetscErrorCode DMSwarmDataExAddToSendCount(DMSwarmDataEx de,const PetscMPIInt proc_id,const PetscInt count)
433 {
434   PetscMPIInt    local_val;
435   PetscErrorCode ierr;
436 
437   PetscFunctionBegin;
438   if (de->message_lengths_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DMSwarmDataExInitializeSendCount() first");
439   else if (de->message_lengths_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DMSwarmDataExInitializeSendCount() first");
440 
441   ierr = _DMSwarmDataExConvertProcIdToLocalIndex( de, proc_id, &local_val);CHKERRQ(ierr);
442   if (local_val == -1) SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,"Proc %d is not a valid neighbour rank", (int)proc_id);
443 
444   de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count;
445   PetscFunctionReturn(0);
446 }
447 
448 PetscErrorCode DMSwarmDataExFinalizeSendCount(DMSwarmDataEx de)
449 {
450   PetscErrorCode ierr;
451 
452   PetscFunctionBegin;
453   if (de->message_lengths_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DMSwarmDataExInitializeSendCount() first");
454 
455   de->message_lengths_status = DEOBJECT_FINALIZED;
456   ierr = PetscLogEventEnd(DMSWARM_DataExchangerSendCount,0,0,0,0);CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 
460 /* === Phase C === */
461 /*
462  * zero out all send counts
463  * free send and recv buffers
464  * zeros out message length
465  * zeros out all counters
466  * zero out packed data counters
467 */
468 PetscErrorCode _DMSwarmDataExInitializeTmpStorage(DMSwarmDataEx de)
469 {
470   PetscMPIInt    i, np;
471   PetscErrorCode ierr;
472 
473   PetscFunctionBegin;
474   /*if (de->n_neighbour_procs < 0) SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of neighbour procs < 0");
475   */
476   /*
477   if (!de->neighbour_procs) SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "Neighbour proc list is NULL");
478   */
479   np = de->n_neighbour_procs;
480   for (i = 0; i < np; ++i) {
481     /*  de->messages_to_be_sent[i] = -1; */
482     de->messages_to_be_recvieved[i] = -1;
483   }
484   ierr = PetscFree(de->send_message);CHKERRQ(ierr);
485   ierr = PetscFree(de->recv_message);CHKERRQ(ierr);
486   PetscFunctionReturn(0);
487 }
488 
489 /*
490  *) Zeros out pack data counters
491  *) Ensures mesaage length is set
492  *) Checks send counts properly initialized
493  *) allocates space for pack data
494 */
495 PetscErrorCode DMSwarmDataExPackInitialize(DMSwarmDataEx de,size_t unit_message_size)
496 {
497   PetscMPIInt    i,np;
498   PetscInt       total;
499   PetscErrorCode ierr;
500 
501   PetscFunctionBegin;
502   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized");
503   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized");
504   ierr = PetscLogEventBegin(DMSWARM_DataExchangerPack,0,0,0,0);CHKERRQ(ierr);
505   de->packer_status = DEOBJECT_INITIALIZED;
506   ierr = _DMSwarmDataExInitializeTmpStorage(de);CHKERRQ(ierr);
507   np = de->n_neighbour_procs;
508   de->unit_message_size = unit_message_size;
509   total = 0;
510   for (i = 0; i < np; ++i) {
511     if (de->messages_to_be_sent[i] == -1) {
512       PetscMPIInt proc_neighour = de->neighbour_procs[i];
513       SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DMSwarmDataExSetSendCount() first", (int)proc_neighour);
514     }
515     total = total + de->messages_to_be_sent[i];
516   }
517   /* create space for the data to be sent */
518   ierr = PetscMalloc(unit_message_size * (total + 1), &de->send_message);CHKERRQ(ierr);
519   /* initialize memory */
520   ierr = PetscMemzero(de->send_message, unit_message_size * (total + 1));CHKERRQ(ierr);
521   /* set total items to send */
522   de->send_message_length = total;
523   de->message_offsets[0] = 0;
524   total = de->messages_to_be_sent[0];
525   for (i = 1; i < np; ++i) {
526     de->message_offsets[i] = total;
527     total = total + de->messages_to_be_sent[i];
528   }
529   /* init the packer counters */
530   de->total_pack_cnt = 0;
531   for (i = 0; i < np; ++i) {
532     de->pack_cnt[i] = 0;
533   }
534   PetscFunctionReturn(0);
535 }
536 
537 /*
538 *) Ensures data gets been packed appropriately and no overlaps occur
539 */
540 PetscErrorCode DMSwarmDataExPackData(DMSwarmDataEx de,PetscMPIInt proc_id,PetscInt n,void *data)
541 {
542   PetscMPIInt    local;
543   PetscInt       insert_location;
544   void           *dest;
545   PetscErrorCode ierr;
546 
547   PetscFunctionBegin;
548   if (de->packer_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DMSwarmDataExInitializeSendCount(), DMSwarmDataExAddToSendCount(), DMSwarmDataExPackInitialize() first");
549   else if (de->packer_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DMSwarmDataExInitializeSendCount(), DMSwarmDataExAddToSendCount(), DMSwarmDataExPackInitialize() first");
550 
551   if (!de->send_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DMSwarmDataExPackInitialize() first");
552   ierr = _DMSwarmDataExConvertProcIdToLocalIndex( de, proc_id, &local);CHKERRQ(ierr);
553   if (local == -1) SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", (int)proc_id);
554   if (n+de->pack_cnt[local] > de->messages_to_be_sent[local]) SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %D: Attempt to insert %D",
555               (int)proc_id, de->messages_to_be_sent[local], n+de->pack_cnt[local]);
556 
557   /* copy memory */
558   insert_location = de->message_offsets[local] + de->pack_cnt[local];
559   dest = ((char*)de->send_message) + de->unit_message_size*insert_location;
560   ierr = PetscMemcpy(dest, data, de->unit_message_size * n);CHKERRQ(ierr);
561   /* increment counter */
562   de->pack_cnt[local] = de->pack_cnt[local] + n;
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567 *) Ensures all data has been packed
568 */
569 PetscErrorCode DMSwarmDataExPackFinalize(DMSwarmDataEx de)
570 {
571   PetscMPIInt i,np;
572   PetscInt    total;
573   PetscErrorCode ierr;
574 
575   PetscFunctionBegin;
576   if (de->packer_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DMSwarmDataExPackInitialize() first.");
577   np = de->n_neighbour_procs;
578   for (i = 0; i < np; ++i) {
579     if (de->pack_cnt[i] != de->messages_to_be_sent[i]) SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %D : Inserted %D",
580                 (int)de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i]);
581   }
582   /* init */
583   for (i = 0; i < np; ++i) {
584     de->messages_to_be_recvieved[i] = -1;
585   }
586   /* figure out the recv counts here */
587   for (i = 0; i < np; ++i) {
588     ierr = MPI_Isend(&de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]);CHKERRMPI(ierr);
589   }
590   for (i = 0; i < np; ++i) {
591     ierr = MPI_Irecv(&de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i]);CHKERRMPI(ierr);
592   }
593   ierr = MPI_Waitall(2*np, de->_requests, de->_stats);CHKERRMPI(ierr);
594   /* create space for the data to be recvieved */
595   total = 0;
596   for (i = 0; i < np; ++i) {
597     total = total + de->messages_to_be_recvieved[i];
598   }
599   ierr = PetscMalloc(de->unit_message_size * (total + 1), &de->recv_message);CHKERRQ(ierr);
600   /* initialize memory */
601   ierr = PetscMemzero(de->recv_message, de->unit_message_size * (total + 1));CHKERRQ(ierr);
602   /* set total items to receive */
603   de->recv_message_length = total;
604   de->packer_status = DEOBJECT_FINALIZED;
605   de->communication_status = DEOBJECT_INITIALIZED;
606   ierr = PetscLogEventEnd(DMSWARM_DataExchangerPack,0,0,0,0);CHKERRQ(ierr);
607   PetscFunctionReturn(0);
608 }
609 
610 /* do the actual message passing now */
611 PetscErrorCode DMSwarmDataExBegin(DMSwarmDataEx de)
612 {
613   PetscMPIInt i,np;
614   void       *dest;
615   PetscInt    length;
616   PetscErrorCode ierr;
617 
618   PetscFunctionBegin;
619   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized");
620   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized");
621   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer not finalized");
622   if (de->communication_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DMSwarmDataExInitialize() first.");
623   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DMSwarmDataExPackFinalize() first");
624   ierr = PetscLogEventBegin(DMSWARM_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
625   np = de->n_neighbour_procs;
626   /* == NON BLOCKING == */
627   for (i = 0; i < np; ++i) {
628     length = de->messages_to_be_sent[i] * de->unit_message_size;
629     dest = ((char*)de->send_message) + de->unit_message_size * de->message_offsets[i];
630     ierr = MPI_Isend( dest, length, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]);CHKERRMPI(ierr);
631   }
632   ierr = PetscLogEventEnd(DMSWARM_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
633   PetscFunctionReturn(0);
634 }
635 
636 /* do the actual message passing now */
637 PetscErrorCode DMSwarmDataExEnd(DMSwarmDataEx de)
638 {
639   PetscMPIInt  i,np;
640   PetscInt     total;
641   PetscInt    *message_recv_offsets;
642   void        *dest;
643   PetscInt     length;
644   PetscErrorCode ierr;
645 
646   PetscFunctionBegin;
647   if (de->communication_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DMSwarmDataExInitialize() first.");
648   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DMSwarmDataExPackFinalize() first");
649   ierr = PetscLogEventBegin(DMSWARM_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
650   np = de->n_neighbour_procs;
651   ierr = PetscMalloc1(np+1, &message_recv_offsets);CHKERRQ(ierr);
652   message_recv_offsets[0] = 0;
653   total = de->messages_to_be_recvieved[0];
654   for (i = 1; i < np; ++i) {
655     message_recv_offsets[i] = total;
656     total = total + de->messages_to_be_recvieved[i];
657   }
658   /* == NON BLOCKING == */
659   for (i = 0; i < np; ++i) {
660     length = de->messages_to_be_recvieved[i] * de->unit_message_size;
661     dest = ((char*)de->recv_message) + de->unit_message_size * message_recv_offsets[i];
662     ierr = MPI_Irecv( dest, length, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i]);CHKERRMPI(ierr);
663   }
664   ierr = MPI_Waitall( 2*np, de->_requests, de->_stats);CHKERRMPI(ierr);
665   ierr = PetscFree(message_recv_offsets);CHKERRQ(ierr);
666   de->communication_status = DEOBJECT_FINALIZED;
667   ierr = PetscLogEventEnd(DMSWARM_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 PetscErrorCode DMSwarmDataExGetSendData(DMSwarmDataEx de,PetscInt *length,void **send)
672 {
673   PetscFunctionBegin;
674   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed.");
675   *length = de->send_message_length;
676   *send   = de->send_message;
677   PetscFunctionReturn(0);
678 }
679 
680 PetscErrorCode DMSwarmDataExGetRecvData(DMSwarmDataEx de,PetscInt *length,void **recv)
681 {
682   PetscFunctionBegin;
683   if (de->communication_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent.");
684   *length = de->recv_message_length;
685   *recv   = de->recv_message;
686   PetscFunctionReturn(0);
687 }
688 
689 PetscErrorCode DMSwarmDataExTopologyGetNeighbours(DMSwarmDataEx de,PetscMPIInt *n,PetscMPIInt *neigh[])
690 {
691   PetscFunctionBegin;
692   if (n)     {*n     = de->n_neighbour_procs;}
693   if (neigh) {*neigh = de->neighbour_procs;}
694   PetscFunctionReturn(0);
695 }
696