xref: /petsc/src/dm/impls/swarm/data_ex.c (revision dfd676b1a855b7f967ece75a22ee7f6626d10f89)
1 /*
2 Build a few basic tools to help with partitioned domains.
3 
4 1)
5 On each processor, have a DomainExchangerTopology.
6 This is a doubly-connected edge list which enumerates the
7 communication paths between connected processors. By numbering
8 these paths we can always uniquely assign message identifers.
9 
10         edge
11          10
12 proc  --------->  proc
13  0    <--------    1
14          11
15         twin
16 
17 Eg: Proc 0 send to proc 1 with message id is 10. To receive the correct
18 message, proc 1 looks for the edge connected to proc 0, and then the
19 message id comes from the twin of that edge
20 
21 2)
22 A DomainExchangerArrayPacker.
23 A little function which given a piece of data, will memcpy the data into
24 an array (which will be sent to procs) into the correct place.
25 
26 On Proc 1 we sent data to procs 0,2,3. The data is on different lengths.
27 All data gets jammed into single array. Need to "jam" data into correct locations
28 The Packer knows how much is to going to each processor and keeps track of the inserts
29 so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory
30 
31 data to 0    data to 2       data to 3
32 
33 |--------|-----------------|--|
34 
35 User has to unpack message themselves. I can get you the pointer for each i
36 entry, but you'll have to cast it to the appropriate data type.
37 
38 Phase A: Build topology
39 
40 Phase B: Define message lengths
41 
42 Phase C: Pack data
43 
44 Phase D: Send data
45 
46 + Constructor
47 DMSwarmDataExCreate()
48 + Phase A
49 DMSwarmDataExTopologyInitialize()
50 DMSwarmDataExTopologyAddNeighbour()
51 DMSwarmDataExTopologyAddNeighbour()
52 DMSwarmDataExTopologyFinalize()
53 + Phase B
54 DMSwarmDataExZeroAllSendCount()
55 DMSwarmDataExAddToSendCount()
56 DMSwarmDataExAddToSendCount()
57 DMSwarmDataExAddToSendCount()
58 + Phase C
59 DMSwarmDataExPackInitialize()
60 DMSwarmDataExPackData()
61 DMSwarmDataExPackData()
62 DMSwarmDataExPackFinalize()
63 +Phase D
64 DMSwarmDataExBegin()
65  ... perform any calculations ...
66 DMSwarmDataExEnd()
67 
68 ... user calls any getters here ...
69 
70 */
71 #include <petscvec.h>
72 #include <petscmat.h>
73 
74 #include "../src/dm/impls/swarm/data_ex.h"
75 
76 const char *status_names[] = {"initialized", "finalized", "unknown"};
77 
78 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerTopologySetup;
79 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerBegin;
80 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerEnd;
81 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerSendCount;
82 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerPack;
83 
84 PetscErrorCode DMSwarmDataExCreate(MPI_Comm comm,const PetscInt count, DMSwarmDataEx *ex)
85 {
86   DMSwarmDataEx  d;
87 
88   PetscFunctionBegin;
89   PetscCall(PetscNew(&d));
90   PetscCallMPI(MPI_Comm_dup(comm,&d->comm));
91   PetscCallMPI(MPI_Comm_rank(d->comm,&d->rank));
92 
93   d->instance = count;
94 
95   d->topology_status        = DEOBJECT_STATE_UNKNOWN;
96   d->message_lengths_status = DEOBJECT_STATE_UNKNOWN;
97   d->packer_status          = DEOBJECT_STATE_UNKNOWN;
98   d->communication_status   = DEOBJECT_STATE_UNKNOWN;
99 
100   d->n_neighbour_procs = -1;
101   d->neighbour_procs   = NULL;
102 
103   d->messages_to_be_sent      = NULL;
104   d->message_offsets          = NULL;
105   d->messages_to_be_recvieved = NULL;
106 
107   d->unit_message_size   = (size_t)-1;
108   d->send_message        = NULL;
109   d->send_message_length = -1;
110   d->recv_message        = NULL;
111   d->recv_message_length = -1;
112   d->total_pack_cnt      = -1;
113   d->pack_cnt            = NULL;
114 
115   d->send_tags = NULL;
116   d->recv_tags = NULL;
117 
118   d->_stats    = NULL;
119   d->_requests = NULL;
120   *ex = d;
121   PetscFunctionReturn(0);
122 }
123 
124 /*
125     This code is horrible, who let it get into main.
126 
127     Should be printing to a viewer, should not be using PETSC_COMM_WORLD
128 
129 */
130 PetscErrorCode DMSwarmDataExView(DMSwarmDataEx d)
131 {
132   PetscMPIInt    p;
133 
134   PetscFunctionBegin;
135   PetscCall(PetscPrintf( PETSC_COMM_WORLD, "DMSwarmDataEx: instance=%" PetscInt_FMT "\n",d->instance));
136   PetscCall(PetscPrintf( PETSC_COMM_WORLD, "  topology status:        %s \n", status_names[d->topology_status]));
137   PetscCall(PetscPrintf( PETSC_COMM_WORLD, "  message lengths status: %s \n", status_names[d->message_lengths_status]));
138   PetscCall(PetscPrintf( PETSC_COMM_WORLD, "  packer status status:   %s \n", status_names[d->packer_status]));
139   PetscCall(PetscPrintf( PETSC_COMM_WORLD, "  communication status:   %s \n", status_names[d->communication_status]));
140 
141   if (d->topology_status == DEOBJECT_FINALIZED) {
142     PetscCall(PetscPrintf( PETSC_COMM_WORLD, "  Topology:\n"));
143     PetscCall(PetscSynchronizedPrintf( PETSC_COMM_WORLD, "    [%d] neighbours: %d \n", d->rank, d->n_neighbour_procs));
144     for (p=0; p<d->n_neighbour_procs; p++) {
145       PetscCall(PetscSynchronizedPrintf( PETSC_COMM_WORLD, "    [%d]   neighbour[%d] = %d \n", d->rank, p, d->neighbour_procs[p]));
146     }
147     PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD,stdout));
148   }
149 
150   if (d->message_lengths_status == DEOBJECT_FINALIZED) {
151     PetscCall(PetscPrintf( PETSC_COMM_WORLD, "  Message lengths:\n"));
152     PetscCall(PetscSynchronizedPrintf( PETSC_COMM_WORLD, "    [%d] atomic size: %ld \n", d->rank, (long int)d->unit_message_size));
153     for (p=0; p<d->n_neighbour_procs; p++) {
154       PetscCall(PetscSynchronizedPrintf( PETSC_COMM_WORLD, "    [%d] >>>>> ( %" PetscInt_FMT " units :: tag = %d) >>>>> [%d] \n", d->rank, d->messages_to_be_sent[p], d->send_tags[p], d->neighbour_procs[p]));
155     }
156     for (p=0; p<d->n_neighbour_procs; p++) {
157       PetscCall(PetscSynchronizedPrintf( PETSC_COMM_WORLD, "    [%d] <<<<< ( %" PetscInt_FMT " units :: tag = %d) <<<<< [%d] \n", d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], d->neighbour_procs[p]));
158     }
159     PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD,stdout));
160   }
161   if (d->packer_status == DEOBJECT_FINALIZED) {}
162   if (d->communication_status == DEOBJECT_FINALIZED) {}
163   PetscFunctionReturn(0);
164 }
165 
166 PetscErrorCode DMSwarmDataExDestroy(DMSwarmDataEx d)
167 {
168   PetscFunctionBegin;
169   PetscCallMPI(MPI_Comm_free(&d->comm));
170   if (d->neighbour_procs) PetscCall(PetscFree(d->neighbour_procs));
171   if (d->messages_to_be_sent) PetscCall(PetscFree(d->messages_to_be_sent));
172   if (d->message_offsets) PetscCall(PetscFree(d->message_offsets));
173   if (d->messages_to_be_recvieved) PetscCall(PetscFree(d->messages_to_be_recvieved));
174   if (d->send_message) PetscCall(PetscFree(d->send_message));
175   if (d->recv_message) PetscCall(PetscFree(d->recv_message));
176   if (d->pack_cnt) PetscCall(PetscFree(d->pack_cnt));
177   if (d->send_tags) PetscCall(PetscFree(d->send_tags));
178   if (d->recv_tags) PetscCall(PetscFree(d->recv_tags));
179   if (d->_stats) PetscCall(PetscFree(d->_stats));
180   if (d->_requests) PetscCall(PetscFree(d->_requests));
181   PetscCall(PetscFree(d));
182   PetscFunctionReturn(0);
183 }
184 
185 /* === Phase A === */
186 
187 PetscErrorCode DMSwarmDataExTopologyInitialize(DMSwarmDataEx d)
188 {
189   PetscFunctionBegin;
190   d->topology_status = DEOBJECT_INITIALIZED;
191   d->n_neighbour_procs = 0;
192   PetscCall(PetscFree(d->neighbour_procs));
193   PetscCall(PetscFree(d->messages_to_be_sent));
194   PetscCall(PetscFree(d->message_offsets));
195   PetscCall(PetscFree(d->messages_to_be_recvieved));
196   PetscCall(PetscFree(d->pack_cnt));
197   PetscCall(PetscFree(d->send_tags));
198   PetscCall(PetscFree(d->recv_tags));
199   PetscFunctionReturn(0);
200 }
201 
202 PetscErrorCode DMSwarmDataExTopologyAddNeighbour(DMSwarmDataEx d,const PetscMPIInt proc_id)
203 {
204   PetscMPIInt    n,found;
205   PetscMPIInt    size;
206 
207   PetscFunctionBegin;
208   PetscCheck(d->topology_status != DEOBJECT_FINALIZED,d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DMSwarmDataExTopologyInitialize() first");
209   else PetscCheck(d->topology_status == DEOBJECT_INITIALIZED,d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be initialised. Call DMSwarmDataExTopologyInitialize() first");
210 
211   /* error on negative entries */
212   PetscCheck(proc_id >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank < 0");
213   /* error on ranks larger than number of procs in communicator */
214   PetscCallMPI(MPI_Comm_size(d->comm,&size));
215   PetscCheck(proc_id < size,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour %d with a rank >= size %d",proc_id,size);
216   if (d->n_neighbour_procs == 0) PetscCall(PetscMalloc1(1, &d->neighbour_procs));
217   /* check for proc_id */
218   found = 0;
219   for (n = 0; n < d->n_neighbour_procs; n++) {
220     if (d->neighbour_procs[n] == proc_id) {
221       found  = 1;
222     }
223   }
224   if (found == 0) { /* add it to list */
225     PetscCall(PetscRealloc(sizeof(PetscMPIInt)*(d->n_neighbour_procs+1), &d->neighbour_procs));
226     d->neighbour_procs[ d->n_neighbour_procs ] = proc_id;
227     d->n_neighbour_procs++;
228   }
229   PetscFunctionReturn(0);
230 }
231 
232 /*
233 counter: the index of the communication object
234 N: the number of processors
235 r0: rank of sender
236 r1: rank of receiver
237 
238 procs = { 0, 1, 2, 3 }
239 
240 0 ==> 0         e=0
241 0 ==> 1         e=1
242 0 ==> 2         e=2
243 0 ==> 3         e=3
244 
245 1 ==> 0         e=4
246 1 ==> 1         e=5
247 1 ==> 2         e=6
248 1 ==> 3         e=7
249 
250 2 ==> 0         e=8
251 2 ==> 1         e=9
252 2 ==> 2         e=10
253 2 ==> 3         e=11
254 
255 3 ==> 0         e=12
256 3 ==> 1         e=13
257 3 ==> 2         e=14
258 3 ==> 3         e=15
259 
260 If we require that proc A sends to proc B, then the SEND tag index will be given by
261   N * rank(A) + rank(B) + offset
262 If we require that proc A will receive from proc B, then the RECV tag index will be given by
263   N * rank(B) + rank(A) + offset
264 
265 */
266 static void _get_tags(PetscInt counter, PetscMPIInt N, PetscMPIInt r0,PetscMPIInt r1, PetscMPIInt *_st, PetscMPIInt *_rt)
267 {
268   PetscMPIInt st,rt;
269 
270   st = N*r0 + r1   +   N*N*counter;
271   rt = N*r1 + r0   +   N*N*counter;
272   *_st = st;
273   *_rt = rt;
274 }
275 
276 /*
277 Makes the communication map symmetric
278 */
279 PetscErrorCode _DMSwarmDataExCompleteCommunicationMap(MPI_Comm comm,PetscMPIInt n,PetscMPIInt proc_neighbours[],PetscMPIInt *n_new,PetscMPIInt **proc_neighbours_new)
280 {
281   Mat               A;
282   PetscInt          i,j,nc;
283   PetscInt          n_, *proc_neighbours_;
284   PetscInt          rank_;
285   PetscMPIInt       size,  rank;
286   PetscScalar       *vals;
287   const PetscInt    *cols;
288   const PetscScalar *red_vals;
289   PetscMPIInt       _n_new, *_proc_neighbours_new;
290 
291   PetscFunctionBegin;
292   n_ = n;
293   PetscCall(PetscMalloc(sizeof(PetscInt) * n_, &proc_neighbours_));
294   for (i = 0; i < n_; ++i) {
295     proc_neighbours_[i] = proc_neighbours[i];
296   }
297   PetscCallMPI(MPI_Comm_size(comm,&size));
298   PetscCallMPI(MPI_Comm_rank(comm,&rank));
299   rank_ = rank;
300 
301   PetscCall(MatCreate(comm,&A));
302   PetscCall(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,size,size));
303   PetscCall(MatSetType(A,MATAIJ));
304   PetscCall(MatSeqAIJSetPreallocation(A,1,NULL));
305   PetscCall(MatMPIAIJSetPreallocation(A,n_,NULL,n_,NULL));
306   PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
307   /* Build original map */
308   PetscCall(PetscMalloc1(n_, &vals));
309   for (i = 0; i < n_; ++i) {
310     vals[i] = 1.0;
311   }
312   PetscCall(MatSetValues( A, 1,&rank_, n_,proc_neighbours_, vals, INSERT_VALUES));
313   PetscCall(MatAssemblyBegin(A,MAT_FLUSH_ASSEMBLY));
314   PetscCall(MatAssemblyEnd(A,MAT_FLUSH_ASSEMBLY));
315   /* Now force all other connections if they are not already there */
316   /* It's more efficient to do them all at once */
317   for (i = 0; i < n_; ++i) {
318     vals[i] = 2.0;
319   }
320   PetscCall(MatSetValues( A, n_,proc_neighbours_, 1,&rank_, vals, INSERT_VALUES));
321   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
322   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
323 /*
324   PetscCall(PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO));
325   PetscCall(MatView(A,PETSC_VIEWER_STDOUT_WORLD));
326   PetscCall(PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD));
327 */
328   if ((n_new != NULL) && (proc_neighbours_new != NULL)) {
329     PetscCall(MatGetRow(A, rank_, &nc, &cols, &red_vals));
330     _n_new = (PetscMPIInt) nc;
331     PetscCall(PetscMalloc1(_n_new, &_proc_neighbours_new));
332     for (j = 0; j < nc; ++j) {
333       _proc_neighbours_new[j] = (PetscMPIInt)cols[j];
334     }
335     PetscCall(MatRestoreRow( A, rank_, &nc, &cols, &red_vals));
336     *n_new               = (PetscMPIInt)_n_new;
337     *proc_neighbours_new = (PetscMPIInt*)_proc_neighbours_new;
338   }
339   PetscCall(MatDestroy(&A));
340   PetscCall(PetscFree(vals));
341   PetscCall(PetscFree(proc_neighbours_));
342   PetscCallMPI(MPI_Barrier(comm));
343   PetscFunctionReturn(0);
344 }
345 
346 PetscErrorCode DMSwarmDataExTopologyFinalize(DMSwarmDataEx d)
347 {
348   PetscMPIInt    symm_nn, *symm_procs, r0,n,st,rt, size;
349 
350   PetscFunctionBegin;
351   PetscCheck(d->topology_status == DEOBJECT_INITIALIZED,d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be initialised. Call DMSwarmDataExTopologyInitialize() first");
352 
353   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerTopologySetup,0,0,0,0));
354   /* given information about all my neighbours, make map symmetric */
355   PetscCall(_DMSwarmDataExCompleteCommunicationMap( d->comm,d->n_neighbour_procs,d->neighbour_procs, &symm_nn, &symm_procs));
356   /* update my arrays */
357   PetscCall(PetscFree(d->neighbour_procs));
358   d->n_neighbour_procs = symm_nn;
359   d->neighbour_procs   = symm_procs;
360   /* allocates memory */
361   if (!d->messages_to_be_sent) PetscCall(PetscMalloc1(d->n_neighbour_procs+1, &d->messages_to_be_sent));
362   if (!d->message_offsets) PetscCall(PetscMalloc1(d->n_neighbour_procs+1, &d->message_offsets));
363   if (!d->messages_to_be_recvieved) PetscCall(PetscMalloc1(d->n_neighbour_procs+1, &d->messages_to_be_recvieved));
364   if (!d->pack_cnt) PetscCall(PetscMalloc(sizeof(PetscInt) * d->n_neighbour_procs, &d->pack_cnt));
365   if (!d->_stats) PetscCall(PetscMalloc(sizeof(MPI_Status) * 2*d->n_neighbour_procs, &d->_stats));
366   if (!d->_requests) PetscCall(PetscMalloc(sizeof(MPI_Request) * 2*d->n_neighbour_procs, &d->_requests));
367   if (!d->send_tags) PetscCall(PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->send_tags));
368   if (!d->recv_tags) PetscCall(PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->recv_tags));
369   /* compute message tags */
370   PetscCallMPI(MPI_Comm_size(d->comm,&size));
371   r0 = d->rank;
372   for (n = 0; n < d->n_neighbour_procs; ++n) {
373     PetscMPIInt r1 = d->neighbour_procs[n];
374 
375     _get_tags( d->instance, size, r0,r1, &st, &rt);
376     d->send_tags[n] = (int)st;
377     d->recv_tags[n] = (int)rt;
378   }
379   d->topology_status = DEOBJECT_FINALIZED;
380   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerTopologySetup,0,0,0,0));
381   PetscFunctionReturn(0);
382 }
383 
384 /* === Phase B === */
385 PetscErrorCode _DMSwarmDataExConvertProcIdToLocalIndex(DMSwarmDataEx de,PetscMPIInt proc_id,PetscMPIInt *local)
386 {
387   PetscMPIInt i,np;
388 
389   PetscFunctionBegin;
390   np = de->n_neighbour_procs;
391   *local = -1;
392   for (i = 0; i < np; ++i) {
393     if (proc_id == de->neighbour_procs[i]) {
394       *local = i;
395       break;
396     }
397   }
398   PetscFunctionReturn(0);
399 }
400 
401 PetscErrorCode DMSwarmDataExInitializeSendCount(DMSwarmDataEx de)
402 {
403   PetscMPIInt    i;
404 
405   PetscFunctionBegin;
406   PetscCheck(de->topology_status == DEOBJECT_FINALIZED,de->comm, PETSC_ERR_ORDER, "Topology not finalized");
407   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerSendCount,0,0,0,0));
408   de->message_lengths_status = DEOBJECT_INITIALIZED;
409   for (i = 0; i < de->n_neighbour_procs; ++i) {
410     de->messages_to_be_sent[i] = 0;
411   }
412   PetscFunctionReturn(0);
413 }
414 
415 /*
416 1) only allows counters to be set on neighbouring cpus
417 */
418 PetscErrorCode DMSwarmDataExAddToSendCount(DMSwarmDataEx de,const PetscMPIInt proc_id,const PetscInt count)
419 {
420   PetscMPIInt    local_val;
421 
422   PetscFunctionBegin;
423   PetscCheck(de->message_lengths_status != DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DMSwarmDataExInitializeSendCount() first");
424   else PetscCheck(de->message_lengths_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DMSwarmDataExInitializeSendCount() first");
425 
426   PetscCall(_DMSwarmDataExConvertProcIdToLocalIndex( de, proc_id, &local_val));
427   PetscCheck(local_val != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,"Proc %d is not a valid neighbour rank", (int)proc_id);
428 
429   de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count;
430   PetscFunctionReturn(0);
431 }
432 
433 PetscErrorCode DMSwarmDataExFinalizeSendCount(DMSwarmDataEx de)
434 {
435   PetscFunctionBegin;
436   PetscCheck(de->message_lengths_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DMSwarmDataExInitializeSendCount() first");
437 
438   de->message_lengths_status = DEOBJECT_FINALIZED;
439   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerSendCount,0,0,0,0));
440   PetscFunctionReturn(0);
441 }
442 
443 /* === Phase C === */
444 /*
445   zero out all send counts
446   free send and recv buffers
447   zeros out message length
448   zeros out all counters
449   zero out packed data counters
450 */
451 PetscErrorCode _DMSwarmDataExInitializeTmpStorage(DMSwarmDataEx de)
452 {
453   PetscMPIInt    i, np;
454 
455   PetscFunctionBegin;
456   np = de->n_neighbour_procs;
457   for (i = 0; i < np; ++i) {
458     /*  de->messages_to_be_sent[i] = -1; */
459     de->messages_to_be_recvieved[i] = -1;
460   }
461   PetscCall(PetscFree(de->send_message));
462   PetscCall(PetscFree(de->recv_message));
463   PetscFunctionReturn(0);
464 }
465 
466 /*
467    Zeros out pack data counters
468    Ensures mesaage length is set
469    Checks send counts properly initialized
470    allocates space for pack data
471 */
472 PetscErrorCode DMSwarmDataExPackInitialize(DMSwarmDataEx de,size_t unit_message_size)
473 {
474   PetscMPIInt    i,np;
475   PetscInt       total;
476 
477   PetscFunctionBegin;
478   PetscCheck(de->topology_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Topology not finalized");
479   PetscCheck(de->message_lengths_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths not finalized");
480   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerPack,0,0,0,0));
481   de->packer_status = DEOBJECT_INITIALIZED;
482   PetscCall(_DMSwarmDataExInitializeTmpStorage(de));
483   np = de->n_neighbour_procs;
484   de->unit_message_size = unit_message_size;
485   total = 0;
486   for (i = 0; i < np; ++i) {
487     if (de->messages_to_be_sent[i] == -1) {
488       PetscMPIInt proc_neighour = de->neighbour_procs[i];
489       SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DMSwarmDataExSetSendCount() first", (int)proc_neighour);
490     }
491     total = total + de->messages_to_be_sent[i];
492   }
493   /* create space for the data to be sent */
494   PetscCall(PetscMalloc(unit_message_size * (total + 1), &de->send_message));
495   /* initialize memory */
496   PetscCall(PetscMemzero(de->send_message, unit_message_size * (total + 1)));
497   /* set total items to send */
498   de->send_message_length = total;
499   de->message_offsets[0] = 0;
500   total = de->messages_to_be_sent[0];
501   for (i = 1; i < np; ++i) {
502     de->message_offsets[i] = total;
503     total = total + de->messages_to_be_sent[i];
504   }
505   /* init the packer counters */
506   de->total_pack_cnt = 0;
507   for (i = 0; i < np; ++i) {
508     de->pack_cnt[i] = 0;
509   }
510   PetscFunctionReturn(0);
511 }
512 
513 /*
514     Ensures data gets been packed appropriately and no overlaps occur
515 */
516 PetscErrorCode DMSwarmDataExPackData(DMSwarmDataEx de,PetscMPIInt proc_id,PetscInt n,void *data)
517 {
518   PetscMPIInt    local;
519   PetscInt       insert_location;
520   void           *dest;
521 
522   PetscFunctionBegin;
523   PetscCheck(de->packer_status != DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DMSwarmDataExInitializeSendCount(), DMSwarmDataExAddToSendCount(), DMSwarmDataExPackInitialize() first");
524   else PetscCheck(de->packer_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DMSwarmDataExInitializeSendCount(), DMSwarmDataExAddToSendCount(), DMSwarmDataExPackInitialize() first");
525 
526   PetscCheck(de->send_message, de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DMSwarmDataExPackInitialize() first");
527   PetscCall(_DMSwarmDataExConvertProcIdToLocalIndex( de, proc_id, &local));
528   PetscCheck(local != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", (int)proc_id);
529   PetscCheck(n+de->pack_cnt[local] <= de->messages_to_be_sent[local], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %" PetscInt_FMT ": Attempt to insert %" PetscInt_FMT,
530               (int)proc_id, de->messages_to_be_sent[local], n+de->pack_cnt[local]);
531 
532   /* copy memory */
533   insert_location = de->message_offsets[local] + de->pack_cnt[local];
534   dest = ((char*)de->send_message) + de->unit_message_size*insert_location;
535   PetscCall(PetscMemcpy(dest, data, de->unit_message_size * n));
536   /* increment counter */
537   de->pack_cnt[local] = de->pack_cnt[local] + n;
538   PetscFunctionReturn(0);
539 }
540 
541 /*
542 *) Ensures all data has been packed
543 */
544 PetscErrorCode DMSwarmDataExPackFinalize(DMSwarmDataEx de)
545 {
546   PetscMPIInt    i,np;
547   PetscInt       total;
548 
549   PetscFunctionBegin;
550   PetscCheck(de->packer_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DMSwarmDataExPackInitialize() first.");
551   np = de->n_neighbour_procs;
552   for (i = 0; i < np; ++i) {
553     PetscCheck(de->pack_cnt[i] == de->messages_to_be_sent[i], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %" PetscInt_FMT " : Inserted %" PetscInt_FMT,
554                 (int)de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i]);
555   }
556   /* init */
557   for (i = 0; i < np; ++i) {
558     de->messages_to_be_recvieved[i] = -1;
559   }
560   /* figure out the recv counts here */
561   for (i = 0; i < np; ++i) {
562     PetscCallMPI(MPI_Isend(&de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]));
563   }
564   for (i = 0; i < np; ++i) {
565     PetscCallMPI(MPI_Irecv(&de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i]));
566   }
567   PetscCallMPI(MPI_Waitall(2*np, de->_requests, de->_stats));
568   /* create space for the data to be recvieved */
569   total = 0;
570   for (i = 0; i < np; ++i) {
571     total = total + de->messages_to_be_recvieved[i];
572   }
573   PetscCall(PetscMalloc(de->unit_message_size * (total + 1), &de->recv_message));
574   /* initialize memory */
575   PetscCall(PetscMemzero(de->recv_message, de->unit_message_size * (total + 1)));
576   /* set total items to receive */
577   de->recv_message_length = total;
578   de->packer_status = DEOBJECT_FINALIZED;
579   de->communication_status = DEOBJECT_INITIALIZED;
580   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerPack,0,0,0,0));
581   PetscFunctionReturn(0);
582 }
583 
584 /* do the actual message passing */
585 PetscErrorCode DMSwarmDataExBegin(DMSwarmDataEx de)
586 {
587   PetscMPIInt    i,np;
588   void           *dest;
589   PetscInt       length;
590 
591   PetscFunctionBegin;
592   PetscCheck(de->topology_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Topology not finalized");
593   PetscCheck(de->message_lengths_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths not finalized");
594   PetscCheck(de->packer_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Packer not finalized");
595   PetscCheck(de->communication_status != DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DMSwarmDataExInitialize() first.");
596   PetscCheck(de->recv_message, de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DMSwarmDataExPackFinalize() first");
597   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerBegin,0,0,0,0));
598   np = de->n_neighbour_procs;
599   /* == NON BLOCKING == */
600   for (i = 0; i < np; ++i) {
601     length = de->messages_to_be_sent[i] * de->unit_message_size;
602     dest = ((char*)de->send_message) + de->unit_message_size * de->message_offsets[i];
603     PetscCallMPI(MPI_Isend( dest, length, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]));
604   }
605   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerBegin,0,0,0,0));
606   PetscFunctionReturn(0);
607 }
608 
609 /* do the actual message passing now */
610 PetscErrorCode DMSwarmDataExEnd(DMSwarmDataEx de)
611 {
612   PetscMPIInt    i,np;
613   PetscInt       total;
614   PetscInt       *message_recv_offsets;
615   void           *dest;
616   PetscInt       length;
617 
618   PetscFunctionBegin;
619   PetscCheck(de->communication_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DMSwarmDataExInitialize() first.");
620   PetscCheck(de->recv_message, de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DMSwarmDataExPackFinalize() first");
621   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerEnd,0,0,0,0));
622   np = de->n_neighbour_procs;
623   PetscCall(PetscMalloc1(np+1, &message_recv_offsets));
624   message_recv_offsets[0] = 0;
625   total = de->messages_to_be_recvieved[0];
626   for (i = 1; i < np; ++i) {
627     message_recv_offsets[i] = total;
628     total = total + de->messages_to_be_recvieved[i];
629   }
630   /* == NON BLOCKING == */
631   for (i = 0; i < np; ++i) {
632     length = de->messages_to_be_recvieved[i] * de->unit_message_size;
633     dest = ((char*)de->recv_message) + de->unit_message_size * message_recv_offsets[i];
634     PetscCallMPI(MPI_Irecv( dest, length, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i]));
635   }
636   PetscCallMPI(MPI_Waitall( 2*np, de->_requests, de->_stats));
637   PetscCall(PetscFree(message_recv_offsets));
638   de->communication_status = DEOBJECT_FINALIZED;
639   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerEnd,0,0,0,0));
640   PetscFunctionReturn(0);
641 }
642 
643 PetscErrorCode DMSwarmDataExGetSendData(DMSwarmDataEx de,PetscInt *length,void **send)
644 {
645   PetscFunctionBegin;
646   PetscCheck(de->packer_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed.");
647   *length = de->send_message_length;
648   *send   = de->send_message;
649   PetscFunctionReturn(0);
650 }
651 
652 PetscErrorCode DMSwarmDataExGetRecvData(DMSwarmDataEx de,PetscInt *length,void **recv)
653 {
654   PetscFunctionBegin;
655   PetscCheck(de->communication_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent.");
656   *length = de->recv_message_length;
657   *recv   = de->recv_message;
658   PetscFunctionReturn(0);
659 }
660 
661 PetscErrorCode DMSwarmDataExTopologyGetNeighbours(DMSwarmDataEx de,PetscMPIInt *n,PetscMPIInt *neigh[])
662 {
663   PetscFunctionBegin;
664   if (n)     {*n     = de->n_neighbour_procs;}
665   if (neigh) {*neigh = de->neighbour_procs;}
666   PetscFunctionReturn(0);
667 }
668