xref: /petsc/src/dm/impls/swarm/data_ex.c (revision 21e3ffae2f3b73c0bd738cf6d0a809700fc04bb0) !
1 /*
2 Build a few basic tools to help with partitioned domains.
3 
4 1)
5 On each processor, have a DomainExchangerTopology.
6 This is a doubly-connected edge list which enumerates the
7 communication paths between connected processors. By numbering
8 these paths we can always uniquely assign message identifiers.
9 
10         edge
11          10
12 proc  --------->  proc
13  0    <--------    1
14          11
15         twin
16 
17 Eg: Proc 0 send to proc 1 with message id is 10. To receive the correct
18 message, proc 1 looks for the edge connected to proc 0, and then the
19 message id comes from the twin of that edge
20 
21 2)
22 A DomainExchangerArrayPacker.
23 A little function which given a piece of data, will memcpy the data into
24 an array (which will be sent to procs) into the correct place.
25 
26 On Proc 1 we sent data to procs 0,2,3. The data is on different lengths.
27 All data gets jammed into single array. Need to "jam" data into correct locations
28 The Packer knows how much is to going to each processor and keeps track of the inserts
29 so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory
30 
31 data to 0    data to 2       data to 3
32 
33 |--------|-----------------|--|
34 
35 User has to unpack message themselves. I can get you the pointer for each i
36 entry, but you'll have to cast it to the appropriate data type.
37 
38 Phase A: Build topology
39 
40 Phase B: Define message lengths
41 
42 Phase C: Pack data
43 
44 Phase D: Send data
45 
46 + Constructor
47 DMSwarmDataExCreate()
48 + Phase A
49 DMSwarmDataExTopologyInitialize()
50 DMSwarmDataExTopologyAddNeighbour()
51 DMSwarmDataExTopologyAddNeighbour()
52 DMSwarmDataExTopologyFinalize()
53 + Phase B
54 DMSwarmDataExZeroAllSendCount()
55 DMSwarmDataExAddToSendCount()
56 DMSwarmDataExAddToSendCount()
57 DMSwarmDataExAddToSendCount()
58 + Phase C
59 DMSwarmDataExPackInitialize()
60 DMSwarmDataExPackData()
61 DMSwarmDataExPackData()
62 DMSwarmDataExPackFinalize()
63 +Phase D
64 DMSwarmDataExBegin()
65  ... perform any calculations ...
66 DMSwarmDataExEnd()
67 
68 ... user calls any getters here ...
69 
70 */
71 #include <petscvec.h>
72 #include <petscmat.h>
73 
74 #include "../src/dm/impls/swarm/data_ex.h"
75 
76 const char *status_names[] = {"initialized", "finalized", "unknown"};
77 
78 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerTopologySetup;
79 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerBegin;
80 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerEnd;
81 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerSendCount;
82 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerPack;
83 
84 PetscErrorCode DMSwarmDataExCreate(MPI_Comm comm, const PetscInt count, DMSwarmDataEx *ex)
85 {
86   DMSwarmDataEx d;
87 
88   PetscFunctionBegin;
89   PetscCall(PetscNew(&d));
90   PetscCallMPI(MPI_Comm_dup(comm, &d->comm));
91   PetscCallMPI(MPI_Comm_rank(d->comm, &d->rank));
92 
93   d->instance = count;
94 
95   d->topology_status        = DEOBJECT_STATE_UNKNOWN;
96   d->message_lengths_status = DEOBJECT_STATE_UNKNOWN;
97   d->packer_status          = DEOBJECT_STATE_UNKNOWN;
98   d->communication_status   = DEOBJECT_STATE_UNKNOWN;
99 
100   d->n_neighbour_procs = -1;
101   d->neighbour_procs   = NULL;
102 
103   d->messages_to_be_sent      = NULL;
104   d->message_offsets          = NULL;
105   d->messages_to_be_recvieved = NULL;
106 
107   d->unit_message_size   = (size_t)-1;
108   d->send_message        = NULL;
109   d->send_message_length = -1;
110   d->recv_message        = NULL;
111   d->recv_message_length = -1;
112   d->total_pack_cnt      = -1;
113   d->pack_cnt            = NULL;
114 
115   d->send_tags = NULL;
116   d->recv_tags = NULL;
117 
118   d->_stats    = NULL;
119   d->_requests = NULL;
120   *ex          = d;
121   PetscFunctionReturn(PETSC_SUCCESS);
122 }
123 
124 /*
125     This code is horrible, who let it get into main.
126 
127     Should be printing to a viewer, should not be using PETSC_COMM_WORLD
128 
129 */
130 PetscErrorCode DMSwarmDataExView(DMSwarmDataEx d)
131 {
132   PetscMPIInt p;
133 
134   PetscFunctionBegin;
135   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "DMSwarmDataEx: instance=%" PetscInt_FMT "\n", d->instance));
136   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  topology status:        %s \n", status_names[d->topology_status]));
137   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  message lengths status: %s \n", status_names[d->message_lengths_status]));
138   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  packer status status:   %s \n", status_names[d->packer_status]));
139   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  communication status:   %s \n", status_names[d->communication_status]));
140 
141   if (d->topology_status == DEOBJECT_FINALIZED) {
142     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  Topology:\n"));
143     PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "    [%d] neighbours: %d \n", d->rank, d->n_neighbour_procs));
144     for (p = 0; p < d->n_neighbour_procs; p++) PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "    [%d]   neighbour[%d] = %d \n", d->rank, p, d->neighbour_procs[p]));
145     PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, stdout));
146   }
147 
148   if (d->message_lengths_status == DEOBJECT_FINALIZED) {
149     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  Message lengths:\n"));
150     PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "    [%d] atomic size: %ld \n", d->rank, (long int)d->unit_message_size));
151     for (p = 0; p < d->n_neighbour_procs; p++) {
152       PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "    [%d] >>>>> ( %" PetscInt_FMT " units :: tag = %d) >>>>> [%d] \n", d->rank, d->messages_to_be_sent[p], d->send_tags[p], d->neighbour_procs[p]));
153     }
154     for (p = 0; p < d->n_neighbour_procs; p++) {
155       PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "    [%d] <<<<< ( %" PetscInt_FMT " units :: tag = %d) <<<<< [%d] \n", d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], d->neighbour_procs[p]));
156     }
157     PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, stdout));
158   }
159   if (d->packer_status == DEOBJECT_FINALIZED) { }
160   if (d->communication_status == DEOBJECT_FINALIZED) { }
161   PetscFunctionReturn(PETSC_SUCCESS);
162 }
163 
164 PetscErrorCode DMSwarmDataExDestroy(DMSwarmDataEx d)
165 {
166   PetscFunctionBegin;
167   PetscCallMPI(MPI_Comm_free(&d->comm));
168   if (d->neighbour_procs) PetscCall(PetscFree(d->neighbour_procs));
169   if (d->messages_to_be_sent) PetscCall(PetscFree(d->messages_to_be_sent));
170   if (d->message_offsets) PetscCall(PetscFree(d->message_offsets));
171   if (d->messages_to_be_recvieved) PetscCall(PetscFree(d->messages_to_be_recvieved));
172   if (d->send_message) PetscCall(PetscFree(d->send_message));
173   if (d->recv_message) PetscCall(PetscFree(d->recv_message));
174   if (d->pack_cnt) PetscCall(PetscFree(d->pack_cnt));
175   if (d->send_tags) PetscCall(PetscFree(d->send_tags));
176   if (d->recv_tags) PetscCall(PetscFree(d->recv_tags));
177   if (d->_stats) PetscCall(PetscFree(d->_stats));
178   if (d->_requests) PetscCall(PetscFree(d->_requests));
179   PetscCall(PetscFree(d));
180   PetscFunctionReturn(PETSC_SUCCESS);
181 }
182 
183 /* === Phase A === */
184 
185 PetscErrorCode DMSwarmDataExTopologyInitialize(DMSwarmDataEx d)
186 {
187   PetscFunctionBegin;
188   d->topology_status   = DEOBJECT_INITIALIZED;
189   d->n_neighbour_procs = 0;
190   PetscCall(PetscFree(d->neighbour_procs));
191   PetscCall(PetscFree(d->messages_to_be_sent));
192   PetscCall(PetscFree(d->message_offsets));
193   PetscCall(PetscFree(d->messages_to_be_recvieved));
194   PetscCall(PetscFree(d->pack_cnt));
195   PetscCall(PetscFree(d->send_tags));
196   PetscCall(PetscFree(d->recv_tags));
197   PetscFunctionReturn(PETSC_SUCCESS);
198 }
199 
200 PetscErrorCode DMSwarmDataExTopologyAddNeighbour(DMSwarmDataEx d, const PetscMPIInt proc_id)
201 {
202   PetscMPIInt n, found;
203   PetscMPIInt size;
204 
205   PetscFunctionBegin;
206   PetscCheck(d->topology_status != DEOBJECT_FINALIZED, d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DMSwarmDataExTopologyInitialize() first");
207   PetscCheck(d->topology_status == DEOBJECT_INITIALIZED, d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be initialised. Call DMSwarmDataExTopologyInitialize() first");
208 
209   /* error on negative entries */
210   PetscCheck(proc_id >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to set proc neighbour with a rank < 0");
211   /* error on ranks larger than number of procs in communicator */
212   PetscCallMPI(MPI_Comm_size(d->comm, &size));
213   PetscCheck(proc_id < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to set proc neighbour %d with a rank >= size %d", proc_id, size);
214   if (d->n_neighbour_procs == 0) PetscCall(PetscMalloc1(1, &d->neighbour_procs));
215   /* check for proc_id */
216   found = 0;
217   for (n = 0; n < d->n_neighbour_procs; n++) {
218     if (d->neighbour_procs[n] == proc_id) found = 1;
219   }
220   if (found == 0) { /* add it to list */
221     PetscCall(PetscRealloc(sizeof(PetscMPIInt) * (d->n_neighbour_procs + 1), &d->neighbour_procs));
222     d->neighbour_procs[d->n_neighbour_procs] = proc_id;
223     d->n_neighbour_procs++;
224   }
225   PetscFunctionReturn(PETSC_SUCCESS);
226 }
227 
228 /*
229 counter: the index of the communication object
230 N: the number of processors
231 r0: rank of sender
232 r1: rank of receiver
233 
234 procs = { 0, 1, 2, 3 }
235 
236 0 ==> 0         e=0
237 0 ==> 1         e=1
238 0 ==> 2         e=2
239 0 ==> 3         e=3
240 
241 1 ==> 0         e=4
242 1 ==> 1         e=5
243 1 ==> 2         e=6
244 1 ==> 3         e=7
245 
246 2 ==> 0         e=8
247 2 ==> 1         e=9
248 2 ==> 2         e=10
249 2 ==> 3         e=11
250 
251 3 ==> 0         e=12
252 3 ==> 1         e=13
253 3 ==> 2         e=14
254 3 ==> 3         e=15
255 
256 If we require that proc A sends to proc B, then the SEND tag index will be given by
257   N * rank(A) + rank(B) + offset
258 If we require that proc A will receive from proc B, then the RECV tag index will be given by
259   N * rank(B) + rank(A) + offset
260 
261 */
262 static void _get_tags(PetscInt counter, PetscMPIInt N, PetscMPIInt r0, PetscMPIInt r1, PetscMPIInt maxtag, PetscMPIInt *_st, PetscMPIInt *_rt)
263 {
264   PetscMPIInt st, rt;
265 
266   st   = (N * r0 + r1 + N * N * counter) % maxtag;
267   rt   = (N * r1 + r0 + N * N * counter) % maxtag;
268   *_st = st;
269   *_rt = rt;
270 }
271 
272 /*
273 Makes the communication map symmetric
274 */
275 PetscErrorCode _DMSwarmDataExCompleteCommunicationMap(MPI_Comm comm, PetscMPIInt n, PetscMPIInt proc_neighbours[], PetscMPIInt *n_new, PetscMPIInt **proc_neighbours_new)
276 {
277   Mat                A;
278   PetscInt           i, j, nc;
279   PetscInt           n_, *proc_neighbours_;
280   PetscInt           rank_;
281   PetscMPIInt        size, rank;
282   PetscScalar       *vals;
283   const PetscInt    *cols;
284   const PetscScalar *red_vals;
285   PetscMPIInt        _n_new, *_proc_neighbours_new;
286 
287   PetscFunctionBegin;
288   n_ = n;
289   PetscCall(PetscMalloc(sizeof(PetscInt) * n_, &proc_neighbours_));
290   for (i = 0; i < n_; ++i) proc_neighbours_[i] = proc_neighbours[i];
291   PetscCallMPI(MPI_Comm_size(comm, &size));
292   PetscCallMPI(MPI_Comm_rank(comm, &rank));
293   rank_ = rank;
294 
295   PetscCall(MatCreate(comm, &A));
296   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, size, size));
297   PetscCall(MatSetType(A, MATAIJ));
298   PetscCall(MatSeqAIJSetPreallocation(A, 1, NULL));
299   PetscCall(MatMPIAIJSetPreallocation(A, n_, NULL, n_, NULL));
300   PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
301   /* Build original map */
302   PetscCall(PetscMalloc1(n_, &vals));
303   for (i = 0; i < n_; ++i) vals[i] = 1.0;
304   PetscCall(MatSetValues(A, 1, &rank_, n_, proc_neighbours_, vals, INSERT_VALUES));
305   PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY));
306   PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY));
307   /* Now force all other connections if they are not already there */
308   /* It's more efficient to do them all at once */
309   for (i = 0; i < n_; ++i) vals[i] = 2.0;
310   PetscCall(MatSetValues(A, n_, proc_neighbours_, 1, &rank_, vals, INSERT_VALUES));
311   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
312   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
313   /*
314   PetscCall(PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO));
315   PetscCall(MatView(A,PETSC_VIEWER_STDOUT_WORLD));
316   PetscCall(PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD));
317 */
318   if ((n_new != NULL) && (proc_neighbours_new != NULL)) {
319     PetscCall(MatGetRow(A, rank_, &nc, &cols, &red_vals));
320     _n_new = (PetscMPIInt)nc;
321     PetscCall(PetscMalloc1(_n_new, &_proc_neighbours_new));
322     for (j = 0; j < nc; ++j) _proc_neighbours_new[j] = (PetscMPIInt)cols[j];
323     PetscCall(MatRestoreRow(A, rank_, &nc, &cols, &red_vals));
324     *n_new               = (PetscMPIInt)_n_new;
325     *proc_neighbours_new = (PetscMPIInt *)_proc_neighbours_new;
326   }
327   PetscCall(MatDestroy(&A));
328   PetscCall(PetscFree(vals));
329   PetscCall(PetscFree(proc_neighbours_));
330   PetscCallMPI(MPI_Barrier(comm));
331   PetscFunctionReturn(PETSC_SUCCESS);
332 }
333 
334 PetscErrorCode DMSwarmDataExTopologyFinalize(DMSwarmDataEx d)
335 {
336   PetscMPIInt symm_nn, *symm_procs, r0, n, st, rt, size, *maxtag, flg;
337 
338   PetscFunctionBegin;
339   PetscCheck(d->topology_status == DEOBJECT_INITIALIZED, d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be initialised. Call DMSwarmDataExTopologyInitialize() first");
340 
341   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerTopologySetup, 0, 0, 0, 0));
342   /* given information about all my neighbours, make map symmetric */
343   PetscCall(_DMSwarmDataExCompleteCommunicationMap(d->comm, d->n_neighbour_procs, d->neighbour_procs, &symm_nn, &symm_procs));
344   /* update my arrays */
345   PetscCall(PetscFree(d->neighbour_procs));
346   d->n_neighbour_procs = symm_nn;
347   d->neighbour_procs   = symm_procs;
348   /* allocates memory */
349   if (!d->messages_to_be_sent) PetscCall(PetscMalloc1(d->n_neighbour_procs + 1, &d->messages_to_be_sent));
350   if (!d->message_offsets) PetscCall(PetscMalloc1(d->n_neighbour_procs + 1, &d->message_offsets));
351   if (!d->messages_to_be_recvieved) PetscCall(PetscMalloc1(d->n_neighbour_procs + 1, &d->messages_to_be_recvieved));
352   if (!d->pack_cnt) PetscCall(PetscMalloc(sizeof(PetscInt) * d->n_neighbour_procs, &d->pack_cnt));
353   if (!d->_stats) PetscCall(PetscMalloc(sizeof(MPI_Status) * 2 * d->n_neighbour_procs, &d->_stats));
354   if (!d->_requests) PetscCall(PetscMalloc(sizeof(MPI_Request) * 2 * d->n_neighbour_procs, &d->_requests));
355   if (!d->send_tags) PetscCall(PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->send_tags));
356   if (!d->recv_tags) PetscCall(PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->recv_tags));
357   /* compute message tags */
358   PetscCallMPI(MPI_Comm_size(d->comm, &size));
359   PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxtag, &flg));
360   PetscCheck(flg, d->comm, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
361   r0 = d->rank;
362   for (n = 0; n < d->n_neighbour_procs; ++n) {
363     PetscMPIInt r1 = d->neighbour_procs[n];
364 
365     _get_tags(d->instance, size, r0, r1, *maxtag, &st, &rt);
366     d->send_tags[n] = (int)st;
367     d->recv_tags[n] = (int)rt;
368   }
369   d->topology_status = DEOBJECT_FINALIZED;
370   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerTopologySetup, 0, 0, 0, 0));
371   PetscFunctionReturn(PETSC_SUCCESS);
372 }
373 
374 /* === Phase B === */
375 PetscErrorCode _DMSwarmDataExConvertProcIdToLocalIndex(DMSwarmDataEx de, PetscMPIInt proc_id, PetscMPIInt *local)
376 {
377   PetscMPIInt i, np;
378 
379   PetscFunctionBegin;
380   np     = de->n_neighbour_procs;
381   *local = -1;
382   for (i = 0; i < np; ++i) {
383     if (proc_id == de->neighbour_procs[i]) {
384       *local = i;
385       break;
386     }
387   }
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 PetscErrorCode DMSwarmDataExInitializeSendCount(DMSwarmDataEx de)
392 {
393   PetscMPIInt i;
394 
395   PetscFunctionBegin;
396   PetscCheck(de->topology_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Topology not finalized");
397   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerSendCount, 0, 0, 0, 0));
398   de->message_lengths_status = DEOBJECT_INITIALIZED;
399   for (i = 0; i < de->n_neighbour_procs; ++i) de->messages_to_be_sent[i] = 0;
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 /*
404 1) only allows counters to be set on neighbouring cpus
405 */
406 PetscErrorCode DMSwarmDataExAddToSendCount(DMSwarmDataEx de, const PetscMPIInt proc_id, const PetscInt count)
407 {
408   PetscMPIInt local_val;
409 
410   PetscFunctionBegin;
411   PetscCheck(de->message_lengths_status != DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DMSwarmDataExInitializeSendCount() first");
412   PetscCheck(de->message_lengths_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DMSwarmDataExInitializeSendCount() first");
413 
414   PetscCall(_DMSwarmDataExConvertProcIdToLocalIndex(de, proc_id, &local_val));
415   PetscCheck(local_val != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Proc %d is not a valid neighbour rank", (int)proc_id);
416 
417   de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count;
418   PetscFunctionReturn(PETSC_SUCCESS);
419 }
420 
421 PetscErrorCode DMSwarmDataExFinalizeSendCount(DMSwarmDataEx de)
422 {
423   PetscFunctionBegin;
424   PetscCheck(de->message_lengths_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DMSwarmDataExInitializeSendCount() first");
425 
426   de->message_lengths_status = DEOBJECT_FINALIZED;
427   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerSendCount, 0, 0, 0, 0));
428   PetscFunctionReturn(PETSC_SUCCESS);
429 }
430 
431 /* === Phase C === */
432 /*
433   zero out all send counts
434   free send and recv buffers
435   zeros out message length
436   zeros out all counters
437   zero out packed data counters
438 */
439 PetscErrorCode _DMSwarmDataExInitializeTmpStorage(DMSwarmDataEx de)
440 {
441   PetscMPIInt i, np;
442 
443   PetscFunctionBegin;
444   np = de->n_neighbour_procs;
445   for (i = 0; i < np; ++i) {
446     /*  de->messages_to_be_sent[i] = -1; */
447     de->messages_to_be_recvieved[i] = -1;
448   }
449   PetscCall(PetscFree(de->send_message));
450   PetscCall(PetscFree(de->recv_message));
451   PetscFunctionReturn(PETSC_SUCCESS);
452 }
453 
454 /*
455    Zeros out pack data counters
456    Ensures mesaage length is set
457    Checks send counts properly initialized
458    allocates space for pack data
459 */
460 PetscErrorCode DMSwarmDataExPackInitialize(DMSwarmDataEx de, size_t unit_message_size)
461 {
462   PetscMPIInt i, np;
463   PetscInt    total;
464 
465   PetscFunctionBegin;
466   PetscCheck(de->topology_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Topology not finalized");
467   PetscCheck(de->message_lengths_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths not finalized");
468   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerPack, 0, 0, 0, 0));
469   de->packer_status = DEOBJECT_INITIALIZED;
470   PetscCall(_DMSwarmDataExInitializeTmpStorage(de));
471   np                    = de->n_neighbour_procs;
472   de->unit_message_size = unit_message_size;
473   total                 = 0;
474   for (i = 0; i < np; ++i) {
475     if (de->messages_to_be_sent[i] == -1) {
476       PetscMPIInt proc_neighour = de->neighbour_procs[i];
477       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DMSwarmDataExSetSendCount() first", (int)proc_neighour);
478     }
479     total = total + de->messages_to_be_sent[i];
480   }
481   /* create space for the data to be sent */
482   PetscCall(PetscMalloc(unit_message_size * (total + 1), &de->send_message));
483   /* initialize memory */
484   PetscCall(PetscMemzero(de->send_message, unit_message_size * (total + 1)));
485   /* set total items to send */
486   de->send_message_length = total;
487   de->message_offsets[0]  = 0;
488   total                   = de->messages_to_be_sent[0];
489   for (i = 1; i < np; ++i) {
490     de->message_offsets[i] = total;
491     total                  = total + de->messages_to_be_sent[i];
492   }
493   /* init the packer counters */
494   de->total_pack_cnt = 0;
495   for (i = 0; i < np; ++i) de->pack_cnt[i] = 0;
496   PetscFunctionReturn(PETSC_SUCCESS);
497 }
498 
499 /*
500     Ensures data gets been packed appropriately and no overlaps occur
501 */
502 PetscErrorCode DMSwarmDataExPackData(DMSwarmDataEx de, PetscMPIInt proc_id, PetscInt n, void *data)
503 {
504   PetscMPIInt local;
505   PetscInt    insert_location;
506   void       *dest;
507 
508   PetscFunctionBegin;
509   PetscCheck(de->packer_status != DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DMSwarmDataExInitializeSendCount(), DMSwarmDataExAddToSendCount(), DMSwarmDataExPackInitialize() first");
510   PetscCheck(de->packer_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DMSwarmDataExInitializeSendCount(), DMSwarmDataExAddToSendCount(), DMSwarmDataExPackInitialize() first");
511 
512   PetscCheck(de->send_message, de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DMSwarmDataExPackInitialize() first");
513   PetscCall(_DMSwarmDataExConvertProcIdToLocalIndex(de, proc_id, &local));
514   PetscCheck(local != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", (int)proc_id);
515   PetscCheck(n + de->pack_cnt[local] <= de->messages_to_be_sent[local], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %" PetscInt_FMT ": Attempt to insert %" PetscInt_FMT, (int)proc_id, de->messages_to_be_sent[local], n + de->pack_cnt[local]);
516 
517   /* copy memory */
518   insert_location = de->message_offsets[local] + de->pack_cnt[local];
519   dest            = ((char *)de->send_message) + de->unit_message_size * insert_location;
520   PetscCall(PetscMemcpy(dest, data, de->unit_message_size * n));
521   /* increment counter */
522   de->pack_cnt[local] = de->pack_cnt[local] + n;
523   PetscFunctionReturn(PETSC_SUCCESS);
524 }
525 
526 /*
527 *) Ensures all data has been packed
528 */
529 PetscErrorCode DMSwarmDataExPackFinalize(DMSwarmDataEx de)
530 {
531   PetscMPIInt i, np;
532   PetscInt    total;
533 
534   PetscFunctionBegin;
535   PetscCheck(de->packer_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DMSwarmDataExPackInitialize() first.");
536   np = de->n_neighbour_procs;
537   for (i = 0; i < np; ++i) {
538     PetscCheck(de->pack_cnt[i] == de->messages_to_be_sent[i], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %" PetscInt_FMT " : Inserted %" PetscInt_FMT, (int)de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i]);
539   }
540   /* init */
541   for (i = 0; i < np; ++i) de->messages_to_be_recvieved[i] = -1;
542   /* figure out the recv counts here */
543   for (i = 0; i < np; ++i) PetscCallMPI(MPI_Isend(&de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]));
544   for (i = 0; i < np; ++i) PetscCallMPI(MPI_Irecv(&de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np + i]));
545   PetscCallMPI(MPI_Waitall(2 * np, de->_requests, de->_stats));
546   /* create space for the data to be recvieved */
547   total = 0;
548   for (i = 0; i < np; ++i) total = total + de->messages_to_be_recvieved[i];
549   PetscCall(PetscMalloc(de->unit_message_size * (total + 1), &de->recv_message));
550   /* initialize memory */
551   PetscCall(PetscMemzero(de->recv_message, de->unit_message_size * (total + 1)));
552   /* set total items to receive */
553   de->recv_message_length  = total;
554   de->packer_status        = DEOBJECT_FINALIZED;
555   de->communication_status = DEOBJECT_INITIALIZED;
556   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerPack, 0, 0, 0, 0));
557   PetscFunctionReturn(PETSC_SUCCESS);
558 }
559 
560 /* do the actual message passing */
561 PetscErrorCode DMSwarmDataExBegin(DMSwarmDataEx de)
562 {
563   PetscMPIInt i, np;
564   void       *dest;
565   PetscInt    length;
566 
567   PetscFunctionBegin;
568   PetscCheck(de->topology_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Topology not finalized");
569   PetscCheck(de->message_lengths_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths not finalized");
570   PetscCheck(de->packer_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Packer not finalized");
571   PetscCheck(de->communication_status != DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DMSwarmDataExInitialize() first.");
572   PetscCheck(de->recv_message, de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DMSwarmDataExPackFinalize() first");
573   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerBegin, 0, 0, 0, 0));
574   np = de->n_neighbour_procs;
575   /* == NON BLOCKING == */
576   for (i = 0; i < np; ++i) {
577     length = de->messages_to_be_sent[i] * de->unit_message_size;
578     dest   = ((char *)de->send_message) + de->unit_message_size * de->message_offsets[i];
579     PetscCallMPI(MPI_Isend(dest, length, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]));
580   }
581   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerBegin, 0, 0, 0, 0));
582   PetscFunctionReturn(PETSC_SUCCESS);
583 }
584 
585 /* do the actual message passing now */
586 PetscErrorCode DMSwarmDataExEnd(DMSwarmDataEx de)
587 {
588   PetscMPIInt i, np;
589   PetscInt    total;
590   PetscInt   *message_recv_offsets;
591   void       *dest;
592   PetscInt    length;
593 
594   PetscFunctionBegin;
595   PetscCheck(de->communication_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DMSwarmDataExInitialize() first.");
596   PetscCheck(de->recv_message, de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DMSwarmDataExPackFinalize() first");
597   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerEnd, 0, 0, 0, 0));
598   np = de->n_neighbour_procs;
599   PetscCall(PetscMalloc1(np + 1, &message_recv_offsets));
600   message_recv_offsets[0] = 0;
601   total                   = de->messages_to_be_recvieved[0];
602   for (i = 1; i < np; ++i) {
603     message_recv_offsets[i] = total;
604     total                   = total + de->messages_to_be_recvieved[i];
605   }
606   /* == NON BLOCKING == */
607   for (i = 0; i < np; ++i) {
608     length = de->messages_to_be_recvieved[i] * de->unit_message_size;
609     dest   = ((char *)de->recv_message) + de->unit_message_size * message_recv_offsets[i];
610     PetscCallMPI(MPI_Irecv(dest, length, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np + i]));
611   }
612   PetscCallMPI(MPI_Waitall(2 * np, de->_requests, de->_stats));
613   PetscCall(PetscFree(message_recv_offsets));
614   de->communication_status = DEOBJECT_FINALIZED;
615   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerEnd, 0, 0, 0, 0));
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 PetscErrorCode DMSwarmDataExGetSendData(DMSwarmDataEx de, PetscInt *length, void **send)
620 {
621   PetscFunctionBegin;
622   PetscCheck(de->packer_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed.");
623   *length = de->send_message_length;
624   *send   = de->send_message;
625   PetscFunctionReturn(PETSC_SUCCESS);
626 }
627 
628 PetscErrorCode DMSwarmDataExGetRecvData(DMSwarmDataEx de, PetscInt *length, void **recv)
629 {
630   PetscFunctionBegin;
631   PetscCheck(de->communication_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent.");
632   *length = de->recv_message_length;
633   *recv   = de->recv_message;
634   PetscFunctionReturn(PETSC_SUCCESS);
635 }
636 
637 PetscErrorCode DMSwarmDataExTopologyGetNeighbours(DMSwarmDataEx de, PetscMPIInt *n, PetscMPIInt *neigh[])
638 {
639   PetscFunctionBegin;
640   if (n) *n = de->n_neighbour_procs;
641   if (neigh) *neigh = de->neighbour_procs;
642   PetscFunctionReturn(PETSC_SUCCESS);
643 }
644