xref: /petsc/src/dm/impls/swarm/data_ex.c (revision 7a5338279d92d13360d231b9bd26d284f35eaa49)
1 /*
2 Build a few basic tools to help with partitioned domains.
3 
4 1)
5 On each processor, have a DomainExchangerTopology.
6 This is a doubly-connected edge list which enumerates the
7 communication paths between connected processors. By numbering
8 these paths we can always uniquely assign message identifiers.
9 
10         edge
11          10
12 proc  --------->  proc
13  0    <--------    1
14          11
15         twin
16 
17 Eg: Proc 0 send to proc 1 with message id is 10. To receive the correct
18 message, proc 1 looks for the edge connected to proc 0, and then the
19 message id comes from the twin of that edge
20 
21 2)
22 A DomainExchangerArrayPacker.
23 A little function which given a piece of data, will memcpy the data into
24 an array (which will be sent to procs) into the correct place.
25 
26 On Proc 1 we sent data to procs 0,2,3. The data is on different lengths.
27 All data gets jammed into single array. Need to "jam" data into correct locations
28 The Packer knows how much is to going to each processor and keeps track of the inserts
29 so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory
30 
31 data to 0    data to 2       data to 3
32 
33 |--------|-----------------|--|
34 
35 User has to unpack message themselves. I can get you the pointer for each i
36 entry, but you'll have to cast it to the appropriate data type.
37 
38 Phase A: Build topology
39 
40 Phase B: Define message lengths
41 
42 Phase C: Pack data
43 
44 Phase D: Send data
45 
46 + Constructor
47 DMSwarmDataExCreate()
48 + Phase A
49 DMSwarmDataExTopologyInitialize()
50 DMSwarmDataExTopologyAddNeighbour()
51 DMSwarmDataExTopologyAddNeighbour()
52 DMSwarmDataExTopologyFinalize()
53 + Phase B
54 DMSwarmDataExZeroAllSendCount()
55 DMSwarmDataExAddToSendCount()
56 DMSwarmDataExAddToSendCount()
57 DMSwarmDataExAddToSendCount()
58 + Phase C
59 DMSwarmDataExPackInitialize()
60 DMSwarmDataExPackData()
61 DMSwarmDataExPackData()
62 DMSwarmDataExPackFinalize()
63 +Phase D
64 DMSwarmDataExBegin()
65  ... perform any calculations ...
66 DMSwarmDataExEnd()
67 
68 ... user calls any getters here ...
69 
70 */
71 #include <petscvec.h>
72 #include <petscmat.h>
73 #include <petsc/private/petscimpl.h>
74 
75 #include "../src/dm/impls/swarm/data_ex.h"
76 
77 const char *status_names[] = {"initialized", "finalized", "unknown"};
78 
79 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerTopologySetup;
80 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerBegin;
81 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerEnd;
82 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerSendCount;
83 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerPack;
84 
85 PetscErrorCode DMSwarmDataExCreate(MPI_Comm comm, const PetscInt count, DMSwarmDataEx *ex)
86 {
87   DMSwarmDataEx d;
88 
89   PetscFunctionBegin;
90   PetscCall(PetscNew(&d));
91   PetscCallMPI(MPI_Comm_dup(comm, &d->comm));
92   PetscCallMPI(MPI_Comm_rank(d->comm, &d->rank));
93 
94   d->instance = count;
95 
96   d->topology_status        = DEOBJECT_STATE_UNKNOWN;
97   d->message_lengths_status = DEOBJECT_STATE_UNKNOWN;
98   d->packer_status          = DEOBJECT_STATE_UNKNOWN;
99   d->communication_status   = DEOBJECT_STATE_UNKNOWN;
100 
101   d->n_neighbour_procs = -1;
102   d->neighbour_procs   = NULL;
103 
104   d->messages_to_be_sent      = NULL;
105   d->message_offsets          = NULL;
106   d->messages_to_be_recvieved = NULL;
107 
108   d->unit_message_size   = (size_t)-1;
109   d->send_message        = NULL;
110   d->send_message_length = -1;
111   d->recv_message        = NULL;
112   d->recv_message_length = -1;
113   d->total_pack_cnt      = -1;
114   d->pack_cnt            = NULL;
115 
116   d->send_tags = NULL;
117   d->recv_tags = NULL;
118 
119   d->_stats    = NULL;
120   d->_requests = NULL;
121   *ex          = d;
122   PetscFunctionReturn(PETSC_SUCCESS);
123 }
124 
125 /*
126     This code is horrible, who let it get into main.
127 
128     Should be printing to a viewer, should not be using PETSC_COMM_WORLD
129 
130 */
131 PetscErrorCode DMSwarmDataExView(DMSwarmDataEx d)
132 {
133   PetscMPIInt p;
134 
135   PetscFunctionBegin;
136   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "DMSwarmDataEx: instance=%" PetscInt_FMT "\n", d->instance));
137   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  topology status:        %s \n", status_names[d->topology_status]));
138   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  message lengths status: %s \n", status_names[d->message_lengths_status]));
139   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  packer status status:   %s \n", status_names[d->packer_status]));
140   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  communication status:   %s \n", status_names[d->communication_status]));
141 
142   if (d->topology_status == DEOBJECT_FINALIZED) {
143     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  Topology:\n"));
144     PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "    [%d] neighbours: %d \n", d->rank, d->n_neighbour_procs));
145     for (p = 0; p < d->n_neighbour_procs; p++) PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "    [%d]   neighbour[%d] = %d \n", d->rank, p, d->neighbour_procs[p]));
146     PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, stdout));
147   }
148 
149   if (d->message_lengths_status == DEOBJECT_FINALIZED) {
150     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  Message lengths:\n"));
151     PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "    [%d] atomic size: %ld \n", d->rank, (long int)d->unit_message_size));
152     for (p = 0; p < d->n_neighbour_procs; p++) {
153       PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "    [%d] >>>>> ( %" PetscInt_FMT " units :: tag = %d) >>>>> [%d] \n", d->rank, d->messages_to_be_sent[p], d->send_tags[p], d->neighbour_procs[p]));
154     }
155     for (p = 0; p < d->n_neighbour_procs; p++) {
156       PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "    [%d] <<<<< ( %" PetscInt_FMT " units :: tag = %d) <<<<< [%d] \n", d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], d->neighbour_procs[p]));
157     }
158     PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, stdout));
159   }
160   if (d->packer_status == DEOBJECT_FINALIZED) { }
161   if (d->communication_status == DEOBJECT_FINALIZED) { }
162   PetscFunctionReturn(PETSC_SUCCESS);
163 }
164 
165 PetscErrorCode DMSwarmDataExDestroy(DMSwarmDataEx d)
166 {
167   PetscFunctionBegin;
168   PetscCallMPI(MPI_Comm_free(&d->comm));
169   if (d->neighbour_procs) PetscCall(PetscFree(d->neighbour_procs));
170   if (d->messages_to_be_sent) PetscCall(PetscFree(d->messages_to_be_sent));
171   if (d->message_offsets) PetscCall(PetscFree(d->message_offsets));
172   if (d->messages_to_be_recvieved) PetscCall(PetscFree(d->messages_to_be_recvieved));
173   if (d->send_message) PetscCall(PetscFree(d->send_message));
174   if (d->recv_message) PetscCall(PetscFree(d->recv_message));
175   if (d->pack_cnt) PetscCall(PetscFree(d->pack_cnt));
176   if (d->send_tags) PetscCall(PetscFree(d->send_tags));
177   if (d->recv_tags) PetscCall(PetscFree(d->recv_tags));
178   if (d->_stats) PetscCall(PetscFree(d->_stats));
179   if (d->_requests) PetscCall(PetscFree(d->_requests));
180   PetscCall(PetscFree(d));
181   PetscFunctionReturn(PETSC_SUCCESS);
182 }
183 
184 /* === Phase A === */
185 
186 PetscErrorCode DMSwarmDataExTopologyInitialize(DMSwarmDataEx d)
187 {
188   PetscFunctionBegin;
189   d->topology_status   = DEOBJECT_INITIALIZED;
190   d->n_neighbour_procs = 0;
191   PetscCall(PetscFree(d->neighbour_procs));
192   PetscCall(PetscFree(d->messages_to_be_sent));
193   PetscCall(PetscFree(d->message_offsets));
194   PetscCall(PetscFree(d->messages_to_be_recvieved));
195   PetscCall(PetscFree(d->pack_cnt));
196   PetscCall(PetscFree(d->send_tags));
197   PetscCall(PetscFree(d->recv_tags));
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 PetscErrorCode DMSwarmDataExTopologyAddNeighbour(DMSwarmDataEx d, const PetscMPIInt proc_id)
202 {
203   PetscMPIInt n, found;
204   PetscMPIInt size;
205 
206   PetscFunctionBegin;
207   PetscCheck(d->topology_status != DEOBJECT_FINALIZED, d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DMSwarmDataExTopologyInitialize() first");
208   PetscCheck(d->topology_status == DEOBJECT_INITIALIZED, d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be initialised. Call DMSwarmDataExTopologyInitialize() first");
209 
210   /* error on negative entries */
211   PetscCheck(proc_id >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to set proc neighbour with a rank < 0");
212   /* error on ranks larger than number of procs in communicator */
213   PetscCallMPI(MPI_Comm_size(d->comm, &size));
214   PetscCheck(proc_id < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to set proc neighbour %d with a rank >= size %d", proc_id, size);
215   if (d->n_neighbour_procs == 0) PetscCall(PetscMalloc1(1, &d->neighbour_procs));
216   /* check for proc_id */
217   found = 0;
218   for (n = 0; n < d->n_neighbour_procs; n++) {
219     if (d->neighbour_procs[n] == proc_id) found = 1;
220   }
221   if (found == 0) { /* add it to list */
222     PetscCall(PetscRealloc(sizeof(PetscMPIInt) * (d->n_neighbour_procs + 1), &d->neighbour_procs));
223     d->neighbour_procs[d->n_neighbour_procs] = proc_id;
224     d->n_neighbour_procs++;
225   }
226   PetscFunctionReturn(PETSC_SUCCESS);
227 }
228 
229 /*
230 counter: the index of the communication object
231 N: the number of processors
232 r0: rank of sender
233 r1: rank of receiver
234 
235 procs = { 0, 1, 2, 3 }
236 
237 0 ==> 0         e=0
238 0 ==> 1         e=1
239 0 ==> 2         e=2
240 0 ==> 3         e=3
241 
242 1 ==> 0         e=4
243 1 ==> 1         e=5
244 1 ==> 2         e=6
245 1 ==> 3         e=7
246 
247 2 ==> 0         e=8
248 2 ==> 1         e=9
249 2 ==> 2         e=10
250 2 ==> 3         e=11
251 
252 3 ==> 0         e=12
253 3 ==> 1         e=13
254 3 ==> 2         e=14
255 3 ==> 3         e=15
256 
257 If we require that proc A sends to proc B, then the SEND tag index will be given by
258   N * rank(A) + rank(B) + offset
259 If we require that proc A will receive from proc B, then the RECV tag index will be given by
260   N * rank(B) + rank(A) + offset
261 
262 */
263 static void _get_tags(PetscInt counter, PetscMPIInt N, PetscMPIInt r0, PetscMPIInt r1, PetscMPIInt maxtag, PetscMPIInt *_st, PetscMPIInt *_rt)
264 {
265   PetscMPIInt st, rt;
266 
267   st   = (N * r0 + r1 + N * N * counter) % maxtag;
268   rt   = (N * r1 + r0 + N * N * counter) % maxtag;
269   *_st = st;
270   *_rt = rt;
271 }
272 
273 /*
274 Makes the communication map symmetric
275 */
276 static PetscErrorCode DMSwarmDataExCompleteCommunicationMap_Private(MPI_Comm comm, PetscMPIInt n, const PetscMPIInt proc_neighbours[], PetscMPIInt *n_new, PetscMPIInt **proc_neighbours_new)
277 {
278   Mat                A;
279   PetscInt           i, j, nc;
280   PetscInt           n_, *proc_neighbours_;
281   PetscInt           rank_;
282   PetscMPIInt        size, rank;
283   PetscScalar       *vals;
284   const PetscInt    *cols;
285   const PetscScalar *red_vals;
286   PetscMPIInt        _n_new, *_proc_neighbours_new;
287 
288   PetscFunctionBegin;
289   n_ = n;
290   PetscCall(PetscMalloc1(n_, &proc_neighbours_));
291   for (i = 0; i < n_; ++i) proc_neighbours_[i] = proc_neighbours[i];
292   PetscCallMPI(MPI_Comm_size(comm, &size));
293   PetscCallMPI(MPI_Comm_rank(comm, &rank));
294   rank_ = rank;
295 
296   PetscCall(MatCreate(comm, &A));
297   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, size, size));
298   PetscCall(MatSetType(A, MATAIJ));
299   PetscCall(MatSeqAIJSetPreallocation(A, 1, NULL));
300   PetscCall(MatMPIAIJSetPreallocation(A, n_, NULL, n_, NULL));
301   PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
302   /* Build original map */
303   PetscCall(PetscMalloc1(n_, &vals));
304   for (i = 0; i < n_; ++i) vals[i] = 1.0;
305   PetscCall(MatSetValues(A, 1, &rank_, n_, proc_neighbours_, vals, INSERT_VALUES));
306   PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY));
307   PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY));
308   /* Now force all other connections if they are not already there */
309   /* It's more efficient to do them all at once */
310   for (i = 0; i < n_; ++i) vals[i] = 2.0;
311   PetscCall(MatSetValues(A, n_, proc_neighbours_, 1, &rank_, vals, INSERT_VALUES));
312   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
313   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
314   /*
315   PetscCall(PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO));
316   PetscCall(MatView(A,PETSC_VIEWER_STDOUT_WORLD));
317   PetscCall(PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD));
318 */
319   if ((n_new != NULL) && (proc_neighbours_new != NULL)) {
320     PetscCall(MatGetRow(A, rank_, &nc, &cols, &red_vals));
321     PetscCall(PetscMPIIntCast(nc, &_n_new));
322     PetscCall(PetscMalloc1(_n_new, &_proc_neighbours_new));
323     for (j = 0; j < nc; ++j) PetscCall(PetscMPIIntCast(cols[j], &_proc_neighbours_new[j]));
324     PetscCall(MatRestoreRow(A, rank_, &nc, &cols, &red_vals));
325     *n_new               = _n_new;
326     *proc_neighbours_new = _proc_neighbours_new;
327   }
328   PetscCall(MatDestroy(&A));
329   PetscCall(PetscFree(vals));
330   PetscCall(PetscFree(proc_neighbours_));
331   PetscCallMPI(MPI_Barrier(comm));
332   PetscFunctionReturn(PETSC_SUCCESS);
333 }
334 
335 PetscErrorCode DMSwarmDataExTopologyFinalize(DMSwarmDataEx d)
336 {
337   PetscMPIInt symm_nn = 0, *symm_procs = NULL, r0, n, st, rt, size, *maxtag, flg;
338 
339   PetscFunctionBegin;
340   PetscCheck(d->topology_status == DEOBJECT_INITIALIZED, d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be initialised. Call DMSwarmDataExTopologyInitialize() first");
341 
342   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerTopologySetup, 0, 0, 0, 0));
343   /* given information about all my neighbours, make map symmetric */
344   PetscCall(DMSwarmDataExCompleteCommunicationMap_Private(d->comm, d->n_neighbour_procs, d->neighbour_procs, &symm_nn, &symm_procs));
345   /* update my arrays */
346   PetscCall(PetscFree(d->neighbour_procs));
347   d->n_neighbour_procs = symm_nn;
348   d->neighbour_procs   = symm_procs;
349   /* allocates memory */
350   if (!d->messages_to_be_sent) PetscCall(PetscMalloc1(d->n_neighbour_procs + 1, &d->messages_to_be_sent));
351   if (!d->message_offsets) PetscCall(PetscMalloc1(d->n_neighbour_procs + 1, &d->message_offsets));
352   if (!d->messages_to_be_recvieved) PetscCall(PetscMalloc1(d->n_neighbour_procs + 1, &d->messages_to_be_recvieved));
353   if (!d->pack_cnt) PetscCall(PetscMalloc(sizeof(PetscInt) * d->n_neighbour_procs, &d->pack_cnt));
354   if (!d->_stats) PetscCall(PetscMalloc(sizeof(MPI_Status) * 2 * d->n_neighbour_procs, &d->_stats));
355   if (!d->_requests) PetscCall(PetscMalloc(sizeof(MPI_Request) * 2 * d->n_neighbour_procs, &d->_requests));
356   if (!d->send_tags) PetscCall(PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->send_tags));
357   if (!d->recv_tags) PetscCall(PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->recv_tags));
358   /* compute message tags */
359   PetscCallMPI(MPI_Comm_size(d->comm, &size));
360   PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxtag, &flg));
361   PetscCheck(flg, d->comm, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
362   r0 = d->rank;
363   for (n = 0; n < d->n_neighbour_procs; ++n) {
364     PetscMPIInt r1 = d->neighbour_procs[n];
365 
366     _get_tags(d->instance, size, r0, r1, *maxtag, &st, &rt);
367     d->send_tags[n] = st;
368     d->recv_tags[n] = rt;
369   }
370   d->topology_status = DEOBJECT_FINALIZED;
371   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerTopologySetup, 0, 0, 0, 0));
372   PetscFunctionReturn(PETSC_SUCCESS);
373 }
374 
375 /* === Phase B === */
376 static PetscErrorCode _DMSwarmDataExConvertProcIdToLocalIndex(DMSwarmDataEx de, PetscMPIInt proc_id, PetscMPIInt *local)
377 {
378   PetscMPIInt i, np;
379 
380   PetscFunctionBegin;
381   np     = de->n_neighbour_procs;
382   *local = -1;
383   for (i = 0; i < np; ++i) {
384     if (proc_id == de->neighbour_procs[i]) {
385       *local = i;
386       break;
387     }
388   }
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 PetscErrorCode DMSwarmDataExInitializeSendCount(DMSwarmDataEx de)
393 {
394   PetscMPIInt i;
395 
396   PetscFunctionBegin;
397   PetscCheck(de->topology_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Topology not finalized");
398   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerSendCount, 0, 0, 0, 0));
399   de->message_lengths_status = DEOBJECT_INITIALIZED;
400   for (i = 0; i < de->n_neighbour_procs; ++i) de->messages_to_be_sent[i] = 0;
401   PetscFunctionReturn(PETSC_SUCCESS);
402 }
403 
404 /*
405 1) only allows counters to be set on neighbouring cpus
406 */
407 PetscErrorCode DMSwarmDataExAddToSendCount(DMSwarmDataEx de, const PetscMPIInt proc_id, const PetscInt count)
408 {
409   PetscMPIInt local_val;
410 
411   PetscFunctionBegin;
412   PetscCheck(de->message_lengths_status != DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DMSwarmDataExInitializeSendCount() first");
413   PetscCheck(de->message_lengths_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DMSwarmDataExInitializeSendCount() first");
414 
415   PetscCall(_DMSwarmDataExConvertProcIdToLocalIndex(de, proc_id, &local_val));
416   PetscCheck(local_val != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Proc %d is not a valid neighbour rank", (int)proc_id);
417 
418   de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count;
419   PetscFunctionReturn(PETSC_SUCCESS);
420 }
421 
422 PetscErrorCode DMSwarmDataExFinalizeSendCount(DMSwarmDataEx de)
423 {
424   PetscFunctionBegin;
425   PetscCheck(de->message_lengths_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DMSwarmDataExInitializeSendCount() first");
426 
427   de->message_lengths_status = DEOBJECT_FINALIZED;
428   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerSendCount, 0, 0, 0, 0));
429   PetscFunctionReturn(PETSC_SUCCESS);
430 }
431 
432 /* === Phase C === */
433 /*
434   zero out all send counts
435   free send and recv buffers
436   zeros out message length
437   zeros out all counters
438   zero out packed data counters
439 */
440 static PetscErrorCode _DMSwarmDataExInitializeTmpStorage(DMSwarmDataEx de)
441 {
442   PetscMPIInt i, np;
443 
444   PetscFunctionBegin;
445   np = de->n_neighbour_procs;
446   for (i = 0; i < np; ++i) {
447     /*  de->messages_to_be_sent[i] = -1; */
448     de->messages_to_be_recvieved[i] = -1;
449   }
450   PetscCall(PetscFree(de->send_message));
451   PetscCall(PetscFree(de->recv_message));
452   PetscFunctionReturn(PETSC_SUCCESS);
453 }
454 
455 /*
456    Zeros out pack data counters
457    Ensures mesaage length is set
458    Checks send counts properly initialized
459    allocates space for pack data
460 */
461 PetscErrorCode DMSwarmDataExPackInitialize(DMSwarmDataEx de, size_t unit_message_size)
462 {
463   PetscMPIInt i, np;
464   PetscInt    total;
465 
466   PetscFunctionBegin;
467   PetscCheck(de->topology_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Topology not finalized");
468   PetscCheck(de->message_lengths_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths not finalized");
469   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerPack, 0, 0, 0, 0));
470   de->packer_status = DEOBJECT_INITIALIZED;
471   PetscCall(_DMSwarmDataExInitializeTmpStorage(de));
472   np                    = de->n_neighbour_procs;
473   de->unit_message_size = unit_message_size;
474   total                 = 0;
475   for (i = 0; i < np; ++i) {
476     if (de->messages_to_be_sent[i] == -1) {
477       PetscMPIInt proc_neighour = de->neighbour_procs[i];
478       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DMSwarmDataExSetSendCount() first", proc_neighour);
479     }
480     total = total + de->messages_to_be_sent[i];
481   }
482   /* create space for the data to be sent */
483   PetscCall(PetscMalloc(unit_message_size * (total + 1), &de->send_message));
484   /* initialize memory */
485   PetscCall(PetscMemzero(de->send_message, unit_message_size * (total + 1)));
486   /* set total items to send */
487   de->send_message_length = total;
488   de->message_offsets[0]  = 0;
489   total                   = de->messages_to_be_sent[0];
490   for (i = 1; i < np; ++i) {
491     de->message_offsets[i] = total;
492     total                  = total + de->messages_to_be_sent[i];
493   }
494   /* init the packer counters */
495   de->total_pack_cnt = 0;
496   for (i = 0; i < np; ++i) de->pack_cnt[i] = 0;
497   PetscFunctionReturn(PETSC_SUCCESS);
498 }
499 
500 /*
501     Ensures data gets been packed appropriately and no overlaps occur
502 */
503 PetscErrorCode DMSwarmDataExPackData(DMSwarmDataEx de, PetscMPIInt proc_id, PetscInt n, void *data)
504 {
505   PetscMPIInt local;
506   PetscInt    insert_location;
507   void       *dest;
508 
509   PetscFunctionBegin;
510   PetscCheck(de->packer_status != DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DMSwarmDataExInitializeSendCount(), DMSwarmDataExAddToSendCount(), DMSwarmDataExPackInitialize() first");
511   PetscCheck(de->packer_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DMSwarmDataExInitializeSendCount(), DMSwarmDataExAddToSendCount(), DMSwarmDataExPackInitialize() first");
512 
513   PetscCheck(de->send_message, de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DMSwarmDataExPackInitialize() first");
514   PetscCall(_DMSwarmDataExConvertProcIdToLocalIndex(de, proc_id, &local));
515   PetscCheck(local != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", proc_id);
516   PetscCheck(n + de->pack_cnt[local] <= de->messages_to_be_sent[local], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %" PetscInt_FMT ": Attempt to insert %" PetscInt_FMT, proc_id, de->messages_to_be_sent[local], n + de->pack_cnt[local]);
517 
518   /* copy memory */
519   insert_location = de->message_offsets[local] + de->pack_cnt[local];
520   dest            = ((char *)de->send_message) + de->unit_message_size * insert_location;
521   PetscCall(PetscMemcpy(dest, data, de->unit_message_size * n));
522   /* increment counter */
523   de->pack_cnt[local] = de->pack_cnt[local] + n;
524   PetscFunctionReturn(PETSC_SUCCESS);
525 }
526 
527 /*
528 *) Ensures all data has been packed
529 */
530 PetscErrorCode DMSwarmDataExPackFinalize(DMSwarmDataEx de)
531 {
532   PetscMPIInt i, np;
533   PetscInt    total;
534 
535   PetscFunctionBegin;
536   PetscCheck(de->packer_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DMSwarmDataExPackInitialize() first.");
537   np = de->n_neighbour_procs;
538   for (i = 0; i < np; ++i) {
539     PetscCheck(de->pack_cnt[i] == de->messages_to_be_sent[i], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %" PetscInt_FMT " : Inserted %" PetscInt_FMT, de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i]);
540   }
541   /* init */
542   for (i = 0; i < np; ++i) de->messages_to_be_recvieved[i] = -1;
543   /* figure out the recv counts here */
544   for (i = 0; i < np; ++i) PetscCallMPI(MPIU_Isend(&de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]));
545   for (i = 0; i < np; ++i) PetscCallMPI(MPIU_Irecv(&de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np + i]));
546   PetscCallMPI(MPI_Waitall(2 * np, de->_requests, de->_stats));
547   /* create space for the data to be recvieved */
548   total = 0;
549   for (i = 0; i < np; ++i) total = total + de->messages_to_be_recvieved[i];
550   PetscCall(PetscMalloc(de->unit_message_size * (total + 1), &de->recv_message));
551   /* initialize memory */
552   PetscCall(PetscMemzero(de->recv_message, de->unit_message_size * (total + 1)));
553   /* set total items to receive */
554   de->recv_message_length  = total;
555   de->packer_status        = DEOBJECT_FINALIZED;
556   de->communication_status = DEOBJECT_INITIALIZED;
557   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerPack, 0, 0, 0, 0));
558   PetscFunctionReturn(PETSC_SUCCESS);
559 }
560 
561 /* do the actual message passing */
562 PetscErrorCode DMSwarmDataExBegin(DMSwarmDataEx de)
563 {
564   PetscMPIInt i, np;
565   void       *dest;
566 
567   PetscFunctionBegin;
568   PetscCheck(de->topology_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Topology not finalized");
569   PetscCheck(de->message_lengths_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths not finalized");
570   PetscCheck(de->packer_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Packer not finalized");
571   PetscCheck(de->communication_status != DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DMSwarmDataExInitialize() first.");
572   PetscCheck(de->recv_message, de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DMSwarmDataExPackFinalize() first");
573   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerBegin, 0, 0, 0, 0));
574   np = de->n_neighbour_procs;
575   /* == NON BLOCKING == */
576   for (i = 0; i < np; ++i) {
577     dest = ((char *)de->send_message) + de->unit_message_size * de->message_offsets[i];
578     PetscCallMPI(MPIU_Isend(dest, de->messages_to_be_sent[i] * de->unit_message_size, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]));
579   }
580   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerBegin, 0, 0, 0, 0));
581   PetscFunctionReturn(PETSC_SUCCESS);
582 }
583 
584 /* do the actual message passing now */
585 PetscErrorCode DMSwarmDataExEnd(DMSwarmDataEx de)
586 {
587   PetscMPIInt i, np;
588   PetscInt    total;
589   PetscInt   *message_recv_offsets;
590   void       *dest;
591 
592   PetscFunctionBegin;
593   PetscCheck(de->communication_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DMSwarmDataExInitialize() first.");
594   PetscCheck(de->recv_message, de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DMSwarmDataExPackFinalize() first");
595   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerEnd, 0, 0, 0, 0));
596   np = de->n_neighbour_procs;
597   PetscCall(PetscMalloc1(np + 1, &message_recv_offsets));
598   message_recv_offsets[0] = 0;
599   total                   = de->messages_to_be_recvieved[0];
600   for (i = 1; i < np; ++i) {
601     message_recv_offsets[i] = total;
602     total                   = total + de->messages_to_be_recvieved[i];
603   }
604   /* == NON BLOCKING == */
605   for (i = 0; i < np; ++i) {
606     dest = ((char *)de->recv_message) + de->unit_message_size * message_recv_offsets[i];
607     PetscCallMPI(MPIU_Irecv(dest, de->messages_to_be_recvieved[i] * de->unit_message_size, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np + i]));
608   }
609   PetscCallMPI(MPI_Waitall(2 * np, de->_requests, de->_stats));
610   PetscCall(PetscFree(message_recv_offsets));
611   de->communication_status = DEOBJECT_FINALIZED;
612   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerEnd, 0, 0, 0, 0));
613   PetscFunctionReturn(PETSC_SUCCESS);
614 }
615 
616 PetscErrorCode DMSwarmDataExGetSendData(DMSwarmDataEx de, PetscInt *length, void **send)
617 {
618   PetscFunctionBegin;
619   PetscCheck(de->packer_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed.");
620   *length = de->send_message_length;
621   *send   = de->send_message;
622   PetscFunctionReturn(PETSC_SUCCESS);
623 }
624 
625 PetscErrorCode DMSwarmDataExGetRecvData(DMSwarmDataEx de, PetscInt *length, void **recv)
626 {
627   PetscFunctionBegin;
628   PetscCheck(de->communication_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent.");
629   *length = de->recv_message_length;
630   *recv   = de->recv_message;
631   PetscFunctionReturn(PETSC_SUCCESS);
632 }
633 
634 PetscErrorCode DMSwarmDataExTopologyGetNeighbours(DMSwarmDataEx de, PetscMPIInt *n, PetscMPIInt *neigh[])
635 {
636   PetscFunctionBegin;
637   if (n) *n = de->n_neighbour_procs;
638   if (neigh) *neigh = de->neighbour_procs;
639   PetscFunctionReturn(PETSC_SUCCESS);
640 }
641