xref: /petsc/src/dm/impls/swarm/data_ex.c (revision d5b43468fb8780a8feea140ccd6fa3e6a50411cc)
1 /*
2 Build a few basic tools to help with partitioned domains.
3 
4 1)
5 On each processor, have a DomainExchangerTopology.
6 This is a doubly-connected edge list which enumerates the
7 communication paths between connected processors. By numbering
8 these paths we can always uniquely assign message identifers.
9 
10         edge
11          10
12 proc  --------->  proc
13  0    <--------    1
14          11
15         twin
16 
17 Eg: Proc 0 send to proc 1 with message id is 10. To receive the correct
18 message, proc 1 looks for the edge connected to proc 0, and then the
19 message id comes from the twin of that edge
20 
21 2)
22 A DomainExchangerArrayPacker.
23 A little function which given a piece of data, will memcpy the data into
24 an array (which will be sent to procs) into the correct place.
25 
26 On Proc 1 we sent data to procs 0,2,3. The data is on different lengths.
27 All data gets jammed into single array. Need to "jam" data into correct locations
28 The Packer knows how much is to going to each processor and keeps track of the inserts
29 so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory
30 
31 data to 0    data to 2       data to 3
32 
33 |--------|-----------------|--|
34 
35 User has to unpack message themselves. I can get you the pointer for each i
36 entry, but you'll have to cast it to the appropriate data type.
37 
38 Phase A: Build topology
39 
40 Phase B: Define message lengths
41 
42 Phase C: Pack data
43 
44 Phase D: Send data
45 
46 + Constructor
47 DMSwarmDataExCreate()
48 + Phase A
49 DMSwarmDataExTopologyInitialize()
50 DMSwarmDataExTopologyAddNeighbour()
51 DMSwarmDataExTopologyAddNeighbour()
52 DMSwarmDataExTopologyFinalize()
53 + Phase B
54 DMSwarmDataExZeroAllSendCount()
55 DMSwarmDataExAddToSendCount()
56 DMSwarmDataExAddToSendCount()
57 DMSwarmDataExAddToSendCount()
58 + Phase C
59 DMSwarmDataExPackInitialize()
60 DMSwarmDataExPackData()
61 DMSwarmDataExPackData()
62 DMSwarmDataExPackFinalize()
63 +Phase D
64 DMSwarmDataExBegin()
65  ... perform any calculations ...
66 DMSwarmDataExEnd()
67 
68 ... user calls any getters here ...
69 
70 */
71 #include <petscvec.h>
72 #include <petscmat.h>
73 
74 #include "../src/dm/impls/swarm/data_ex.h"
75 
76 const char *status_names[] = {"initialized", "finalized", "unknown"};
77 
78 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerTopologySetup;
79 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerBegin;
80 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerEnd;
81 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerSendCount;
82 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerPack;
83 
84 PetscErrorCode DMSwarmDataExCreate(MPI_Comm comm, const PetscInt count, DMSwarmDataEx *ex)
85 {
86   DMSwarmDataEx d;
87 
88   PetscFunctionBegin;
89   PetscCall(PetscNew(&d));
90   PetscCallMPI(MPI_Comm_dup(comm, &d->comm));
91   PetscCallMPI(MPI_Comm_rank(d->comm, &d->rank));
92 
93   d->instance = count;
94 
95   d->topology_status        = DEOBJECT_STATE_UNKNOWN;
96   d->message_lengths_status = DEOBJECT_STATE_UNKNOWN;
97   d->packer_status          = DEOBJECT_STATE_UNKNOWN;
98   d->communication_status   = DEOBJECT_STATE_UNKNOWN;
99 
100   d->n_neighbour_procs = -1;
101   d->neighbour_procs   = NULL;
102 
103   d->messages_to_be_sent      = NULL;
104   d->message_offsets          = NULL;
105   d->messages_to_be_recvieved = NULL;
106 
107   d->unit_message_size   = (size_t)-1;
108   d->send_message        = NULL;
109   d->send_message_length = -1;
110   d->recv_message        = NULL;
111   d->recv_message_length = -1;
112   d->total_pack_cnt      = -1;
113   d->pack_cnt            = NULL;
114 
115   d->send_tags = NULL;
116   d->recv_tags = NULL;
117 
118   d->_stats    = NULL;
119   d->_requests = NULL;
120   *ex          = d;
121   PetscFunctionReturn(0);
122 }
123 
124 /*
125     This code is horrible, who let it get into main.
126 
127     Should be printing to a viewer, should not be using PETSC_COMM_WORLD
128 
129 */
130 PetscErrorCode DMSwarmDataExView(DMSwarmDataEx d)
131 {
132   PetscMPIInt p;
133 
134   PetscFunctionBegin;
135   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "DMSwarmDataEx: instance=%" PetscInt_FMT "\n", d->instance));
136   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  topology status:        %s \n", status_names[d->topology_status]));
137   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  message lengths status: %s \n", status_names[d->message_lengths_status]));
138   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  packer status status:   %s \n", status_names[d->packer_status]));
139   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  communication status:   %s \n", status_names[d->communication_status]));
140 
141   if (d->topology_status == DEOBJECT_FINALIZED) {
142     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  Topology:\n"));
143     PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "    [%d] neighbours: %d \n", d->rank, d->n_neighbour_procs));
144     for (p = 0; p < d->n_neighbour_procs; p++) PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "    [%d]   neighbour[%d] = %d \n", d->rank, p, d->neighbour_procs[p]));
145     PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, stdout));
146   }
147 
148   if (d->message_lengths_status == DEOBJECT_FINALIZED) {
149     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  Message lengths:\n"));
150     PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "    [%d] atomic size: %ld \n", d->rank, (long int)d->unit_message_size));
151     for (p = 0; p < d->n_neighbour_procs; p++) {
152       PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "    [%d] >>>>> ( %" PetscInt_FMT " units :: tag = %d) >>>>> [%d] \n", d->rank, d->messages_to_be_sent[p], d->send_tags[p], d->neighbour_procs[p]));
153     }
154     for (p = 0; p < d->n_neighbour_procs; p++) {
155       PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "    [%d] <<<<< ( %" PetscInt_FMT " units :: tag = %d) <<<<< [%d] \n", d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], d->neighbour_procs[p]));
156     }
157     PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, stdout));
158   }
159   if (d->packer_status == DEOBJECT_FINALIZED) { }
160   if (d->communication_status == DEOBJECT_FINALIZED) { }
161   PetscFunctionReturn(0);
162 }
163 
164 PetscErrorCode DMSwarmDataExDestroy(DMSwarmDataEx d)
165 {
166   PetscFunctionBegin;
167   PetscCallMPI(MPI_Comm_free(&d->comm));
168   if (d->neighbour_procs) PetscCall(PetscFree(d->neighbour_procs));
169   if (d->messages_to_be_sent) PetscCall(PetscFree(d->messages_to_be_sent));
170   if (d->message_offsets) PetscCall(PetscFree(d->message_offsets));
171   if (d->messages_to_be_recvieved) PetscCall(PetscFree(d->messages_to_be_recvieved));
172   if (d->send_message) PetscCall(PetscFree(d->send_message));
173   if (d->recv_message) PetscCall(PetscFree(d->recv_message));
174   if (d->pack_cnt) PetscCall(PetscFree(d->pack_cnt));
175   if (d->send_tags) PetscCall(PetscFree(d->send_tags));
176   if (d->recv_tags) PetscCall(PetscFree(d->recv_tags));
177   if (d->_stats) PetscCall(PetscFree(d->_stats));
178   if (d->_requests) PetscCall(PetscFree(d->_requests));
179   PetscCall(PetscFree(d));
180   PetscFunctionReturn(0);
181 }
182 
183 /* === Phase A === */
184 
185 PetscErrorCode DMSwarmDataExTopologyInitialize(DMSwarmDataEx d)
186 {
187   PetscFunctionBegin;
188   d->topology_status   = DEOBJECT_INITIALIZED;
189   d->n_neighbour_procs = 0;
190   PetscCall(PetscFree(d->neighbour_procs));
191   PetscCall(PetscFree(d->messages_to_be_sent));
192   PetscCall(PetscFree(d->message_offsets));
193   PetscCall(PetscFree(d->messages_to_be_recvieved));
194   PetscCall(PetscFree(d->pack_cnt));
195   PetscCall(PetscFree(d->send_tags));
196   PetscCall(PetscFree(d->recv_tags));
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode DMSwarmDataExTopologyAddNeighbour(DMSwarmDataEx d, const PetscMPIInt proc_id)
201 {
202   PetscMPIInt n, found;
203   PetscMPIInt size;
204 
205   PetscFunctionBegin;
206   PetscCheck(d->topology_status != DEOBJECT_FINALIZED, d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DMSwarmDataExTopologyInitialize() first");
207   PetscCheck(d->topology_status == DEOBJECT_INITIALIZED, d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be initialised. Call DMSwarmDataExTopologyInitialize() first");
208 
209   /* error on negative entries */
210   PetscCheck(proc_id >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to set proc neighbour with a rank < 0");
211   /* error on ranks larger than number of procs in communicator */
212   PetscCallMPI(MPI_Comm_size(d->comm, &size));
213   PetscCheck(proc_id < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to set proc neighbour %d with a rank >= size %d", proc_id, size);
214   if (d->n_neighbour_procs == 0) PetscCall(PetscMalloc1(1, &d->neighbour_procs));
215   /* check for proc_id */
216   found = 0;
217   for (n = 0; n < d->n_neighbour_procs; n++) {
218     if (d->neighbour_procs[n] == proc_id) found = 1;
219   }
220   if (found == 0) { /* add it to list */
221     PetscCall(PetscRealloc(sizeof(PetscMPIInt) * (d->n_neighbour_procs + 1), &d->neighbour_procs));
222     d->neighbour_procs[d->n_neighbour_procs] = proc_id;
223     d->n_neighbour_procs++;
224   }
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229 counter: the index of the communication object
230 N: the number of processors
231 r0: rank of sender
232 r1: rank of receiver
233 
234 procs = { 0, 1, 2, 3 }
235 
236 0 ==> 0         e=0
237 0 ==> 1         e=1
238 0 ==> 2         e=2
239 0 ==> 3         e=3
240 
241 1 ==> 0         e=4
242 1 ==> 1         e=5
243 1 ==> 2         e=6
244 1 ==> 3         e=7
245 
246 2 ==> 0         e=8
247 2 ==> 1         e=9
248 2 ==> 2         e=10
249 2 ==> 3         e=11
250 
251 3 ==> 0         e=12
252 3 ==> 1         e=13
253 3 ==> 2         e=14
254 3 ==> 3         e=15
255 
256 If we require that proc A sends to proc B, then the SEND tag index will be given by
257   N * rank(A) + rank(B) + offset
258 If we require that proc A will receive from proc B, then the RECV tag index will be given by
259   N * rank(B) + rank(A) + offset
260 
261 */
262 static void _get_tags(PetscInt counter, PetscMPIInt N, PetscMPIInt r0, PetscMPIInt r1, PetscMPIInt *_st, PetscMPIInt *_rt)
263 {
264   PetscMPIInt st, rt;
265 
266   st   = N * r0 + r1 + N * N * counter;
267   rt   = N * r1 + r0 + N * N * counter;
268   *_st = st;
269   *_rt = rt;
270 }
271 
272 /*
273 Makes the communication map symmetric
274 */
275 PetscErrorCode _DMSwarmDataExCompleteCommunicationMap(MPI_Comm comm, PetscMPIInt n, PetscMPIInt proc_neighbours[], PetscMPIInt *n_new, PetscMPIInt **proc_neighbours_new)
276 {
277   Mat                A;
278   PetscInt           i, j, nc;
279   PetscInt           n_, *proc_neighbours_;
280   PetscInt           rank_;
281   PetscMPIInt        size, rank;
282   PetscScalar       *vals;
283   const PetscInt    *cols;
284   const PetscScalar *red_vals;
285   PetscMPIInt        _n_new, *_proc_neighbours_new;
286 
287   PetscFunctionBegin;
288   n_ = n;
289   PetscCall(PetscMalloc(sizeof(PetscInt) * n_, &proc_neighbours_));
290   for (i = 0; i < n_; ++i) proc_neighbours_[i] = proc_neighbours[i];
291   PetscCallMPI(MPI_Comm_size(comm, &size));
292   PetscCallMPI(MPI_Comm_rank(comm, &rank));
293   rank_ = rank;
294 
295   PetscCall(MatCreate(comm, &A));
296   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, size, size));
297   PetscCall(MatSetType(A, MATAIJ));
298   PetscCall(MatSeqAIJSetPreallocation(A, 1, NULL));
299   PetscCall(MatMPIAIJSetPreallocation(A, n_, NULL, n_, NULL));
300   PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
301   /* Build original map */
302   PetscCall(PetscMalloc1(n_, &vals));
303   for (i = 0; i < n_; ++i) vals[i] = 1.0;
304   PetscCall(MatSetValues(A, 1, &rank_, n_, proc_neighbours_, vals, INSERT_VALUES));
305   PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY));
306   PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY));
307   /* Now force all other connections if they are not already there */
308   /* It's more efficient to do them all at once */
309   for (i = 0; i < n_; ++i) vals[i] = 2.0;
310   PetscCall(MatSetValues(A, n_, proc_neighbours_, 1, &rank_, vals, INSERT_VALUES));
311   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
312   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
313   /*
314   PetscCall(PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO));
315   PetscCall(MatView(A,PETSC_VIEWER_STDOUT_WORLD));
316   PetscCall(PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD));
317 */
318   if ((n_new != NULL) && (proc_neighbours_new != NULL)) {
319     PetscCall(MatGetRow(A, rank_, &nc, &cols, &red_vals));
320     _n_new = (PetscMPIInt)nc;
321     PetscCall(PetscMalloc1(_n_new, &_proc_neighbours_new));
322     for (j = 0; j < nc; ++j) _proc_neighbours_new[j] = (PetscMPIInt)cols[j];
323     PetscCall(MatRestoreRow(A, rank_, &nc, &cols, &red_vals));
324     *n_new               = (PetscMPIInt)_n_new;
325     *proc_neighbours_new = (PetscMPIInt *)_proc_neighbours_new;
326   }
327   PetscCall(MatDestroy(&A));
328   PetscCall(PetscFree(vals));
329   PetscCall(PetscFree(proc_neighbours_));
330   PetscCallMPI(MPI_Barrier(comm));
331   PetscFunctionReturn(0);
332 }
333 
334 PetscErrorCode DMSwarmDataExTopologyFinalize(DMSwarmDataEx d)
335 {
336   PetscMPIInt symm_nn, *symm_procs, r0, n, st, rt, size;
337 
338   PetscFunctionBegin;
339   PetscCheck(d->topology_status == DEOBJECT_INITIALIZED, d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be initialised. Call DMSwarmDataExTopologyInitialize() first");
340 
341   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerTopologySetup, 0, 0, 0, 0));
342   /* given information about all my neighbours, make map symmetric */
343   PetscCall(_DMSwarmDataExCompleteCommunicationMap(d->comm, d->n_neighbour_procs, d->neighbour_procs, &symm_nn, &symm_procs));
344   /* update my arrays */
345   PetscCall(PetscFree(d->neighbour_procs));
346   d->n_neighbour_procs = symm_nn;
347   d->neighbour_procs   = symm_procs;
348   /* allocates memory */
349   if (!d->messages_to_be_sent) PetscCall(PetscMalloc1(d->n_neighbour_procs + 1, &d->messages_to_be_sent));
350   if (!d->message_offsets) PetscCall(PetscMalloc1(d->n_neighbour_procs + 1, &d->message_offsets));
351   if (!d->messages_to_be_recvieved) PetscCall(PetscMalloc1(d->n_neighbour_procs + 1, &d->messages_to_be_recvieved));
352   if (!d->pack_cnt) PetscCall(PetscMalloc(sizeof(PetscInt) * d->n_neighbour_procs, &d->pack_cnt));
353   if (!d->_stats) PetscCall(PetscMalloc(sizeof(MPI_Status) * 2 * d->n_neighbour_procs, &d->_stats));
354   if (!d->_requests) PetscCall(PetscMalloc(sizeof(MPI_Request) * 2 * d->n_neighbour_procs, &d->_requests));
355   if (!d->send_tags) PetscCall(PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->send_tags));
356   if (!d->recv_tags) PetscCall(PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->recv_tags));
357   /* compute message tags */
358   PetscCallMPI(MPI_Comm_size(d->comm, &size));
359   r0 = d->rank;
360   for (n = 0; n < d->n_neighbour_procs; ++n) {
361     PetscMPIInt r1 = d->neighbour_procs[n];
362 
363     _get_tags(d->instance, size, r0, r1, &st, &rt);
364     d->send_tags[n] = (int)st;
365     d->recv_tags[n] = (int)rt;
366   }
367   d->topology_status = DEOBJECT_FINALIZED;
368   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerTopologySetup, 0, 0, 0, 0));
369   PetscFunctionReturn(0);
370 }
371 
372 /* === Phase B === */
373 PetscErrorCode _DMSwarmDataExConvertProcIdToLocalIndex(DMSwarmDataEx de, PetscMPIInt proc_id, PetscMPIInt *local)
374 {
375   PetscMPIInt i, np;
376 
377   PetscFunctionBegin;
378   np     = de->n_neighbour_procs;
379   *local = -1;
380   for (i = 0; i < np; ++i) {
381     if (proc_id == de->neighbour_procs[i]) {
382       *local = i;
383       break;
384     }
385   }
386   PetscFunctionReturn(0);
387 }
388 
389 PetscErrorCode DMSwarmDataExInitializeSendCount(DMSwarmDataEx de)
390 {
391   PetscMPIInt i;
392 
393   PetscFunctionBegin;
394   PetscCheck(de->topology_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Topology not finalized");
395   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerSendCount, 0, 0, 0, 0));
396   de->message_lengths_status = DEOBJECT_INITIALIZED;
397   for (i = 0; i < de->n_neighbour_procs; ++i) de->messages_to_be_sent[i] = 0;
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402 1) only allows counters to be set on neighbouring cpus
403 */
404 PetscErrorCode DMSwarmDataExAddToSendCount(DMSwarmDataEx de, const PetscMPIInt proc_id, const PetscInt count)
405 {
406   PetscMPIInt local_val;
407 
408   PetscFunctionBegin;
409   PetscCheck(de->message_lengths_status != DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DMSwarmDataExInitializeSendCount() first");
410   PetscCheck(de->message_lengths_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DMSwarmDataExInitializeSendCount() first");
411 
412   PetscCall(_DMSwarmDataExConvertProcIdToLocalIndex(de, proc_id, &local_val));
413   PetscCheck(local_val != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Proc %d is not a valid neighbour rank", (int)proc_id);
414 
415   de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count;
416   PetscFunctionReturn(0);
417 }
418 
419 PetscErrorCode DMSwarmDataExFinalizeSendCount(DMSwarmDataEx de)
420 {
421   PetscFunctionBegin;
422   PetscCheck(de->message_lengths_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DMSwarmDataExInitializeSendCount() first");
423 
424   de->message_lengths_status = DEOBJECT_FINALIZED;
425   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerSendCount, 0, 0, 0, 0));
426   PetscFunctionReturn(0);
427 }
428 
429 /* === Phase C === */
430 /*
431   zero out all send counts
432   free send and recv buffers
433   zeros out message length
434   zeros out all counters
435   zero out packed data counters
436 */
437 PetscErrorCode _DMSwarmDataExInitializeTmpStorage(DMSwarmDataEx de)
438 {
439   PetscMPIInt i, np;
440 
441   PetscFunctionBegin;
442   np = de->n_neighbour_procs;
443   for (i = 0; i < np; ++i) {
444     /*  de->messages_to_be_sent[i] = -1; */
445     de->messages_to_be_recvieved[i] = -1;
446   }
447   PetscCall(PetscFree(de->send_message));
448   PetscCall(PetscFree(de->recv_message));
449   PetscFunctionReturn(0);
450 }
451 
452 /*
453    Zeros out pack data counters
454    Ensures mesaage length is set
455    Checks send counts properly initialized
456    allocates space for pack data
457 */
458 PetscErrorCode DMSwarmDataExPackInitialize(DMSwarmDataEx de, size_t unit_message_size)
459 {
460   PetscMPIInt i, np;
461   PetscInt    total;
462 
463   PetscFunctionBegin;
464   PetscCheck(de->topology_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Topology not finalized");
465   PetscCheck(de->message_lengths_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths not finalized");
466   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerPack, 0, 0, 0, 0));
467   de->packer_status = DEOBJECT_INITIALIZED;
468   PetscCall(_DMSwarmDataExInitializeTmpStorage(de));
469   np                    = de->n_neighbour_procs;
470   de->unit_message_size = unit_message_size;
471   total                 = 0;
472   for (i = 0; i < np; ++i) {
473     if (de->messages_to_be_sent[i] == -1) {
474       PetscMPIInt proc_neighour = de->neighbour_procs[i];
475       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DMSwarmDataExSetSendCount() first", (int)proc_neighour);
476     }
477     total = total + de->messages_to_be_sent[i];
478   }
479   /* create space for the data to be sent */
480   PetscCall(PetscMalloc(unit_message_size * (total + 1), &de->send_message));
481   /* initialize memory */
482   PetscCall(PetscMemzero(de->send_message, unit_message_size * (total + 1)));
483   /* set total items to send */
484   de->send_message_length = total;
485   de->message_offsets[0]  = 0;
486   total                   = de->messages_to_be_sent[0];
487   for (i = 1; i < np; ++i) {
488     de->message_offsets[i] = total;
489     total                  = total + de->messages_to_be_sent[i];
490   }
491   /* init the packer counters */
492   de->total_pack_cnt = 0;
493   for (i = 0; i < np; ++i) de->pack_cnt[i] = 0;
494   PetscFunctionReturn(0);
495 }
496 
497 /*
498     Ensures data gets been packed appropriately and no overlaps occur
499 */
500 PetscErrorCode DMSwarmDataExPackData(DMSwarmDataEx de, PetscMPIInt proc_id, PetscInt n, void *data)
501 {
502   PetscMPIInt local;
503   PetscInt    insert_location;
504   void       *dest;
505 
506   PetscFunctionBegin;
507   PetscCheck(de->packer_status != DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DMSwarmDataExInitializeSendCount(), DMSwarmDataExAddToSendCount(), DMSwarmDataExPackInitialize() first");
508   PetscCheck(de->packer_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DMSwarmDataExInitializeSendCount(), DMSwarmDataExAddToSendCount(), DMSwarmDataExPackInitialize() first");
509 
510   PetscCheck(de->send_message, de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DMSwarmDataExPackInitialize() first");
511   PetscCall(_DMSwarmDataExConvertProcIdToLocalIndex(de, proc_id, &local));
512   PetscCheck(local != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", (int)proc_id);
513   PetscCheck(n + de->pack_cnt[local] <= de->messages_to_be_sent[local], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %" PetscInt_FMT ": Attempt to insert %" PetscInt_FMT, (int)proc_id, de->messages_to_be_sent[local], n + de->pack_cnt[local]);
514 
515   /* copy memory */
516   insert_location = de->message_offsets[local] + de->pack_cnt[local];
517   dest            = ((char *)de->send_message) + de->unit_message_size * insert_location;
518   PetscCall(PetscMemcpy(dest, data, de->unit_message_size * n));
519   /* increment counter */
520   de->pack_cnt[local] = de->pack_cnt[local] + n;
521   PetscFunctionReturn(0);
522 }
523 
524 /*
525 *) Ensures all data has been packed
526 */
527 PetscErrorCode DMSwarmDataExPackFinalize(DMSwarmDataEx de)
528 {
529   PetscMPIInt i, np;
530   PetscInt    total;
531 
532   PetscFunctionBegin;
533   PetscCheck(de->packer_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DMSwarmDataExPackInitialize() first.");
534   np = de->n_neighbour_procs;
535   for (i = 0; i < np; ++i) {
536     PetscCheck(de->pack_cnt[i] == de->messages_to_be_sent[i], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %" PetscInt_FMT " : Inserted %" PetscInt_FMT, (int)de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i]);
537   }
538   /* init */
539   for (i = 0; i < np; ++i) de->messages_to_be_recvieved[i] = -1;
540   /* figure out the recv counts here */
541   for (i = 0; i < np; ++i) PetscCallMPI(MPI_Isend(&de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]));
542   for (i = 0; i < np; ++i) PetscCallMPI(MPI_Irecv(&de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np + i]));
543   PetscCallMPI(MPI_Waitall(2 * np, de->_requests, de->_stats));
544   /* create space for the data to be recvieved */
545   total = 0;
546   for (i = 0; i < np; ++i) total = total + de->messages_to_be_recvieved[i];
547   PetscCall(PetscMalloc(de->unit_message_size * (total + 1), &de->recv_message));
548   /* initialize memory */
549   PetscCall(PetscMemzero(de->recv_message, de->unit_message_size * (total + 1)));
550   /* set total items to receive */
551   de->recv_message_length  = total;
552   de->packer_status        = DEOBJECT_FINALIZED;
553   de->communication_status = DEOBJECT_INITIALIZED;
554   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerPack, 0, 0, 0, 0));
555   PetscFunctionReturn(0);
556 }
557 
558 /* do the actual message passing */
559 PetscErrorCode DMSwarmDataExBegin(DMSwarmDataEx de)
560 {
561   PetscMPIInt i, np;
562   void       *dest;
563   PetscInt    length;
564 
565   PetscFunctionBegin;
566   PetscCheck(de->topology_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Topology not finalized");
567   PetscCheck(de->message_lengths_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Message lengths not finalized");
568   PetscCheck(de->packer_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Packer not finalized");
569   PetscCheck(de->communication_status != DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DMSwarmDataExInitialize() first.");
570   PetscCheck(de->recv_message, de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DMSwarmDataExPackFinalize() first");
571   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerBegin, 0, 0, 0, 0));
572   np = de->n_neighbour_procs;
573   /* == NON BLOCKING == */
574   for (i = 0; i < np; ++i) {
575     length = de->messages_to_be_sent[i] * de->unit_message_size;
576     dest   = ((char *)de->send_message) + de->unit_message_size * de->message_offsets[i];
577     PetscCallMPI(MPI_Isend(dest, length, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]));
578   }
579   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerBegin, 0, 0, 0, 0));
580   PetscFunctionReturn(0);
581 }
582 
583 /* do the actual message passing now */
584 PetscErrorCode DMSwarmDataExEnd(DMSwarmDataEx de)
585 {
586   PetscMPIInt i, np;
587   PetscInt    total;
588   PetscInt   *message_recv_offsets;
589   void       *dest;
590   PetscInt    length;
591 
592   PetscFunctionBegin;
593   PetscCheck(de->communication_status == DEOBJECT_INITIALIZED, de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DMSwarmDataExInitialize() first.");
594   PetscCheck(de->recv_message, de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DMSwarmDataExPackFinalize() first");
595   PetscCall(PetscLogEventBegin(DMSWARM_DataExchangerEnd, 0, 0, 0, 0));
596   np = de->n_neighbour_procs;
597   PetscCall(PetscMalloc1(np + 1, &message_recv_offsets));
598   message_recv_offsets[0] = 0;
599   total                   = de->messages_to_be_recvieved[0];
600   for (i = 1; i < np; ++i) {
601     message_recv_offsets[i] = total;
602     total                   = total + de->messages_to_be_recvieved[i];
603   }
604   /* == NON BLOCKING == */
605   for (i = 0; i < np; ++i) {
606     length = de->messages_to_be_recvieved[i] * de->unit_message_size;
607     dest   = ((char *)de->recv_message) + de->unit_message_size * message_recv_offsets[i];
608     PetscCallMPI(MPI_Irecv(dest, length, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np + i]));
609   }
610   PetscCallMPI(MPI_Waitall(2 * np, de->_requests, de->_stats));
611   PetscCall(PetscFree(message_recv_offsets));
612   de->communication_status = DEOBJECT_FINALIZED;
613   PetscCall(PetscLogEventEnd(DMSWARM_DataExchangerEnd, 0, 0, 0, 0));
614   PetscFunctionReturn(0);
615 }
616 
617 PetscErrorCode DMSwarmDataExGetSendData(DMSwarmDataEx de, PetscInt *length, void **send)
618 {
619   PetscFunctionBegin;
620   PetscCheck(de->packer_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed.");
621   *length = de->send_message_length;
622   *send   = de->send_message;
623   PetscFunctionReturn(0);
624 }
625 
626 PetscErrorCode DMSwarmDataExGetRecvData(DMSwarmDataEx de, PetscInt *length, void **recv)
627 {
628   PetscFunctionBegin;
629   PetscCheck(de->communication_status == DEOBJECT_FINALIZED, de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent.");
630   *length = de->recv_message_length;
631   *recv   = de->recv_message;
632   PetscFunctionReturn(0);
633 }
634 
635 PetscErrorCode DMSwarmDataExTopologyGetNeighbours(DMSwarmDataEx de, PetscMPIInt *n, PetscMPIInt *neigh[])
636 {
637   PetscFunctionBegin;
638   if (n) *n = de->n_neighbour_procs;
639   if (neigh) *neigh = de->neighbour_procs;
640   PetscFunctionReturn(0);
641 }
642