xref: /petsc/src/dm/impls/swarm/data_ex.c (revision 3d96e9964ff330fd2a9eee374bcd4376da7efe60)
1 /*
2 Build a few basic tools to help with partitioned domains.
3 
4 1)
5 On each processor, have a DomainExchangerTopology.
6 This is a doubly-connected edge list which enumerates the
7 communication paths between connected processors. By numbering
8 these paths we can always uniquely assign message identifers.
9 
10         edge
11          10
12 proc  --------->  proc
13  0    <--------    1
14          11
15         twin
16 
17 Eg: Proc 0 send to proc 1 with message id is 10. To recieve the correct
18 message, proc 1 looks for the edge connected to proc 0, and then the
19 messgae id comes from the twin of that edge
20 
21 2)
22 A DomainExchangerArrayPacker.
23 A little function which given a piece of data, will memcpy the data into
24 an array (which will be sent to procs) into the correct place.
25 
26 On Proc 1 we sent data to procs 0,2,3. The data is on different lengths.
27 All data gets jammed into single array. Need to "jam" data into correct locations
28 The Packer knows how much is to going to each processor and keeps track of the inserts
29 so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory
30 
31 data to 0    data to 2       data to 3
32 
33 |--------|-----------------|--|
34 
35 
36 User has to unpack message themselves. I can get you the pointer for each i
37 entry, but you'll have to cast it to the appropriate data type.
38 
39 
40 
41 
42 Phase A: Build topology
43 
44 Phase B: Define message lengths
45 
46 Phase C: Pack data
47 
48 Phase D: Send data
49 
50 + Constructor
51 DataExCreate()
52 + Phase A
53 DataExTopologyInitialize()
54 DataExTopologyAddNeighbour()
55 DataExTopologyAddNeighbour()
56 DataExTopologyFinalize()
57 + Phase B
58 DataExZeroAllSendCount()
59 DataExAddToSendCount()
60 DataExAddToSendCount()
61 DataExAddToSendCount()
62 + Phase C
63 DataExPackInitialize()
64 DataExPackData()
65 DataExPackData()
66 DataExPackFinalize()
67 +Phase D
68 DataExBegin()
69  ... perform any calculations ...
70 DataExEnd()
71 
72 ... user calls any getters here ...
73 
74 
75 */
76 
77 #include <petsc.h>
78 #include <petscvec.h>
79 #include <petscmat.h>
80 
81 #include "data_ex.h"
82 
83 const char *status_names[] = {"initialized", "finalized", "unknown"};
84 
85 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerTopologySetup;
86 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerBegin;
87 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerEnd;
88 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerSendCount;
89 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerPack;
90 
91 PetscErrorCode DataExCreate(MPI_Comm comm,const PetscInt count, DataEx *ex)
92 {
93   PetscErrorCode ierr;
94   DataEx         d;
95 
96   PetscFunctionBegin;
97   ierr = PetscMalloc(sizeof(struct _p_DataEx), &d);CHKERRQ(ierr);
98   ierr = PetscMemzero(d, sizeof(struct _p_DataEx));CHKERRQ(ierr);
99   ierr = MPI_Comm_dup(comm,&d->comm);CHKERRQ(ierr);
100   ierr = MPI_Comm_rank(d->comm,&d->rank);CHKERRQ(ierr);
101 
102   d->instance = count;
103 
104   d->topology_status        = DEOBJECT_STATE_UNKNOWN;
105   d->message_lengths_status = DEOBJECT_STATE_UNKNOWN;
106   d->packer_status          = DEOBJECT_STATE_UNKNOWN;
107   d->communication_status   = DEOBJECT_STATE_UNKNOWN;
108 
109   d->n_neighbour_procs = -1;
110   d->neighbour_procs   = NULL;
111 
112   d->messages_to_be_sent      = NULL;
113   d->message_offsets          = NULL;
114   d->messages_to_be_recvieved = NULL;
115 
116   d->unit_message_size   = -1;
117   d->send_message        = NULL;
118   d->send_message_length = -1;
119   d->recv_message        = NULL;
120   d->recv_message_length = -1;
121   d->total_pack_cnt      = -1;
122   d->pack_cnt            = NULL;
123 
124   d->send_tags = NULL;
125   d->recv_tags = NULL;
126 
127   d->_stats    = NULL;
128   d->_requests = NULL;
129   *ex = d;
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode DataExView(DataEx d)
134 {
135   PetscMPIInt p;
136   PetscErrorCode ierr;
137 
138   PetscFunctionBegin;
139   ierr = PetscPrintf( PETSC_COMM_WORLD, "DataEx: instance=%D\n",d->instance);CHKERRQ(ierr);
140   ierr = PetscPrintf( PETSC_COMM_WORLD, "  topology status:        %s \n", status_names[d->topology_status]);CHKERRQ(ierr);
141   ierr = PetscPrintf( PETSC_COMM_WORLD, "  message lengths status: %s \n", status_names[d->message_lengths_status] );CHKERRQ(ierr);
142   ierr = PetscPrintf( PETSC_COMM_WORLD, "  packer status status:   %s \n", status_names[d->packer_status] );CHKERRQ(ierr);
143   ierr = PetscPrintf( PETSC_COMM_WORLD, "  communication status:   %s \n", status_names[d->communication_status] );CHKERRQ(ierr);
144 
145   if (d->topology_status == DEOBJECT_FINALIZED) {
146     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Topology:\n");CHKERRQ(ierr);
147     ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] neighbours: %d \n", (int)d->rank, (int)d->n_neighbour_procs );CHKERRQ(ierr);
148     for (p=0; p<d->n_neighbour_procs; p++) {
149       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d]   neighbour[%D] = %d \n", (int)d->rank, p, (int)d->neighbour_procs[p]);CHKERRQ(ierr);
150     }
151   }
152   if (d->message_lengths_status == DEOBJECT_FINALIZED) {
153     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Message lengths:\n");CHKERRQ(ierr);
154     ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] atomic size: %ld \n", (int)d->rank, (long int)d->unit_message_size );CHKERRQ(ierr);
155     for (p=0; p<d->n_neighbour_procs; p++) {
156       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] >>>>> ( %D units :: tag = %d ) >>>>> [%d] \n", (int)d->rank, d->messages_to_be_sent[p], d->send_tags[p], (int)d->neighbour_procs[p] );CHKERRQ(ierr);
157     }
158     for (p=0; p<d->n_neighbour_procs; p++) {
159       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] <<<<< ( %D units :: tag = %d ) <<<<< [%d] \n", (int)d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], (int)d->neighbour_procs[p] );CHKERRQ(ierr);
160     }
161   }
162   if (d->packer_status == DEOBJECT_FINALIZED) {}
163   if (d->communication_status == DEOBJECT_FINALIZED) {}
164   PetscFunctionReturn(0);
165 }
166 
167 PetscErrorCode DataExDestroy(DataEx d)
168 {
169   PetscErrorCode ierr;
170 
171   PetscFunctionBegin;
172   ierr = MPI_Comm_free(&d->comm);CHKERRQ(ierr);
173   if (d->neighbour_procs) {ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);}
174   if (d->messages_to_be_sent) {ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);}
175   if (d->message_offsets) {ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);}
176   if (d->messages_to_be_recvieved) {ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);}
177   if (d->send_message) {ierr = PetscFree(d->send_message);CHKERRQ(ierr);}
178   if (d->recv_message) {ierr = PetscFree(d->recv_message);CHKERRQ(ierr);}
179   if (d->pack_cnt) {ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);}
180   if (d->send_tags) {ierr = PetscFree(d->send_tags);CHKERRQ(ierr);}
181   if (d->recv_tags) {ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);}
182   if (d->_stats) {ierr = PetscFree(d->_stats);CHKERRQ(ierr);}
183   if (d->_requests) {ierr = PetscFree(d->_requests);CHKERRQ(ierr);}
184   ierr = PetscFree(d);CHKERRQ(ierr);
185   PetscFunctionReturn(0);
186 }
187 
188 /* === Phase A === */
189 
190 PetscErrorCode DataExTopologyInitialize(DataEx d)
191 {
192   PetscErrorCode ierr;
193 
194   PetscFunctionBegin;
195   d->topology_status = DEOBJECT_INITIALIZED;
196   d->n_neighbour_procs = 0;
197   ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);
198   ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);
199   ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);
200   ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);
201   ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);
202   ierr = PetscFree(d->send_tags);CHKERRQ(ierr);
203   ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 PetscErrorCode DataExTopologyAddNeighbour(DataEx d,const PetscMPIInt proc_id)
208 {
209   PetscMPIInt    n,found;
210   PetscMPIInt    nproc;
211   PetscErrorCode ierr;
212 
213   PetscFunctionBegin;
214   if (d->topology_status == DEOBJECT_FINALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DataExTopologyInitialize() first");
215   else if (d->topology_status != DEOBJECT_INITIALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first");
216 
217   /* error on negative entries */
218   if (proc_id < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank < 0");
219   /* error on ranks larger than number of procs in communicator */
220   ierr = MPI_Comm_size(d->comm,&nproc);CHKERRQ(ierr);
221   if (proc_id >= nproc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank >= nproc");
222   if (d->n_neighbour_procs == 0) {ierr = PetscMalloc1(1, &d->neighbour_procs);CHKERRQ(ierr);}
223   /* check for proc_id */
224   found = 0;
225   for (n = 0; n < d->n_neighbour_procs; n++) {
226     if (d->neighbour_procs[n] == proc_id) {
227       found  = 1;
228     }
229   }
230   if (found == 0) { /* add it to list */
231     ierr = PetscRealloc(sizeof(PetscMPIInt)*(d->n_neighbour_procs+1), &d->neighbour_procs);CHKERRQ(ierr);
232     d->neighbour_procs[ d->n_neighbour_procs ] = proc_id;
233     d->n_neighbour_procs++;
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 /*
239 counter: the index of the communication object
240 N: the number of processors
241 r0: rank of sender
242 r1: rank of receiver
243 
244 procs = { 0, 1, 2, 3 }
245 
246 0 ==> 0		e=0
247 0 ==> 1		e=1
248 0 ==> 2		e=2
249 0 ==> 3		e=3
250 
251 1 ==> 0		e=4
252 1 ==> 1		e=5
253 1 ==> 2		e=6
254 1 ==> 3		e=7
255 
256 2 ==> 0		e=8
257 2 ==> 1		e=9
258 2 ==> 2		e=10
259 2 ==> 3		e=11
260 
261 3 ==> 0		e=12
262 3 ==> 1		e=13
263 3 ==> 2		e=14
264 3 ==> 3		e=15
265 
266 If we require that proc A sends to proc B, then the SEND tag index will be given by
267   N * rank(A) + rank(B) + offset
268 If we require that proc A will receive from proc B, then the RECV tag index will be given by
269   N * rank(B) + rank(A) + offset
270 
271 */
272 static void _get_tags(PetscInt counter, PetscMPIInt N, PetscMPIInt r0,PetscMPIInt r1, PetscMPIInt *_st, PetscMPIInt *_rt)
273 {
274   PetscMPIInt st,rt;
275 
276   st = N*r0 + r1   +   N*N*counter;
277   rt = N*r1 + r0   +   N*N*counter;
278   *_st = st;
279   *_rt = rt;
280 }
281 
282 /*
283 Makes the communication map symmetric
284 */
285 PetscErrorCode _DataExCompleteCommunicationMap(MPI_Comm comm,PetscMPIInt n,PetscMPIInt proc_neighbours[],PetscMPIInt *n_new,PetscMPIInt **proc_neighbours_new)
286 {
287   Mat               A;
288   PetscInt          i,j,nc;
289   PetscInt          n_, *proc_neighbours_;
290   PetscInt          rank_i_;
291   PetscMPIInt       size,  rank_i;
292   PetscScalar       *vals;
293   const PetscInt    *cols;
294   const PetscScalar *red_vals;
295   PetscMPIInt       _n_new, *_proc_neighbours_new;
296   PetscErrorCode    ierr;
297 
298   PetscFunctionBegin;
299   n_ = n;
300   ierr = PetscMalloc(sizeof(PetscInt) * n_, &proc_neighbours_);CHKERRQ(ierr);
301   for (i = 0; i < n_; ++i) {
302     proc_neighbours_[i] = proc_neighbours[i];
303   }
304   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
305   ierr = MPI_Comm_rank(comm,&rank_i);CHKERRQ(ierr);
306   rank_i_ = rank_i;
307 
308   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
309   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,size,size);CHKERRQ(ierr);
310   ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
311   ierr = MatSeqAIJSetPreallocation(A,1,NULL);CHKERRQ(ierr);
312   ierr = MatMPIAIJSetPreallocation(A,n_,NULL,n_,NULL);CHKERRQ(ierr);
313   ierr = MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);CHKERRQ(ierr);
314   /* Build original map */
315   ierr = PetscMalloc1(n_, &vals);CHKERRQ(ierr);
316   for (i = 0; i < n_; ++i) {
317     vals[i] = 1.0;
318   }
319   ierr = MatSetValues( A, 1,&rank_i_, n_,proc_neighbours_, vals, INSERT_VALUES );CHKERRQ(ierr);
320   ierr = MatAssemblyBegin(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
321   ierr = MatAssemblyEnd(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
322   /* Now force all other connections if they are not already there */
323   /* It's more efficient to do them all at once */
324   for (i = 0; i < n_; ++i) {
325     vals[i] = 2.0;
326   }
327   ierr = MatSetValues( A, n_,proc_neighbours_, 1,&rank_i_, vals, INSERT_VALUES );CHKERRQ(ierr);
328   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
329   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
330 /*
331   ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
332   ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
333   ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
334 */
335   if ((n_new != NULL) && (proc_neighbours_new != NULL)) {
336     ierr = MatGetRow(A, rank_i_, &nc, &cols, &red_vals);CHKERRQ(ierr);
337     _n_new = (PetscMPIInt) nc;
338     ierr = PetscMalloc1(_n_new, &_proc_neighbours_new);CHKERRQ(ierr);
339     for (j = 0; j < nc; ++j) {
340       _proc_neighbours_new[j] = (PetscMPIInt)cols[j];
341     }
342     ierr = MatRestoreRow( A, rank_i_, &nc, &cols, &red_vals );CHKERRQ(ierr);
343     *n_new               = (PetscMPIInt)_n_new;
344     *proc_neighbours_new = (PetscMPIInt*)_proc_neighbours_new;
345   }
346   ierr = MatDestroy(&A);CHKERRQ(ierr);
347   ierr = PetscFree(vals);CHKERRQ(ierr);
348   ierr = PetscFree(proc_neighbours_);CHKERRQ(ierr);
349   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
353 PetscErrorCode DataExTopologyFinalize(DataEx d)
354 {
355   PetscMPIInt    symm_nn;
356   PetscMPIInt   *symm_procs;
357   PetscMPIInt    r0,n,st,rt;
358   PetscMPIInt    nprocs;
359   PetscErrorCode ierr;
360 
361   PetscFunctionBegin;
362   if (d->topology_status != DEOBJECT_INITIALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first");
363 
364   ierr = PetscLogEventBegin(DMSWARM_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
365   /* given infomation about all my neighbours, make map symmetric */
366   ierr = _DataExCompleteCommunicationMap( d->comm,d->n_neighbour_procs,d->neighbour_procs, &symm_nn, &symm_procs );CHKERRQ(ierr);
367   /* update my arrays */
368   ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);
369   d->n_neighbour_procs = symm_nn;
370   d->neighbour_procs   = symm_procs;
371   /* allocates memory */
372   if (!d->messages_to_be_sent) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->messages_to_be_sent);CHKERRQ(ierr);}
373   if (!d->message_offsets) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->message_offsets);CHKERRQ(ierr);}
374   if (!d->messages_to_be_recvieved) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->messages_to_be_recvieved);CHKERRQ(ierr);}
375   if (!d->pack_cnt) {ierr = PetscMalloc(sizeof(PetscInt) * d->n_neighbour_procs, &d->pack_cnt);CHKERRQ(ierr);}
376   if (!d->_stats) {ierr = PetscMalloc(sizeof(MPI_Status) * 2*d->n_neighbour_procs, &d->_stats);CHKERRQ(ierr);}
377   if (!d->_requests) {ierr = PetscMalloc(sizeof(MPI_Request) * 2*d->n_neighbour_procs, &d->_requests);CHKERRQ(ierr);}
378   if (!d->send_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->send_tags);CHKERRQ(ierr);}
379   if (!d->recv_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->recv_tags);CHKERRQ(ierr);}
380   /* compute message tags */
381   ierr = MPI_Comm_size(d->comm,&nprocs);CHKERRQ(ierr);
382   r0 = d->rank;
383   for (n = 0; n < d->n_neighbour_procs; ++n) {
384     PetscMPIInt r1 = d->neighbour_procs[n];
385 
386     _get_tags( d->instance, nprocs, r0,r1, &st, &rt );
387     d->send_tags[n] = (int)st;
388     d->recv_tags[n] = (int)rt;
389   }
390   d->topology_status = DEOBJECT_FINALIZED;
391   ierr = PetscLogEventEnd(DMSWARM_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
392   PetscFunctionReturn(0);
393 }
394 
395 /* === Phase B === */
396 PetscErrorCode _DataExConvertProcIdToLocalIndex(DataEx de,PetscMPIInt proc_id,PetscMPIInt *local)
397 {
398   PetscMPIInt i,np;
399 
400   PetscFunctionBegin;
401   np = de->n_neighbour_procs;
402   *local = -1;
403   for (i = 0; i < np; ++i) {
404     if (proc_id == de->neighbour_procs[i]) {
405       *local = i;
406       break;
407     }
408   }
409   PetscFunctionReturn(0);
410 }
411 
412 PetscErrorCode DataExInitializeSendCount(DataEx de)
413 {
414   PetscMPIInt    i;
415   PetscErrorCode ierr;
416 
417   PetscFunctionBegin;
418   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ(de->comm, PETSC_ERR_ORDER, "Topology not finalized");
419   ierr = PetscLogEventBegin(DMSWARM_DataExchangerSendCount,0,0,0,0);CHKERRQ(ierr);
420   de->message_lengths_status = DEOBJECT_INITIALIZED;
421   for (i = 0; i < de->n_neighbour_procs; ++i) {
422     de->messages_to_be_sent[i] = 0;
423   }
424   PetscFunctionReturn(0);
425 }
426 
427 /*
428 1) only allows counters to be set on neighbouring cpus
429 */
430 PetscErrorCode DataExAddToSendCount(DataEx de,const PetscMPIInt proc_id,const PetscInt count)
431 {
432   PetscMPIInt    local_val;
433   PetscErrorCode ierr;
434 
435   PetscFunctionBegin;
436   if (de->message_lengths_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DataExInitializeSendCount() first" );
437   else if (de->message_lengths_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
438 
439   ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local_val );CHKERRQ(ierr);
440   if (local_val == -1) SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,"Proc %d is not a valid neighbour rank", (int)proc_id );
441 
442   de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count;
443   PetscFunctionReturn(0);
444 }
445 
446 PetscErrorCode DataExFinalizeSendCount(DataEx de)
447 {
448   PetscErrorCode ierr;
449 
450   PetscFunctionBegin;
451   if (de->message_lengths_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
452 
453   de->message_lengths_status = DEOBJECT_FINALIZED;
454   ierr = PetscLogEventEnd(DMSWARM_DataExchangerSendCount,0,0,0,0);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 /* === Phase C === */
459 /*
460  * zero out all send counts
461  * free send and recv buffers
462  * zeros out message length
463  * zeros out all counters
464  * zero out packed data counters
465 */
466 PetscErrorCode _DataExInitializeTmpStorage(DataEx de)
467 {
468   PetscMPIInt    i, np;
469   PetscErrorCode ierr;
470 
471   PetscFunctionBegin;
472   /*if (de->n_neighbour_procs < 0) SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of neighbour procs < 0");
473   */
474   /*
475   if (!de->neighbour_procs) SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "Neighbour proc list is NULL" );
476   */
477   np = de->n_neighbour_procs;
478   for (i = 0; i < np; ++i) {
479     /*	de->messages_to_be_sent[i] = -1; */
480     de->messages_to_be_recvieved[i] = -1;
481   }
482   ierr = PetscFree(de->send_message);CHKERRQ(ierr);
483   ierr = PetscFree(de->recv_message);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 /*
488 *) Zeros out pack data counters
489 *) Ensures mesaage length is set
490 *) Checks send counts properly initialized
491 *) allocates space for pack data
492 */
493 PetscErrorCode DataExPackInitialize(DataEx de,size_t unit_message_size)
494 {
495   PetscMPIInt    i,np;
496   PetscInt       total;
497   PetscErrorCode ierr;
498 
499   PetscFunctionBegin;
500   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
501   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
502   ierr = PetscLogEventBegin(DMSWARM_DataExchangerPack,0,0,0,0);CHKERRQ(ierr);
503   de->packer_status = DEOBJECT_INITIALIZED;
504   ierr = _DataExInitializeTmpStorage(de);CHKERRQ(ierr);
505   np = de->n_neighbour_procs;
506   de->unit_message_size = unit_message_size;
507   total = 0;
508   for (i = 0; i < np; ++i) {
509     if (de->messages_to_be_sent[i] == -1) {
510       PetscMPIInt proc_neighour = de->neighbour_procs[i];
511       SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DataExSetSendCount() first", (int)proc_neighour );
512     }
513     total = total + de->messages_to_be_sent[i];
514   }
515   /* create space for the data to be sent */
516   ierr = PetscMalloc(unit_message_size * (total + 1), &de->send_message);CHKERRQ(ierr);
517   /* initialize memory */
518   ierr = PetscMemzero(de->send_message, unit_message_size * (total + 1));CHKERRQ(ierr);
519   /* set total items to send */
520   de->send_message_length = total;
521   de->message_offsets[0] = 0;
522   total = de->messages_to_be_sent[0];
523   for (i = 1; i < np; ++i) {
524     de->message_offsets[i] = total;
525     total = total + de->messages_to_be_sent[i];
526   }
527   /* init the packer counters */
528   de->total_pack_cnt = 0;
529   for (i = 0; i < np; ++i) {
530     de->pack_cnt[i] = 0;
531   }
532   PetscFunctionReturn(0);
533 }
534 
535 /*
536 *) Ensures data gets been packed appropriately and no overlaps occur
537 */
538 PetscErrorCode DataExPackData(DataEx de,PetscMPIInt proc_id,PetscInt n,void *data)
539 {
540   PetscMPIInt    local;
541   PetscInt       insert_location;
542   void           *dest;
543   PetscErrorCode ierr;
544 
545   PetscFunctionBegin;
546   if (de->packer_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
547   else if (de->packer_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
548 
549   if (!de->send_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DataExPackInitialize() first" );
550   ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local );CHKERRQ(ierr);
551   if (local == -1) SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", (int)proc_id );
552   if (n+de->pack_cnt[local] > de->messages_to_be_sent[local]) SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %D: Attempt to insert %D",
553               (int)proc_id, de->messages_to_be_sent[local], n+de->pack_cnt[local] );
554 
555   /* copy memory */
556   insert_location = de->message_offsets[local] + de->pack_cnt[local];
557   dest = ((char*)de->send_message) + de->unit_message_size*insert_location;
558   ierr = PetscMemcpy(dest, data, de->unit_message_size * n);CHKERRQ(ierr);
559   /* increment counter */
560   de->pack_cnt[local] = de->pack_cnt[local] + n;
561   PetscFunctionReturn(0);
562 }
563 
564 /*
565 *) Ensures all data has been packed
566 */
567 PetscErrorCode DataExPackFinalize(DataEx de)
568 {
569   PetscMPIInt i,np;
570   PetscInt    total;
571   PetscErrorCode ierr;
572 
573   PetscFunctionBegin;
574   if (de->packer_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DataExPackInitialize() first." );
575   np = de->n_neighbour_procs;
576   for (i = 0; i < np; ++i) {
577     if (de->pack_cnt[i] != de->messages_to_be_sent[i]) SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %D : Inserted %D",
578                 (int)de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i] );
579   }
580   /* init */
581   for (i = 0; i < np; ++i) {
582     de->messages_to_be_recvieved[i] = -1;
583   }
584   /* figure out the recv counts here */
585   for (i = 0; i < np; ++i) {
586     ierr = MPI_Isend(&de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]);CHKERRQ(ierr);
587   }
588   for (i = 0; i < np; ++i) {
589     ierr = MPI_Irecv(&de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i]);CHKERRQ(ierr);
590   }
591   ierr = MPI_Waitall(2*np, de->_requests, de->_stats);CHKERRQ(ierr);
592   /* create space for the data to be recvieved */
593   total = 0;
594   for (i = 0; i < np; ++i) {
595     total = total + de->messages_to_be_recvieved[i];
596   }
597   ierr = PetscMalloc(de->unit_message_size * (total + 1), &de->recv_message);CHKERRQ(ierr);
598   /* initialize memory */
599   ierr = PetscMemzero(de->recv_message, de->unit_message_size * (total + 1));CHKERRQ(ierr);
600   /* set total items to recieve */
601   de->recv_message_length = total;
602   de->packer_status = DEOBJECT_FINALIZED;
603   de->communication_status = DEOBJECT_INITIALIZED;
604   ierr = PetscLogEventEnd(DMSWARM_DataExchangerPack,0,0,0,0);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 /* do the actual message passing now */
609 PetscErrorCode DataExBegin(DataEx de)
610 {
611   PetscMPIInt i,np;
612   void       *dest;
613   PetscInt    length;
614   PetscErrorCode ierr;
615 
616   PetscFunctionBegin;
617   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
618   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
619   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer not finalized" );
620   if (de->communication_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DataExInitialize() first." );
621   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
622   ierr = PetscLogEventBegin(DMSWARM_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
623   np = de->n_neighbour_procs;
624   /* == NON BLOCKING == */
625   for (i = 0; i < np; ++i) {
626     length = de->messages_to_be_sent[i] * de->unit_message_size;
627     dest = ((char*)de->send_message) + de->unit_message_size * de->message_offsets[i];
628     ierr = MPI_Isend( dest, length, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i] );CHKERRQ(ierr);
629   }
630   ierr = PetscLogEventEnd(DMSWARM_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
631   PetscFunctionReturn(0);
632 }
633 
634 /* do the actual message passing now */
635 PetscErrorCode DataExEnd(DataEx de)
636 {
637   PetscMPIInt  i,np;
638   PetscInt     total;
639   PetscInt    *message_recv_offsets;
640   void        *dest;
641   PetscInt     length;
642   PetscErrorCode ierr;
643 
644   PetscFunctionBegin;
645   if (de->communication_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DataExInitialize() first." );
646   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
647   ierr = PetscLogEventBegin(DMSWARM_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
648   np = de->n_neighbour_procs;
649   ierr = PetscMalloc1(np+1, &message_recv_offsets);CHKERRQ(ierr);
650   message_recv_offsets[0] = 0;
651   total = de->messages_to_be_recvieved[0];
652   for (i = 1; i < np; ++i) {
653     message_recv_offsets[i] = total;
654     total = total + de->messages_to_be_recvieved[i];
655   }
656   /* == NON BLOCKING == */
657   for (i = 0; i < np; ++i) {
658     length = de->messages_to_be_recvieved[i] * de->unit_message_size;
659     dest = ((char*)de->recv_message) + de->unit_message_size * message_recv_offsets[i];
660     ierr = MPI_Irecv( dest, length, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i] );CHKERRQ(ierr);
661   }
662   ierr = MPI_Waitall( 2*np, de->_requests, de->_stats );CHKERRQ(ierr);
663   ierr = PetscFree(message_recv_offsets);CHKERRQ(ierr);
664   de->communication_status = DEOBJECT_FINALIZED;
665   ierr = PetscLogEventEnd(DMSWARM_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode DataExGetSendData(DataEx de,PetscInt *length,void **send)
670 {
671   PetscFunctionBegin;
672   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed." );
673   *length = de->send_message_length;
674   *send   = de->send_message;
675   PetscFunctionReturn(0);
676 }
677 
678 PetscErrorCode DataExGetRecvData(DataEx de,PetscInt *length,void **recv)
679 {
680   PetscFunctionBegin;
681   if (de->communication_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent." );
682   *length = de->recv_message_length;
683   *recv   = de->recv_message;
684   PetscFunctionReturn(0);
685 }
686 
687 PetscErrorCode DataExTopologyGetNeighbours(DataEx de,PetscMPIInt *n,PetscMPIInt *neigh[])
688 {
689   PetscFunctionBegin;
690   if (n)     {*n     = de->n_neighbour_procs;}
691   if (neigh) {*neigh = de->neighbour_procs;}
692   PetscFunctionReturn(0);
693 }
694 
695