xref: /petsc/src/dm/impls/swarm/data_ex.c (revision 9f4d3c52fa2fe0bb72fec4f4e85d8e495867af35)
1 /*
2 Build a few basic tools to help with partitioned domains.
3 
4 1)
5 On each processor, have a DomainExchangerTopology.
6 This is a doubly-connected edge list which enumerates the
7 communication paths between connected processors. By numbering
8 these paths we can always uniquely assign message identifers.
9 
10         edge
11          10
12 proc  --------->  proc
13  0    <--------    1
14          11
15         twin
16 
17 Eg: Proc 0 send to proc 1 with message id is 10. To recieve the correct
18 message, proc 1 looks for the edge connected to proc 0, and then the
19 messgae id comes from the twin of that edge
20 
21 2)
22 A DomainExchangerArrayPacker.
23 A little function which given a piece of data, will memcpy the data into
24 an array (which will be sent to procs) into the correct place.
25 
26 On Proc 1 we sent data to procs 0,2,3. The data is on different lengths.
27 All data gets jammed into single array. Need to "jam" data into correct locations
28 The Packer knows how much is to going to each processor and keeps track of the inserts
29 so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory
30 
31 data to 0    data to 2       data to 3
32 
33 |--------|-----------------|--|
34 
35 
36 User has to unpack message themselves. I can get you the pointer for each i
37 entry, but you'll have to cast it to the appropriate data type.
38 
39 
40 
41 
42 Phase A: Build topology
43 
44 Phase B: Define message lengths
45 
46 Phase C: Pack data
47 
48 Phase D: Send data
49 
50 + Constructor
51 DataExCreate()
52 + Phase A
53 DataExTopologyInitialize()
54 DataExTopologyAddNeighbour()
55 DataExTopologyAddNeighbour()
56 DataExTopologyFinalize()
57 + Phase B
58 DataExZeroAllSendCount()
59 DataExAddToSendCount()
60 DataExAddToSendCount()
61 DataExAddToSendCount()
62 + Phase C
63 DataExPackInitialize()
64 DataExPackData()
65 DataExPackData()
66 DataExPackFinalize()
67 +Phase D
68 DataExBegin()
69  ... perform any calculations ...
70 DataExEnd()
71 
72 ... user calls any getters here ...
73 
74 
75 */
76 #include <petscvec.h>
77 #include <petscmat.h>
78 
79 #include "data_ex.h"
80 
81 const char *status_names[] = {"initialized", "finalized", "unknown"};
82 
83 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerTopologySetup;
84 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerBegin;
85 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerEnd;
86 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerSendCount;
87 PETSC_EXTERN PetscLogEvent DMSWARM_DataExchangerPack;
88 
89 PetscErrorCode DataExCreate(MPI_Comm comm,const PetscInt count, DataEx *ex)
90 {
91   PetscErrorCode ierr;
92   DataEx         d;
93 
94   PetscFunctionBegin;
95   ierr = PetscMalloc(sizeof(struct _p_DataEx), &d);CHKERRQ(ierr);
96   ierr = PetscMemzero(d, sizeof(struct _p_DataEx));CHKERRQ(ierr);
97   ierr = MPI_Comm_dup(comm,&d->comm);CHKERRQ(ierr);
98   ierr = MPI_Comm_rank(d->comm,&d->rank);CHKERRQ(ierr);
99 
100   d->instance = count;
101 
102   d->topology_status        = DEOBJECT_STATE_UNKNOWN;
103   d->message_lengths_status = DEOBJECT_STATE_UNKNOWN;
104   d->packer_status          = DEOBJECT_STATE_UNKNOWN;
105   d->communication_status   = DEOBJECT_STATE_UNKNOWN;
106 
107   d->n_neighbour_procs = -1;
108   d->neighbour_procs   = NULL;
109 
110   d->messages_to_be_sent      = NULL;
111   d->message_offsets          = NULL;
112   d->messages_to_be_recvieved = NULL;
113 
114   d->unit_message_size   = -1;
115   d->send_message        = NULL;
116   d->send_message_length = -1;
117   d->recv_message        = NULL;
118   d->recv_message_length = -1;
119   d->total_pack_cnt      = -1;
120   d->pack_cnt            = NULL;
121 
122   d->send_tags = NULL;
123   d->recv_tags = NULL;
124 
125   d->_stats    = NULL;
126   d->_requests = NULL;
127   *ex = d;
128   PetscFunctionReturn(0);
129 }
130 
131 PetscErrorCode DataExView(DataEx d)
132 {
133   PetscMPIInt p;
134   PetscErrorCode ierr;
135 
136   PetscFunctionBegin;
137   ierr = PetscPrintf( PETSC_COMM_WORLD, "DataEx: instance=%D\n",d->instance);CHKERRQ(ierr);
138   ierr = PetscPrintf( PETSC_COMM_WORLD, "  topology status:        %s \n", status_names[d->topology_status]);CHKERRQ(ierr);
139   ierr = PetscPrintf( PETSC_COMM_WORLD, "  message lengths status: %s \n", status_names[d->message_lengths_status] );CHKERRQ(ierr);
140   ierr = PetscPrintf( PETSC_COMM_WORLD, "  packer status status:   %s \n", status_names[d->packer_status] );CHKERRQ(ierr);
141   ierr = PetscPrintf( PETSC_COMM_WORLD, "  communication status:   %s \n", status_names[d->communication_status] );CHKERRQ(ierr);
142 
143   if (d->topology_status == DEOBJECT_FINALIZED) {
144     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Topology:\n");CHKERRQ(ierr);
145     ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] neighbours: %d \n", (int)d->rank, (int)d->n_neighbour_procs );CHKERRQ(ierr);
146     for (p=0; p<d->n_neighbour_procs; p++) {
147       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d]   neighbour[%D] = %d \n", (int)d->rank, p, (int)d->neighbour_procs[p]);CHKERRQ(ierr);
148     }
149   }
150   if (d->message_lengths_status == DEOBJECT_FINALIZED) {
151     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Message lengths:\n");CHKERRQ(ierr);
152     ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] atomic size: %ld \n", (int)d->rank, (long int)d->unit_message_size );CHKERRQ(ierr);
153     for (p=0; p<d->n_neighbour_procs; p++) {
154       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] >>>>> ( %D units :: tag = %d ) >>>>> [%d] \n", (int)d->rank, d->messages_to_be_sent[p], d->send_tags[p], (int)d->neighbour_procs[p] );CHKERRQ(ierr);
155     }
156     for (p=0; p<d->n_neighbour_procs; p++) {
157       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] <<<<< ( %D units :: tag = %d ) <<<<< [%d] \n", (int)d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], (int)d->neighbour_procs[p] );CHKERRQ(ierr);
158     }
159   }
160   if (d->packer_status == DEOBJECT_FINALIZED) {}
161   if (d->communication_status == DEOBJECT_FINALIZED) {}
162   PetscFunctionReturn(0);
163 }
164 
165 PetscErrorCode DataExDestroy(DataEx d)
166 {
167   PetscErrorCode ierr;
168 
169   PetscFunctionBegin;
170   ierr = MPI_Comm_free(&d->comm);CHKERRQ(ierr);
171   if (d->neighbour_procs) {ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);}
172   if (d->messages_to_be_sent) {ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);}
173   if (d->message_offsets) {ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);}
174   if (d->messages_to_be_recvieved) {ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);}
175   if (d->send_message) {ierr = PetscFree(d->send_message);CHKERRQ(ierr);}
176   if (d->recv_message) {ierr = PetscFree(d->recv_message);CHKERRQ(ierr);}
177   if (d->pack_cnt) {ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);}
178   if (d->send_tags) {ierr = PetscFree(d->send_tags);CHKERRQ(ierr);}
179   if (d->recv_tags) {ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);}
180   if (d->_stats) {ierr = PetscFree(d->_stats);CHKERRQ(ierr);}
181   if (d->_requests) {ierr = PetscFree(d->_requests);CHKERRQ(ierr);}
182   ierr = PetscFree(d);CHKERRQ(ierr);
183   PetscFunctionReturn(0);
184 }
185 
186 /* === Phase A === */
187 
188 PetscErrorCode DataExTopologyInitialize(DataEx d)
189 {
190   PetscErrorCode ierr;
191 
192   PetscFunctionBegin;
193   d->topology_status = DEOBJECT_INITIALIZED;
194   d->n_neighbour_procs = 0;
195   ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);
196   ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);
197   ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);
198   ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);
199   ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);
200   ierr = PetscFree(d->send_tags);CHKERRQ(ierr);
201   ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);
202   PetscFunctionReturn(0);
203 }
204 
205 PetscErrorCode DataExTopologyAddNeighbour(DataEx d,const PetscMPIInt proc_id)
206 {
207   PetscMPIInt    n,found;
208   PetscMPIInt    nproc;
209   PetscErrorCode ierr;
210 
211   PetscFunctionBegin;
212   if (d->topology_status == DEOBJECT_FINALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DataExTopologyInitialize() first");
213   else if (d->topology_status != DEOBJECT_INITIALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first");
214 
215   /* error on negative entries */
216   if (proc_id < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank < 0");
217   /* error on ranks larger than number of procs in communicator */
218   ierr = MPI_Comm_size(d->comm,&nproc);CHKERRQ(ierr);
219   if (proc_id >= nproc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank >= nproc");
220   if (d->n_neighbour_procs == 0) {ierr = PetscMalloc1(1, &d->neighbour_procs);CHKERRQ(ierr);}
221   /* check for proc_id */
222   found = 0;
223   for (n = 0; n < d->n_neighbour_procs; n++) {
224     if (d->neighbour_procs[n] == proc_id) {
225       found  = 1;
226     }
227   }
228   if (found == 0) { /* add it to list */
229     ierr = PetscRealloc(sizeof(PetscMPIInt)*(d->n_neighbour_procs+1), &d->neighbour_procs);CHKERRQ(ierr);
230     d->neighbour_procs[ d->n_neighbour_procs ] = proc_id;
231     d->n_neighbour_procs++;
232   }
233   PetscFunctionReturn(0);
234 }
235 
236 /*
237 counter: the index of the communication object
238 N: the number of processors
239 r0: rank of sender
240 r1: rank of receiver
241 
242 procs = { 0, 1, 2, 3 }
243 
244 0 ==> 0		e=0
245 0 ==> 1		e=1
246 0 ==> 2		e=2
247 0 ==> 3		e=3
248 
249 1 ==> 0		e=4
250 1 ==> 1		e=5
251 1 ==> 2		e=6
252 1 ==> 3		e=7
253 
254 2 ==> 0		e=8
255 2 ==> 1		e=9
256 2 ==> 2		e=10
257 2 ==> 3		e=11
258 
259 3 ==> 0		e=12
260 3 ==> 1		e=13
261 3 ==> 2		e=14
262 3 ==> 3		e=15
263 
264 If we require that proc A sends to proc B, then the SEND tag index will be given by
265   N * rank(A) + rank(B) + offset
266 If we require that proc A will receive from proc B, then the RECV tag index will be given by
267   N * rank(B) + rank(A) + offset
268 
269 */
270 static void _get_tags(PetscInt counter, PetscMPIInt N, PetscMPIInt r0,PetscMPIInt r1, PetscMPIInt *_st, PetscMPIInt *_rt)
271 {
272   PetscMPIInt st,rt;
273 
274   st = N*r0 + r1   +   N*N*counter;
275   rt = N*r1 + r0   +   N*N*counter;
276   *_st = st;
277   *_rt = rt;
278 }
279 
280 /*
281 Makes the communication map symmetric
282 */
283 PetscErrorCode _DataExCompleteCommunicationMap(MPI_Comm comm,PetscMPIInt n,PetscMPIInt proc_neighbours[],PetscMPIInt *n_new,PetscMPIInt **proc_neighbours_new)
284 {
285   Mat               A;
286   PetscInt          i,j,nc;
287   PetscInt          n_, *proc_neighbours_;
288   PetscInt          rank_i_;
289   PetscMPIInt       size,  rank_i;
290   PetscScalar       *vals;
291   const PetscInt    *cols;
292   const PetscScalar *red_vals;
293   PetscMPIInt       _n_new, *_proc_neighbours_new;
294   PetscErrorCode    ierr;
295 
296   PetscFunctionBegin;
297   n_ = n;
298   ierr = PetscMalloc(sizeof(PetscInt) * n_, &proc_neighbours_);CHKERRQ(ierr);
299   for (i = 0; i < n_; ++i) {
300     proc_neighbours_[i] = proc_neighbours[i];
301   }
302   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
303   ierr = MPI_Comm_rank(comm,&rank_i);CHKERRQ(ierr);
304   rank_i_ = rank_i;
305 
306   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
307   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,size,size);CHKERRQ(ierr);
308   ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
309   ierr = MatSeqAIJSetPreallocation(A,1,NULL);CHKERRQ(ierr);
310   ierr = MatMPIAIJSetPreallocation(A,n_,NULL,n_,NULL);CHKERRQ(ierr);
311   ierr = MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);CHKERRQ(ierr);
312   /* Build original map */
313   ierr = PetscMalloc1(n_, &vals);CHKERRQ(ierr);
314   for (i = 0; i < n_; ++i) {
315     vals[i] = 1.0;
316   }
317   ierr = MatSetValues( A, 1,&rank_i_, n_,proc_neighbours_, vals, INSERT_VALUES );CHKERRQ(ierr);
318   ierr = MatAssemblyBegin(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
319   ierr = MatAssemblyEnd(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
320   /* Now force all other connections if they are not already there */
321   /* It's more efficient to do them all at once */
322   for (i = 0; i < n_; ++i) {
323     vals[i] = 2.0;
324   }
325   ierr = MatSetValues( A, n_,proc_neighbours_, 1,&rank_i_, vals, INSERT_VALUES );CHKERRQ(ierr);
326   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
327   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
328 /*
329   ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
330   ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
331   ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
332 */
333   if ((n_new != NULL) && (proc_neighbours_new != NULL)) {
334     ierr = MatGetRow(A, rank_i_, &nc, &cols, &red_vals);CHKERRQ(ierr);
335     _n_new = (PetscMPIInt) nc;
336     ierr = PetscMalloc1(_n_new, &_proc_neighbours_new);CHKERRQ(ierr);
337     for (j = 0; j < nc; ++j) {
338       _proc_neighbours_new[j] = (PetscMPIInt)cols[j];
339     }
340     ierr = MatRestoreRow( A, rank_i_, &nc, &cols, &red_vals );CHKERRQ(ierr);
341     *n_new               = (PetscMPIInt)_n_new;
342     *proc_neighbours_new = (PetscMPIInt*)_proc_neighbours_new;
343   }
344   ierr = MatDestroy(&A);CHKERRQ(ierr);
345   ierr = PetscFree(vals);CHKERRQ(ierr);
346   ierr = PetscFree(proc_neighbours_);CHKERRQ(ierr);
347   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 PetscErrorCode DataExTopologyFinalize(DataEx d)
352 {
353   PetscMPIInt    symm_nn;
354   PetscMPIInt   *symm_procs;
355   PetscMPIInt    r0,n,st,rt;
356   PetscMPIInt    nprocs;
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   if (d->topology_status != DEOBJECT_INITIALIZED) SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first");
361 
362   ierr = PetscLogEventBegin(DMSWARM_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
363   /* given infomation about all my neighbours, make map symmetric */
364   ierr = _DataExCompleteCommunicationMap( d->comm,d->n_neighbour_procs,d->neighbour_procs, &symm_nn, &symm_procs );CHKERRQ(ierr);
365   /* update my arrays */
366   ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);
367   d->n_neighbour_procs = symm_nn;
368   d->neighbour_procs   = symm_procs;
369   /* allocates memory */
370   if (!d->messages_to_be_sent) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->messages_to_be_sent);CHKERRQ(ierr);}
371   if (!d->message_offsets) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->message_offsets);CHKERRQ(ierr);}
372   if (!d->messages_to_be_recvieved) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->messages_to_be_recvieved);CHKERRQ(ierr);}
373   if (!d->pack_cnt) {ierr = PetscMalloc(sizeof(PetscInt) * d->n_neighbour_procs, &d->pack_cnt);CHKERRQ(ierr);}
374   if (!d->_stats) {ierr = PetscMalloc(sizeof(MPI_Status) * 2*d->n_neighbour_procs, &d->_stats);CHKERRQ(ierr);}
375   if (!d->_requests) {ierr = PetscMalloc(sizeof(MPI_Request) * 2*d->n_neighbour_procs, &d->_requests);CHKERRQ(ierr);}
376   if (!d->send_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->send_tags);CHKERRQ(ierr);}
377   if (!d->recv_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->recv_tags);CHKERRQ(ierr);}
378   /* compute message tags */
379   ierr = MPI_Comm_size(d->comm,&nprocs);CHKERRQ(ierr);
380   r0 = d->rank;
381   for (n = 0; n < d->n_neighbour_procs; ++n) {
382     PetscMPIInt r1 = d->neighbour_procs[n];
383 
384     _get_tags( d->instance, nprocs, r0,r1, &st, &rt );
385     d->send_tags[n] = (int)st;
386     d->recv_tags[n] = (int)rt;
387   }
388   d->topology_status = DEOBJECT_FINALIZED;
389   ierr = PetscLogEventEnd(DMSWARM_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
390   PetscFunctionReturn(0);
391 }
392 
393 /* === Phase B === */
394 PetscErrorCode _DataExConvertProcIdToLocalIndex(DataEx de,PetscMPIInt proc_id,PetscMPIInt *local)
395 {
396   PetscMPIInt i,np;
397 
398   PetscFunctionBegin;
399   np = de->n_neighbour_procs;
400   *local = -1;
401   for (i = 0; i < np; ++i) {
402     if (proc_id == de->neighbour_procs[i]) {
403       *local = i;
404       break;
405     }
406   }
407   PetscFunctionReturn(0);
408 }
409 
410 PetscErrorCode DataExInitializeSendCount(DataEx de)
411 {
412   PetscMPIInt    i;
413   PetscErrorCode ierr;
414 
415   PetscFunctionBegin;
416   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ(de->comm, PETSC_ERR_ORDER, "Topology not finalized");
417   ierr = PetscLogEventBegin(DMSWARM_DataExchangerSendCount,0,0,0,0);CHKERRQ(ierr);
418   de->message_lengths_status = DEOBJECT_INITIALIZED;
419   for (i = 0; i < de->n_neighbour_procs; ++i) {
420     de->messages_to_be_sent[i] = 0;
421   }
422   PetscFunctionReturn(0);
423 }
424 
425 /*
426 1) only allows counters to be set on neighbouring cpus
427 */
428 PetscErrorCode DataExAddToSendCount(DataEx de,const PetscMPIInt proc_id,const PetscInt count)
429 {
430   PetscMPIInt    local_val;
431   PetscErrorCode ierr;
432 
433   PetscFunctionBegin;
434   if (de->message_lengths_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DataExInitializeSendCount() first" );
435   else if (de->message_lengths_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
436 
437   ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local_val );CHKERRQ(ierr);
438   if (local_val == -1) SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,"Proc %d is not a valid neighbour rank", (int)proc_id );
439 
440   de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count;
441   PetscFunctionReturn(0);
442 }
443 
444 PetscErrorCode DataExFinalizeSendCount(DataEx de)
445 {
446   PetscErrorCode ierr;
447 
448   PetscFunctionBegin;
449   if (de->message_lengths_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
450 
451   de->message_lengths_status = DEOBJECT_FINALIZED;
452   ierr = PetscLogEventEnd(DMSWARM_DataExchangerSendCount,0,0,0,0);CHKERRQ(ierr);
453   PetscFunctionReturn(0);
454 }
455 
456 /* === Phase C === */
457 /*
458  * zero out all send counts
459  * free send and recv buffers
460  * zeros out message length
461  * zeros out all counters
462  * zero out packed data counters
463 */
464 PetscErrorCode _DataExInitializeTmpStorage(DataEx de)
465 {
466   PetscMPIInt    i, np;
467   PetscErrorCode ierr;
468 
469   PetscFunctionBegin;
470   /*if (de->n_neighbour_procs < 0) SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of neighbour procs < 0");
471   */
472   /*
473   if (!de->neighbour_procs) SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "Neighbour proc list is NULL" );
474   */
475   np = de->n_neighbour_procs;
476   for (i = 0; i < np; ++i) {
477     /*	de->messages_to_be_sent[i] = -1; */
478     de->messages_to_be_recvieved[i] = -1;
479   }
480   ierr = PetscFree(de->send_message);CHKERRQ(ierr);
481   ierr = PetscFree(de->recv_message);CHKERRQ(ierr);
482   PetscFunctionReturn(0);
483 }
484 
485 /*
486 *) Zeros out pack data counters
487 *) Ensures mesaage length is set
488 *) Checks send counts properly initialized
489 *) allocates space for pack data
490 */
491 PetscErrorCode DataExPackInitialize(DataEx de,size_t unit_message_size)
492 {
493   PetscMPIInt    i,np;
494   PetscInt       total;
495   PetscErrorCode ierr;
496 
497   PetscFunctionBegin;
498   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
499   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
500   ierr = PetscLogEventBegin(DMSWARM_DataExchangerPack,0,0,0,0);CHKERRQ(ierr);
501   de->packer_status = DEOBJECT_INITIALIZED;
502   ierr = _DataExInitializeTmpStorage(de);CHKERRQ(ierr);
503   np = de->n_neighbour_procs;
504   de->unit_message_size = unit_message_size;
505   total = 0;
506   for (i = 0; i < np; ++i) {
507     if (de->messages_to_be_sent[i] == -1) {
508       PetscMPIInt proc_neighour = de->neighbour_procs[i];
509       SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DataExSetSendCount() first", (int)proc_neighour );
510     }
511     total = total + de->messages_to_be_sent[i];
512   }
513   /* create space for the data to be sent */
514   ierr = PetscMalloc(unit_message_size * (total + 1), &de->send_message);CHKERRQ(ierr);
515   /* initialize memory */
516   ierr = PetscMemzero(de->send_message, unit_message_size * (total + 1));CHKERRQ(ierr);
517   /* set total items to send */
518   de->send_message_length = total;
519   de->message_offsets[0] = 0;
520   total = de->messages_to_be_sent[0];
521   for (i = 1; i < np; ++i) {
522     de->message_offsets[i] = total;
523     total = total + de->messages_to_be_sent[i];
524   }
525   /* init the packer counters */
526   de->total_pack_cnt = 0;
527   for (i = 0; i < np; ++i) {
528     de->pack_cnt[i] = 0;
529   }
530   PetscFunctionReturn(0);
531 }
532 
533 /*
534 *) Ensures data gets been packed appropriately and no overlaps occur
535 */
536 PetscErrorCode DataExPackData(DataEx de,PetscMPIInt proc_id,PetscInt n,void *data)
537 {
538   PetscMPIInt    local;
539   PetscInt       insert_location;
540   void           *dest;
541   PetscErrorCode ierr;
542 
543   PetscFunctionBegin;
544   if (de->packer_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
545   else if (de->packer_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
546 
547   if (!de->send_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DataExPackInitialize() first" );
548   ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local );CHKERRQ(ierr);
549   if (local == -1) SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", (int)proc_id );
550   if (n+de->pack_cnt[local] > de->messages_to_be_sent[local]) SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %D: Attempt to insert %D",
551               (int)proc_id, de->messages_to_be_sent[local], n+de->pack_cnt[local] );
552 
553   /* copy memory */
554   insert_location = de->message_offsets[local] + de->pack_cnt[local];
555   dest = ((char*)de->send_message) + de->unit_message_size*insert_location;
556   ierr = PetscMemcpy(dest, data, de->unit_message_size * n);CHKERRQ(ierr);
557   /* increment counter */
558   de->pack_cnt[local] = de->pack_cnt[local] + n;
559   PetscFunctionReturn(0);
560 }
561 
562 /*
563 *) Ensures all data has been packed
564 */
565 PetscErrorCode DataExPackFinalize(DataEx de)
566 {
567   PetscMPIInt i,np;
568   PetscInt    total;
569   PetscErrorCode ierr;
570 
571   PetscFunctionBegin;
572   if (de->packer_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DataExPackInitialize() first." );
573   np = de->n_neighbour_procs;
574   for (i = 0; i < np; ++i) {
575     if (de->pack_cnt[i] != de->messages_to_be_sent[i]) SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %D : Inserted %D",
576                 (int)de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i] );
577   }
578   /* init */
579   for (i = 0; i < np; ++i) {
580     de->messages_to_be_recvieved[i] = -1;
581   }
582   /* figure out the recv counts here */
583   for (i = 0; i < np; ++i) {
584     ierr = MPI_Isend(&de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]);CHKERRQ(ierr);
585   }
586   for (i = 0; i < np; ++i) {
587     ierr = MPI_Irecv(&de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i]);CHKERRQ(ierr);
588   }
589   ierr = MPI_Waitall(2*np, de->_requests, de->_stats);CHKERRQ(ierr);
590   /* create space for the data to be recvieved */
591   total = 0;
592   for (i = 0; i < np; ++i) {
593     total = total + de->messages_to_be_recvieved[i];
594   }
595   ierr = PetscMalloc(de->unit_message_size * (total + 1), &de->recv_message);CHKERRQ(ierr);
596   /* initialize memory */
597   ierr = PetscMemzero(de->recv_message, de->unit_message_size * (total + 1));CHKERRQ(ierr);
598   /* set total items to recieve */
599   de->recv_message_length = total;
600   de->packer_status = DEOBJECT_FINALIZED;
601   de->communication_status = DEOBJECT_INITIALIZED;
602   ierr = PetscLogEventEnd(DMSWARM_DataExchangerPack,0,0,0,0);CHKERRQ(ierr);
603   PetscFunctionReturn(0);
604 }
605 
606 /* do the actual message passing now */
607 PetscErrorCode DataExBegin(DataEx de)
608 {
609   PetscMPIInt i,np;
610   void       *dest;
611   PetscInt    length;
612   PetscErrorCode ierr;
613 
614   PetscFunctionBegin;
615   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
616   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
617   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer not finalized" );
618   if (de->communication_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DataExInitialize() first." );
619   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
620   ierr = PetscLogEventBegin(DMSWARM_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
621   np = de->n_neighbour_procs;
622   /* == NON BLOCKING == */
623   for (i = 0; i < np; ++i) {
624     length = de->messages_to_be_sent[i] * de->unit_message_size;
625     dest = ((char*)de->send_message) + de->unit_message_size * de->message_offsets[i];
626     ierr = MPI_Isend( dest, length, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i] );CHKERRQ(ierr);
627   }
628   ierr = PetscLogEventEnd(DMSWARM_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
629   PetscFunctionReturn(0);
630 }
631 
632 /* do the actual message passing now */
633 PetscErrorCode DataExEnd(DataEx de)
634 {
635   PetscMPIInt  i,np;
636   PetscInt     total;
637   PetscInt    *message_recv_offsets;
638   void        *dest;
639   PetscInt     length;
640   PetscErrorCode ierr;
641 
642   PetscFunctionBegin;
643   if (de->communication_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DataExInitialize() first." );
644   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
645   ierr = PetscLogEventBegin(DMSWARM_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
646   np = de->n_neighbour_procs;
647   ierr = PetscMalloc1(np+1, &message_recv_offsets);CHKERRQ(ierr);
648   message_recv_offsets[0] = 0;
649   total = de->messages_to_be_recvieved[0];
650   for (i = 1; i < np; ++i) {
651     message_recv_offsets[i] = total;
652     total = total + de->messages_to_be_recvieved[i];
653   }
654   /* == NON BLOCKING == */
655   for (i = 0; i < np; ++i) {
656     length = de->messages_to_be_recvieved[i] * de->unit_message_size;
657     dest = ((char*)de->recv_message) + de->unit_message_size * message_recv_offsets[i];
658     ierr = MPI_Irecv( dest, length, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i] );CHKERRQ(ierr);
659   }
660   ierr = MPI_Waitall( 2*np, de->_requests, de->_stats );CHKERRQ(ierr);
661   ierr = PetscFree(message_recv_offsets);CHKERRQ(ierr);
662   de->communication_status = DEOBJECT_FINALIZED;
663   ierr = PetscLogEventEnd(DMSWARM_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 PetscErrorCode DataExGetSendData(DataEx de,PetscInt *length,void **send)
668 {
669   PetscFunctionBegin;
670   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed." );
671   *length = de->send_message_length;
672   *send   = de->send_message;
673   PetscFunctionReturn(0);
674 }
675 
676 PetscErrorCode DataExGetRecvData(DataEx de,PetscInt *length,void **recv)
677 {
678   PetscFunctionBegin;
679   if (de->communication_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent." );
680   *length = de->recv_message_length;
681   *recv   = de->recv_message;
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode DataExTopologyGetNeighbours(DataEx de,PetscMPIInt *n,PetscMPIInt *neigh[])
686 {
687   PetscFunctionBegin;
688   if (n)     {*n     = de->n_neighbour_procs;}
689   if (neigh) {*neigh = de->neighbour_procs;}
690   PetscFunctionReturn(0);
691 }
692 
693