xref: /petsc/src/dm/impls/swarm/data_ex.c (revision c688d0420b4e513ff34944d1e1ad7d4e50aafa8d)
1 /*
2 Build a few basic tools to help with partitioned domains.
3 
4 1)
5 On each processor, have a DomainExchangerTopology.
6 This is a doubly-connected edge list which enumerates the
7 communication paths between connected processors. By numbering
8 these paths we can always uniquely assign message identifers.
9 
10         edge
11          10
12 proc  --------->  proc
13  0    <--------    1
14          11
15         twin
16 
17 Eg: Proc 0 send to proc 1 with message id is 10. To recieve the correct
18 message, proc 1 looks for the edge connected to proc 0, and then the
19 messgae id comes from the twin of that edge
20 
21 2)
22 A DomainExchangerArrayPacker.
23 A little function which given a piece of data, will memcpy the data into
24 an array (which will be sent to procs) into the correct place.
25 
26 On Proc 1 we sent data to procs 0,2,3. The data is on different lengths.
27 All data gets jammed into single array. Need to "jam" data into correct locations
28 The Packer knows how much is to going to each processor and keeps track of the inserts
29 so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory
30 
31 data to 0    data to 2       data to 3
32 
33 |--------|-----------------|--|
34 
35 
36 User has to unpack message themselves. I can get you the pointer for each i
37 entry, but you'll have to cast it to the appropriate data type.
38 
39 
40 
41 
42 Phase A: Build topology
43 
44 Phase B: Define message lengths
45 
46 Phase C: Pack data
47 
48 Phase D: Send data
49 
50 //
51 DataExCreate()
52 // A
53 DataExTopologyInitialize()
54 DataExTopologyAddNeighbour()
55 DataExTopologyAddNeighbour()
56 DataExTopologyFinalize()
57 // B
58 DataExZeroAllSendCount()
59 DataExAddToSendCount()
60 DataExAddToSendCount()
61 DataExAddToSendCount()
62 // C
63 DataExPackInitialize()
64 DataExPackData()
65 DataExPackData()
66 DataExPackFinalize()
67 // D
68 DataExBegin()
69 // ... perform any calculations ... ///
70 DataExEnd()
71 
72 // Call any getters //
73 
74 
75 */
76 
77 #include <petsc.h>
78 #include <petscvec.h>
79 #include <petscmat.h>
80 
81 #include "data_ex.h"
82 
83 const char *status_names[] = {"initialized", "finalized", "unknown"};
84 
85 PetscLogEvent PTATIN_DataExchangerTopologySetup;
86 PetscLogEvent PTATIN_DataExchangerBegin;
87 PetscLogEvent PTATIN_DataExchangerEnd;
88 
89 #undef __FUNCT__
90 #define __FUNCT__ "DataExCreate"
91 PetscErrorCode DataExCreate(MPI_Comm comm,const PetscInt count, DataEx *ex)
92 {
93   PetscErrorCode ierr;
94   DataEx         d;
95 
96   PetscFunctionBegin;
97   ierr = PetscMalloc(sizeof(struct _p_DataEx), &d);CHKERRQ(ierr);
98   ierr = PetscMemzero(d, sizeof(struct _p_DataEx));CHKERRQ(ierr);
99   ierr = MPI_Comm_dup(comm,&d->comm);CHKERRQ(ierr);
100   ierr = MPI_Comm_rank(d->comm,&d->rank);CHKERRQ(ierr);
101 
102   d->instance = count;
103 
104   d->topology_status        = DEOBJECT_STATE_UNKNOWN;
105   d->message_lengths_status = DEOBJECT_STATE_UNKNOWN;
106   d->packer_status          = DEOBJECT_STATE_UNKNOWN;
107   d->communication_status   = DEOBJECT_STATE_UNKNOWN;
108 
109   d->n_neighbour_procs = -1;
110   d->neighbour_procs   = NULL;
111 
112   d->messages_to_be_sent      = NULL;
113   d->message_offsets          = NULL;
114   d->messages_to_be_recvieved = NULL;
115 
116   d->unit_message_size   = -1;
117   d->send_message        = NULL;
118   d->send_message_length = -1;
119   d->recv_message        = NULL;
120   d->recv_message_length = -1;
121   d->total_pack_cnt      = -1;
122   d->pack_cnt            = NULL;
123 
124   d->send_tags = NULL;
125   d->recv_tags = NULL;
126 
127   d->_stats    = NULL;
128   d->_requests = NULL;
129   *ex = d;
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "DataExView"
135 PetscErrorCode DataExView(DataEx d)
136 {
137   PetscMPIInt p;
138   PetscErrorCode ierr;
139 
140   PetscFunctionBegin;
141   ierr = PetscPrintf( PETSC_COMM_WORLD, "DataEx: instance=%D\n",d->instance);CHKERRQ(ierr);
142   ierr = PetscPrintf( PETSC_COMM_WORLD, "  topology status:        %s \n", status_names[d->topology_status]);CHKERRQ(ierr);
143   ierr = PetscPrintf( PETSC_COMM_WORLD, "  message lengths status: %s \n", status_names[d->message_lengths_status] );CHKERRQ(ierr);
144   ierr = PetscPrintf( PETSC_COMM_WORLD, "  packer status status:   %s \n", status_names[d->packer_status] );CHKERRQ(ierr);
145   ierr = PetscPrintf( PETSC_COMM_WORLD, "  communication status:   %s \n", status_names[d->communication_status] );CHKERRQ(ierr);
146 
147   if (d->topology_status == DEOBJECT_FINALIZED) {
148     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Topology:\n");CHKERRQ(ierr);
149     ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] neighbours: %d \n", (int)d->rank, (int)d->n_neighbour_procs );CHKERRQ(ierr);
150     for (p=0; p<d->n_neighbour_procs; p++) {
151       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d]   neighbour[%D] = %d \n", (int)d->rank, p, (int)d->neighbour_procs[p]);CHKERRQ(ierr);
152     }
153   }
154   if (d->message_lengths_status == DEOBJECT_FINALIZED) {
155     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Message lengths:\n");CHKERRQ(ierr);
156     ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] atomic size: %ld \n", (int)d->rank, (long int)d->unit_message_size );CHKERRQ(ierr);
157     for (p=0; p<d->n_neighbour_procs; p++) {
158       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] >>>>> ( %D units :: tag = %d ) >>>>> [%d] \n", (int)d->rank, d->messages_to_be_sent[p], d->send_tags[p], (int)d->neighbour_procs[p] );CHKERRQ(ierr);
159     }
160     for (p=0; p<d->n_neighbour_procs; p++) {
161       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] <<<<< ( %D units :: tag = %d ) <<<<< [%d] \n", (int)d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], (int)d->neighbour_procs[p] );CHKERRQ(ierr);
162     }
163   }
164   if (d->packer_status == DEOBJECT_FINALIZED) {}
165   if (d->communication_status == DEOBJECT_FINALIZED) {}
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "DataExDestroy"
171 PetscErrorCode DataExDestroy(DataEx d)
172 {
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MPI_Comm_free(&d->comm);CHKERRQ(ierr);
177   if (d->neighbour_procs) {ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);}
178   if (d->messages_to_be_sent) {ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);}
179   if (d->message_offsets) {ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);}
180   if (d->messages_to_be_recvieved) {ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);}
181   if (d->send_message) {ierr = PetscFree(d->send_message);CHKERRQ(ierr);}
182   if (d->recv_message) {ierr = PetscFree(d->recv_message);CHKERRQ(ierr);}
183   if (d->pack_cnt) {ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);}
184   if (d->send_tags) {ierr = PetscFree(d->send_tags);CHKERRQ(ierr);}
185   if (d->recv_tags) {ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);}
186   if (d->_stats) {ierr = PetscFree(d->_stats);CHKERRQ(ierr);}
187   if (d->_requests) {ierr = PetscFree(d->_requests);CHKERRQ(ierr);}
188   ierr = PetscFree(d);CHKERRQ(ierr);
189   PetscFunctionReturn(0);
190 }
191 
192 /* === Phase A === */
193 
194 #undef __FUNCT__
195 #define __FUNCT__ "DataExTopologyInitialize"
196 PetscErrorCode DataExTopologyInitialize(DataEx d)
197 {
198   PetscErrorCode ierr;
199 
200   PetscFunctionBegin;
201   d->topology_status = DEOBJECT_INITIALIZED;
202   d->n_neighbour_procs = 0;
203   ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);
204   ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);
205   ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);
206   ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);
207   ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);
208   ierr = PetscFree(d->send_tags);CHKERRQ(ierr);
209   ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "DataExTopologyAddNeighbour"
215 PetscErrorCode DataExTopologyAddNeighbour(DataEx d,const PetscMPIInt proc_id)
216 {
217   PetscMPIInt    n,found;
218   PetscMPIInt    nproc;
219   PetscErrorCode ierr;
220 
221   PetscFunctionBegin;
222   if (d->topology_status == DEOBJECT_FINALIZED) {
223     SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DataExTopologyInitialize() first");
224   } else if (d->topology_status != DEOBJECT_INITIALIZED) {
225     SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first");
226   }
227   /* error on negative entries */
228   if (proc_id < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank < 0");
229   /* error on ranks larger than number of procs in communicator */
230   ierr = MPI_Comm_size(d->comm,&nproc);CHKERRQ(ierr);
231   if (proc_id >= nproc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank >= nproc");
232   if (d->n_neighbour_procs == 0) {ierr = PetscMalloc1(1, &d->neighbour_procs);CHKERRQ(ierr);}
233   /* check for proc_id */
234   found = 0;
235   for (n = 0; n < d->n_neighbour_procs; n++) {
236     if (d->neighbour_procs[n] == proc_id) {
237       found  = 1;
238     }
239   }
240   if (found == 0) { /* add it to list */
241     ierr = PetscRealloc(sizeof(PetscMPIInt)*(d->n_neighbour_procs+1), &d->neighbour_procs);CHKERRQ(ierr);
242     d->neighbour_procs[ d->n_neighbour_procs ] = proc_id;
243     d->n_neighbour_procs++;
244   }
245   PetscFunctionReturn(0);
246 }
247 
248 /*
249 counter: the index of the communication object
250 N: the number of processors
251 r0: rank of sender
252 r1: rank of receiver
253 
254 procs = { 0, 1, 2, 3 }
255 
256 0 ==> 0		e=0
257 0 ==> 1		e=1
258 0 ==> 2		e=2
259 0 ==> 3		e=3
260 
261 1 ==> 0		e=4
262 1 ==> 1		e=5
263 1 ==> 2		e=6
264 1 ==> 3		e=7
265 
266 2 ==> 0		e=8
267 2 ==> 1		e=9
268 2 ==> 2		e=10
269 2 ==> 3		e=11
270 
271 3 ==> 0		e=12
272 3 ==> 1		e=13
273 3 ==> 2		e=14
274 3 ==> 3		e=15
275 
276 If we require that proc A sends to proc B, then the SEND tag index will be given by
277   N * rank(A) + rank(B) + offset
278 If we require that proc A will receive from proc B, then the RECV tag index will be given by
279   N * rank(B) + rank(A) + offset
280 
281 */
282 static void _get_tags(PetscInt counter, PetscMPIInt N, PetscMPIInt r0,PetscMPIInt r1, PetscMPIInt *_st, PetscMPIInt *_rt)
283 {
284   PetscMPIInt st,rt;
285 
286   st = N*r0 + r1   +   N*N*counter;
287   rt = N*r1 + r0   +   N*N*counter;
288   *_st = st;
289   *_rt = rt;
290 }
291 
292 /*
293 Makes the communication map symmetric
294 */
295 #undef __FUNCT__
296 #define __FUNCT__ "_DataExCompleteCommunicationMap"
297 PetscErrorCode _DataExCompleteCommunicationMap(MPI_Comm comm,PetscMPIInt n,PetscMPIInt proc_neighbours[],PetscMPIInt *n_new,PetscMPIInt **proc_neighbours_new)
298 {
299   Mat               A;
300   PetscInt          i,j,nc;
301   PetscInt          n_, *proc_neighbours_;
302   PetscInt          rank_i_;
303   PetscMPIInt       size,  rank_i;
304   PetscScalar       *vals;
305   const PetscInt    *cols;
306   const PetscScalar *red_vals;
307   PetscMPIInt       _n_new, *_proc_neighbours_new;
308   PetscErrorCode    ierr;
309 
310   PetscFunctionBegin;
311   n_ = n;
312   ierr = PetscMalloc(sizeof(PetscInt) * n_, &proc_neighbours_);CHKERRQ(ierr);
313   for (i = 0; i < n_; ++i) {
314     proc_neighbours_[i] = proc_neighbours[i];
315   }
316   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
317   ierr = MPI_Comm_rank(comm,&rank_i);CHKERRQ(ierr);
318   rank_i_ = rank_i;
319 
320   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
321   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,size,size);CHKERRQ(ierr);
322   ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
323   ierr = MatSeqAIJSetPreallocation(A,1,NULL);CHKERRQ(ierr);
324   ierr = MatMPIAIJSetPreallocation(A,n_,NULL,n_,NULL);CHKERRQ(ierr);
325   ierr = MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);CHKERRQ(ierr);
326   /* Build original map */
327   ierr = PetscMalloc1(n_, &vals);CHKERRQ(ierr);
328   for (i = 0; i < n_; ++i) {
329     vals[i] = 1.0;
330   }
331   ierr = MatSetValues( A, 1,&rank_i_, n_,proc_neighbours_, vals, INSERT_VALUES );CHKERRQ(ierr);
332   ierr = MatAssemblyBegin(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
333   ierr = MatAssemblyEnd(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
334   /* Now force all other connections if they are not already there */
335   /* It's more efficient to do them all at once */
336   for (i = 0; i < n_; ++i) {
337     vals[i] = 2.0;
338   }
339   ierr = MatSetValues( A, n_,proc_neighbours_, 1,&rank_i_, vals, INSERT_VALUES );CHKERRQ(ierr);
340   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342 /*
343   ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
344   ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
345   ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
346 */
347   if ((n_new != NULL) && (proc_neighbours_new != NULL)) {
348     ierr = MatGetRow(A, rank_i_, &nc, &cols, &red_vals);CHKERRQ(ierr);
349     _n_new = (PetscMPIInt) nc;
350     ierr = PetscMalloc1(_n_new, &_proc_neighbours_new);CHKERRQ(ierr);
351     for (j = 0; j < nc; ++j) {
352       _proc_neighbours_new[j] = (PetscMPIInt)cols[j];
353     }
354     ierr = MatRestoreRow( A, rank_i_, &nc, &cols, &red_vals );CHKERRQ(ierr);
355     *n_new               = (PetscMPIInt)_n_new;
356     *proc_neighbours_new = (PetscMPIInt*)_proc_neighbours_new;
357   }
358   ierr = MatDestroy(&A);CHKERRQ(ierr);
359   ierr = PetscFree(vals);CHKERRQ(ierr);
360   ierr = PetscFree(proc_neighbours_);CHKERRQ(ierr);
361   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364 
365 #undef __FUNCT__
366 #define __FUNCT__ "DataExTopologyFinalize"
367 PetscErrorCode DataExTopologyFinalize(DataEx d)
368 {
369   PetscMPIInt    symm_nn;
370   PetscMPIInt   *symm_procs;
371   PetscMPIInt    r0,n,st,rt;
372   PetscMPIInt    nprocs;
373   PetscErrorCode ierr;
374 
375   PetscFunctionBegin;
376   if (d->topology_status != DEOBJECT_INITIALIZED) {
377     SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first");
378   }
379   ierr = PetscLogEventBegin(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
380   /* given infomation about all my neighbours, make map symmetric */
381   ierr = _DataExCompleteCommunicationMap( d->comm,d->n_neighbour_procs,d->neighbour_procs, &symm_nn, &symm_procs );CHKERRQ(ierr);
382   /* update my arrays */
383   ierr = PetscFree(d->neighbour_procs);
384   d->n_neighbour_procs = symm_nn;
385   d->neighbour_procs   = symm_procs;
386   /* allocates memory */
387   if (!d->messages_to_be_sent) {ierr = PetscMalloc1(d->n_neighbour_procs, &d->messages_to_be_sent);CHKERRQ(ierr);}
388   if (!d->message_offsets) {ierr = PetscMalloc1(d->n_neighbour_procs, &d->message_offsets);CHKERRQ(ierr);}
389   if (!d->messages_to_be_recvieved) {ierr = PetscMalloc1(d->n_neighbour_procs, &d->messages_to_be_recvieved);CHKERRQ(ierr);}
390   if (!d->pack_cnt) {ierr = PetscMalloc(sizeof(PetscInt) * d->n_neighbour_procs, &d->pack_cnt);CHKERRQ(ierr);}
391   if (!d->_stats) {ierr = PetscMalloc(sizeof(MPI_Status) * 2*d->n_neighbour_procs, &d->_stats);CHKERRQ(ierr);}
392   if (!d->_requests) {ierr = PetscMalloc(sizeof(MPI_Request) * 2*d->n_neighbour_procs, &d->_requests);CHKERRQ(ierr);}
393   if (!d->send_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->send_tags);CHKERRQ(ierr);}
394   if (!d->recv_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->recv_tags);CHKERRQ(ierr);}
395   /* compute message tags */
396   ierr = MPI_Comm_size(d->comm,&nprocs);CHKERRQ(ierr);
397   r0 = d->rank;
398   for (n = 0; n < d->n_neighbour_procs; ++n) {
399     PetscMPIInt r1 = d->neighbour_procs[n];
400 
401     _get_tags( d->instance, nprocs, r0,r1, &st, &rt );
402     d->send_tags[n] = (int)st;
403     d->recv_tags[n] = (int)rt;
404   }
405   d->topology_status = DEOBJECT_FINALIZED;
406   ierr = PetscLogEventEnd(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 
410 /* === Phase B === */
411 #undef __FUNCT__
412 #define __FUNCT__ "_DataExConvertProcIdToLocalIndex"
413 PetscErrorCode _DataExConvertProcIdToLocalIndex(DataEx de,PetscMPIInt proc_id,PetscMPIInt *local)
414 {
415   PetscMPIInt i,np;
416 
417   PetscFunctionBegin;
418   np = de->n_neighbour_procs;
419   *local = -1;
420   for (i = 0; i < np; ++i) {
421     if (proc_id == de->neighbour_procs[i]) {
422       *local = i;
423       break;
424     }
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "DataExInitializeSendCount"
431 PetscErrorCode DataExInitializeSendCount(DataEx de)
432 {
433   PetscMPIInt i;
434 
435   PetscFunctionBegin;
436   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ(de->comm, PETSC_ERR_ORDER, "Topology not finalized");
437   de->message_lengths_status = DEOBJECT_INITIALIZED;
438   for (i = 0; i < de->n_neighbour_procs; ++i) {
439     de->messages_to_be_sent[i] = 0;
440   }
441   PetscFunctionReturn(0);
442 }
443 
444 /*
445 1) only allows counters to be set on neighbouring cpus
446 */
447 #undef __FUNCT__
448 #define __FUNCT__ "DataExAddToSendCount"
449 PetscErrorCode DataExAddToSendCount(DataEx de,const PetscMPIInt proc_id,const PetscInt count)
450 {
451   PetscMPIInt    local_val;
452   PetscErrorCode ierr;
453 
454   PetscFunctionBegin;
455   if (de->message_lengths_status == DEOBJECT_FINALIZED) {
456     SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DataExInitializeSendCount() first" );
457   } else if (de->message_lengths_status != DEOBJECT_INITIALIZED) {
458     SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
459   }
460   ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local_val );CHKERRQ(ierr);
461   if (local_val == -1) {
462     SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,"Proc %d is not a valid neighbour rank", (int)proc_id );
463   }
464   de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count;
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "DataExFinalizeSendCount"
470 PetscErrorCode DataExFinalizeSendCount(DataEx de)
471 {
472   PetscFunctionBegin;
473   if (de->message_lengths_status != DEOBJECT_INITIALIZED) {
474     SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
475   }
476   de->message_lengths_status = DEOBJECT_FINALIZED;
477   PetscFunctionReturn(0);
478 }
479 
480 /* === Phase C === */
481 /*
482  * zero out all send counts
483  * free send and recv buffers
484  * zeros out message length
485  * zeros out all counters
486  * zero out packed data counters
487 */
488 #undef __FUNCT__
489 #define __FUNCT__ "_DataExInitializeTmpStorage"
490 PetscErrorCode _DataExInitializeTmpStorage(DataEx de)
491 {
492   PetscMPIInt    i, np;
493   PetscErrorCode ierr;
494 
495   PetscFunctionBegin;
496   if (de->n_neighbour_procs < 0) {
497     SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of neighbour procs < 0");
498   }
499   if (!de->neighbour_procs) {
500     SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "Neighbour proc list is NULL" );
501   }
502   np = de->n_neighbour_procs;
503   for (i = 0; i < np; ++i) {
504     /*	de->messages_to_be_sent[i] = -1; */
505     de->messages_to_be_recvieved[i] = -1;
506   }
507   ierr = PetscFree(de->send_message);CHKERRQ(ierr);
508   ierr = PetscFree(de->recv_message);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 /*
513 *) Zeros out pack data counters
514 *) Ensures mesaage length is set
515 *) Checks send counts properly initialized
516 *) allocates space for pack data
517 */
518 #undef __FUNCT__
519 #define __FUNCT__ "DataExPackInitialize"
520 PetscErrorCode DataExPackInitialize(DataEx de,size_t unit_message_size)
521 {
522   PetscMPIInt    i,np;
523   PetscInt       total;
524   PetscErrorCode ierr;
525 
526   PetscFunctionBegin;
527   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
528   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
529   de->packer_status = DEOBJECT_INITIALIZED;
530   ierr = _DataExInitializeTmpStorage(de);CHKERRQ(ierr);
531   np = de->n_neighbour_procs;
532   de->unit_message_size = unit_message_size;
533   total = 0;
534   for (i = 0; i < np; ++i) {
535     if (de->messages_to_be_sent[i] == -1) {
536       PetscMPIInt proc_neighour = de->neighbour_procs[i];
537       SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DataExSetSendCount() first", (int)proc_neighour );
538     }
539     total = total + de->messages_to_be_sent[i];
540   }
541   /* create space for the data to be sent */
542   ierr = PetscMalloc(unit_message_size * (total + 1), &de->send_message);CHKERRQ(ierr);
543   /* initialize memory */
544   ierr = PetscMemzero(de->send_message, unit_message_size * (total + 1));CHKERRQ(ierr);
545   /* set total items to send */
546   de->send_message_length = total;
547   de->message_offsets[0] = 0;
548   total = de->messages_to_be_sent[0];
549   for (i = 1; i < np; ++i) {
550     de->message_offsets[i] = total;
551     total = total + de->messages_to_be_sent[i];
552   }
553   /* init the packer counters */
554   de->total_pack_cnt = 0;
555   for (i = 0; i < np; ++i) {
556     de->pack_cnt[i] = 0;
557   }
558   PetscFunctionReturn(0);
559 }
560 
561 /*
562 *) Ensures data gets been packed appropriately and no overlaps occur
563 */
564 #undef __FUNCT__
565 #define __FUNCT__ "DataExPackData"
566 PetscErrorCode DataExPackData(DataEx de,PetscMPIInt proc_id,PetscInt n,void *data)
567 {
568   PetscMPIInt    local;
569   PetscInt       insert_location;
570   void           *dest;
571   PetscErrorCode ierr;
572 
573   PetscFunctionBegin;
574   if (de->packer_status == DEOBJECT_FINALIZED) {
575     SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
576   }
577   else if (de->packer_status != DEOBJECT_INITIALIZED) {
578     SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
579   }
580   if (!de->send_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DataExPackInitialize() first" );
581   ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local );CHKERRQ(ierr);
582   if (local == -1) SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", (int)proc_id );
583   if (n+de->pack_cnt[local] > de->messages_to_be_sent[local]) {
584     SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %D: Attempt to insert %D",
585               (int)proc_id, de->messages_to_be_sent[local], n+de->pack_cnt[local] );
586     /* don't need this - the catch for too many messages will pick this up. Gives us more info though */
587     if (de->packer_status == DEOBJECT_FINALIZED) {
588       SETERRQ( de->comm, PETSC_ERR_ARG_WRONG, "Cannot insert any more data. DataExPackFinalize() has been called." );
589     }
590   }
591   /* copy memory */
592   insert_location = de->message_offsets[local] + de->pack_cnt[local];
593   dest = ((char*)de->send_message) + de->unit_message_size*insert_location;
594   ierr = PetscMemcpy(dest, data, de->unit_message_size * n);CHKERRQ(ierr);
595   /* increment counter */
596   de->pack_cnt[local] = de->pack_cnt[local] + n;
597   PetscFunctionReturn(0);
598 }
599 
600 /*
601 *) Ensures all data has been packed
602 */
603 #undef __FUNCT__
604 #define __FUNCT__ "DataExPackFinalize"
605 PetscErrorCode DataExPackFinalize(DataEx de)
606 {
607   PetscMPIInt i,np;
608   PetscInt    total;
609   PetscErrorCode ierr;
610 
611   PetscFunctionBegin;
612   if (de->packer_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DataExPackInitialize() first." );
613   np = de->n_neighbour_procs;
614   for (i = 0; i < np; ++i) {
615     if (de->pack_cnt[i] != de->messages_to_be_sent[i]) {
616       SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %D : Inserted %D",
617                 (int)de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i] );
618     }
619   }
620   /* init */
621   for (i = 0; i < np; ++i) {
622     de->messages_to_be_recvieved[i] = -1;
623   }
624   /* figure out the recv counts here */
625   for (i = 0; i < np; ++i) {
626     ierr = MPI_Isend(&de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]);CHKERRQ(ierr);
627   }
628   for (i = 0; i < np; ++i) {
629     ierr = MPI_Irecv(&de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i]);CHKERRQ(ierr);
630   }
631   ierr = MPI_Waitall(2*np, de->_requests, de->_stats);CHKERRQ(ierr);
632   /* create space for the data to be recvieved */
633   total = 0;
634   for (i = 0; i < np; ++i) {
635     total = total + de->messages_to_be_recvieved[i];
636   }
637   ierr = PetscMalloc(de->unit_message_size * (total + 1), &de->recv_message);CHKERRQ(ierr);
638   /* initialize memory */
639   ierr = PetscMemzero(de->recv_message, de->unit_message_size * (total + 1));CHKERRQ(ierr);
640   /* set total items to recieve */
641   de->recv_message_length = total;
642   de->packer_status = DEOBJECT_FINALIZED;
643   de->communication_status = DEOBJECT_INITIALIZED;
644   PetscFunctionReturn(0);
645 }
646 
647 /* do the actual message passing now */
648 #undef __FUNCT__
649 #define __FUNCT__ "DataExBegin"
650 PetscErrorCode DataExBegin(DataEx de)
651 {
652   PetscMPIInt i,np;
653   void       *dest;
654   PetscInt    length;
655   PetscErrorCode ierr;
656 
657   PetscFunctionBegin;
658   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
659   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
660   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer not finalized" );
661   if (de->communication_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DataExInitialize() first." );
662   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
663   ierr = PetscLogEventBegin(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
664   np = de->n_neighbour_procs;
665   /* == NON BLOCKING == */
666   for (i = 0; i < np; ++i) {
667     length = de->messages_to_be_sent[i] * de->unit_message_size;
668     dest = ((char*)de->send_message) + de->unit_message_size * de->message_offsets[i];
669     ierr = MPI_Isend( dest, length, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i] );CHKERRQ(ierr);
670   }
671   ierr = PetscLogEventEnd(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 
675 /* do the actual message passing now */
676 #undef __FUNCT__
677 #define __FUNCT__ "DataExEnd"
678 PetscErrorCode DataExEnd(DataEx de)
679 {
680   PetscMPIInt  i,np;
681   PetscInt     total;
682   PetscInt    *message_recv_offsets;
683   void        *dest;
684   PetscInt     length;
685   PetscErrorCode ierr;
686 
687   PetscFunctionBegin;
688   if (de->communication_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DataExInitialize() first." );
689   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
690   ierr = PetscLogEventBegin(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
691   np = de->n_neighbour_procs;
692   ierr = PetscMalloc1(np, &message_recv_offsets);CHKERRQ(ierr);
693   message_recv_offsets[0] = 0;
694   total = de->messages_to_be_recvieved[0];
695   for (i = 1; i < np; ++i) {
696     message_recv_offsets[i] = total;
697     total = total + de->messages_to_be_recvieved[i];
698   }
699   /* == NON BLOCKING == */
700   for (i = 0; i < np; ++i) {
701     length = de->messages_to_be_recvieved[i] * de->unit_message_size;
702     dest = ((char*)de->recv_message) + de->unit_message_size * message_recv_offsets[i];
703     ierr = MPI_Irecv( dest, length, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i] );CHKERRQ(ierr);
704   }
705   ierr = MPI_Waitall( 2*np, de->_requests, de->_stats );CHKERRQ(ierr);
706   ierr = PetscFree(message_recv_offsets);
707   de->communication_status = DEOBJECT_FINALIZED;
708   ierr = PetscLogEventEnd(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "DataExGetSendData"
714 PetscErrorCode DataExGetSendData(DataEx de,PetscInt *length,void **send)
715 {
716   PetscFunctionBegin;
717   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed." );
718   *length = de->send_message_length;
719   *send   = de->send_message;
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "DataExGetRecvData"
725 PetscErrorCode DataExGetRecvData(DataEx de,PetscInt *length,void **recv)
726 {
727   PetscFunctionBegin;
728   if (de->communication_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent." );
729   *length = de->recv_message_length;
730   *recv   = de->recv_message;
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNCT__
735 #define __FUNCT__ "DataExTopologyGetNeighbours"
736 PetscErrorCode DataExTopologyGetNeighbours(DataEx de,PetscMPIInt *n,PetscMPIInt *neigh[])
737 {
738   PetscFunctionBegin;
739   if (n)     {*n     = de->n_neighbour_procs;}
740   if (neigh) {*neigh = de->neighbour_procs;}
741   PetscFunctionReturn(0);
742 }
743 
744