xref: /petsc/src/dm/impls/swarm/data_ex.c (revision 09fe277da1f73a743f13ea3855cd2bdc4d5de01f)
1 /*
2 Build a few basic tools to help with partitioned domains.
3 
4 1)
5 On each processor, have a DomainExchangerTopology.
6 This is a doubly-connected edge list which enumerates the
7 communication paths between connected processors. By numbering
8 these paths we can always uniquely assign message identifers.
9 
10         edge
11          10
12 proc  --------->  proc
13  0    <--------    1
14          11
15         twin
16 
17 Eg: Proc 0 send to proc 1 with message id is 10. To recieve the correct
18 message, proc 1 looks for the edge connected to proc 0, and then the
19 messgae id comes from the twin of that edge
20 
21 2)
22 A DomainExchangerArrayPacker.
23 A little function which given a piece of data, will memcpy the data into
24 an array (which will be sent to procs) into the correct place.
25 
26 On Proc 1 we sent data to procs 0,2,3. The data is on different lengths.
27 All data gets jammed into single array. Need to "jam" data into correct locations
28 The Packer knows how much is to going to each processor and keeps track of the inserts
29 so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory
30 
31 data to 0    data to 2       data to 3
32 
33 |--------|-----------------|--|
34 
35 
36 User has to unpack message themselves. I can get you the pointer for each i
37 entry, but you'll have to cast it to the appropriate data type.
38 
39 
40 
41 
42 Phase A: Build topology
43 
44 Phase B: Define message lengths
45 
46 Phase C: Pack data
47 
48 Phase D: Send data
49 
50 //
51 DataExCreate()
52 // A
53 DataExTopologyInitialize()
54 DataExTopologyAddNeighbour()
55 DataExTopologyAddNeighbour()
56 DataExTopologyFinalize()
57 // B
58 DataExZeroAllSendCount()
59 DataExAddToSendCount()
60 DataExAddToSendCount()
61 DataExAddToSendCount()
62 // C
63 DataExPackInitialize()
64 DataExPackData()
65 DataExPackData()
66 DataExPackFinalize()
67 // D
68 DataExBegin()
69 // ... perform any calculations ... ///
70 DataExEnd()
71 
72 // Call any getters //
73 
74 
75 */
76 
77 #include <petsc.h>
78 #include <petscvec.h>
79 #include <petscmat.h>
80 
81 #include "data_ex.h"
82 
83 const char *status_names[] = { "initialized", "finalized", "unknown" };
84 
85 PetscLogEvent PTATIN_DataExchangerTopologySetup;
86 PetscLogEvent PTATIN_DataExchangerBegin;
87 PetscLogEvent PTATIN_DataExchangerEnd;
88 
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "DataExCreate"
92 DataEx DataExCreate(MPI_Comm comm,const PetscInt count)
93 {
94 	DataEx         d;
95 	PetscErrorCode ierr;
96 
97 	d = (DataEx)malloc( sizeof(struct _p_DataEx) );
98 	memset( d, 0, sizeof(struct _p_DataEx) );
99 
100 	ierr = MPI_Comm_dup(comm,&d->comm);
101 	ierr = MPI_Comm_rank(d->comm,&d->rank);
102 
103 	d->instance = count;
104 
105 	d->topology_status        = DEOBJECT_STATE_UNKNOWN;
106 	d->message_lengths_status = DEOBJECT_STATE_UNKNOWN;
107 	d->packer_status          = DEOBJECT_STATE_UNKNOWN;
108 	d->communication_status   = DEOBJECT_STATE_UNKNOWN;
109 
110 	d->n_neighbour_procs = -1;
111 	d->neighbour_procs   = NULL;
112 
113 	d->messages_to_be_sent      = NULL;
114 	d->message_offsets          = NULL;
115 	d->messages_to_be_recvieved = NULL;
116 
117 	d->unit_message_size   = -1;
118 	d->send_message        = NULL;
119 	d->send_message_length = -1;
120 	d->recv_message        = NULL;
121 	d->recv_message_length = -1;
122 	d->total_pack_cnt      = -1;
123 	d->pack_cnt            = NULL;
124 
125 	d->send_tags = NULL;
126 	d->recv_tags = NULL;
127 
128 	d->_stats    = NULL;
129 	d->_requests = NULL;
130 
131 	return d;
132 }
133 
134 #undef __FUNCT__
135 #define __FUNCT__ "DataExView"
136 PetscErrorCode DataExView(DataEx d)
137 {
138 	PetscMPIInt p;
139 
140 
141 	PetscFunctionBegin;
142 	PetscPrintf( PETSC_COMM_WORLD, "DataEx: instance=%D\n",d->instance);
143 
144 	PetscPrintf( PETSC_COMM_WORLD, "  topology status:        %s \n", status_names[d->topology_status]);
145 	PetscPrintf( PETSC_COMM_WORLD, "  message lengths status: %s \n", status_names[d->message_lengths_status] );
146 	PetscPrintf( PETSC_COMM_WORLD, "  packer status status:   %s \n", status_names[d->packer_status] );
147 	PetscPrintf( PETSC_COMM_WORLD, "  communication status:   %s \n", status_names[d->communication_status] );
148 
149 	if (d->topology_status == DEOBJECT_FINALIZED) {
150 		PetscPrintf( PETSC_COMM_WORLD, "  Topology:\n");
151 		PetscPrintf( PETSC_COMM_SELF, "    [%d] neighbours: %d \n", (int)d->rank, (int)d->n_neighbour_procs );
152 		for (p=0; p<d->n_neighbour_procs; p++) {
153 			PetscPrintf( PETSC_COMM_SELF, "    [%d]   neighbour[%D] = %d \n", (int)d->rank, p, (int)d->neighbour_procs[p]);
154 		}
155 	}
156 
157 	if (d->message_lengths_status == DEOBJECT_FINALIZED) {
158 		PetscPrintf( PETSC_COMM_WORLD, "  Message lengths:\n");
159 		PetscPrintf( PETSC_COMM_SELF, "    [%d] atomic size: %ld \n", (int)d->rank, (long int)d->unit_message_size );
160 		for (p=0; p<d->n_neighbour_procs; p++) {
161 			PetscPrintf( PETSC_COMM_SELF, "    [%d] >>>>> ( %D units :: tag = %d ) >>>>> [%d] \n", (int)d->rank, d->messages_to_be_sent[p], d->send_tags[p], (int)d->neighbour_procs[p] );
162 		}
163 		for (p=0; p<d->n_neighbour_procs; p++) {
164 			PetscPrintf( PETSC_COMM_SELF, "    [%d] <<<<< ( %D units :: tag = %d ) <<<<< [%d] \n", (int)d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], (int)d->neighbour_procs[p] );
165 		}
166 	}
167 
168 	if (d->packer_status == DEOBJECT_FINALIZED) {
169 
170 	}
171 
172 	if (d->communication_status == DEOBJECT_FINALIZED) {
173 
174 	}
175 
176 	PetscFunctionReturn(0);
177 }
178 
179 #undef __FUNCT__
180 #define __FUNCT__ "DataExDestroy"
181 PetscErrorCode DataExDestroy(DataEx d)
182 {
183 	PetscErrorCode ierr;
184 
185 	PetscFunctionBegin;
186 	ierr = MPI_Comm_free(&d->comm);CHKERRQ(ierr);
187 
188 	if (d->neighbour_procs != NULL) {
189 		free(d->neighbour_procs);
190 	}
191 
192 	if (d->messages_to_be_sent != NULL) {
193 		free(d->messages_to_be_sent);
194 	}
195 
196 	if (d->message_offsets != NULL) {
197 		free(d->message_offsets);
198 	}
199 
200 	if (d->messages_to_be_recvieved != NULL) {
201 		free(d->messages_to_be_recvieved);
202 	}
203 
204 	if (d->send_message != NULL) {
205 		free(d->send_message);
206 	}
207 
208 	if (d->recv_message != NULL) {
209 		free(d->recv_message);
210 	}
211 
212 	if (d->pack_cnt != NULL) {
213 		free(d->pack_cnt);
214 	}
215 
216 	if (d->send_tags != NULL) {
217 		free(d->send_tags);
218 	}
219 	if (d->recv_tags != NULL) {
220 		free(d->recv_tags);
221 	}
222 
223 	if (d->_stats != NULL) {
224 		free(d->_stats);
225 	}
226 	if (d->_requests != NULL) {
227 		free(d->_requests);
228 	}
229 
230 	free(d);
231 
232 	PetscFunctionReturn(0);
233 }
234 
235 /* === Phase A === */
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "DataExTopologyInitialize"
239 PetscErrorCode DataExTopologyInitialize(DataEx d)
240 {
241 	PetscFunctionBegin;
242 	d->topology_status = DEOBJECT_INITIALIZED;
243 
244 	d->n_neighbour_procs = 0;
245 	if (d->neighbour_procs          != NULL)  {  free(d->neighbour_procs);            d->neighbour_procs          = NULL;  }
246 	if (d->messages_to_be_sent      != NULL)  {  free(d->messages_to_be_sent);        d->messages_to_be_sent      = NULL;  }
247 	if (d->message_offsets          != NULL)  {  free(d->message_offsets);            d->message_offsets          = NULL;  }
248 	if (d->messages_to_be_recvieved != NULL)  {  free(d->messages_to_be_recvieved);   d->messages_to_be_recvieved = NULL;  }
249 	if (d->pack_cnt                 != NULL)  {  free(d->pack_cnt);                   d->pack_cnt                 = NULL;  }
250 
251 	if (d->send_tags != NULL)                 {  free(d->send_tags);                  d->send_tags = NULL;  }
252 	if (d->recv_tags != NULL)                 {  free(d->recv_tags);                  d->recv_tags = NULL;  }
253 
254 	PetscFunctionReturn(0);
255 }
256 
257 #undef __FUNCT__
258 #define __FUNCT__ "DataExTopologyAddNeighbour"
259 PetscErrorCode DataExTopologyAddNeighbour(DataEx d,const PetscMPIInt proc_id)
260 {
261 	PetscMPIInt    n,found;
262 	PetscMPIInt    nproc;
263 	PetscErrorCode ierr;
264 
265 
266 	PetscFunctionBegin;
267 	if (d->topology_status == DEOBJECT_FINALIZED) {
268 		SETERRQ( d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DataExTopologyInitialize() first" );
269 	}
270 	else if (d->topology_status != DEOBJECT_INITIALIZED) {
271 		SETERRQ( d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first" );
272 	}
273 
274 	/* error on negative entries */
275 	if (proc_id < 0) {
276 		SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank < 0");
277 	}
278 	/* error on ranks larger than number of procs in communicator */
279 	ierr = MPI_Comm_size(d->comm,&nproc);CHKERRQ(ierr);
280 	if (proc_id >= nproc) {
281 		SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank >= nproc");
282 	}
283 
284 	if (d->n_neighbour_procs == 0) {
285 		d->neighbour_procs = (PetscMPIInt*)malloc( sizeof(PetscMPIInt) );
286 	}
287 
288 	/* check for proc_id */
289 	found = 0;
290 	for (n=0; n<d->n_neighbour_procs; n++) {
291 		if (d->neighbour_procs[n] == proc_id) {
292 			found  = 1;
293 		}
294 	}
295 	if (found == 0) { /* add it to list */
296 		PetscMPIInt *tmp;
297 
298 		tmp = (PetscMPIInt*)realloc( d->neighbour_procs, sizeof(PetscMPIInt)*(d->n_neighbour_procs+1) );
299 		d->neighbour_procs = tmp;
300 
301 		d->neighbour_procs[ d->n_neighbour_procs ] = proc_id;
302 		d->n_neighbour_procs++;
303 	}
304 
305 	PetscFunctionReturn(0);
306 }
307 
308 /*
309 counter: the index of the communication object
310 N: the number of processors
311 r0: rank of sender
312 r1: rank of receiver
313 
314 procs = { 0, 1, 2, 3 }
315 
316 0 ==> 0		e=0
317 0 ==> 1		e=1
318 0 ==> 2		e=2
319 0 ==> 3		e=3
320 
321 1 ==> 0		e=4
322 1 ==> 1		e=5
323 1 ==> 2		e=6
324 1 ==> 3		e=7
325 
326 2 ==> 0		e=8
327 2 ==> 1		e=9
328 2 ==> 2		e=10
329 2 ==> 3		e=11
330 
331 3 ==> 0		e=12
332 3 ==> 1		e=13
333 3 ==> 2		e=14
334 3 ==> 3		e=15
335 
336 If we require that proc A sends to proc B, then the SEND tag index will be given by
337   N * rank(A) + rank(B) + offset
338 If we require that proc A will receive from proc B, then the RECV tag index will be given by
339   N * rank(B) + rank(A) + offset
340 
341 */
342 void _get_tags( PetscInt counter, PetscMPIInt N, PetscMPIInt r0,PetscMPIInt r1, PetscMPIInt *_st, PetscMPIInt *_rt )
343 {
344 	PetscMPIInt st,rt;
345 
346 
347 	st = N*r0 + r1   +   N*N*counter;
348 	rt = N*r1 + r0   +   N*N*counter;
349 
350 	*_st = st;
351 	*_rt = rt;
352 }
353 
354 /*
355 Makes the communication map symmetric
356 */
357 #undef __FUNCT__
358 #define __FUNCT__ "_DataExCompleteCommunicationMap"
359 PetscErrorCode _DataExCompleteCommunicationMap(MPI_Comm comm,PetscMPIInt n,PetscMPIInt proc_neighbours[],PetscMPIInt *n_new,PetscMPIInt **proc_neighbours_new)
360 {
361 	Mat               A;
362 	PetscInt          i,j,nc;
363 	PetscInt          n_, *proc_neighbours_;
364   PetscInt          rank_i_;
365 	PetscMPIInt       size,  rank_i;
366 	PetscScalar       *vals;
367 	const PetscInt    *cols;
368 	const PetscScalar *red_vals;
369 	PetscMPIInt       _n_new, *_proc_neighbours_new;
370 	//PetscLogDouble    t0,t1;
371 	PetscErrorCode    ierr;
372 
373 
374 	PetscFunctionBegin;
375 	//PetscPrintf(PETSC_COMM_WORLD,"*** Starting _DataExCompleteCommunicationMap *** \n");
376 	//PetscTime(&t0);
377 
378 	n_ = n;
379 	ierr = PetscMalloc( sizeof(PetscInt) * n_, &proc_neighbours_ );CHKERRQ(ierr);
380 	for (i=0; i<n_; i++) {
381 		proc_neighbours_[i] = proc_neighbours[i];
382 	}
383 
384 	ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
385 	ierr = MPI_Comm_rank(comm,&rank_i);CHKERRQ(ierr);
386 	rank_i_ = rank_i;
387 
388 	ierr = MatCreate(comm,&A);CHKERRQ(ierr);
389 	ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,size,size);CHKERRQ(ierr);
390 	ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
391 
392 	ierr = MatSeqAIJSetPreallocation(A,1,NULL);CHKERRQ(ierr);
393   ierr = MatMPIAIJSetPreallocation(A,n_,NULL,n_,NULL);CHKERRQ(ierr);
394 
395   ierr = MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);CHKERRQ(ierr);
396 
397 	/* Build original map */
398 	ierr = PetscMalloc( sizeof(PetscScalar)*n_, &vals );CHKERRQ(ierr);
399 	for (i=0; i<n_; i++) {
400 		vals[i] = 1.0;
401 	}
402 	ierr = MatSetValues( A, 1,&rank_i_, n_,proc_neighbours_, vals, INSERT_VALUES );CHKERRQ(ierr);
403 
404 	ierr = MatAssemblyBegin(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
405 	ierr = MatAssemblyEnd(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
406 
407 	/* Now force all other connections if they are not already there */
408 	/* It's more efficient to do them all at once */
409 	for (i=0; i<n_; i++) {
410 		vals[i] = 2.0;
411 	}
412 	ierr = MatSetValues( A, n_,proc_neighbours_, 1,&rank_i_, vals, INSERT_VALUES );CHKERRQ(ierr);
413 
414 	ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
415 	ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416 /*
417 	ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
418 	ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
419   ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
420 */
421 	if ((n_new != NULL) && (proc_neighbours_new != NULL)) {
422 
423 		ierr = MatGetRow( A, rank_i_, &nc, &cols, &red_vals );CHKERRQ(ierr);
424 
425 		_n_new = (PetscMPIInt)nc;
426 		_proc_neighbours_new = (PetscMPIInt*)malloc( sizeof(PetscMPIInt) * _n_new );
427 
428 		for (j=0; j<nc; j++) {
429 			_proc_neighbours_new[j] = (PetscMPIInt)cols[j];
430 		}
431 		ierr = MatRestoreRow( A, rank_i_, &nc, &cols, &red_vals );CHKERRQ(ierr);
432 
433 		*n_new               = (PetscMPIInt)_n_new;
434 		*proc_neighbours_new = (PetscMPIInt*)_proc_neighbours_new;
435 	}
436 
437 	ierr = MatDestroy(&A);CHKERRQ(ierr);
438 	ierr = PetscFree(vals);CHKERRQ(ierr);
439 	ierr = PetscFree(proc_neighbours_);CHKERRQ(ierr);
440 
441 	ierr = MPI_Barrier(comm);CHKERRQ(ierr);
442 	//PetscTime(&t1);
443 	//PetscPrintf(PETSC_COMM_WORLD,"*** Ending _DataExCompleteCommunicationMap [setup time: %1.4e (sec)] *** \n",t1-t0);
444 
445 	PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "DataExTopologyFinalize"
450 PetscErrorCode DataExTopologyFinalize(DataEx d)
451 {
452 	PetscMPIInt    symm_nn;
453 	PetscMPIInt   *symm_procs;
454 	PetscMPIInt    r0,n,st,rt;
455 	PetscMPIInt    nprocs;
456 	PetscErrorCode ierr;
457 
458 
459 	PetscFunctionBegin;
460 	if (d->topology_status != DEOBJECT_INITIALIZED) {
461 		SETERRQ( d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first" );
462 	}
463 	ierr = PetscLogEventBegin(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
464 
465 	/* given infomation about all my neighbours, make map symmetric */
466 	ierr = _DataExCompleteCommunicationMap( d->comm,d->n_neighbour_procs,d->neighbour_procs, &symm_nn, &symm_procs );CHKERRQ(ierr);
467 	/* update my arrays */
468 	free(d->neighbour_procs);
469 
470 	d->n_neighbour_procs = symm_nn;
471 	d->neighbour_procs   = symm_procs;
472 
473 
474 	/* allocates memory */
475 	if (d->messages_to_be_sent == NULL) {
476 		d->messages_to_be_sent = (PetscInt*)malloc( sizeof(PetscInt) * d->n_neighbour_procs );
477 	}
478 	if (d->message_offsets == NULL) {
479 		d->message_offsets = (PetscInt*)malloc( sizeof(PetscInt) * d->n_neighbour_procs );
480 	}
481 	if (d->messages_to_be_recvieved == NULL) {
482 		d->messages_to_be_recvieved = (PetscInt*)malloc( sizeof(PetscInt) * d->n_neighbour_procs );
483 	}
484 
485 	if (d->pack_cnt == NULL) {
486 		d->pack_cnt = (PetscInt*)malloc( sizeof(PetscInt) * d->n_neighbour_procs );
487 	}
488 
489 	if (d->_stats == NULL) {
490 		d->_stats = (MPI_Status*)malloc( sizeof(MPI_Status) * 2*d->n_neighbour_procs );
491 	}
492 	if (d->_requests == NULL) {
493 		d->_requests = (MPI_Request*)malloc( sizeof(MPI_Request) * 2*d->n_neighbour_procs );
494 	}
495 
496 	if (d->send_tags == NULL) {
497 		d->send_tags = (int*)malloc( sizeof(int) * d->n_neighbour_procs );
498 	}
499 	if (d->recv_tags == NULL) {
500 		d->recv_tags = (int*)malloc( sizeof(int) * d->n_neighbour_procs );
501 	}
502 
503 	/* compute message tags */
504 	ierr = MPI_Comm_size(d->comm,&nprocs);CHKERRQ(ierr);
505 	r0 = d->rank;
506 	for (n=0; n<d->n_neighbour_procs; n++) {
507 		PetscMPIInt r1 = d->neighbour_procs[n];
508 
509 		_get_tags( d->instance, nprocs, r0,r1, &st, &rt );
510 
511 		d->send_tags[n] = (int)st;
512 		d->recv_tags[n] = (int)rt;
513 	}
514 
515 	d->topology_status = DEOBJECT_FINALIZED;
516 	ierr = PetscLogEventEnd(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
517 
518 	PetscFunctionReturn(0);
519 }
520 
521 /* === Phase B === */
522 #undef __FUNCT__
523 #define __FUNCT__ "_DataExConvertProcIdToLocalIndex"
524 PetscErrorCode _DataExConvertProcIdToLocalIndex(DataEx de,PetscMPIInt proc_id,PetscMPIInt *local)
525 {
526 	PetscMPIInt i,np;
527 
528 
529 	PetscFunctionBegin;
530 	np = de->n_neighbour_procs;
531 
532 	*local = -1;
533 	for (i=0; i<np; i++) {
534 		if (proc_id == de->neighbour_procs[i]) {
535 			*local = i;
536 			break;
537 		}
538 	}
539 	PetscFunctionReturn(0);
540 }
541 
542 #undef __FUNCT__
543 #define __FUNCT__ "DataExInitializeSendCount"
544 PetscErrorCode DataExInitializeSendCount(DataEx de)
545 {
546 	PetscMPIInt i;
547 
548 
549 	PetscFunctionBegin;
550 	if (de->topology_status != DEOBJECT_FINALIZED) {
551 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
552 	}
553 
554 	de->message_lengths_status = DEOBJECT_INITIALIZED;
555 
556 	for (i=0; i<de->n_neighbour_procs; i++) {
557 		de->messages_to_be_sent[i] = 0;
558 	}
559 
560 	PetscFunctionReturn(0);
561 }
562 
563 /*
564 1) only allows counters to be set on neighbouring cpus
565 */
566 #undef __FUNCT__
567 #define __FUNCT__ "DataExAddToSendCount"
568 PetscErrorCode DataExAddToSendCount(DataEx de,const PetscMPIInt proc_id,const PetscInt count)
569 {
570 	PetscMPIInt    local_val;
571 	PetscErrorCode ierr;
572 
573 
574 	PetscFunctionBegin;
575 	if (de->message_lengths_status == DEOBJECT_FINALIZED) {
576 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DataExInitializeSendCount() first" );
577 	}
578 	else if (de->message_lengths_status != DEOBJECT_INITIALIZED) {
579 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
580 	}
581 
582 	ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local_val );CHKERRQ(ierr);
583 	if (local_val == -1) {
584 		SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,"Proc %d is not a valid neighbour rank", (int)proc_id );
585 	}
586 
587 	de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count;
588 	PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "DataExFinalizeSendCount"
593 PetscErrorCode DataExFinalizeSendCount(DataEx de)
594 {
595 	PetscFunctionBegin;
596 	if (de->message_lengths_status != DEOBJECT_INITIALIZED) {
597 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
598 	}
599 	de->message_lengths_status = DEOBJECT_FINALIZED;
600 
601 	PetscFunctionReturn(0);
602 }
603 
604 /* === Phase C === */
605 /*
606  * zero out all send counts
607  * free send and recv buffers
608  * zeros out message length
609  * zeros out all counters
610  * zero out packed data counters
611 */
612 #undef __FUNCT__
613 #define __FUNCT__ "_DataExInitializeTmpStorage"
614 PetscErrorCode _DataExInitializeTmpStorage(DataEx de)
615 {
616 	PetscMPIInt i,np;
617 
618 
619 	PetscFunctionBegin;
620 	if (de->n_neighbour_procs < 0) {
621 		SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of neighbour procs < 0");
622 	}
623 	if (de->neighbour_procs == NULL) {
624 		SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "Neighbour proc list is NULL" );
625 	}
626 
627 	np = de->n_neighbour_procs;
628 	for (i=0; i<np; i++) {
629 	/*	de->messages_to_be_sent[i] = -1; */
630 		de->messages_to_be_recvieved[i] = -1;
631 	}
632 
633 	if (de->send_message != NULL) {
634 		free(de->send_message);
635 		de->send_message = NULL;
636 	}
637 	if (de->recv_message != NULL) {
638 		free(de->recv_message);
639 		de->recv_message = NULL;
640 	}
641 
642 	PetscFunctionReturn(0);
643 }
644 
645 /*
646 *) Zeros out pack data counters
647 *) Ensures mesaage length is set
648 *) Checks send counts properly initialized
649 *) allocates space for pack data
650 */
651 #undef __FUNCT__
652 #define __FUNCT__ "DataExPackInitialize"
653 PetscErrorCode DataExPackInitialize(DataEx de,size_t unit_message_size)
654 {
655 	PetscMPIInt    i,np;
656 	PetscInt       total;
657 	PetscErrorCode ierr;
658 
659 
660 	PetscFunctionBegin;
661 	if (de->topology_status != DEOBJECT_FINALIZED) {
662 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
663 	}
664 	if (de->message_lengths_status != DEOBJECT_FINALIZED) {
665 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
666 	}
667 
668 	de->packer_status = DEOBJECT_INITIALIZED;
669 
670 	ierr = _DataExInitializeTmpStorage(de);CHKERRQ(ierr);
671 
672 	np = de->n_neighbour_procs;
673 
674 	de->unit_message_size = unit_message_size;
675 
676 	total = 0;
677 	for (i=0; i<np; i++) {
678 		if (de->messages_to_be_sent[i] == -1) {
679 			PetscMPIInt proc_neighour = de->neighbour_procs[i];
680 			SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DataExSetSendCount() first", (int)proc_neighour );
681 		}
682 		total = total + de->messages_to_be_sent[i];
683 	}
684 
685 	/* create space for the data to be sent */
686 	de->send_message = (void*)malloc( unit_message_size * (total + 1) );
687 	/* initialize memory */
688 	memset( de->send_message, 0, unit_message_size * (total + 1) );
689 	/* set total items to send */
690 	de->send_message_length = total;
691 
692 	de->message_offsets[0] = 0;
693 	total = de->messages_to_be_sent[0];
694 	for (i=1; i<np; i++) {
695 		de->message_offsets[i] = total;
696 		total = total + de->messages_to_be_sent[i];
697 	}
698 
699 	/* init the packer counters */
700 	de->total_pack_cnt = 0;
701 	for (i=0; i<np; i++) {
702 		de->pack_cnt[i] = 0;
703 	}
704 
705 	PetscFunctionReturn(0);
706 }
707 
708 /*
709 *) Ensures data gets been packed appropriately and no overlaps occur
710 */
711 #undef __FUNCT__
712 #define __FUNCT__ "DataExPackData"
713 PetscErrorCode DataExPackData(DataEx de,PetscMPIInt proc_id,PetscInt n,void *data)
714 {
715 	PetscMPIInt    local;
716 	PetscInt       insert_location;
717 	void           *dest;
718 	PetscErrorCode ierr;
719 
720 
721 	PetscFunctionBegin;
722 	if (de->packer_status == DEOBJECT_FINALIZED) {
723 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
724 	}
725 	else if (de->packer_status != DEOBJECT_INITIALIZED) {
726 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
727 	}
728 
729 
730 	if (de->send_message == NULL){
731 		SETERRQ( de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DataExPackInitialize() first" );
732 	}
733 
734 
735 	ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local );CHKERRQ(ierr);
736 	if (local == -1) {
737 		SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", (int)proc_id );
738 	}
739 
740 	if (n+de->pack_cnt[local] > de->messages_to_be_sent[local]) {
741 		SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %D: Attempt to insert %D",
742 				(int)proc_id, de->messages_to_be_sent[local], n+de->pack_cnt[local] );
743 
744 		/* don't need this - the catch for too many messages will pick this up. Gives us more info though */
745 		if (de->packer_status == DEOBJECT_FINALIZED) {
746 			SETERRQ( de->comm, PETSC_ERR_ARG_WRONG, "Cannot insert any more data. DataExPackFinalize() has been called." );
747 		}
748 	}
749 
750 	/* copy memory */
751 	insert_location = de->message_offsets[local] + de->pack_cnt[local];
752 	dest = ((char*)de->send_message) + de->unit_message_size*insert_location;
753 	memcpy( dest, data, de->unit_message_size * n );
754 
755 	/* increment counter */
756 	de->pack_cnt[local] = de->pack_cnt[local] + n;
757 
758 	PetscFunctionReturn(0);
759 }
760 
761 /*
762 *) Ensures all data has been packed
763 */
764 #undef __FUNCT__
765 #define __FUNCT__ "DataExPackFinalize"
766 PetscErrorCode DataExPackFinalize(DataEx de)
767 {
768 	PetscMPIInt i,np;
769 	PetscInt    total;
770 	PetscErrorCode ierr;
771 
772 
773 	PetscFunctionBegin;
774 	if (de->packer_status != DEOBJECT_INITIALIZED) {
775 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DataExPackInitialize() first." );
776 	}
777 
778 	np = de->n_neighbour_procs;
779 
780 	for (i=0; i<np; i++) {
781 		if (de->pack_cnt[i] != de->messages_to_be_sent[i]) {
782 			SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %D : Inserted %D",
783 					(int)de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i] );
784 		}
785 	}
786 
787 	/* init */
788 	for (i=0; i<np; i++) {
789 		de->messages_to_be_recvieved[i] = -1;
790 	}
791 
792 	/* figure out the recv counts here */
793 	for (i=0; i<np; i++) {
794 	//	MPI_Send( &de->messages_to_be_sent[i], 1, MPI_INT, de->neighbour_procs[i], de->send_tags[i], de->comm );
795 		ierr = MPI_Isend( &de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i] );CHKERRQ(ierr);
796 	//	MPI_Send( &de->messages_to_be_sent[i], 1, MPI_INT, de->neighbour_procs[i], 0, de->comm );
797 	}
798 	for (i=0; i<np; i++) {
799 	//	MPI_Recv( &de->messages_to_be_recvieved[i], 1, MPI_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &stat );
800 		ierr = MPI_Irecv( &de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i] );CHKERRQ(ierr);
801 	//	MPI_Recv( &de->messages_to_be_recvieved[i], 1, MPI_INT, de->neighbour_procs[i], 0, de->comm, &stat );
802 	}
803 	ierr = MPI_Waitall( 2*np, de->_requests, de->_stats );CHKERRQ(ierr);
804 
805 	/* create space for the data to be recvieved */
806 	total = 0;
807 	for (i=0; i<np; i++) {
808 		total = total + de->messages_to_be_recvieved[i];
809 	}
810 	de->recv_message = (void*)malloc( de->unit_message_size * (total + 1) );
811 	/* initialize memory */
812 	memset( de->recv_message, 0, de->unit_message_size * (total + 1) );
813 	/* set total items to recieve */
814 	de->recv_message_length = total;
815 
816 	de->packer_status = DEOBJECT_FINALIZED;
817 
818 	de->communication_status = DEOBJECT_INITIALIZED;
819 
820 	PetscFunctionReturn(0);
821 }
822 
823 /* do the actual message passing now */
824 #undef __FUNCT__
825 #define __FUNCT__ "DataExBegin"
826 PetscErrorCode DataExBegin(DataEx de)
827 {
828 	PetscMPIInt i,np;
829 	void       *dest;
830 	PetscInt    length;
831 	PetscErrorCode ierr;
832 
833 
834 	PetscFunctionBegin;
835 	if (de->topology_status != DEOBJECT_FINALIZED) {
836 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
837 	}
838 	if (de->message_lengths_status != DEOBJECT_FINALIZED) {
839 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
840 	}
841 	if (de->packer_status != DEOBJECT_FINALIZED) {
842 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer not finalized" );
843 	}
844 
845 	if (de->communication_status == DEOBJECT_FINALIZED) {
846 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DataExInitialize() first." );
847 	}
848 
849 	if (de->recv_message == NULL) {
850 		SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
851 	}
852 
853 	ierr = PetscLogEventBegin(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
854 	np = de->n_neighbour_procs;
855 
856 	/* == NON BLOCKING == */
857 	for (i=0; i<np; i++) {
858 		length = de->messages_to_be_sent[i] * de->unit_message_size;
859 		dest = ((char*)de->send_message) + de->unit_message_size * de->message_offsets[i];
860 		ierr = MPI_Isend( dest, length, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i] );CHKERRQ(ierr);
861 	}
862 
863 	ierr = PetscLogEventEnd(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
864 	PetscFunctionReturn(0);
865 }
866 
867 /* do the actual message passing now */
868 #undef __FUNCT__
869 #define __FUNCT__ "DataExEnd"
870 PetscErrorCode DataExEnd(DataEx de)
871 {
872 	PetscMPIInt  i,np;
873 	PetscInt     total;
874 	PetscInt    *message_recv_offsets;
875 	void        *dest;
876 	PetscInt     length;
877 	PetscErrorCode ierr;
878 
879 
880 	PetscFunctionBegin;
881 	if (de->communication_status != DEOBJECT_INITIALIZED) {
882 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DataExInitialize() first." );
883 	}
884 	if (de->recv_message == NULL) {
885 		SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
886 	}
887 
888 	ierr = PetscLogEventBegin(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
889 	np = de->n_neighbour_procs;
890 
891 	message_recv_offsets = (PetscInt*)malloc( sizeof(PetscInt) * np );
892 	message_recv_offsets[0] = 0;
893 	total = de->messages_to_be_recvieved[0];
894 	for (i=1; i<np; i++) {
895 		message_recv_offsets[i] = total;
896 		total = total + de->messages_to_be_recvieved[i];
897 	}
898 
899 	/* == NON BLOCKING == */
900 	for (i=0; i<np; i++) {
901 		length = de->messages_to_be_recvieved[i] * de->unit_message_size;
902 		dest = ((char*)de->recv_message) + de->unit_message_size * message_recv_offsets[i];
903 		ierr = MPI_Irecv( dest, length, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i] );CHKERRQ(ierr);
904 	}
905 	ierr = MPI_Waitall( 2*np, de->_requests, de->_stats );CHKERRQ(ierr);
906 
907 	free(message_recv_offsets);
908 
909 	de->communication_status = DEOBJECT_FINALIZED;
910 	ierr = PetscLogEventEnd(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
911 	PetscFunctionReturn(0);
912 }
913 
914 #undef __FUNCT__
915 #define __FUNCT__ "DataExGetSendData"
916 PetscErrorCode DataExGetSendData(DataEx de,PetscInt *length,void **send)
917 {
918 	PetscFunctionBegin;
919 	if (de->packer_status != DEOBJECT_FINALIZED) {
920 		SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed." );
921 	}
922 	*length = de->send_message_length;
923 	*send   = de->send_message;
924 	PetscFunctionReturn(0);
925 }
926 
927 #undef __FUNCT__
928 #define __FUNCT__ "DataExGetRecvData"
929 PetscErrorCode DataExGetRecvData(DataEx de,PetscInt *length,void **recv)
930 {
931 	PetscFunctionBegin;
932 	if (de->communication_status != DEOBJECT_FINALIZED) {
933 		SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent." );
934 	}
935 	*length = de->recv_message_length;
936 	*recv   = de->recv_message;
937 	PetscFunctionReturn(0);
938 }
939 
940 #undef __FUNCT__
941 #define __FUNCT__ "DataExTopologyGetNeighbours"
942 PetscErrorCode DataExTopologyGetNeighbours(DataEx de,PetscMPIInt *n,PetscMPIInt *neigh[])
943 {
944 	PetscFunctionBegin;
945 	if (n)     { *n     = de->n_neighbour_procs; }
946 	if (neigh) { *neigh = de->neighbour_procs; }
947 	PetscFunctionReturn(0);
948 }
949 
950