xref: /petsc/src/mat/utils/matstash.c (revision 06262987a8ce5e8f5ab66e2c6a1b86bf95f4bf83)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/matimpl.h"
4 #include "src/mat/utils/matstashspace.h"
5 
6 /*
7        The input to the stash is ALWAYS in MatScalar precision, and the
8     internal storage and output is also in MatScalar.
9 */
10 #define DEFAULT_STASH_SIZE   10000
11 
12 /*
13   MatStashCreate_Private - Creates a stash,currently used for all the parallel
14   matrix implementations. The stash is where elements of a matrix destined
15   to be stored on other processors are kept until matrix assembly is done.
16 
17   This is a simple minded stash. Simply adds entries to end of stash.
18 
19   Input Parameters:
20   comm - communicator, required for scatters.
21   bs   - stash block size. used when stashing blocks of values
22 
23   Output Parameters:
24   stash    - the newly created stash
25 */
26 #undef __FUNCT__
27 #define __FUNCT__ "MatStashCreate_Private"
28 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
29 {
30   PetscErrorCode ierr;
31   PetscInt       max,*opt,nopt;
32   PetscTruth     flg;
33 
34   PetscFunctionBegin;
35   /* Require 2 tags,get the second using PetscCommGetNewTag() */
36   stash->comm = comm;
37   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
38   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
39   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
40   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
41 
42   nopt = stash->size;
43   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
44   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
45   if (flg) {
46     if (nopt == 1)                max = opt[0];
47     else if (nopt == stash->size) max = opt[stash->rank];
48     else if (stash->rank < nopt)  max = opt[stash->rank];
49     else                          max = 0; /* Use default */
50     stash->umax = max;
51   } else {
52     stash->umax = 0;
53   }
54   ierr = PetscFree(opt);CHKERRQ(ierr);
55   if (bs <= 0) bs = 1;
56 
57   stash->bs       = bs;
58   stash->nmax     = 0;
59   stash->oldnmax  = 0;
60   stash->n        = 0;
61   stash->reallocs = -1;
62   stash->space_head = 0;
63   stash->space      = 0;
64 
65   stash->send_waits  = 0;
66   stash->recv_waits  = 0;
67   stash->send_status = 0;
68   stash->nsends      = 0;
69   stash->nrecvs      = 0;
70   stash->svalues     = 0;
71   stash->rvalues     = 0;
72   stash->rindices    = 0;
73   stash->nprocs      = 0;
74   stash->nprocessed  = 0;
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79    MatStashDestroy_Private - Destroy the stash
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "MatStashDestroy_Private"
83 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
84 {
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   if (stash->space_head){
89     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
90     stash->space_head = 0;
91   }
92   PetscFunctionReturn(0);
93 }
94 
95 /*
96    MatStashScatterEnd_Private - This is called as the fial stage of
97    scatter. The final stages of messagepassing is done here, and
98    all the memory used for messagepassing is cleanedu up. This
99    routine also resets the stash, and deallocates the memory used
100    for the stash. It also keeps track of the current memory usage
101    so that the same value can be used the next time through.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "MatStashScatterEnd_Private"
105 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
106 {
107   PetscErrorCode ierr;
108   PetscInt       nsends=stash->nsends,bs2,oldnmax;
109   MPI_Status     *send_status;
110 
111   PetscFunctionBegin;
112   /* wait on sends */
113   if (nsends) {
114     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
115     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
116     ierr = PetscFree(send_status);CHKERRQ(ierr);
117   }
118 
119   /* Now update nmaxold to be app 10% more than max n used, this way the
120      wastage of space is reduced the next time this stash is used.
121      Also update the oldmax, only if it increases */
122   if (stash->n) {
123     bs2      = stash->bs*stash->bs;
124     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
125     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
126   }
127 
128   stash->nmax       = 0;
129   stash->n          = 0;
130   stash->reallocs   = -1;
131   stash->nprocessed = 0;
132   if (stash->space_head){
133     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
134     stash->space_head = 0;
135   }
136   if (stash->send_waits) {
137     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
138     stash->send_waits = 0;
139   }
140   if (stash->recv_waits) {
141     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
142     stash->recv_waits = 0;
143   }
144   if (stash->svalues) {
145     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
146     stash->svalues = 0;
147   }
148   if (stash->rvalues) {
149     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
150     stash->rvalues = 0;
151   }
152   if (stash->rindices) {
153     ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
154     stash->rindices = 0;
155   }
156   if (stash->nprocs) {
157     ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
158     stash->nprocs = 0;
159   }
160   PetscFunctionReturn(0);
161 }
162 
163 /*
164    MatStashGetInfo_Private - Gets the relavant statistics of the stash
165 
166    Input Parameters:
167    stash    - the stash
168    nstash   - the size of the stash. Indicates the number of values stored.
169    reallocs - the number of additional mallocs incurred.
170 
171 */
172 #undef __FUNCT__
173 #define __FUNCT__ "MatStashGetInfo_Private"
174 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
175 {
176   PetscInt bs2 = stash->bs*stash->bs;
177 
178   PetscFunctionBegin;
179   if (nstash) *nstash   = stash->n*bs2;
180   if (reallocs) {
181     if (stash->reallocs < 0) *reallocs = 0;
182     else                     *reallocs = stash->reallocs;
183   }
184   PetscFunctionReturn(0);
185 }
186 
187 /*
188    MatStashSetInitialSize_Private - Sets the initial size of the stash
189 
190    Input Parameters:
191    stash  - the stash
192    max    - the value that is used as the max size of the stash.
193             this value is used while allocating memory.
194 */
195 #undef __FUNCT__
196 #define __FUNCT__ "MatStashSetInitialSize_Private"
197 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
198 {
199   PetscFunctionBegin;
200   stash->umax = max;
201   PetscFunctionReturn(0);
202 }
203 
204 /* MatStashExpand_Private - Expand the stash. This function is called
205    when the space in the stash is not sufficient to add the new values
206    being inserted into the stash.
207 
208    Input Parameters:
209    stash - the stash
210    incr  - the minimum increase requested
211 
212    Notes:
213    This routine doubles the currently used memory.
214  */
215 #undef __FUNCT__
216 #define __FUNCT__ "MatStashExpand_Private"
217 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
218 {
219   PetscErrorCode ierr;
220   PetscInt       newnmax,bs2= stash->bs*stash->bs;
221 
222   PetscFunctionBegin;
223   /* allocate a larger stash */
224   if (!stash->oldnmax && !stash->nmax) { /* new stash */
225     if (stash->umax)                  newnmax = stash->umax/bs2;
226     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
227   } else if (!stash->nmax) { /* resuing stash */
228     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
229     else                              newnmax = stash->oldnmax/bs2;
230   } else                              newnmax = stash->nmax*2;
231   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
232 
233   /* Get a MatStashSpace and attach it to stash */
234   if (!stash->nmax) { /* new stash or resuing stash->oldnmax */
235     ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space_head);CHKERRQ(ierr);
236     stash->space = stash->space_head;
237   } else {
238     ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
239   }
240   stash->reallocs++;
241   stash->nmax = newnmax;
242   PetscFunctionReturn(0);
243 }
244 /*
245   MatStashValuesRow_Private - inserts values into the stash. This function
246   expects the values to be roworiented. Multiple columns belong to the same row
247   can be inserted with a single call to this function.
248 
249   Input Parameters:
250   stash  - the stash
251   row    - the global row correspoiding to the values
252   n      - the number of elements inserted. All elements belong to the above row.
253   idxn   - the global column indices corresponding to each of the values.
254   values - the values inserted
255 */
256 #undef __FUNCT__
257 #define __FUNCT__ "MatStashValuesRow_Private"
258 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
259 {
260   PetscErrorCode     ierr;
261   PetscInt           i,k;
262   PetscMatStashSpace space=stash->space;
263 
264   PetscFunctionBegin;
265   /* Check and see if we have sufficient memory */
266   if (!space || space->local_remaining < n){
267     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
268   }
269   space = stash->space;
270   k     = space->local_used;
271   for (i=0; i<n; i++) {
272     space->idx[k] = row;
273     space->idy[k] = idxn[i];
274     space->val[k] = values[i];
275     k++;
276   }
277   stash->n               += n;
278   space->local_used      += n;
279   space->local_remaining -= n;
280   PetscFunctionReturn(0);
281 }
282 
283 /*
284   MatStashValuesCol_Private - inserts values into the stash. This function
285   expects the values to be columnoriented. Multiple columns belong to the same row
286   can be inserted with a single call to this function.
287 
288   Input Parameters:
289   stash   - the stash
290   row     - the global row correspoiding to the values
291   n       - the number of elements inserted. All elements belong to the above row.
292   idxn    - the global column indices corresponding to each of the values.
293   values  - the values inserted
294   stepval - the consecutive values are sepated by a distance of stepval.
295             this happens because the input is columnoriented.
296 */
297 #undef __FUNCT__
298 #define __FUNCT__ "MatStashValuesCol_Private"
299 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
300 {
301   PetscErrorCode     ierr;
302   PetscInt           i,k;
303   PetscMatStashSpace space=stash->space;
304 
305   PetscFunctionBegin;
306   /* Check and see if we have sufficient memory */
307   if (!space || space->local_remaining < n){
308     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
309   }
310   space = stash->space;
311   k = space->local_used;
312   for (i=0; i<n; i++) {
313     space->idx[k] = row;
314     space->idy[k] = idxn[i];
315     space->val[k] = values[i*stepval];
316     k++;
317   }
318   stash->n               += n;
319   space->local_used      += n;
320   space->local_remaining -= n;
321   PetscFunctionReturn(0);
322 }
323 
324 /*
325   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
326   This function expects the values to be roworiented. Multiple columns belong
327   to the same block-row can be inserted with a single call to this function.
328   This function extracts the sub-block of values based on the dimensions of
329   the original input block, and the row,col values corresponding to the blocks.
330 
331   Input Parameters:
332   stash  - the stash
333   row    - the global block-row correspoiding to the values
334   n      - the number of elements inserted. All elements belong to the above row.
335   idxn   - the global block-column indices corresponding to each of the blocks of
336            values. Each block is of size bs*bs.
337   values - the values inserted
338   rmax   - the number of block-rows in the original block.
339   cmax   - the number of block-columsn on the original block.
340   idx    - the index of the current block-row in the original block.
341 */
342 #undef __FUNCT__
343 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
344 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
345 {
346   PetscErrorCode     ierr;
347   PetscInt           i,j,k,bs2,bs=stash->bs,l;
348   const MatScalar    *vals;
349   MatScalar          *array;
350   PetscMatStashSpace space=stash->space;
351 
352   PetscFunctionBegin;
353   if (!space || space->local_remaining < n){
354     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
355   }
356   space = stash->space;
357   l     = space->local_used;
358   bs2   = bs*bs;
359   for (i=0; i<n; i++) {
360     space->idx[l] = row;
361     space->idy[l] = idxn[i];
362     /* Now copy over the block of values. Store the values column oriented.
363        This enables inserting multiple blocks belonging to a row with a single
364        funtion call */
365     array = space->val + bs2*l;
366     vals  = values + idx*bs2*n + bs*i;
367     for (j=0; j<bs; j++) {
368       for (k=0; k<bs; k++) array[k*bs] = vals[k];
369       array++;
370       vals  += cmax*bs;
371     }
372     l++;
373   }
374   stash->n               += n;
375   space->local_used      += n;
376   space->local_remaining -= n;
377   PetscFunctionReturn(0);
378 }
379 
380 /*
381   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
382   This function expects the values to be roworiented. Multiple columns belong
383   to the same block-row can be inserted with a single call to this function.
384   This function extracts the sub-block of values based on the dimensions of
385   the original input block, and the row,col values corresponding to the blocks.
386 
387   Input Parameters:
388   stash  - the stash
389   row    - the global block-row correspoiding to the values
390   n      - the number of elements inserted. All elements belong to the above row.
391   idxn   - the global block-column indices corresponding to each of the blocks of
392            values. Each block is of size bs*bs.
393   values - the values inserted
394   rmax   - the number of block-rows in the original block.
395   cmax   - the number of block-columsn on the original block.
396   idx    - the index of the current block-row in the original block.
397 */
398 #undef __FUNCT__
399 #define __FUNCT__ "MatStashValuesColBlocked_Private"
400 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
401 {
402   PetscErrorCode  ierr;
403   PetscInt        i,j,k,bs2,bs=stash->bs,l;
404   const MatScalar *vals;
405   MatScalar       *array;
406   PetscMatStashSpace space=stash->space;
407 
408   PetscFunctionBegin;
409   if (!space || space->local_remaining < n){
410     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
411   }
412   space = stash->space;
413   l     = space->local_used;
414   bs2   = bs*bs;
415   for (i=0; i<n; i++) {
416     space->idx[l] = row;
417     space->idy[l] = idxn[i];
418     /* Now copy over the block of values. Store the values column oriented.
419      This enables inserting multiple blocks belonging to a row with a single
420      funtion call */
421     array = space->val + bs2*l;
422     vals  = values + idx*bs2*n + bs*i;
423     for (j=0; j<bs; j++) {
424       for (k=0; k<bs; k++) {array[k] = vals[k];}
425       array += bs;
426       vals  += rmax*bs;
427     }
428     l++;
429   }
430   stash->n               += n;
431   space->local_used      += n;
432   space->local_remaining -= n;
433   PetscFunctionReturn(0);
434 }
435 /*
436   MatStashScatterBegin_Private - Initiates the transfer of values to the
437   correct owners. This function goes through the stash, and check the
438   owners of each stashed value, and sends the values off to the owner
439   processors.
440 
441   Input Parameters:
442   stash  - the stash
443   owners - an array of size 'no-of-procs' which gives the ownership range
444            for each node.
445 
446   Notes: The 'owners' array in the cased of the blocked-stash has the
447   ranges specified blocked global indices, and for the regular stash in
448   the proper global indices.
449 */
450 #undef __FUNCT__
451 #define __FUNCT__ "MatStashScatterBegin_Private"
452 PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
453 {
454   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
455   PetscInt       size=stash->size,nsends;
456   PetscErrorCode ierr;
457   PetscInt       count,*sindices,**rindices,i,j,idx,lastidx,l;
458   MatScalar      **rvalues,*svalues;
459   MPI_Comm       comm = stash->comm;
460   MPI_Request    *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
461   PetscMPIInt    *nprocs,*nlengths,nreceives;
462   PetscInt       *sp_idx,*sp_idy;
463   MatScalar      *sp_val;
464   PetscMatStashSpace space,space_next;
465 
466   PetscFunctionBegin;
467   bs2 = stash->bs*stash->bs;
468 
469   /*  first count number of contributors to each processor */
470   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
471   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr);
472   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
473 
474   nlengths = nprocs+size;
475   i = j    = 0;
476   lastidx  = -1;
477   space    = stash->space_head;
478   while (space != PETSC_NULL){
479     space_next = space->next;
480     sp_idx     = space->idx;
481     for (l=0; l<space->local_used; l++){
482       /* if indices are NOT locally sorted, need to start search at the beginning */
483       if (lastidx > (idx = sp_idx[l])) j = 0;
484       lastidx = idx;
485       for (; j<size; j++) {
486         if (idx >= owners[j] && idx < owners[j+1]) {
487           nlengths[j]++; owner[i] = j; break;
488         }
489       }
490       i++;
491     }
492     space = space_next;
493   }
494   /* Now check what procs get messages - and compute nsends. */
495   for (i=0, nsends=0 ; i<size; i++) {
496     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
497   }
498 
499   { int  *onodes,*olengths;
500   /* Determine the number of messages to expect, their lengths, from from-ids */
501   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
502   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
503   /* since clubbing row,col - lengths are multiplied by 2 */
504   for (i=0; i<nreceives; i++) olengths[i] *=2;
505   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
506   /* values are size 'bs2' lengths (and remove earlier factor 2 */
507   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
508   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
509   ierr = PetscFree(onodes);CHKERRQ(ierr);
510   ierr = PetscFree(olengths);CHKERRQ(ierr);
511   }
512 
513   /* do sends:
514       1) starts[i] gives the starting index in svalues for stuff going to
515          the ith processor
516   */
517   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
518   sindices = (PetscInt*)(svalues + bs2*stash->n);
519   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
520   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
521   starti   = startv + size;
522   /* use 2 sends the first with all_a, the next with all_i and all_j */
523   startv[0]  = 0; starti[0] = 0;
524   for (i=1; i<size; i++) {
525     startv[i] = startv[i-1] + nlengths[i-1];
526     starti[i] = starti[i-1] + nlengths[i-1]*2;
527   }
528 
529   i     = 0;
530   space = stash->space_head;
531   while (space != PETSC_NULL){
532     space_next = space->next;
533     sp_idx = space->idx;
534     sp_idy = space->idy;
535     sp_val = space->val;
536     for (l=0; l<space->local_used; l++){
537       j = owner[i];
538       if (bs2 == 1) {
539         svalues[startv[j]] = sp_val[l];
540       } else {
541         PetscInt  k;
542         MatScalar *buf1,*buf2;
543         buf1 = svalues+bs2*startv[j];
544         buf2 = space->val + bs2*i;
545         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
546       }
547       sindices[starti[j]]             = sp_idx[l];
548       sindices[starti[j]+nlengths[j]] = sp_idy[l];
549       startv[j]++;
550       starti[j]++;
551       i++;
552     }
553     space = space_next;
554   }
555   startv[0] = 0;
556   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
557 
558   for (i=0,count=0; i<size; i++) {
559     if (nprocs[i]) {
560       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
561       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
562     }
563   }
564 #if defined(PETSC_USE_INFO)
565   ierr = PetscInfo1(0,"No of messages: %d \n",nsends);CHKERRQ(ierr);
566   for (i=0; i<size; i++) {
567     if (nprocs[i]) {
568       ierr = PetscInfo2(0,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
569     }
570   }
571 #endif
572   ierr = PetscFree(owner);CHKERRQ(ierr);
573   ierr = PetscFree(startv);CHKERRQ(ierr);
574   /* This memory is reused in scatter end  for a different purpose*/
575   for (i=0; i<2*size; i++) nprocs[i] = -1;
576   stash->nprocs = nprocs;
577 
578   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
579   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
580 
581   for (i=0; i<nreceives; i++) {
582     recv_waits[2*i]   = recv_waits1[i];
583     recv_waits[2*i+1] = recv_waits2[i];
584   }
585   stash->recv_waits = recv_waits;
586   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
587   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
588 
589   stash->svalues    = svalues;    stash->rvalues     = rvalues;
590   stash->rindices   = rindices;   stash->send_waits  = send_waits;
591   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
592   PetscFunctionReturn(0);
593 }
594 
595 /*
596    MatStashScatterGetMesg_Private - This function waits on the receives posted
597    in the function MatStashScatterBegin_Private() and returns one message at
598    a time to the calling function. If no messages are left, it indicates this
599    by setting flg = 0, else it sets flg = 1.
600 
601    Input Parameters:
602    stash - the stash
603 
604    Output Parameters:
605    nvals - the number of entries in the current message.
606    rows  - an array of row indices (or blocked indices) corresponding to the values
607    cols  - an array of columnindices (or blocked indices) corresponding to the values
608    vals  - the values
609    flg   - 0 indicates no more message left, and the current call has no values associated.
610            1 indicates that the current call successfully received a message, and the
611              other output parameters nvals,rows,cols,vals are set appropriately.
612 */
613 #undef __FUNCT__
614 #define __FUNCT__ "MatStashScatterGetMesg_Private"
615 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
616 {
617   PetscErrorCode ierr;
618   PetscMPIInt    i,*flg_v,i1,i2;
619   PetscInt       bs2;
620   MPI_Status     recv_status;
621   PetscTruth     match_found = PETSC_FALSE;
622 
623   PetscFunctionBegin;
624 
625   *flg = 0; /* When a message is discovered this is reset to 1 */
626   /* Return if no more messages to process */
627   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
628 
629   flg_v = stash->nprocs;
630   bs2   = stash->bs*stash->bs;
631   /* If a matching pair of receieves are found, process them, and return the data to
632      the calling function. Until then keep receiving messages */
633   while (!match_found) {
634     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
635     /* Now pack the received message into a structure which is useable by others */
636     if (i % 2) {
637       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
638       flg_v[2*recv_status.MPI_SOURCE] = i/2;
639       *nvals = *nvals/bs2;
640     } else {
641       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
642       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
643       *nvals = *nvals/2; /* This message has both row indices and col indices */
644     }
645 
646     /* Check if we have both the messages from this proc */
647     i1 = flg_v[2*recv_status.MPI_SOURCE];
648     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
649     if (i1 != -1 && i2 != -1) {
650       *rows       = stash->rindices[i2];
651       *cols       = *rows + *nvals;
652       *vals       = stash->rvalues[i1];
653       *flg        = 1;
654       stash->nprocessed ++;
655       match_found = PETSC_TRUE;
656     }
657   }
658   PetscFunctionReturn(0);
659 }
660