xref: /petsc/src/mat/utils/matstash.c (revision c05d87d6c2fa1d0952fe676de20339d09daa79bc)
1 #define PETSCMAT_DLL
2 
3 #include "private/matimpl.h"
4 
5 #define DEFAULT_STASH_SIZE   10000
6 
7 /*
8   MatStashCreate_Private - Creates a stash,currently used for all the parallel
9   matrix implementations. The stash is where elements of a matrix destined
10   to be stored on other processors are kept until matrix assembly is done.
11 
12   This is a simple minded stash. Simply adds entries to end of stash.
13 
14   Input Parameters:
15   comm - communicator, required for scatters.
16   bs   - stash block size. used when stashing blocks of values
17 
18   Output Parameters:
19   stash    - the newly created stash
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "MatStashCreate_Private"
23 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
24 {
25   PetscErrorCode ierr;
26   PetscInt       max,*opt,nopt;
27   PetscTruth     flg;
28 
29   PetscFunctionBegin;
30   /* Require 2 tags,get the second using PetscCommGetNewTag() */
31   stash->comm = comm;
32   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
33   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
34   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
35   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
36 
37   nopt = stash->size;
38   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
39   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
40   if (flg) {
41     if (nopt == 1)                max = opt[0];
42     else if (nopt == stash->size) max = opt[stash->rank];
43     else if (stash->rank < nopt)  max = opt[stash->rank];
44     else                          max = 0; /* Use default */
45     stash->umax = max;
46   } else {
47     stash->umax = 0;
48   }
49   ierr = PetscFree(opt);CHKERRQ(ierr);
50   if (bs <= 0) bs = 1;
51 
52   stash->bs       = bs;
53   stash->nmax     = 0;
54   stash->oldnmax  = 0;
55   stash->n        = 0;
56   stash->reallocs = -1;
57   stash->space_head = 0;
58   stash->space      = 0;
59 
60   stash->send_waits  = 0;
61   stash->recv_waits  = 0;
62   stash->send_status = 0;
63   stash->nsends      = 0;
64   stash->nrecvs      = 0;
65   stash->svalues     = 0;
66   stash->rvalues     = 0;
67   stash->rindices    = 0;
68   stash->nprocessed  = 0;
69   PetscFunctionReturn(0);
70 }
71 
72 /*
73    MatStashDestroy_Private - Destroy the stash
74 */
75 #undef __FUNCT__
76 #define __FUNCT__ "MatStashDestroy_Private"
77 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
78 {
79   PetscErrorCode ierr;
80 
81   PetscFunctionBegin;
82   if (stash->space_head){
83     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
84     stash->space_head = 0;
85     stash->space      = 0;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 /*
91    MatStashScatterEnd_Private - This is called as the fial stage of
92    scatter. The final stages of messagepassing is done here, and
93    all the memory used for messagepassing is cleanedu up. This
94    routine also resets the stash, and deallocates the memory used
95    for the stash. It also keeps track of the current memory usage
96    so that the same value can be used the next time through.
97 */
98 #undef __FUNCT__
99 #define __FUNCT__ "MatStashScatterEnd_Private"
100 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
101 {
102   PetscErrorCode ierr;
103   PetscInt       nsends=stash->nsends,bs2,oldnmax;
104   MPI_Status     *send_status;
105 
106   PetscFunctionBegin;
107   /* wait on sends */
108   if (nsends) {
109     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
110     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
111     ierr = PetscFree(send_status);CHKERRQ(ierr);
112   }
113 
114   /* Now update nmaxold to be app 10% more than max n used, this way the
115      wastage of space is reduced the next time this stash is used.
116      Also update the oldmax, only if it increases */
117   if (stash->n) {
118     bs2      = stash->bs*stash->bs;
119     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
120     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
121   }
122 
123   stash->nmax       = 0;
124   stash->n          = 0;
125   stash->reallocs   = -1;
126   stash->nprocessed = 0;
127   if (stash->space_head){
128     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
129     stash->space_head = 0;
130     stash->space      = 0;
131   }
132   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
133   stash->send_waits = 0;
134   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
135   stash->recv_waits = 0;
136   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
137   stash->svalues = 0;
138   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
139   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
140   stash->rvalues = 0;
141   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
142   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
143   stash->rindices = 0;
144   PetscFunctionReturn(0);
145 }
146 
147 /*
148    MatStashGetInfo_Private - Gets the relavant statistics of the stash
149 
150    Input Parameters:
151    stash    - the stash
152    nstash   - the size of the stash. Indicates the number of values stored.
153    reallocs - the number of additional mallocs incurred.
154 
155 */
156 #undef __FUNCT__
157 #define __FUNCT__ "MatStashGetInfo_Private"
158 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
159 {
160   PetscInt bs2 = stash->bs*stash->bs;
161 
162   PetscFunctionBegin;
163   if (nstash) *nstash   = stash->n*bs2;
164   if (reallocs) {
165     if (stash->reallocs < 0) *reallocs = 0;
166     else                     *reallocs = stash->reallocs;
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 /*
172    MatStashSetInitialSize_Private - Sets the initial size of the stash
173 
174    Input Parameters:
175    stash  - the stash
176    max    - the value that is used as the max size of the stash.
177             this value is used while allocating memory.
178 */
179 #undef __FUNCT__
180 #define __FUNCT__ "MatStashSetInitialSize_Private"
181 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
182 {
183   PetscFunctionBegin;
184   stash->umax = max;
185   PetscFunctionReturn(0);
186 }
187 
188 /* MatStashExpand_Private - Expand the stash. This function is called
189    when the space in the stash is not sufficient to add the new values
190    being inserted into the stash.
191 
192    Input Parameters:
193    stash - the stash
194    incr  - the minimum increase requested
195 
196    Notes:
197    This routine doubles the currently used memory.
198  */
199 #undef __FUNCT__
200 #define __FUNCT__ "MatStashExpand_Private"
201 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
202 {
203   PetscErrorCode ierr;
204   PetscInt       newnmax,bs2= stash->bs*stash->bs;
205 
206   PetscFunctionBegin;
207   /* allocate a larger stash */
208   if (!stash->oldnmax && !stash->nmax) { /* new stash */
209     if (stash->umax)                  newnmax = stash->umax/bs2;
210     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
211   } else if (!stash->nmax) { /* resuing stash */
212     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
213     else                              newnmax = stash->oldnmax/bs2;
214   } else                              newnmax = stash->nmax*2;
215   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
216 
217   /* Get a MatStashSpace and attach it to stash */
218   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
219   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
220     stash->space_head = stash->space;
221   }
222 
223   stash->reallocs++;
224   stash->nmax = newnmax;
225   PetscFunctionReturn(0);
226 }
227 /*
228   MatStashValuesRow_Private - inserts values into the stash. This function
229   expects the values to be roworiented. Multiple columns belong to the same row
230   can be inserted with a single call to this function.
231 
232   Input Parameters:
233   stash  - the stash
234   row    - the global row correspoiding to the values
235   n      - the number of elements inserted. All elements belong to the above row.
236   idxn   - the global column indices corresponding to each of the values.
237   values - the values inserted
238 */
239 #undef __FUNCT__
240 #define __FUNCT__ "MatStashValuesRow_Private"
241 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscTruth ignorezeroentries)
242 {
243   PetscErrorCode     ierr;
244   PetscInt           i,k,cnt = 0;
245   PetscMatStashSpace space=stash->space;
246 
247   PetscFunctionBegin;
248   /* Check and see if we have sufficient memory */
249   if (!space || space->local_remaining < n){
250     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
251   }
252   space = stash->space;
253   k     = space->local_used;
254   for (i=0; i<n; i++) {
255     if (ignorezeroentries && (values[i] == 0.0)) continue;
256     space->idx[k] = row;
257     space->idy[k] = idxn[i];
258     space->val[k] = values[i];
259     k++;
260     cnt++;
261   }
262   stash->n               += cnt;
263   space->local_used      += cnt;
264   space->local_remaining -= cnt;
265   PetscFunctionReturn(0);
266 }
267 
268 /*
269   MatStashValuesCol_Private - inserts values into the stash. This function
270   expects the values to be columnoriented. Multiple columns belong to the same row
271   can be inserted with a single call to this function.
272 
273   Input Parameters:
274   stash   - the stash
275   row     - the global row correspoiding to the values
276   n       - the number of elements inserted. All elements belong to the above row.
277   idxn    - the global column indices corresponding to each of the values.
278   values  - the values inserted
279   stepval - the consecutive values are sepated by a distance of stepval.
280             this happens because the input is columnoriented.
281 */
282 #undef __FUNCT__
283 #define __FUNCT__ "MatStashValuesCol_Private"
284 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscTruth ignorezeroentries)
285 {
286   PetscErrorCode     ierr;
287   PetscInt           i,k,cnt = 0;
288   PetscMatStashSpace space=stash->space;
289 
290   PetscFunctionBegin;
291   /* Check and see if we have sufficient memory */
292   if (!space || space->local_remaining < n){
293     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
294   }
295   space = stash->space;
296   k = space->local_used;
297   for (i=0; i<n; i++) {
298     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
299     space->idx[k] = row;
300     space->idy[k] = idxn[i];
301     space->val[k] = values[i*stepval];
302     k++;
303     cnt++;
304   }
305   stash->n               += cnt;
306   space->local_used      += cnt;
307   space->local_remaining -= cnt;
308   PetscFunctionReturn(0);
309 }
310 
311 /*
312   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
313   This function expects the values to be roworiented. Multiple columns belong
314   to the same block-row can be inserted with a single call to this function.
315   This function extracts the sub-block of values based on the dimensions of
316   the original input block, and the row,col values corresponding to the blocks.
317 
318   Input Parameters:
319   stash  - the stash
320   row    - the global block-row correspoiding to the values
321   n      - the number of elements inserted. All elements belong to the above row.
322   idxn   - the global block-column indices corresponding to each of the blocks of
323            values. Each block is of size bs*bs.
324   values - the values inserted
325   rmax   - the number of block-rows in the original block.
326   cmax   - the number of block-columsn on the original block.
327   idx    - the index of the current block-row in the original block.
328 */
329 #undef __FUNCT__
330 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
331 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
332 {
333   PetscErrorCode     ierr;
334   PetscInt           i,j,k,bs2,bs=stash->bs,l;
335   const PetscScalar  *vals;
336   PetscScalar        *array;
337   PetscMatStashSpace space=stash->space;
338 
339   PetscFunctionBegin;
340   if (!space || space->local_remaining < n){
341     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
342   }
343   space = stash->space;
344   l     = space->local_used;
345   bs2   = bs*bs;
346   for (i=0; i<n; i++) {
347     space->idx[l] = row;
348     space->idy[l] = idxn[i];
349     /* Now copy over the block of values. Store the values column oriented.
350        This enables inserting multiple blocks belonging to a row with a single
351        funtion call */
352     array = space->val + bs2*l;
353     vals  = values + idx*bs2*n + bs*i;
354     for (j=0; j<bs; j++) {
355       for (k=0; k<bs; k++) array[k*bs] = vals[k];
356       array++;
357       vals  += cmax*bs;
358     }
359     l++;
360   }
361   stash->n               += n;
362   space->local_used      += n;
363   space->local_remaining -= n;
364   PetscFunctionReturn(0);
365 }
366 
367 /*
368   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
369   This function expects the values to be roworiented. Multiple columns belong
370   to the same block-row can be inserted with a single call to this function.
371   This function extracts the sub-block of values based on the dimensions of
372   the original input block, and the row,col values corresponding to the blocks.
373 
374   Input Parameters:
375   stash  - the stash
376   row    - the global block-row correspoiding to the values
377   n      - the number of elements inserted. All elements belong to the above row.
378   idxn   - the global block-column indices corresponding to each of the blocks of
379            values. Each block is of size bs*bs.
380   values - the values inserted
381   rmax   - the number of block-rows in the original block.
382   cmax   - the number of block-columsn on the original block.
383   idx    - the index of the current block-row in the original block.
384 */
385 #undef __FUNCT__
386 #define __FUNCT__ "MatStashValuesColBlocked_Private"
387 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
388 {
389   PetscErrorCode     ierr;
390   PetscInt           i,j,k,bs2,bs=stash->bs,l;
391   const PetscScalar  *vals;
392   PetscScalar        *array;
393   PetscMatStashSpace space=stash->space;
394 
395   PetscFunctionBegin;
396   if (!space || space->local_remaining < n){
397     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
398   }
399   space = stash->space;
400   l     = space->local_used;
401   bs2   = bs*bs;
402   for (i=0; i<n; i++) {
403     space->idx[l] = row;
404     space->idy[l] = idxn[i];
405     /* Now copy over the block of values. Store the values column oriented.
406      This enables inserting multiple blocks belonging to a row with a single
407      funtion call */
408     array = space->val + bs2*l;
409     vals  = values + idx*bs2*n + bs*i;
410     for (j=0; j<bs; j++) {
411       for (k=0; k<bs; k++) {array[k] = vals[k];}
412       array += bs;
413       vals  += rmax*bs;
414     }
415     l++;
416   }
417   stash->n               += n;
418   space->local_used      += n;
419   space->local_remaining -= n;
420   PetscFunctionReturn(0);
421 }
422 /*
423   MatStashScatterBegin_Private - Initiates the transfer of values to the
424   correct owners. This function goes through the stash, and check the
425   owners of each stashed value, and sends the values off to the owner
426   processors.
427 
428   Input Parameters:
429   stash  - the stash
430   owners - an array of size 'no-of-procs' which gives the ownership range
431            for each node.
432 
433   Notes: The 'owners' array in the cased of the blocked-stash has the
434   ranges specified blocked global indices, and for the regular stash in
435   the proper global indices.
436 */
437 #undef __FUNCT__
438 #define __FUNCT__ "MatStashScatterBegin_Private"
439 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
440 {
441   PetscInt          *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
442   PetscInt          size=stash->size,nsends;
443   PetscErrorCode    ierr;
444   PetscInt          count,*sindices,**rindices,i,j,idx,lastidx,l;
445   PetscScalar       **rvalues,*svalues;
446   MPI_Comm          comm = stash->comm;
447   MPI_Request       *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
448   PetscMPIInt       *nprocs,*nlengths,nreceives;
449   PetscInt          *sp_idx,*sp_idy;
450   PetscScalar       *sp_val;
451   PetscMatStashSpace space,space_next;
452 
453   PetscFunctionBegin;
454   bs2 = stash->bs*stash->bs;
455 
456   /*  first count number of contributors to each processor */
457   ierr  = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
458   ierr  = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
459   ierr  = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr);
460   ierr  = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
461   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
462 
463   i = j    = 0;
464   lastidx  = -1;
465   space    = stash->space_head;
466   while (space != PETSC_NULL){
467     space_next = space->next;
468     sp_idx     = space->idx;
469     for (l=0; l<space->local_used; l++){
470       /* if indices are NOT locally sorted, need to start search at the beginning */
471       if (lastidx > (idx = sp_idx[l])) j = 0;
472       lastidx = idx;
473       for (; j<size; j++) {
474         if (idx >= owners[j] && idx < owners[j+1]) {
475           nlengths[j]++; owner[i] = j; break;
476         }
477       }
478       i++;
479     }
480     space = space_next;
481   }
482   /* Now check what procs get messages - and compute nsends. */
483   for (i=0, nsends=0 ; i<size; i++) {
484     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
485   }
486 
487   {PetscMPIInt  *onodes,*olengths;
488   /* Determine the number of messages to expect, their lengths, from from-ids */
489   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
490   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
491   /* since clubbing row,col - lengths are multiplied by 2 */
492   for (i=0; i<nreceives; i++) olengths[i] *=2;
493   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
494   /* values are size 'bs2' lengths (and remove earlier factor 2 */
495   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
496   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
497   ierr = PetscFree(onodes);CHKERRQ(ierr);
498   ierr = PetscFree(olengths);CHKERRQ(ierr);
499   }
500 
501   /* do sends:
502       1) starts[i] gives the starting index in svalues for stuff going to
503          the ith processor
504   */
505   ierr     = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr);
506   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
507   ierr     = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr);
508   /* use 2 sends the first with all_a, the next with all_i and all_j */
509   startv[0]  = 0; starti[0] = 0;
510   for (i=1; i<size; i++) {
511     startv[i] = startv[i-1] + nlengths[i-1];
512     starti[i] = starti[i-1] + nlengths[i-1]*2;
513   }
514 
515   i     = 0;
516   space = stash->space_head;
517   while (space != PETSC_NULL){
518     space_next = space->next;
519     sp_idx = space->idx;
520     sp_idy = space->idy;
521     sp_val = space->val;
522     for (l=0; l<space->local_used; l++){
523       j = owner[i];
524       if (bs2 == 1) {
525         svalues[startv[j]] = sp_val[l];
526       } else {
527         PetscInt     k;
528         PetscScalar *buf1,*buf2;
529         buf1 = svalues+bs2*startv[j];
530         buf2 = space->val + bs2*l;
531         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
532       }
533       sindices[starti[j]]             = sp_idx[l];
534       sindices[starti[j]+nlengths[j]] = sp_idy[l];
535       startv[j]++;
536       starti[j]++;
537       i++;
538     }
539     space = space_next;
540   }
541   startv[0] = 0;
542   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
543 
544   for (i=0,count=0; i<size; i++) {
545     if (nprocs[i]) {
546       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
547       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
548     }
549   }
550 #if defined(PETSC_USE_INFO)
551   ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr);
552   for (i=0; i<size; i++) {
553     if (nprocs[i]) {
554       ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
555     }
556   }
557 #endif
558   ierr = PetscFree(nlengths);CHKERRQ(ierr);
559   ierr = PetscFree(owner);CHKERRQ(ierr);
560   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
561   ierr = PetscFree(nprocs);CHKERRQ(ierr);
562 
563   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
564   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
565 
566   for (i=0; i<nreceives; i++) {
567     recv_waits[2*i]   = recv_waits1[i];
568     recv_waits[2*i+1] = recv_waits2[i];
569   }
570   stash->recv_waits = recv_waits;
571   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
572   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
573 
574   stash->svalues     = svalues;
575   stash->sindices    = sindices;
576   stash->rvalues     = rvalues;
577   stash->rindices    = rindices;
578   stash->send_waits  = send_waits;
579   stash->nsends      = nsends;
580   stash->nrecvs      = nreceives;
581   PetscFunctionReturn(0);
582 }
583 
584 /*
585    MatStashScatterGetMesg_Private - This function waits on the receives posted
586    in the function MatStashScatterBegin_Private() and returns one message at
587    a time to the calling function. If no messages are left, it indicates this
588    by setting flg = 0, else it sets flg = 1.
589 
590    Input Parameters:
591    stash - the stash
592 
593    Output Parameters:
594    nvals - the number of entries in the current message.
595    rows  - an array of row indices (or blocked indices) corresponding to the values
596    cols  - an array of columnindices (or blocked indices) corresponding to the values
597    vals  - the values
598    flg   - 0 indicates no more message left, and the current call has no values associated.
599            1 indicates that the current call successfully received a message, and the
600              other output parameters nvals,rows,cols,vals are set appropriately.
601 */
602 #undef __FUNCT__
603 #define __FUNCT__ "MatStashScatterGetMesg_Private"
604 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg)
605 {
606   PetscErrorCode ierr;
607   PetscMPIInt    i,*flg_v,i1,i2,size;
608   PetscInt       bs2;
609   MPI_Status     recv_status;
610   PetscTruth     match_found = PETSC_FALSE;
611 
612   PetscFunctionBegin;
613 
614   *flg = 0; /* When a message is discovered this is reset to 1 */
615   /* Return if no more messages to process */
616   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
617 
618   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
619   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&flg_v);CHKERRQ(ierr);
620   for (i=0; i<2*size; i++) flg_v[i] = -1;
621 
622   bs2   = stash->bs*stash->bs;
623   /* If a matching pair of receieves are found, process them, and return the data to
624      the calling function. Until then keep receiving messages */
625   while (!match_found) {
626     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
627     /* Now pack the received message into a structure which is useable by others */
628     if (i % 2) {
629       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
630       flg_v[2*recv_status.MPI_SOURCE] = i/2;
631       *nvals = *nvals/bs2;
632     } else {
633       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
634       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
635       *nvals = *nvals/2; /* This message has both row indices and col indices */
636     }
637 
638     /* Check if we have both messages from this proc */
639     i1 = flg_v[2*recv_status.MPI_SOURCE];
640     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
641     if (i1 != -1 && i2 != -1) {
642       *rows       = stash->rindices[i2];
643       *cols       = *rows + *nvals;
644       *vals       = stash->rvalues[i1];
645       *flg        = 1;
646       stash->nprocessed ++;
647       match_found = PETSC_TRUE;
648     }
649   }
650   ierr = PetscFree(flg_v);CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653