xref: /petsc/src/mat/utils/matstash.c (revision 95d5f7c29374efcfd2ca44c2fe93981fbc2b4454)
1 /*$Id: matstash.c,v 1.41 2000/04/09 04:36:57 bsmith Exp bsmith $*/
2 
3 #include "src/mat/matimpl.h"
4 
5 /*
6        The input to the stash is ALWAYS in Scalar precision, BUT the
7     internal storage and output is in MatScalar.
8 */
9 #define DEFAULT_STASH_SIZE   10000
10 
11 /*
12   MatStashCreate_Private - Creates a stash,currently used for all the parallel
13   matrix implementations. The stash is where elements of a matrix destined
14   to be stored on other processors are kept until matrix assembly is done.
15 
16   This is a simple minded stash. Simply adds entries to end of stash.
17 
18   Input Parameters:
19   comm - communicator, required for scatters.
20   bs   - stash block size. used when stashing blocks of values
21 
22   Output Parameters:
23   stash    - the newly created stash
24 */
25 #undef __FUNC__
26 #define __FUNC__ /*<a name=""></a>*/"MatStashCreate_Private"
27 int MatStashCreate_Private(MPI_Comm comm,int bs,MatStash *stash)
28 {
29   int        ierr,max,*opt,nopt;
30   PetscTruth flg;
31 
32   PetscFunctionBegin;
33   /* Require 2 tags,get the second using PetscCommGetNewTag() */
34   ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr);
35   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
36   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
37   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
38 
39   nopt = stash->size;
40   opt  = (int*)PetscMalloc(nopt*sizeof(int));CHKPTRQ(opt);
41   ierr = OptionsGetIntArray(PETSC_NULL,"-vecstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
42   if (flg) {
43     if (nopt == 1)                max = opt[0];
44     else if (nopt == stash->size) max = opt[stash->rank];
45     else if (stash->rank < nopt)  max = opt[stash->rank];
46     else                          max = 0; /* Use default */
47     stash->umax = max;
48   } else {
49     stash->umax = 0;
50   }
51   ierr = PetscFree(opt);CHKERRQ(ierr);
52   if (bs <= 0) bs = 1;
53 
54   stash->bs       = bs;
55   stash->nmax     = 0;
56   stash->oldnmax  = 0;
57   stash->n        = 0;
58   stash->reallocs = -1;
59   stash->idx      = 0;
60   stash->idy      = 0;
61   stash->array    = 0;
62 
63   stash->send_waits  = 0;
64   stash->recv_waits  = 0;
65   stash->send_status = 0;
66   stash->nsends      = 0;
67   stash->nrecvs      = 0;
68   stash->svalues     = 0;
69   stash->rvalues     = 0;
70   stash->rmax        = 0;
71   stash->nprocs      = 0;
72   stash->nprocessed  = 0;
73   PetscFunctionReturn(0);
74 }
75 
76 /*
77    MatStashDestroy_Private - Destroy the stash
78 */
79 #undef __FUNC__
80 #define __FUNC__ /*<a name=""></a>*/"MatStashDestroy_Private"
81 int MatStashDestroy_Private(MatStash *stash)
82 {
83   int ierr;
84 
85   PetscFunctionBegin;
86   ierr = PetscCommDestroy_Private(&stash->comm);CHKERRQ(ierr);
87   if (stash->array) {
88     ierr = PetscFree(stash->array);CHKERRQ(ierr);
89     stash->array = 0;
90   }
91   PetscFunctionReturn(0);
92 }
93 
94 /*
95    MatStashScatterEnd_Private - This is called as the fial stage of
96    scatter. The final stages of messagepassing is done here, and
97    all the memory used for messagepassing is cleanedu up. This
98    routine also resets the stash, and deallocates the memory used
99    for the stash. It also keeps track of the current memory usage
100    so that the same value can be used the next time through.
101 */
102 #undef __FUNC__
103 #define __FUNC__ /*<a name=""></a>*/"MatStashScatterEnd_Private"
104 int MatStashScatterEnd_Private(MatStash *stash)
105 {
106   int         nsends=stash->nsends,ierr,bs2,oldnmax;
107   MPI_Status  *send_status;
108 
109   PetscFunctionBegin;
110   /* wait on sends */
111   if (nsends) {
112     send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
113     ierr        = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
114     ierr        = PetscFree(send_status);CHKERRQ(ierr);
115   }
116 
117   /* Now update nmaxold to be app 10% more than max n used, this way the
118      wastage of space is reduced the next time this stash is used.
119      Also update the oldmax, only if it increases */
120   bs2      = stash->bs*stash->bs;
121   oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
122   if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
123 
124   stash->nmax       = 0;
125   stash->n          = 0;
126   stash->reallocs   = -1;
127   stash->rmax       = 0;
128   stash->nprocessed = 0;
129 
130   if (stash->array) {
131     ierr         = PetscFree(stash->array);CHKERRQ(ierr);
132     stash->array = 0;
133     stash->idx   = 0;
134     stash->idy   = 0;
135   }
136   if (stash->send_waits) {
137     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
138     stash->send_waits = 0;
139   }
140   if (stash->recv_waits) {
141     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
142     stash->recv_waits = 0;
143   }
144   if (stash->svalues) {
145     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
146     stash->svalues = 0;
147   }
148   if (stash->rvalues) {
149     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
150     stash->rvalues = 0;
151   }
152   if (stash->nprocs) {
153     ierr = PetscFree(stash->nprocs);
154     stash->nprocs = 0;
155   }
156 
157   PetscFunctionReturn(0);
158 }
159 
160 /*
161    MatStashGetInfo_Private - Gets the relavant statistics of the stash
162 
163    Input Parameters:
164    stash    - the stash
165    nstash   - the size of the stash. Indicates the number of values stored.
166    reallocs - the number of additional mallocs incurred.
167 
168 */
169 #undef __FUNC__
170 #define __FUNC__ /*<a name=""></a>*/"MatStashGetInfo_Private"
171 int MatStashGetInfo_Private(MatStash *stash,int *nstash,int *reallocs)
172 {
173   int bs2 = stash->bs*stash->bs;
174 
175   PetscFunctionBegin;
176   *nstash   = stash->n*bs2;
177   if (stash->reallocs < 0) *reallocs = 0;
178   else                     *reallocs = stash->reallocs;
179   PetscFunctionReturn(0);
180 }
181 
182 
183 /*
184    MatStashSetInitialSize_Private - Sets the initial size of the stash
185 
186    Input Parameters:
187    stash  - the stash
188    max    - the value that is used as the max size of the stash.
189             this value is used while allocating memory.
190 */
191 #undef __FUNC__
192 #define __FUNC__ /*<a name=""></a>*/"MatStashSetInitialSize_Private"
193 int MatStashSetInitialSize_Private(MatStash *stash,int max)
194 {
195   PetscFunctionBegin;
196   stash->umax = max;
197   PetscFunctionReturn(0);
198 }
199 
200 /* MatStashExpand_Private - Expand the stash. This function is called
201    when the space in the stash is not sufficient to add the new values
202    being inserted into the stash.
203 
204    Input Parameters:
205    stash - the stash
206    incr  - the minimum increase requested
207 
208    Notes:
209    This routine doubles the currently used memory.
210  */
211 #undef __FUNC__
212 #define __FUNC__ /*<a name=""></a>*/"MatStashExpand_Private"
213 static int MatStashExpand_Private(MatStash *stash,int incr)
214 {
215   int       *n_idx,*n_idy,newnmax,bs2,ierr;
216   MatScalar *n_array;
217 
218   PetscFunctionBegin;
219   /* allocate a larger stash */
220   bs2     = stash->bs*stash->bs;
221   if (!stash->oldnmax && !stash->nmax) { /* new stash */
222     if (stash->umax)                  newnmax = stash->umax/bs2;
223     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
224   } else if (!stash->nmax) { /* resuing stash */
225     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
226     else                              newnmax = stash->oldnmax/bs2;
227   } else                              newnmax = stash->nmax*2;
228   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
229 
230   n_array = (MatScalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(MatScalar)));CHKPTRQ(n_array);
231   n_idx   = (int*)(n_array + bs2*newnmax);
232   n_idy   = (int*)(n_idx + newnmax);
233   ierr = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr);
234   ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));CHKERRQ(ierr);
235   ierr = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));CHKERRQ(ierr);
236   if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);}
237   stash->array   = n_array;
238   stash->idx     = n_idx;
239   stash->idy     = n_idy;
240   stash->nmax    = newnmax;
241   stash->reallocs++;
242   PetscFunctionReturn(0);
243 }
244 /*
245   MatStashValuesRow_Private - inserts values into the stash. This function
246   expects the values to be roworiented. Multiple columns belong to the same row
247   can be inserted with a single call to this function.
248 
249   Input Parameters:
250   stash  - the stash
251   row    - the global row correspoiding to the values
252   n      - the number of elements inserted. All elements belong to the above row.
253   idxn   - the global column indices corresponding to each of the values.
254   values - the values inserted
255 */
256 #undef __FUNC__
257 #define __FUNC__ /*<a name=""></a>*/"MatStashValuesRow_Private"
258 int MatStashValuesRow_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values)
259 {
260   int    ierr,i;
261 
262   PetscFunctionBegin;
263   /* Check and see if we have sufficient memory */
264   if ((stash->n + n) > stash->nmax) {
265     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
266   }
267   for (i=0; i<n; i++) {
268     stash->idx[stash->n]   = row;
269     stash->idy[stash->n]   = idxn[i];
270     stash->array[stash->n] = (MatScalar)values[i];
271     stash->n++;
272   }
273   PetscFunctionReturn(0);
274 }
275 /*
276   MatStashValuesCol_Private - inserts values into the stash. This function
277   expects the values to be columnoriented. Multiple columns belong to the same row
278   can be inserted with a single call to this function.
279 
280   Input Parameters:
281   stash   - the stash
282   row     - the global row correspoiding to the values
283   n       - the number of elements inserted. All elements belong to the above row.
284   idxn    - the global column indices corresponding to each of the values.
285   values  - the values inserted
286   stepval - the consecutive values are sepated by a distance of stepval.
287             this happens because the input is columnoriented.
288 */
289 #undef __FUNC__
290 #define __FUNC__ /*<a name=""></a>*/"MatStashValuesCol_Private"
291 int MatStashValuesCol_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values,int stepval)
292 {
293   int    ierr,i;
294 
295   PetscFunctionBegin;
296   /* Check and see if we have sufficient memory */
297   if ((stash->n + n) > stash->nmax) {
298     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
299   }
300   for (i=0; i<n; i++) {
301     stash->idx[stash->n]   = row;
302     stash->idy[stash->n]   = idxn[i];
303     stash->array[stash->n] = (MatScalar)values[i*stepval];
304     stash->n++;
305   }
306   PetscFunctionReturn(0);
307 }
308 
309 /*
310   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
311   This function expects the values to be roworiented. Multiple columns belong
312   to the same block-row can be inserted with a single call to this function.
313   This function extracts the sub-block of values based on the dimensions of
314   the original input block, and the row,col values corresponding to the blocks.
315 
316   Input Parameters:
317   stash  - the stash
318   row    - the global block-row correspoiding to the values
319   n      - the number of elements inserted. All elements belong to the above row.
320   idxn   - the global block-column indices corresponding to each of the blocks of
321            values. Each block is of size bs*bs.
322   values - the values inserted
323   rmax   - the number of block-rows in the original block.
324   cmax   - the number of block-columsn on the original block.
325   idx    - the index of the current block-row in the original block.
326 */
327 #undef __FUNC__
328 #define __FUNC__ /*<a name=""></a>*/"MatStashValuesRowBlocked_Private"
329 int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values,int rmax,int cmax,int idx)
330 {
331   int       ierr,i,j,k,bs2,bs=stash->bs;
332   Scalar    *vals;
333   MatScalar *array;
334 
335   PetscFunctionBegin;
336   bs2 = bs*bs;
337   if ((stash->n+n) > stash->nmax) {
338     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
339   }
340   for (i=0; i<n; i++) {
341     stash->idx[stash->n]   = row;
342     stash->idy[stash->n] = idxn[i];
343     /* Now copy over the block of values. Store the values column oriented.
344        This enables inserting multiple blocks belonging to a row with a single
345        funtion call */
346     array = stash->array + bs2*stash->n;
347     vals  = values + idx*bs2*n + bs*i;
348     for (j=0; j<bs; j++) {
349       for (k=0; k<bs; k++) {array[k*bs] = (MatScalar)vals[k];}
350       array += 1;
351       vals  += cmax*bs;
352     }
353     stash->n++;
354   }
355   PetscFunctionReturn(0);
356 }
357 
358 /*
359   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
360   This function expects the values to be roworiented. Multiple columns belong
361   to the same block-row can be inserted with a single call to this function.
362   This function extracts the sub-block of values based on the dimensions of
363   the original input block, and the row,col values corresponding to the blocks.
364 
365   Input Parameters:
366   stash  - the stash
367   row    - the global block-row correspoiding to the values
368   n      - the number of elements inserted. All elements belong to the above row.
369   idxn   - the global block-column indices corresponding to each of the blocks of
370            values. Each block is of size bs*bs.
371   values - the values inserted
372   rmax   - the number of block-rows in the original block.
373   cmax   - the number of block-columsn on the original block.
374   idx    - the index of the current block-row in the original block.
375 */
376 #undef __FUNC__
377 #define __FUNC__ /*<a name=""></a>*/"MatStashValuesColBlocked_Private"
378 int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values,int rmax,int cmax,int idx)
379 {
380   int       ierr,i,j,k,bs2,bs=stash->bs;
381   Scalar    *vals;
382   MatScalar *array;
383 
384   PetscFunctionBegin;
385   bs2 = bs*bs;
386   if ((stash->n+n) > stash->nmax) {
387     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
388   }
389   for (i=0; i<n; i++) {
390     stash->idx[stash->n]   = row;
391     stash->idy[stash->n] = idxn[i];
392     /* Now copy over the block of values. Store the values column oriented.
393      This enables inserting multiple blocks belonging to a row with a single
394      funtion call */
395     array = stash->array + bs2*stash->n;
396     vals  = values + idx*bs + bs2*rmax*i;
397     for (j=0; j<bs; j++) {
398       for (k=0; k<bs; k++) {array[k] = (MatScalar)vals[k];}
399       array += bs;
400       vals  += rmax*bs;
401     }
402     stash->n++;
403   }
404   PetscFunctionReturn(0);
405 }
406 /*
407   MatStashScatterBegin_Private - Initiates the transfer of values to the
408   correct owners. This function goes through the stash, and check the
409   owners of each stashed value, and sends the values off to the owner
410   processors.
411 
412   Input Parameters:
413   stash  - the stash
414   owners - an array of size 'no-of-procs' which gives the ownership range
415            for each node.
416 
417   Notes: The 'owners' array in the cased of the blocked-stash has the
418   ranges specified blocked global indices, and for the regular stash in
419   the proper global indices.
420 */
421 #undef __FUNC__
422 #define __FUNC__ /*<a name=""></a>*/"MatStashScatterBegin_Private"
423 int MatStashScatterBegin_Private(MatStash *stash,int *owners)
424 {
425   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
426   int         rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
427   int         nmax,*work,count,ierr,*sindices,*rindices,i,j,idx;
428   MatScalar   *rvalues,*svalues;
429   MPI_Comm    comm = stash->comm;
430   MPI_Request *send_waits,*recv_waits;
431 
432   PetscFunctionBegin;
433 
434   bs2 = stash->bs*stash->bs;
435   /*  first count number of contributors to each processor */
436   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
437   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
438   procs  = nprocs + size;
439   owner  = (int*)PetscMalloc((stash->n+1)*sizeof(int));CHKPTRQ(owner);
440 
441   for (i=0; i<stash->n; i++) {
442     idx = stash->idx[i];
443     for (j=0; j<size; j++) {
444       if (idx >= owners[j] && idx < owners[j+1]) {
445         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
446       }
447     }
448   }
449   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
450 
451   /* inform other processors of number of messages and max length*/
452   work      = (int *)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
453   ierr      = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
454   nmax      = work[rank];
455   nreceives = work[size+rank];
456   ierr      = PetscFree(work);CHKERRQ(ierr);
457   /* post receives:
458      since we don't know how long each individual message is we
459      allocate the largest needed buffer for each receive. Potentially
460      this is a lot of wasted space.
461   */
462   rvalues    = (MatScalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)));CHKPTRQ(rvalues);
463   rindices   = (int*)(rvalues + bs2*nreceives*nmax);
464   recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits);
465   for (i=0,count=0; i<nreceives; i++) {
466     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_MATSCALAR,MPI_ANY_SOURCE,tag1,comm,
467                      recv_waits+count++);CHKERRQ(ierr);
468     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,
469                      recv_waits+count++);CHKERRQ(ierr);
470   }
471 
472   /* do sends:
473       1) starts[i] gives the starting index in svalues for stuff going to
474          the ith processor
475   */
476   svalues    = (MatScalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)));CHKPTRQ(svalues);
477   sindices   = (int*)(svalues + bs2*stash->n);
478   send_waits = (MPI_Request*)PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
479   startv     = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(startv);
480   starti     = startv + size;
481   /* use 2 sends the first with all_a, the next with all_i and all_j */
482   startv[0]  = 0; starti[0] = 0;
483   for (i=1; i<size; i++) {
484     startv[i] = startv[i-1] + nprocs[i-1];
485     starti[i] = starti[i-1] + nprocs[i-1]*2;
486   }
487   for (i=0; i<stash->n; i++) {
488     j = owner[i];
489     if (bs2 == 1) {
490       svalues[startv[j]]              = stash->array[i];
491     } else {
492       int       k;
493       MatScalar *buf1,*buf2;
494       buf1 = svalues+bs2*startv[j];
495       buf2 = stash->array+bs2*i;
496       for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
497     }
498     sindices[starti[j]]             = stash->idx[i];
499     sindices[starti[j]+nprocs[j]]   = stash->idy[i];
500     startv[j]++;
501     starti[j]++;
502   }
503   startv[0] = 0;
504   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nprocs[i-1];}
505   for (i=0,count=0; i<size; i++) {
506     if (procs[i]) {
507       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_MATSCALAR,i,tag1,comm,
508                        send_waits+count++);CHKERRQ(ierr);
509       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
510                        send_waits+count++);CHKERRQ(ierr);
511     }
512   }
513   ierr = PetscFree(owner);CHKERRQ(ierr);
514   ierr = PetscFree(startv);CHKERRQ(ierr);
515   /* This memory is reused in scatter end  for a different purpose*/
516   for (i=0; i<2*size; i++) nprocs[i] = -1;
517   stash->nprocs      = nprocs;
518 
519   stash->svalues    = svalues;    stash->rvalues    = rvalues;
520   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
521   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
522   stash->rmax       = nmax;
523   PetscFunctionReturn(0);
524 }
525 
526 /*
527    MatStashScatterGetMesg_Private - This function waits on the receives posted
528    in the function MatStashScatterBegin_Private() and returns one message at
529    a time to the calling function. If no messages are left, it indicates this
530    by setting flg = 0, else it sets flg = 1.
531 
532    Input Parameters:
533    stash - the stash
534 
535    Output Parameters:
536    nvals - the number of entries in the current message.
537    rows  - an array of row indices (or blocked indices) corresponding to the values
538    cols  - an array of columnindices (or blocked indices) corresponding to the values
539    vals  - the values
540    flg   - 0 indicates no more message left, and the current call has no values associated.
541            1 indicates that the current call successfully received a message, and the
542              other output parameters nvals,rows,cols,vals are set appropriately.
543 */
544 #undef __FUNC__
545 #define __FUNC__ /*<a name=""></a>*/"MatStashScatterGetMesg_Private"
546 int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,MatScalar **vals,int *flg)
547 {
548   int         i,ierr,size=stash->size,*flg_v,*flg_i;
549   int         i1,i2,*rindices,match_found=0,bs2;
550   MPI_Status  recv_status;
551 
552   PetscFunctionBegin;
553 
554   *flg = 0; /* When a message is discovered this is reset to 1 */
555   /* Return if no more messages to process */
556   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
557 
558   flg_v = stash->nprocs;
559   flg_i = flg_v + size;
560   bs2   = stash->bs*stash->bs;
561   /* If a matching pair of receieves are found, process them, and return the data to
562      the calling function. Until then keep receiving messages */
563   while (!match_found) {
564     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
565     /* Now pack the received message into a structure which is useable by others */
566     if (i % 2) {
567       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
568       flg_i[recv_status.MPI_SOURCE] = i/2;
569       *nvals = *nvals/2; /* This message has both row indices and col indices */
570     } else {
571       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
572       flg_v[recv_status.MPI_SOURCE] = i/2;
573       *nvals = *nvals/bs2;
574     }
575 
576     /* Check if we have both the messages from this proc */
577     i1 = flg_v[recv_status.MPI_SOURCE];
578     i2 = flg_i[recv_status.MPI_SOURCE];
579     if (i1 != -1 && i2 != -1) {
580       rindices    = (int*)(stash->rvalues + bs2*stash->rmax*stash->nrecvs);
581       *rows       = rindices + 2*i2*stash->rmax;
582       *cols       = *rows + *nvals;
583       *vals       = stash->rvalues + i1*bs2*stash->rmax;
584       *flg        = 1;
585       stash->nprocessed ++;
586       match_found = 1;
587     }
588   }
589   PetscFunctionReturn(0);
590 }
591