xref: /petsc/src/mat/utils/matstash.c (revision dba47a550923b04c7c4ebbb735eb62a1b3e4e9ae)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/matimpl.h"
4 
5 /*
6        The input to the stash is ALWAYS in MatScalar precision, and the
7     internal storage and output is also in MatScalar.
8 */
9 #define DEFAULT_STASH_SIZE   10000
10 
11 /*
12   MatStashCreate_Private - Creates a stash,currently used for all the parallel
13   matrix implementations. The stash is where elements of a matrix destined
14   to be stored on other processors are kept until matrix assembly is done.
15 
16   This is a simple minded stash. Simply adds entries to end of stash.
17 
18   Input Parameters:
19   comm - communicator, required for scatters.
20   bs   - stash block size. used when stashing blocks of values
21 
22   Output Parameters:
23   stash    - the newly created stash
24 */
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStashCreate_Private"
27 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
28 {
29   PetscErrorCode ierr;
30   PetscInt       max,*opt,nopt;
31   PetscTruth     flg;
32 
33   PetscFunctionBegin;
34   /* Require 2 tags,get the second using PetscCommGetNewTag() */
35   stash->comm = comm;
36   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
37   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
38   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
39   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
40 
41   nopt = stash->size;
42   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
43   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
44   if (flg) {
45     if (nopt == 1)                max = opt[0];
46     else if (nopt == stash->size) max = opt[stash->rank];
47     else if (stash->rank < nopt)  max = opt[stash->rank];
48     else                          max = 0; /* Use default */
49     stash->umax = max;
50   } else {
51     stash->umax = 0;
52   }
53   ierr = PetscFree(opt);CHKERRQ(ierr);
54   if (bs <= 0) bs = 1;
55 
56   stash->bs       = bs;
57   stash->nmax     = 0;
58   stash->oldnmax  = 0;
59   stash->n        = 0;
60   stash->reallocs = -1;
61   stash->idx      = 0;
62   stash->idy      = 0;
63   stash->array    = 0;
64 
65   stash->send_waits  = 0;
66   stash->recv_waits  = 0;
67   stash->send_status = 0;
68   stash->nsends      = 0;
69   stash->nrecvs      = 0;
70   stash->svalues     = 0;
71   stash->rvalues     = 0;
72   stash->rmax        = 0;
73   stash->nprocs      = 0;
74   stash->nprocessed  = 0;
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79    MatStashDestroy_Private - Destroy the stash
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "MatStashDestroy_Private"
83 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
84 {
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   if (stash->array) {
89     ierr = PetscFree(stash->array);CHKERRQ(ierr);
90     stash->array = 0;
91   }
92   PetscFunctionReturn(0);
93 }
94 
95 /*
96    MatStashScatterEnd_Private - This is called as the fial stage of
97    scatter. The final stages of messagepassing is done here, and
98    all the memory used for messagepassing is cleanedu up. This
99    routine also resets the stash, and deallocates the memory used
100    for the stash. It also keeps track of the current memory usage
101    so that the same value can be used the next time through.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "MatStashScatterEnd_Private"
105 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
106 {
107   PetscErrorCode ierr;
108   int         nsends=stash->nsends,bs2,oldnmax;
109   MPI_Status  *send_status;
110 
111   PetscFunctionBegin;
112   /* wait on sends */
113   if (nsends) {
114     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
115     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
116     ierr = PetscFree(send_status);CHKERRQ(ierr);
117   }
118 
119   /* Now update nmaxold to be app 10% more than max n used, this way the
120      wastage of space is reduced the next time this stash is used.
121      Also update the oldmax, only if it increases */
122   if (stash->n) {
123     bs2      = stash->bs*stash->bs;
124     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
125     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
126   }
127 
128   stash->nmax       = 0;
129   stash->n          = 0;
130   stash->reallocs   = -1;
131   stash->rmax       = 0;
132   stash->nprocessed = 0;
133 
134   if (stash->array) {
135     ierr         = PetscFree(stash->array);CHKERRQ(ierr);
136     stash->array = 0;
137     stash->idx   = 0;
138     stash->idy   = 0;
139   }
140   if (stash->send_waits) {
141     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
142     stash->send_waits = 0;
143   }
144   if (stash->recv_waits) {
145     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
146     stash->recv_waits = 0;
147   }
148   if (stash->svalues) {
149     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
150     stash->svalues = 0;
151   }
152   if (stash->rvalues) {
153     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
154     stash->rvalues = 0;
155   }
156   if (stash->nprocs) {
157     ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
158     stash->nprocs = 0;
159   }
160 
161   PetscFunctionReturn(0);
162 }
163 
164 /*
165    MatStashGetInfo_Private - Gets the relavant statistics of the stash
166 
167    Input Parameters:
168    stash    - the stash
169    nstash   - the size of the stash. Indicates the number of values stored.
170    reallocs - the number of additional mallocs incurred.
171 
172 */
173 #undef __FUNCT__
174 #define __FUNCT__ "MatStashGetInfo_Private"
175 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
176 {
177   PetscInt bs2 = stash->bs*stash->bs;
178 
179   PetscFunctionBegin;
180   if (nstash) *nstash   = stash->n*bs2;
181   if (reallocs) {
182     if (stash->reallocs < 0) *reallocs = 0;
183     else                     *reallocs = stash->reallocs;
184   }
185   PetscFunctionReturn(0);
186 }
187 
188 
189 /*
190    MatStashSetInitialSize_Private - Sets the initial size of the stash
191 
192    Input Parameters:
193    stash  - the stash
194    max    - the value that is used as the max size of the stash.
195             this value is used while allocating memory.
196 */
197 #undef __FUNCT__
198 #define __FUNCT__ "MatStashSetInitialSize_Private"
199 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
200 {
201   PetscFunctionBegin;
202   stash->umax = max;
203   PetscFunctionReturn(0);
204 }
205 
206 /* MatStashExpand_Private - Expand the stash. This function is called
207    when the space in the stash is not sufficient to add the new values
208    being inserted into the stash.
209 
210    Input Parameters:
211    stash - the stash
212    incr  - the minimum increase requested
213 
214    Notes:
215    This routine doubles the currently used memory.
216  */
217 #undef __FUNCT__
218 #define __FUNCT__ "MatStashExpand_Private"
219 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
220 {
221   PetscErrorCode ierr;
222   PetscInt       *n_idx,*n_idy,newnmax,bs2;
223   MatScalar *n_array;
224 
225   PetscFunctionBegin;
226   /* allocate a larger stash */
227   bs2     = stash->bs*stash->bs;
228   if (!stash->oldnmax && !stash->nmax) { /* new stash */
229     if (stash->umax)                  newnmax = stash->umax/bs2;
230     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
231   } else if (!stash->nmax) { /* resuing stash */
232     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
233     else                              newnmax = stash->oldnmax/bs2;
234   } else                              newnmax = stash->nmax*2;
235   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
236 
237   ierr  = PetscMalloc((newnmax)*(2*sizeof(PetscInt)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr);
238   n_idx = (PetscInt*)(n_array + bs2*newnmax);
239   n_idy = (PetscInt*)(n_idx + newnmax);
240   ierr  = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr);
241   ierr  = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
242   ierr  = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
243   if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);}
244   stash->array   = n_array;
245   stash->idx     = n_idx;
246   stash->idy     = n_idy;
247   stash->nmax    = newnmax;
248   stash->reallocs++;
249   PetscFunctionReturn(0);
250 }
251 /*
252   MatStashValuesRow_Private - inserts values into the stash. This function
253   expects the values to be roworiented. Multiple columns belong to the same row
254   can be inserted with a single call to this function.
255 
256   Input Parameters:
257   stash  - the stash
258   row    - the global row correspoiding to the values
259   n      - the number of elements inserted. All elements belong to the above row.
260   idxn   - the global column indices corresponding to each of the values.
261   values - the values inserted
262 */
263 #undef __FUNCT__
264 #define __FUNCT__ "MatStashValuesRow_Private"
265 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
266 {
267   PetscErrorCode ierr;
268   PetscInt i;
269 
270   PetscFunctionBegin;
271   /* Check and see if we have sufficient memory */
272   if ((stash->n + n) > stash->nmax) {
273     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
274   }
275   for (i=0; i<n; i++) {
276     stash->idx[stash->n]   = row;
277     stash->idy[stash->n]   = idxn[i];
278     stash->array[stash->n] = values[i];
279     stash->n++;
280   }
281   PetscFunctionReturn(0);
282 }
283 /*
284   MatStashValuesCol_Private - inserts values into the stash. This function
285   expects the values to be columnoriented. Multiple columns belong to the same row
286   can be inserted with a single call to this function.
287 
288   Input Parameters:
289   stash   - the stash
290   row     - the global row correspoiding to the values
291   n       - the number of elements inserted. All elements belong to the above row.
292   idxn    - the global column indices corresponding to each of the values.
293   values  - the values inserted
294   stepval - the consecutive values are sepated by a distance of stepval.
295             this happens because the input is columnoriented.
296 */
297 #undef __FUNCT__
298 #define __FUNCT__ "MatStashValuesCol_Private"
299 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
300 {
301   PetscErrorCode ierr;
302   PetscInt i;
303 
304   PetscFunctionBegin;
305   /* Check and see if we have sufficient memory */
306   if ((stash->n + n) > stash->nmax) {
307     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
308   }
309   for (i=0; i<n; i++) {
310     stash->idx[stash->n]   = row;
311     stash->idy[stash->n]   = idxn[i];
312     stash->array[stash->n] = values[i*stepval];
313     stash->n++;
314   }
315   PetscFunctionReturn(0);
316 }
317 
318 /*
319   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
320   This function expects the values to be roworiented. Multiple columns belong
321   to the same block-row can be inserted with a single call to this function.
322   This function extracts the sub-block of values based on the dimensions of
323   the original input block, and the row,col values corresponding to the blocks.
324 
325   Input Parameters:
326   stash  - the stash
327   row    - the global block-row correspoiding to the values
328   n      - the number of elements inserted. All elements belong to the above row.
329   idxn   - the global block-column indices corresponding to each of the blocks of
330            values. Each block is of size bs*bs.
331   values - the values inserted
332   rmax   - the number of block-rows in the original block.
333   cmax   - the number of block-columsn on the original block.
334   idx    - the index of the current block-row in the original block.
335 */
336 #undef __FUNCT__
337 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
338 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
339 {
340   PetscErrorCode ierr;
341   PetscInt i,j,k,bs2,bs=stash->bs;
342   const MatScalar *vals;
343   MatScalar       *array;
344 
345   PetscFunctionBegin;
346   bs2 = bs*bs;
347   if ((stash->n+n) > stash->nmax) {
348     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
349   }
350   for (i=0; i<n; i++) {
351     stash->idx[stash->n]   = row;
352     stash->idy[stash->n] = idxn[i];
353     /* Now copy over the block of values. Store the values column oriented.
354        This enables inserting multiple blocks belonging to a row with a single
355        funtion call */
356     array = stash->array + bs2*stash->n;
357     vals  = values + idx*bs2*n + bs*i;
358     for (j=0; j<bs; j++) {
359       for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
360       array += 1;
361       vals  += cmax*bs;
362     }
363     stash->n++;
364   }
365   PetscFunctionReturn(0);
366 }
367 
368 /*
369   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
370   This function expects the values to be roworiented. Multiple columns belong
371   to the same block-row can be inserted with a single call to this function.
372   This function extracts the sub-block of values based on the dimensions of
373   the original input block, and the row,col values corresponding to the blocks.
374 
375   Input Parameters:
376   stash  - the stash
377   row    - the global block-row correspoiding to the values
378   n      - the number of elements inserted. All elements belong to the above row.
379   idxn   - the global block-column indices corresponding to each of the blocks of
380            values. Each block is of size bs*bs.
381   values - the values inserted
382   rmax   - the number of block-rows in the original block.
383   cmax   - the number of block-columsn on the original block.
384   idx    - the index of the current block-row in the original block.
385 */
386 #undef __FUNCT__
387 #define __FUNCT__ "MatStashValuesColBlocked_Private"
388 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
389 {
390   PetscErrorCode ierr;
391   PetscInt i,j,k,bs2,bs=stash->bs;
392   const MatScalar *vals;
393   MatScalar       *array;
394 
395   PetscFunctionBegin;
396   bs2 = bs*bs;
397   if ((stash->n+n) > stash->nmax) {
398     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
399   }
400   for (i=0; i<n; i++) {
401     stash->idx[stash->n]   = row;
402     stash->idy[stash->n] = idxn[i];
403     /* Now copy over the block of values. Store the values column oriented.
404      This enables inserting multiple blocks belonging to a row with a single
405      funtion call */
406     array = stash->array + bs2*stash->n;
407     vals  = values + idx*bs + bs2*rmax*i;
408     for (j=0; j<bs; j++) {
409       for (k=0; k<bs; k++) {array[k] = vals[k];}
410       array += bs;
411       vals  += rmax*bs;
412     }
413     stash->n++;
414   }
415   PetscFunctionReturn(0);
416 }
417 /*
418   MatStashScatterBegin_Private - Initiates the transfer of values to the
419   correct owners. This function goes through the stash, and check the
420   owners of each stashed value, and sends the values off to the owner
421   processors.
422 
423   Input Parameters:
424   stash  - the stash
425   owners - an array of size 'no-of-procs' which gives the ownership range
426            for each node.
427 
428   Notes: The 'owners' array in the cased of the blocked-stash has the
429   ranges specified blocked global indices, and for the regular stash in
430   the proper global indices.
431 */
432 #undef __FUNCT__
433 #define __FUNCT__ "MatStashScatterBegin_Private"
434 PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
435 {
436   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
437   PetscInt       size=stash->size,*nprocs,nsends,nreceives;
438   PetscErrorCode ierr;
439   PetscInt       nmax,count,*sindices,*rindices,i,j,idx,lastidx;
440   MatScalar      *rvalues,*svalues;
441   MPI_Comm       comm = stash->comm;
442   MPI_Request    *send_waits,*recv_waits;
443 
444   PetscFunctionBegin;
445 
446   bs2   = stash->bs*stash->bs;
447   /*  first count number of contributors to each processor */
448   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
449   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
450   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
451 
452   j       = 0;
453   lastidx = -1;
454   for (i=0; i<stash->n; i++) {
455     /* if indices are NOT locally sorted, need to start search at the beginning */
456     if (lastidx > (idx = stash->idx[i])) j = 0;
457     lastidx = idx;
458     for (; j<size; j++) {
459       if (idx >= owners[j] && idx < owners[j+1]) {
460         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
461       }
462     }
463   }
464   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
465 
466   /* inform other processors of number of messages and max length*/
467   ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr);
468 
469   /* post receives:
470      since we don't know how long each individual message is we
471      allocate the largest needed buffer for each receive. Potentially
472      this is a lot of wasted space.
473   */
474   ierr     = PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&rvalues);CHKERRQ(ierr);
475   rindices = (PetscInt*)(rvalues + bs2*nreceives*nmax);
476   ierr     = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
477   for (i=0,count=0; i<nreceives; i++) {
478     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_MATSCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRQ(ierr);
479     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPIU_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);CHKERRQ(ierr);
480   }
481 
482   /* do sends:
483       1) starts[i] gives the starting index in svalues for stuff going to
484          the ith processor
485   */
486   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
487   sindices = (PetscInt*)(svalues + bs2*stash->n);
488   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
489   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
490   starti   = startv + size;
491   /* use 2 sends the first with all_a, the next with all_i and all_j */
492   startv[0]  = 0; starti[0] = 0;
493   for (i=1; i<size; i++) {
494     startv[i] = startv[i-1] + nprocs[2*i-2];
495     starti[i] = starti[i-1] + nprocs[2*i-2]*2;
496   }
497   for (i=0; i<stash->n; i++) {
498     j = owner[i];
499     if (bs2 == 1) {
500       svalues[startv[j]]              = stash->array[i];
501     } else {
502       PetscInt       k;
503       MatScalar *buf1,*buf2;
504       buf1 = svalues+bs2*startv[j];
505       buf2 = stash->array+bs2*i;
506       for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
507     }
508     sindices[starti[j]]               = stash->idx[i];
509     sindices[starti[j]+nprocs[2*j]]   = stash->idy[i];
510     startv[j]++;
511     starti[j]++;
512   }
513   startv[0] = 0;
514   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nprocs[2*i-2];}
515   for (i=0,count=0; i<size; i++) {
516     if (nprocs[2*i+1]) {
517       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[2*i],MPIU_MATSCALAR,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
518       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[2*i],MPIU_INT,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
519     }
520   }
521   ierr = PetscFree(owner);CHKERRQ(ierr);
522   ierr = PetscFree(startv);CHKERRQ(ierr);
523   /* This memory is reused in scatter end  for a different purpose*/
524   for (i=0; i<2*size; i++) nprocs[i] = -1;
525   stash->nprocs      = nprocs;
526 
527   stash->svalues    = svalues;    stash->rvalues    = rvalues;
528   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
529   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
530   stash->rmax       = nmax;
531   PetscFunctionReturn(0);
532 }
533 
534 /*
535    MatStashScatterGetMesg_Private - This function waits on the receives posted
536    in the function MatStashScatterBegin_Private() and returns one message at
537    a time to the calling function. If no messages are left, it indicates this
538    by setting flg = 0, else it sets flg = 1.
539 
540    Input Parameters:
541    stash - the stash
542 
543    Output Parameters:
544    nvals - the number of entries in the current message.
545    rows  - an array of row indices (or blocked indices) corresponding to the values
546    cols  - an array of columnindices (or blocked indices) corresponding to the values
547    vals  - the values
548    flg   - 0 indicates no more message left, and the current call has no values associated.
549            1 indicates that the current call successfully received a message, and the
550              other output parameters nvals,rows,cols,vals are set appropriately.
551 */
552 #undef __FUNCT__
553 #define __FUNCT__ "MatStashScatterGetMesg_Private"
554 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
555 {
556   PetscErrorCode ierr;
557   PetscMPIInt    i;
558   PetscInt       *flg_v,i1,i2,*rindices,bs2;
559   MPI_Status     recv_status;
560   PetscTruth     match_found = PETSC_FALSE;
561 
562   PetscFunctionBegin;
563 
564   *flg = 0; /* When a message is discovered this is reset to 1 */
565   /* Return if no more messages to process */
566   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
567 
568   flg_v = stash->nprocs;
569   bs2   = stash->bs*stash->bs;
570   /* If a matching pair of receieves are found, process them, and return the data to
571      the calling function. Until then keep receiving messages */
572   while (!match_found) {
573     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
574     /* Now pack the received message into a structure which is useable by others */
575     if (i % 2) {
576       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
577       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
578       *nvals = *nvals/2; /* This message has both row indices and col indices */
579     } else {
580       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
581       flg_v[2*recv_status.MPI_SOURCE] = i/2;
582       *nvals = *nvals/bs2;
583     }
584 
585     /* Check if we have both the messages from this proc */
586     i1 = flg_v[2*recv_status.MPI_SOURCE];
587     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
588     if (i1 != -1 && i2 != -1) {
589       rindices    = (PetscInt*)(stash->rvalues + bs2*stash->rmax*stash->nrecvs);
590       *rows       = rindices + 2*i2*stash->rmax;
591       *cols       = *rows + *nvals;
592       *vals       = stash->rvalues + i1*bs2*stash->rmax;
593       *flg        = 1;
594       stash->nprocessed ++;
595       match_found = PETSC_TRUE;
596     }
597   }
598   PetscFunctionReturn(0);
599 }
600