xref: /petsc/src/mat/utils/matstash.c (revision 9be4fee84348b1eaee2eca38a590c7003da9a71b)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/matimpl.h"
4 
5 /*
6        The input to the stash is ALWAYS in MatScalar precision, and the
7     internal storage and output is also in MatScalar.
8 */
9 #define DEFAULT_STASH_SIZE   10000
10 
11 /*
12   MatStashCreate_Private - Creates a stash,currently used for all the parallel
13   matrix implementations. The stash is where elements of a matrix destined
14   to be stored on other processors are kept until matrix assembly is done.
15 
16   This is a simple minded stash. Simply adds entries to end of stash.
17 
18   Input Parameters:
19   comm - communicator, required for scatters.
20   bs   - stash block size. used when stashing blocks of values
21 
22   Output Parameters:
23   stash    - the newly created stash
24 */
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStashCreate_Private"
27 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
28 {
29   PetscErrorCode ierr;
30   PetscInt       max,*opt,nopt;
31   PetscTruth     flg;
32 
33   PetscFunctionBegin;
34   /* Require 2 tags,get the second using PetscCommGetNewTag() */
35   stash->comm = comm;
36   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
37   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
38   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
39   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
40 
41   nopt = stash->size;
42   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
43   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
44   if (flg) {
45     if (nopt == 1)                max = opt[0];
46     else if (nopt == stash->size) max = opt[stash->rank];
47     else if (stash->rank < nopt)  max = opt[stash->rank];
48     else                          max = 0; /* Use default */
49     stash->umax = max;
50   } else {
51     stash->umax = 0;
52   }
53   ierr = PetscFree(opt);CHKERRQ(ierr);
54   if (bs <= 0) bs = 1;
55 
56   stash->bs       = bs;
57   stash->nmax     = 0;
58   stash->oldnmax  = 0;
59   stash->n        = 0;
60   stash->reallocs = -1;
61   stash->idx      = 0;
62   stash->idy      = 0;
63   stash->array    = 0;
64 
65   stash->send_waits  = 0;
66   stash->recv_waits  = 0;
67   stash->send_status = 0;
68   stash->nsends      = 0;
69   stash->nrecvs      = 0;
70   stash->svalues     = 0;
71   stash->rvalues     = 0;
72   stash->rindices    = 0;
73   stash->nprocs      = 0;
74   stash->nprocessed  = 0;
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79    MatStashDestroy_Private - Destroy the stash
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "MatStashDestroy_Private"
83 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
84 {
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   if (stash->array) {
89     ierr = PetscFree(stash->array);CHKERRQ(ierr);
90     stash->array = 0;
91   }
92   PetscFunctionReturn(0);
93 }
94 
95 /*
96    MatStashScatterEnd_Private - This is called as the fial stage of
97    scatter. The final stages of messagepassing is done here, and
98    all the memory used for messagepassing is cleanedu up. This
99    routine also resets the stash, and deallocates the memory used
100    for the stash. It also keeps track of the current memory usage
101    so that the same value can be used the next time through.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "MatStashScatterEnd_Private"
105 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
106 {
107   PetscErrorCode ierr;
108   int         nsends=stash->nsends,bs2,oldnmax;
109   MPI_Status  *send_status;
110 
111   PetscFunctionBegin;
112   /* wait on sends */
113   if (nsends) {
114     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
115     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
116     ierr = PetscFree(send_status);CHKERRQ(ierr);
117   }
118 
119   /* Now update nmaxold to be app 10% more than max n used, this way the
120      wastage of space is reduced the next time this stash is used.
121      Also update the oldmax, only if it increases */
122   if (stash->n) {
123     bs2      = stash->bs*stash->bs;
124     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
125     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
126   }
127 
128   stash->nmax       = 0;
129   stash->n          = 0;
130   stash->reallocs   = -1;
131   stash->nprocessed = 0;
132 
133   if (stash->array) {
134     ierr         = PetscFree(stash->array);CHKERRQ(ierr);
135     stash->array = 0;
136     stash->idx   = 0;
137     stash->idy   = 0;
138   }
139   if (stash->send_waits) {
140     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
141     stash->send_waits = 0;
142   }
143   if (stash->recv_waits) {
144     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
145     stash->recv_waits = 0;
146   }
147   if (stash->svalues) {
148     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
149     stash->svalues = 0;
150   }
151   if (stash->rvalues) {
152     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
153     stash->rvalues = 0;
154   }
155   if (stash->rindices) {
156     ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
157     stash->rindices = 0;
158   }
159   if (stash->nprocs) {
160     ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
161     stash->nprocs = 0;
162   }
163 
164   PetscFunctionReturn(0);
165 }
166 
167 /*
168    MatStashGetInfo_Private - Gets the relavant statistics of the stash
169 
170    Input Parameters:
171    stash    - the stash
172    nstash   - the size of the stash. Indicates the number of values stored.
173    reallocs - the number of additional mallocs incurred.
174 
175 */
176 #undef __FUNCT__
177 #define __FUNCT__ "MatStashGetInfo_Private"
178 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
179 {
180   PetscInt bs2 = stash->bs*stash->bs;
181 
182   PetscFunctionBegin;
183   if (nstash) *nstash   = stash->n*bs2;
184   if (reallocs) {
185     if (stash->reallocs < 0) *reallocs = 0;
186     else                     *reallocs = stash->reallocs;
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 
192 /*
193    MatStashSetInitialSize_Private - Sets the initial size of the stash
194 
195    Input Parameters:
196    stash  - the stash
197    max    - the value that is used as the max size of the stash.
198             this value is used while allocating memory.
199 */
200 #undef __FUNCT__
201 #define __FUNCT__ "MatStashSetInitialSize_Private"
202 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
203 {
204   PetscFunctionBegin;
205   stash->umax = max;
206   PetscFunctionReturn(0);
207 }
208 
209 /* MatStashExpand_Private - Expand the stash. This function is called
210    when the space in the stash is not sufficient to add the new values
211    being inserted into the stash.
212 
213    Input Parameters:
214    stash - the stash
215    incr  - the minimum increase requested
216 
217    Notes:
218    This routine doubles the currently used memory.
219  */
220 #undef __FUNCT__
221 #define __FUNCT__ "MatStashExpand_Private"
222 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
223 {
224   PetscErrorCode ierr;
225   PetscInt       *n_idx,*n_idy,newnmax,bs2;
226   MatScalar *n_array;
227 
228   PetscFunctionBegin;
229   /* allocate a larger stash */
230   bs2     = stash->bs*stash->bs;
231   if (!stash->oldnmax && !stash->nmax) { /* new stash */
232     if (stash->umax)                  newnmax = stash->umax/bs2;
233     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
234   } else if (!stash->nmax) { /* resuing stash */
235     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
236     else                              newnmax = stash->oldnmax/bs2;
237   } else                              newnmax = stash->nmax*2;
238   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
239 
240   ierr  = PetscMalloc((newnmax)*(2*sizeof(PetscInt)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr);
241   n_idx = (PetscInt*)(n_array + bs2*newnmax);
242   n_idy = (PetscInt*)(n_idx + newnmax);
243   ierr  = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr);
244   ierr  = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
245   ierr  = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
246   if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);}
247   stash->array   = n_array;
248   stash->idx     = n_idx;
249   stash->idy     = n_idy;
250   stash->nmax    = newnmax;
251   stash->reallocs++;
252   PetscFunctionReturn(0);
253 }
254 /*
255   MatStashValuesRow_Private - inserts values into the stash. This function
256   expects the values to be roworiented. Multiple columns belong to the same row
257   can be inserted with a single call to this function.
258 
259   Input Parameters:
260   stash  - the stash
261   row    - the global row correspoiding to the values
262   n      - the number of elements inserted. All elements belong to the above row.
263   idxn   - the global column indices corresponding to each of the values.
264   values - the values inserted
265 */
266 #undef __FUNCT__
267 #define __FUNCT__ "MatStashValuesRow_Private"
268 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
269 {
270   PetscErrorCode ierr;
271   PetscInt i;
272 
273   PetscFunctionBegin;
274   /* Check and see if we have sufficient memory */
275   if ((stash->n + n) > stash->nmax) {
276     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
277   }
278   for (i=0; i<n; i++) {
279     stash->idx[stash->n]   = row;
280     stash->idy[stash->n]   = idxn[i];
281     stash->array[stash->n] = values[i];
282     stash->n++;
283   }
284   PetscFunctionReturn(0);
285 }
286 /*
287   MatStashValuesCol_Private - inserts values into the stash. This function
288   expects the values to be columnoriented. Multiple columns belong to the same row
289   can be inserted with a single call to this function.
290 
291   Input Parameters:
292   stash   - the stash
293   row     - the global row correspoiding to the values
294   n       - the number of elements inserted. All elements belong to the above row.
295   idxn    - the global column indices corresponding to each of the values.
296   values  - the values inserted
297   stepval - the consecutive values are sepated by a distance of stepval.
298             this happens because the input is columnoriented.
299 */
300 #undef __FUNCT__
301 #define __FUNCT__ "MatStashValuesCol_Private"
302 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
303 {
304   PetscErrorCode ierr;
305   PetscInt i;
306 
307   PetscFunctionBegin;
308   /* Check and see if we have sufficient memory */
309   if ((stash->n + n) > stash->nmax) {
310     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
311   }
312   for (i=0; i<n; i++) {
313     stash->idx[stash->n]   = row;
314     stash->idy[stash->n]   = idxn[i];
315     stash->array[stash->n] = values[i*stepval];
316     stash->n++;
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 /*
322   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
323   This function expects the values to be roworiented. Multiple columns belong
324   to the same block-row can be inserted with a single call to this function.
325   This function extracts the sub-block of values based on the dimensions of
326   the original input block, and the row,col values corresponding to the blocks.
327 
328   Input Parameters:
329   stash  - the stash
330   row    - the global block-row correspoiding to the values
331   n      - the number of elements inserted. All elements belong to the above row.
332   idxn   - the global block-column indices corresponding to each of the blocks of
333            values. Each block is of size bs*bs.
334   values - the values inserted
335   rmax   - the number of block-rows in the original block.
336   cmax   - the number of block-columsn on the original block.
337   idx    - the index of the current block-row in the original block.
338 */
339 #undef __FUNCT__
340 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
341 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
342 {
343   PetscErrorCode ierr;
344   PetscInt i,j,k,bs2,bs=stash->bs;
345   const MatScalar *vals;
346   MatScalar       *array;
347 
348   PetscFunctionBegin;
349   bs2 = bs*bs;
350   if ((stash->n+n) > stash->nmax) {
351     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
352   }
353   for (i=0; i<n; i++) {
354     stash->idx[stash->n]   = row;
355     stash->idy[stash->n] = idxn[i];
356     /* Now copy over the block of values. Store the values column oriented.
357        This enables inserting multiple blocks belonging to a row with a single
358        funtion call */
359     array = stash->array + bs2*stash->n;
360     vals  = values + idx*bs2*n + bs*i;
361     for (j=0; j<bs; j++) {
362       for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
363       array += 1;
364       vals  += cmax*bs;
365     }
366     stash->n++;
367   }
368   PetscFunctionReturn(0);
369 }
370 
371 /*
372   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
373   This function expects the values to be roworiented. Multiple columns belong
374   to the same block-row can be inserted with a single call to this function.
375   This function extracts the sub-block of values based on the dimensions of
376   the original input block, and the row,col values corresponding to the blocks.
377 
378   Input Parameters:
379   stash  - the stash
380   row    - the global block-row correspoiding to the values
381   n      - the number of elements inserted. All elements belong to the above row.
382   idxn   - the global block-column indices corresponding to each of the blocks of
383            values. Each block is of size bs*bs.
384   values - the values inserted
385   rmax   - the number of block-rows in the original block.
386   cmax   - the number of block-columsn on the original block.
387   idx    - the index of the current block-row in the original block.
388 */
389 #undef __FUNCT__
390 #define __FUNCT__ "MatStashValuesColBlocked_Private"
391 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
392 {
393   PetscErrorCode ierr;
394   PetscInt i,j,k,bs2,bs=stash->bs;
395   const MatScalar *vals;
396   MatScalar       *array;
397 
398   PetscFunctionBegin;
399   bs2 = bs*bs;
400   if ((stash->n+n) > stash->nmax) {
401     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
402   }
403   for (i=0; i<n; i++) {
404     stash->idx[stash->n]   = row;
405     stash->idy[stash->n] = idxn[i];
406     /* Now copy over the block of values. Store the values column oriented.
407      This enables inserting multiple blocks belonging to a row with a single
408      funtion call */
409     array = stash->array + bs2*stash->n;
410     vals  = values + idx*bs + bs2*rmax*i;
411     for (j=0; j<bs; j++) {
412       for (k=0; k<bs; k++) {array[k] = vals[k];}
413       array += bs;
414       vals  += rmax*bs;
415     }
416     stash->n++;
417   }
418   PetscFunctionReturn(0);
419 }
420 /*
421   MatStashScatterBegin_Private - Initiates the transfer of values to the
422   correct owners. This function goes through the stash, and check the
423   owners of each stashed value, and sends the values off to the owner
424   processors.
425 
426   Input Parameters:
427   stash  - the stash
428   owners - an array of size 'no-of-procs' which gives the ownership range
429            for each node.
430 
431   Notes: The 'owners' array in the cased of the blocked-stash has the
432   ranges specified blocked global indices, and for the regular stash in
433   the proper global indices.
434 */
435 #undef __FUNCT__
436 #define __FUNCT__ "MatStashScatterBegin_Private"
437 PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
438 {
439   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
440   PetscInt       size=stash->size,nsends;
441   PetscErrorCode ierr;
442   PetscInt       count,*sindices,**rindices,i,j,idx,lastidx;
443   MatScalar      **rvalues,*svalues;
444   MPI_Comm       comm = stash->comm;
445   MPI_Request    *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
446   PetscMPIInt    *nprocs,*nlengths,nreceives;
447 
448   PetscFunctionBegin;
449 
450   bs2   = stash->bs*stash->bs;
451   /*  first count number of contributors to each processor */
452   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
453   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr);
454   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
455 
456   nlengths = nprocs+size;
457   j        = 0;
458   lastidx  = -1;
459   for (i=0; i<stash->n; i++) {
460     /* if indices are NOT locally sorted, need to start search at the beginning */
461     if (lastidx > (idx = stash->idx[i])) j = 0;
462     lastidx = idx;
463     for (; j<size; j++) {
464       if (idx >= owners[j] && idx < owners[j+1]) {
465         nlengths[j]++; owner[i] = j; break;
466       }
467     }
468   }
469   /* Now check what procs get messages - and compute nsends. */
470   for (i=0, nsends=0 ; i<size; i++) {
471     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
472   }
473 
474   { int  *onodes,*olengths;
475   /* Determine the number of messages to expect, their lengths, from from-ids */
476   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
477   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
478   /* since clubbing row,col - lengths are multiplied by 2 */
479   for (i=0; i<nreceives; i++) olengths[i] *=2;
480   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
481   /* values are size 'bs2' lengths (and remove earlier factor 2 */
482   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
483   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
484   ierr = PetscFree(onodes);CHKERRQ(ierr);
485   ierr = PetscFree(olengths);CHKERRQ(ierr);
486   }
487 
488   /* do sends:
489       1) starts[i] gives the starting index in svalues for stuff going to
490          the ith processor
491   */
492   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
493   sindices = (PetscInt*)(svalues + bs2*stash->n);
494   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
495   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
496   starti   = startv + size;
497   /* use 2 sends the first with all_a, the next with all_i and all_j */
498   startv[0]  = 0; starti[0] = 0;
499   for (i=1; i<size; i++) {
500     startv[i] = startv[i-1] + nlengths[i-1];
501     starti[i] = starti[i-1] + nlengths[i-1]*2;
502   }
503   for (i=0; i<stash->n; i++) {
504     j = owner[i];
505     if (bs2 == 1) {
506       svalues[startv[j]]              = stash->array[i];
507     } else {
508       PetscInt       k;
509       MatScalar *buf1,*buf2;
510       buf1 = svalues+bs2*startv[j];
511       buf2 = stash->array+bs2*i;
512       for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
513     }
514     sindices[starti[j]]               = stash->idx[i];
515     sindices[starti[j]+nlengths[j]]   = stash->idy[i];
516     startv[j]++;
517     starti[j]++;
518   }
519   startv[0] = 0;
520   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
521 
522   for (i=0,count=0; i<size; i++) {
523     if (nprocs[i]) {
524       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
525       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
526     }
527   }
528 #if defined(PETSC_USE_VERBOSE)
529   ierr = PetscVerboseInfo((0,"MatStashScatterBegin_Private: No of messages: %d \n",nsends));CHKERRQ(ierr);
530   for (i=0; i<size; i++) {
531     if (nprocs[i]) {
532       ierr = PetscVerboseInfo((0,"MatStashScatterBegin_Private: Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
533     }
534   }
535 #endif
536   ierr = PetscFree(owner);CHKERRQ(ierr);
537   ierr = PetscFree(startv);CHKERRQ(ierr);
538   /* This memory is reused in scatter end  for a different purpose*/
539   for (i=0; i<2*size; i++) nprocs[i] = -1;
540   stash->nprocs      = nprocs;
541 
542   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
543   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
544 
545   for (i=0; i<nreceives; i++) {
546     recv_waits[2*i]   = recv_waits1[i];
547     recv_waits[2*i+1] = recv_waits2[i];
548   }
549   stash->recv_waits = recv_waits;
550   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
551   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
552 
553   stash->svalues    = svalues;    stash->rvalues     = rvalues;
554   stash->rindices   = rindices;   stash->send_waits  = send_waits;
555   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
556   PetscFunctionReturn(0);
557 }
558 
559 /*
560    MatStashScatterGetMesg_Private - This function waits on the receives posted
561    in the function MatStashScatterBegin_Private() and returns one message at
562    a time to the calling function. If no messages are left, it indicates this
563    by setting flg = 0, else it sets flg = 1.
564 
565    Input Parameters:
566    stash - the stash
567 
568    Output Parameters:
569    nvals - the number of entries in the current message.
570    rows  - an array of row indices (or blocked indices) corresponding to the values
571    cols  - an array of columnindices (or blocked indices) corresponding to the values
572    vals  - the values
573    flg   - 0 indicates no more message left, and the current call has no values associated.
574            1 indicates that the current call successfully received a message, and the
575              other output parameters nvals,rows,cols,vals are set appropriately.
576 */
577 #undef __FUNCT__
578 #define __FUNCT__ "MatStashScatterGetMesg_Private"
579 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
580 {
581   PetscErrorCode ierr;
582   PetscMPIInt    i,*flg_v,i1,i2;
583   PetscInt       bs2;
584   MPI_Status     recv_status;
585   PetscTruth     match_found = PETSC_FALSE;
586 
587   PetscFunctionBegin;
588 
589   *flg = 0; /* When a message is discovered this is reset to 1 */
590   /* Return if no more messages to process */
591   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
592 
593   flg_v = stash->nprocs;
594   bs2   = stash->bs*stash->bs;
595   /* If a matching pair of receieves are found, process them, and return the data to
596      the calling function. Until then keep receiving messages */
597   while (!match_found) {
598     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
599     /* Now pack the received message into a structure which is useable by others */
600     if (i % 2) {
601       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
602       flg_v[2*recv_status.MPI_SOURCE] = i/2;
603       *nvals = *nvals/bs2;
604     } else {
605       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
606       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
607       *nvals = *nvals/2; /* This message has both row indices and col indices */
608     }
609 
610     /* Check if we have both the messages from this proc */
611     i1 = flg_v[2*recv_status.MPI_SOURCE];
612     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
613     if (i1 != -1 && i2 != -1) {
614       *rows       = stash->rindices[i2];
615       *cols       = *rows + *nvals;
616       *vals       = stash->rvalues[i1];
617       *flg        = 1;
618       stash->nprocessed ++;
619       match_found = PETSC_TRUE;
620     }
621   }
622   PetscFunctionReturn(0);
623 }
624