xref: /petsc/src/mat/utils/matstash.c (revision 7c4f633dc6bb6149cca88d301ead35a99e103cbb)
1 #define PETSCMAT_DLL
2 
3 #include "private/matimpl.h"
4 
5 #define DEFAULT_STASH_SIZE   10000
6 
7 /*
8   MatStashCreate_Private - Creates a stash,currently used for all the parallel
9   matrix implementations. The stash is where elements of a matrix destined
10   to be stored on other processors are kept until matrix assembly is done.
11 
12   This is a simple minded stash. Simply adds entries to end of stash.
13 
14   Input Parameters:
15   comm - communicator, required for scatters.
16   bs   - stash block size. used when stashing blocks of values
17 
18   Output Parameters:
19   stash    - the newly created stash
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "MatStashCreate_Private"
23 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
24 {
25   PetscErrorCode ierr;
26   PetscInt       max,*opt,nopt;
27   PetscTruth     flg;
28 
29   PetscFunctionBegin;
30   /* Require 2 tags,get the second using PetscCommGetNewTag() */
31   stash->comm = comm;
32   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
33   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
34   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
35   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
36 
37   nopt = stash->size;
38   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
39   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
40   if (flg) {
41     if (nopt == 1)                max = opt[0];
42     else if (nopt == stash->size) max = opt[stash->rank];
43     else if (stash->rank < nopt)  max = opt[stash->rank];
44     else                          max = 0; /* Use default */
45     stash->umax = max;
46   } else {
47     stash->umax = 0;
48   }
49   ierr = PetscFree(opt);CHKERRQ(ierr);
50   if (bs <= 0) bs = 1;
51 
52   stash->bs       = bs;
53   stash->nmax     = 0;
54   stash->oldnmax  = 0;
55   stash->n        = 0;
56   stash->reallocs = -1;
57   stash->space_head = 0;
58   stash->space      = 0;
59 
60   stash->send_waits  = 0;
61   stash->recv_waits  = 0;
62   stash->send_status = 0;
63   stash->nsends      = 0;
64   stash->nrecvs      = 0;
65   stash->svalues     = 0;
66   stash->rvalues     = 0;
67   stash->rindices    = 0;
68   stash->nprocs      = 0;
69   stash->nprocessed  = 0;
70   PetscFunctionReturn(0);
71 }
72 
73 /*
74    MatStashDestroy_Private - Destroy the stash
75 */
76 #undef __FUNCT__
77 #define __FUNCT__ "MatStashDestroy_Private"
78 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
79 {
80   PetscErrorCode ierr;
81 
82   PetscFunctionBegin;
83   if (stash->space_head){
84     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
85     stash->space_head = 0;
86     stash->space      = 0;
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 /*
92    MatStashScatterEnd_Private - This is called as the fial stage of
93    scatter. The final stages of messagepassing is done here, and
94    all the memory used for messagepassing is cleanedu up. This
95    routine also resets the stash, and deallocates the memory used
96    for the stash. It also keeps track of the current memory usage
97    so that the same value can be used the next time through.
98 */
99 #undef __FUNCT__
100 #define __FUNCT__ "MatStashScatterEnd_Private"
101 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
102 {
103   PetscErrorCode ierr;
104   PetscInt       nsends=stash->nsends,bs2,oldnmax;
105   MPI_Status     *send_status;
106 
107   PetscFunctionBegin;
108   /* wait on sends */
109   if (nsends) {
110     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
111     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
112     ierr = PetscFree(send_status);CHKERRQ(ierr);
113   }
114 
115   /* Now update nmaxold to be app 10% more than max n used, this way the
116      wastage of space is reduced the next time this stash is used.
117      Also update the oldmax, only if it increases */
118   if (stash->n) {
119     bs2      = stash->bs*stash->bs;
120     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
121     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
122   }
123 
124   stash->nmax       = 0;
125   stash->n          = 0;
126   stash->reallocs   = -1;
127   stash->nprocessed = 0;
128   if (stash->space_head){
129     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
130     stash->space_head = 0;
131     stash->space      = 0;
132   }
133   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
134   stash->send_waits = 0;
135   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
136   stash->recv_waits = 0;
137   ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
138   stash->svalues = 0;
139   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
140   stash->rvalues = 0;
141   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
142   stash->rindices = 0;
143   ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
144   stash->nprocs = 0;
145   PetscFunctionReturn(0);
146 }
147 
148 /*
149    MatStashGetInfo_Private - Gets the relavant statistics of the stash
150 
151    Input Parameters:
152    stash    - the stash
153    nstash   - the size of the stash. Indicates the number of values stored.
154    reallocs - the number of additional mallocs incurred.
155 
156 */
157 #undef __FUNCT__
158 #define __FUNCT__ "MatStashGetInfo_Private"
159 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
160 {
161   PetscInt bs2 = stash->bs*stash->bs;
162 
163   PetscFunctionBegin;
164   if (nstash) *nstash   = stash->n*bs2;
165   if (reallocs) {
166     if (stash->reallocs < 0) *reallocs = 0;
167     else                     *reallocs = stash->reallocs;
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 /*
173    MatStashSetInitialSize_Private - Sets the initial size of the stash
174 
175    Input Parameters:
176    stash  - the stash
177    max    - the value that is used as the max size of the stash.
178             this value is used while allocating memory.
179 */
180 #undef __FUNCT__
181 #define __FUNCT__ "MatStashSetInitialSize_Private"
182 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
183 {
184   PetscFunctionBegin;
185   stash->umax = max;
186   PetscFunctionReturn(0);
187 }
188 
189 /* MatStashExpand_Private - Expand the stash. This function is called
190    when the space in the stash is not sufficient to add the new values
191    being inserted into the stash.
192 
193    Input Parameters:
194    stash - the stash
195    incr  - the minimum increase requested
196 
197    Notes:
198    This routine doubles the currently used memory.
199  */
200 #undef __FUNCT__
201 #define __FUNCT__ "MatStashExpand_Private"
202 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
203 {
204   PetscErrorCode ierr;
205   PetscInt       newnmax,bs2= stash->bs*stash->bs;
206 
207   PetscFunctionBegin;
208   /* allocate a larger stash */
209   if (!stash->oldnmax && !stash->nmax) { /* new stash */
210     if (stash->umax)                  newnmax = stash->umax/bs2;
211     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
212   } else if (!stash->nmax) { /* resuing stash */
213     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
214     else                              newnmax = stash->oldnmax/bs2;
215   } else                              newnmax = stash->nmax*2;
216   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
217 
218   /* Get a MatStashSpace and attach it to stash */
219   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
220   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
221     stash->space_head = stash->space;
222   }
223 
224   stash->reallocs++;
225   stash->nmax = newnmax;
226   PetscFunctionReturn(0);
227 }
228 /*
229   MatStashValuesRow_Private - inserts values into the stash. This function
230   expects the values to be roworiented. Multiple columns belong to the same row
231   can be inserted with a single call to this function.
232 
233   Input Parameters:
234   stash  - the stash
235   row    - the global row correspoiding to the values
236   n      - the number of elements inserted. All elements belong to the above row.
237   idxn   - the global column indices corresponding to each of the values.
238   values - the values inserted
239 */
240 #undef __FUNCT__
241 #define __FUNCT__ "MatStashValuesRow_Private"
242 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[])
243 {
244   PetscErrorCode     ierr;
245   PetscInt           i,k;
246   PetscMatStashSpace space=stash->space;
247 
248   PetscFunctionBegin;
249   /* Check and see if we have sufficient memory */
250   if (!space || space->local_remaining < n){
251     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
252   }
253   space = stash->space;
254   k     = space->local_used;
255   for (i=0; i<n; i++) {
256     space->idx[k] = row;
257     space->idy[k] = idxn[i];
258     space->val[k] = values[i];
259     k++;
260   }
261   stash->n               += n;
262   space->local_used      += n;
263   space->local_remaining -= n;
264   PetscFunctionReturn(0);
265 }
266 
267 /*
268   MatStashValuesCol_Private - inserts values into the stash. This function
269   expects the values to be columnoriented. Multiple columns belong to the same row
270   can be inserted with a single call to this function.
271 
272   Input Parameters:
273   stash   - the stash
274   row     - the global row correspoiding to the values
275   n       - the number of elements inserted. All elements belong to the above row.
276   idxn    - the global column indices corresponding to each of the values.
277   values  - the values inserted
278   stepval - the consecutive values are sepated by a distance of stepval.
279             this happens because the input is columnoriented.
280 */
281 #undef __FUNCT__
282 #define __FUNCT__ "MatStashValuesCol_Private"
283 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval)
284 {
285   PetscErrorCode     ierr;
286   PetscInt           i,k;
287   PetscMatStashSpace space=stash->space;
288 
289   PetscFunctionBegin;
290   /* Check and see if we have sufficient memory */
291   if (!space || space->local_remaining < n){
292     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
293   }
294   space = stash->space;
295   k = space->local_used;
296   for (i=0; i<n; i++) {
297     space->idx[k] = row;
298     space->idy[k] = idxn[i];
299     space->val[k] = values[i*stepval];
300     k++;
301   }
302   stash->n               += n;
303   space->local_used      += n;
304   space->local_remaining -= n;
305   PetscFunctionReturn(0);
306 }
307 
308 /*
309   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
310   This function expects the values to be roworiented. Multiple columns belong
311   to the same block-row can be inserted with a single call to this function.
312   This function extracts the sub-block of values based on the dimensions of
313   the original input block, and the row,col values corresponding to the blocks.
314 
315   Input Parameters:
316   stash  - the stash
317   row    - the global block-row correspoiding to the values
318   n      - the number of elements inserted. All elements belong to the above row.
319   idxn   - the global block-column indices corresponding to each of the blocks of
320            values. Each block is of size bs*bs.
321   values - the values inserted
322   rmax   - the number of block-rows in the original block.
323   cmax   - the number of block-columsn on the original block.
324   idx    - the index of the current block-row in the original block.
325 */
326 #undef __FUNCT__
327 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
328 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
329 {
330   PetscErrorCode     ierr;
331   PetscInt           i,j,k,bs2,bs=stash->bs,l;
332   const PetscScalar  *vals;
333   PetscScalar        *array;
334   PetscMatStashSpace space=stash->space;
335 
336   PetscFunctionBegin;
337   if (!space || space->local_remaining < n){
338     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
339   }
340   space = stash->space;
341   l     = space->local_used;
342   bs2   = bs*bs;
343   for (i=0; i<n; i++) {
344     space->idx[l] = row;
345     space->idy[l] = idxn[i];
346     /* Now copy over the block of values. Store the values column oriented.
347        This enables inserting multiple blocks belonging to a row with a single
348        funtion call */
349     array = space->val + bs2*l;
350     vals  = values + idx*bs2*n + bs*i;
351     for (j=0; j<bs; j++) {
352       for (k=0; k<bs; k++) array[k*bs] = vals[k];
353       array++;
354       vals  += cmax*bs;
355     }
356     l++;
357   }
358   stash->n               += n;
359   space->local_used      += n;
360   space->local_remaining -= n;
361   PetscFunctionReturn(0);
362 }
363 
364 /*
365   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
366   This function expects the values to be roworiented. Multiple columns belong
367   to the same block-row can be inserted with a single call to this function.
368   This function extracts the sub-block of values based on the dimensions of
369   the original input block, and the row,col values corresponding to the blocks.
370 
371   Input Parameters:
372   stash  - the stash
373   row    - the global block-row correspoiding to the values
374   n      - the number of elements inserted. All elements belong to the above row.
375   idxn   - the global block-column indices corresponding to each of the blocks of
376            values. Each block is of size bs*bs.
377   values - the values inserted
378   rmax   - the number of block-rows in the original block.
379   cmax   - the number of block-columsn on the original block.
380   idx    - the index of the current block-row in the original block.
381 */
382 #undef __FUNCT__
383 #define __FUNCT__ "MatStashValuesColBlocked_Private"
384 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
385 {
386   PetscErrorCode     ierr;
387   PetscInt           i,j,k,bs2,bs=stash->bs,l;
388   const PetscScalar  *vals;
389   PetscScalar        *array;
390   PetscMatStashSpace space=stash->space;
391 
392   PetscFunctionBegin;
393   if (!space || space->local_remaining < n){
394     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
395   }
396   space = stash->space;
397   l     = space->local_used;
398   bs2   = bs*bs;
399   for (i=0; i<n; i++) {
400     space->idx[l] = row;
401     space->idy[l] = idxn[i];
402     /* Now copy over the block of values. Store the values column oriented.
403      This enables inserting multiple blocks belonging to a row with a single
404      funtion call */
405     array = space->val + bs2*l;
406     vals  = values + idx*bs2*n + bs*i;
407     for (j=0; j<bs; j++) {
408       for (k=0; k<bs; k++) {array[k] = vals[k];}
409       array += bs;
410       vals  += rmax*bs;
411     }
412     l++;
413   }
414   stash->n               += n;
415   space->local_used      += n;
416   space->local_remaining -= n;
417   PetscFunctionReturn(0);
418 }
419 /*
420   MatStashScatterBegin_Private - Initiates the transfer of values to the
421   correct owners. This function goes through the stash, and check the
422   owners of each stashed value, and sends the values off to the owner
423   processors.
424 
425   Input Parameters:
426   stash  - the stash
427   owners - an array of size 'no-of-procs' which gives the ownership range
428            for each node.
429 
430   Notes: The 'owners' array in the cased of the blocked-stash has the
431   ranges specified blocked global indices, and for the regular stash in
432   the proper global indices.
433 */
434 #undef __FUNCT__
435 #define __FUNCT__ "MatStashScatterBegin_Private"
436 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
437 {
438   PetscInt          *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
439   PetscInt          size=stash->size,nsends;
440   PetscErrorCode    ierr;
441   PetscInt          count,*sindices,**rindices,i,j,idx,lastidx,l;
442   PetscScalar       **rvalues,*svalues;
443   MPI_Comm          comm = stash->comm;
444   MPI_Request       *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
445   PetscMPIInt       *nprocs,*nlengths,nreceives;
446   PetscInt          *sp_idx,*sp_idy;
447   PetscScalar       *sp_val;
448   PetscMatStashSpace space,space_next;
449 
450   PetscFunctionBegin;
451   bs2 = stash->bs*stash->bs;
452 
453   /*  first count number of contributors to each processor */
454   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
455   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr);
456   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
457 
458   nlengths = nprocs+size;
459   i = j    = 0;
460   lastidx  = -1;
461   space    = stash->space_head;
462   while (space != PETSC_NULL){
463     space_next = space->next;
464     sp_idx     = space->idx;
465     for (l=0; l<space->local_used; l++){
466       /* if indices are NOT locally sorted, need to start search at the beginning */
467       if (lastidx > (idx = sp_idx[l])) j = 0;
468       lastidx = idx;
469       for (; j<size; j++) {
470         if (idx >= owners[j] && idx < owners[j+1]) {
471           nlengths[j]++; owner[i] = j; break;
472         }
473       }
474       i++;
475     }
476     space = space_next;
477   }
478   /* Now check what procs get messages - and compute nsends. */
479   for (i=0, nsends=0 ; i<size; i++) {
480     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
481   }
482 
483   {PetscMPIInt  *onodes,*olengths;
484   /* Determine the number of messages to expect, their lengths, from from-ids */
485   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
486   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
487   /* since clubbing row,col - lengths are multiplied by 2 */
488   for (i=0; i<nreceives; i++) olengths[i] *=2;
489   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
490   /* values are size 'bs2' lengths (and remove earlier factor 2 */
491   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
492   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
493   ierr = PetscFree(onodes);CHKERRQ(ierr);
494   ierr = PetscFree(olengths);CHKERRQ(ierr);
495   }
496 
497   /* do sends:
498       1) starts[i] gives the starting index in svalues for stuff going to
499          the ith processor
500   */
501   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
502   sindices = (PetscInt*)(svalues + bs2*stash->n);
503   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
504   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
505   starti   = startv + size;
506   /* use 2 sends the first with all_a, the next with all_i and all_j */
507   startv[0]  = 0; starti[0] = 0;
508   for (i=1; i<size; i++) {
509     startv[i] = startv[i-1] + nlengths[i-1];
510     starti[i] = starti[i-1] + nlengths[i-1]*2;
511   }
512 
513   i     = 0;
514   space = stash->space_head;
515   while (space != PETSC_NULL){
516     space_next = space->next;
517     sp_idx = space->idx;
518     sp_idy = space->idy;
519     sp_val = space->val;
520     for (l=0; l<space->local_used; l++){
521       j = owner[i];
522       if (bs2 == 1) {
523         svalues[startv[j]] = sp_val[l];
524       } else {
525         PetscInt     k;
526         PetscScalar *buf1,*buf2;
527         buf1 = svalues+bs2*startv[j];
528         buf2 = space->val + bs2*l;
529         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
530       }
531       sindices[starti[j]]             = sp_idx[l];
532       sindices[starti[j]+nlengths[j]] = sp_idy[l];
533       startv[j]++;
534       starti[j]++;
535       i++;
536     }
537     space = space_next;
538   }
539   startv[0] = 0;
540   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
541 
542   for (i=0,count=0; i<size; i++) {
543     if (nprocs[i]) {
544       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
545       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
546     }
547   }
548 #if defined(PETSC_USE_INFO)
549   ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr);
550   for (i=0; i<size; i++) {
551     if (nprocs[i]) {
552       ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
553     }
554   }
555 #endif
556   ierr = PetscFree(owner);CHKERRQ(ierr);
557   ierr = PetscFree(startv);CHKERRQ(ierr);
558   /* This memory is reused in scatter end  for a different purpose*/
559   for (i=0; i<2*size; i++) nprocs[i] = -1;
560   stash->nprocs = nprocs;
561 
562   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
563   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
564 
565   for (i=0; i<nreceives; i++) {
566     recv_waits[2*i]   = recv_waits1[i];
567     recv_waits[2*i+1] = recv_waits2[i];
568   }
569   stash->recv_waits = recv_waits;
570   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
571   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
572 
573   stash->svalues    = svalues;    stash->rvalues     = rvalues;
574   stash->rindices   = rindices;   stash->send_waits  = send_waits;
575   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
576   PetscFunctionReturn(0);
577 }
578 
579 /*
580    MatStashScatterGetMesg_Private - This function waits on the receives posted
581    in the function MatStashScatterBegin_Private() and returns one message at
582    a time to the calling function. If no messages are left, it indicates this
583    by setting flg = 0, else it sets flg = 1.
584 
585    Input Parameters:
586    stash - the stash
587 
588    Output Parameters:
589    nvals - the number of entries in the current message.
590    rows  - an array of row indices (or blocked indices) corresponding to the values
591    cols  - an array of columnindices (or blocked indices) corresponding to the values
592    vals  - the values
593    flg   - 0 indicates no more message left, and the current call has no values associated.
594            1 indicates that the current call successfully received a message, and the
595              other output parameters nvals,rows,cols,vals are set appropriately.
596 */
597 #undef __FUNCT__
598 #define __FUNCT__ "MatStashScatterGetMesg_Private"
599 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg)
600 {
601   PetscErrorCode ierr;
602   PetscMPIInt    i,*flg_v,i1,i2;
603   PetscInt       bs2;
604   MPI_Status     recv_status;
605   PetscTruth     match_found = PETSC_FALSE;
606 
607   PetscFunctionBegin;
608 
609   *flg = 0; /* When a message is discovered this is reset to 1 */
610   /* Return if no more messages to process */
611   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
612 
613   flg_v = stash->nprocs;
614   bs2   = stash->bs*stash->bs;
615   /* If a matching pair of receieves are found, process them, and return the data to
616      the calling function. Until then keep receiving messages */
617   while (!match_found) {
618     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
619     /* Now pack the received message into a structure which is useable by others */
620     if (i % 2) {
621       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
622       flg_v[2*recv_status.MPI_SOURCE] = i/2;
623       *nvals = *nvals/bs2;
624     } else {
625       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
626       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
627       *nvals = *nvals/2; /* This message has both row indices and col indices */
628     }
629 
630     /* Check if we have both messages from this proc */
631     i1 = flg_v[2*recv_status.MPI_SOURCE];
632     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
633     if (i1 != -1 && i2 != -1) {
634       *rows       = stash->rindices[i2];
635       *cols       = *rows + *nvals;
636       *vals       = stash->rvalues[i1];
637       *flg        = 1;
638       stash->nprocessed ++;
639       match_found = PETSC_TRUE;
640     }
641   }
642   PetscFunctionReturn(0);
643 }
644