xref: /petsc/src/mat/utils/matstash.c (revision 5c6c1daec53e1d9ab0bec9db5309fd8fc7645b8d)
1 
2 #include <petsc-private/matimpl.h>
3 
4 #define DEFAULT_STASH_SIZE   10000
5 
6 /*
7   MatStashCreate_Private - Creates a stash,currently used for all the parallel
8   matrix implementations. The stash is where elements of a matrix destined
9   to be stored on other processors are kept until matrix assembly is done.
10 
11   This is a simple minded stash. Simply adds entries to end of stash.
12 
13   Input Parameters:
14   comm - communicator, required for scatters.
15   bs   - stash block size. used when stashing blocks of values
16 
17   Output Parameters:
18   stash    - the newly created stash
19 */
20 #undef __FUNCT__
21 #define __FUNCT__ "MatStashCreate_Private"
22 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
23 {
24   PetscErrorCode ierr;
25   PetscInt       max,*opt,nopt,i;
26   PetscBool      flg;
27 
28   PetscFunctionBegin;
29   /* Require 2 tags,get the second using PetscCommGetNewTag() */
30   stash->comm = comm;
31   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
32   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
33   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
34   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
35   ierr  = PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);CHKERRQ(ierr);
36   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
37 
38 
39   nopt = stash->size;
40   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
41   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
42   if (flg) {
43     if (nopt == 1)                max = opt[0];
44     else if (nopt == stash->size) max = opt[stash->rank];
45     else if (stash->rank < nopt)  max = opt[stash->rank];
46     else                          max = 0; /* Use default */
47     stash->umax = max;
48   } else {
49     stash->umax = 0;
50   }
51   ierr = PetscFree(opt);CHKERRQ(ierr);
52   if (bs <= 0) bs = 1;
53 
54   stash->bs       = bs;
55   stash->nmax     = 0;
56   stash->oldnmax  = 0;
57   stash->n        = 0;
58   stash->reallocs = -1;
59   stash->space_head = 0;
60   stash->space      = 0;
61 
62   stash->send_waits  = 0;
63   stash->recv_waits  = 0;
64   stash->send_status = 0;
65   stash->nsends      = 0;
66   stash->nrecvs      = 0;
67   stash->svalues     = 0;
68   stash->rvalues     = 0;
69   stash->rindices    = 0;
70   stash->nprocessed  = 0;
71 
72   stash->reproduce   = PETSC_FALSE;
73   ierr = PetscOptionsGetBool(PETSC_NULL,"-matstash_reproduce",&stash->reproduce,PETSC_NULL);CHKERRQ(ierr);
74   PetscFunctionReturn(0);
75 }
76 
77 /*
78    MatStashDestroy_Private - Destroy the stash
79 */
80 #undef __FUNCT__
81 #define __FUNCT__ "MatStashDestroy_Private"
82 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
83 {
84   PetscErrorCode ierr;
85 
86   PetscFunctionBegin;
87   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
88   stash->space = 0;
89   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
90   PetscFunctionReturn(0);
91 }
92 
93 /*
94    MatStashScatterEnd_Private - This is called as the final stage of
95    scatter. The final stages of message passing is done here, and
96    all the memory used for message passing is cleaned up. This
97    routine also resets the stash, and deallocates the memory used
98    for the stash. It also keeps track of the current memory usage
99    so that the same value can be used the next time through.
100 */
101 #undef __FUNCT__
102 #define __FUNCT__ "MatStashScatterEnd_Private"
103 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
104 {
105   PetscErrorCode ierr;
106   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
107   MPI_Status     *send_status;
108 
109   PetscFunctionBegin;
110   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
111   /* wait on sends */
112   if (nsends) {
113     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
114     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
115     ierr = PetscFree(send_status);CHKERRQ(ierr);
116   }
117 
118   /* Now update nmaxold to be app 10% more than max n used, this way the
119      wastage of space is reduced the next time this stash is used.
120      Also update the oldmax, only if it increases */
121   if (stash->n) {
122     bs2      = stash->bs*stash->bs;
123     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
124     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
125   }
126 
127   stash->nmax       = 0;
128   stash->n          = 0;
129   stash->reallocs   = -1;
130   stash->nprocessed = 0;
131   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
132   stash->space      = 0;
133   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
134   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
135   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
136   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
137   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
138   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
139   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
140   PetscFunctionReturn(0);
141 }
142 
143 /*
144    MatStashGetInfo_Private - Gets the relavant statistics of the stash
145 
146    Input Parameters:
147    stash    - the stash
148    nstash   - the size of the stash. Indicates the number of values stored.
149    reallocs - the number of additional mallocs incurred.
150 
151 */
152 #undef __FUNCT__
153 #define __FUNCT__ "MatStashGetInfo_Private"
154 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
155 {
156   PetscInt bs2 = stash->bs*stash->bs;
157 
158   PetscFunctionBegin;
159   if (nstash) *nstash   = stash->n*bs2;
160   if (reallocs) {
161     if (stash->reallocs < 0) *reallocs = 0;
162     else                     *reallocs = stash->reallocs;
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 /*
168    MatStashSetInitialSize_Private - Sets the initial size of the stash
169 
170    Input Parameters:
171    stash  - the stash
172    max    - the value that is used as the max size of the stash.
173             this value is used while allocating memory.
174 */
175 #undef __FUNCT__
176 #define __FUNCT__ "MatStashSetInitialSize_Private"
177 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
178 {
179   PetscFunctionBegin;
180   stash->umax = max;
181   PetscFunctionReturn(0);
182 }
183 
184 /* MatStashExpand_Private - Expand the stash. This function is called
185    when the space in the stash is not sufficient to add the new values
186    being inserted into the stash.
187 
188    Input Parameters:
189    stash - the stash
190    incr  - the minimum increase requested
191 
192    Notes:
193    This routine doubles the currently used memory.
194  */
195 #undef __FUNCT__
196 #define __FUNCT__ "MatStashExpand_Private"
197 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
198 {
199   PetscErrorCode ierr;
200   PetscInt       newnmax,bs2= stash->bs*stash->bs;
201 
202   PetscFunctionBegin;
203   /* allocate a larger stash */
204   if (!stash->oldnmax && !stash->nmax) { /* new stash */
205     if (stash->umax)                  newnmax = stash->umax/bs2;
206     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
207   } else if (!stash->nmax) { /* resuing stash */
208     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
209     else                              newnmax = stash->oldnmax/bs2;
210   } else                              newnmax = stash->nmax*2;
211   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
212 
213   /* Get a MatStashSpace and attach it to stash */
214   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
215   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
216     stash->space_head = stash->space;
217   }
218 
219   stash->reallocs++;
220   stash->nmax = newnmax;
221   PetscFunctionReturn(0);
222 }
223 /*
224   MatStashValuesRow_Private - inserts values into the stash. This function
225   expects the values to be roworiented. Multiple columns belong to the same row
226   can be inserted with a single call to this function.
227 
228   Input Parameters:
229   stash  - the stash
230   row    - the global row correspoiding to the values
231   n      - the number of elements inserted. All elements belong to the above row.
232   idxn   - the global column indices corresponding to each of the values.
233   values - the values inserted
234 */
235 #undef __FUNCT__
236 #define __FUNCT__ "MatStashValuesRow_Private"
237 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool  ignorezeroentries)
238 {
239   PetscErrorCode     ierr;
240   PetscInt           i,k,cnt = 0;
241   PetscMatStashSpace space=stash->space;
242 
243   PetscFunctionBegin;
244   /* Check and see if we have sufficient memory */
245   if (!space || space->local_remaining < n){
246     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
247   }
248   space = stash->space;
249   k     = space->local_used;
250   for (i=0; i<n; i++) {
251     if (ignorezeroentries && (values[i] == 0.0)) continue;
252     space->idx[k] = row;
253     space->idy[k] = idxn[i];
254     space->val[k] = values[i];
255     k++;
256     cnt++;
257   }
258   stash->n               += cnt;
259   space->local_used      += cnt;
260   space->local_remaining -= cnt;
261   PetscFunctionReturn(0);
262 }
263 
264 /*
265   MatStashValuesCol_Private - inserts values into the stash. This function
266   expects the values to be columnoriented. Multiple columns belong to the same row
267   can be inserted with a single call to this function.
268 
269   Input Parameters:
270   stash   - the stash
271   row     - the global row correspoiding to the values
272   n       - the number of elements inserted. All elements belong to the above row.
273   idxn    - the global column indices corresponding to each of the values.
274   values  - the values inserted
275   stepval - the consecutive values are sepated by a distance of stepval.
276             this happens because the input is columnoriented.
277 */
278 #undef __FUNCT__
279 #define __FUNCT__ "MatStashValuesCol_Private"
280 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool  ignorezeroentries)
281 {
282   PetscErrorCode     ierr;
283   PetscInt           i,k,cnt = 0;
284   PetscMatStashSpace space=stash->space;
285 
286   PetscFunctionBegin;
287   /* Check and see if we have sufficient memory */
288   if (!space || space->local_remaining < n){
289     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
290   }
291   space = stash->space;
292   k = space->local_used;
293   for (i=0; i<n; i++) {
294     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
295     space->idx[k] = row;
296     space->idy[k] = idxn[i];
297     space->val[k] = values[i*stepval];
298     k++;
299     cnt++;
300   }
301   stash->n               += cnt;
302   space->local_used      += cnt;
303   space->local_remaining -= cnt;
304   PetscFunctionReturn(0);
305 }
306 
307 /*
308   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
309   This function expects the values to be roworiented. Multiple columns belong
310   to the same block-row can be inserted with a single call to this function.
311   This function extracts the sub-block of values based on the dimensions of
312   the original input block, and the row,col values corresponding to the blocks.
313 
314   Input Parameters:
315   stash  - the stash
316   row    - the global block-row correspoiding to the values
317   n      - the number of elements inserted. All elements belong to the above row.
318   idxn   - the global block-column indices corresponding to each of the blocks of
319            values. Each block is of size bs*bs.
320   values - the values inserted
321   rmax   - the number of block-rows in the original block.
322   cmax   - the number of block-columsn on the original block.
323   idx    - the index of the current block-row in the original block.
324 */
325 #undef __FUNCT__
326 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
327 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
328 {
329   PetscErrorCode     ierr;
330   PetscInt           i,j,k,bs2,bs=stash->bs,l;
331   const PetscScalar  *vals;
332   PetscScalar        *array;
333   PetscMatStashSpace space=stash->space;
334 
335   PetscFunctionBegin;
336   if (!space || space->local_remaining < n){
337     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
338   }
339   space = stash->space;
340   l     = space->local_used;
341   bs2   = bs*bs;
342   for (i=0; i<n; i++) {
343     space->idx[l] = row;
344     space->idy[l] = idxn[i];
345     /* Now copy over the block of values. Store the values column oriented.
346        This enables inserting multiple blocks belonging to a row with a single
347        funtion call */
348     array = space->val + bs2*l;
349     vals  = values + idx*bs2*n + bs*i;
350     for (j=0; j<bs; j++) {
351       for (k=0; k<bs; k++) array[k*bs] = vals[k];
352       array++;
353       vals  += cmax*bs;
354     }
355     l++;
356   }
357   stash->n               += n;
358   space->local_used      += n;
359   space->local_remaining -= n;
360   PetscFunctionReturn(0);
361 }
362 
363 /*
364   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
365   This function expects the values to be roworiented. Multiple columns belong
366   to the same block-row can be inserted with a single call to this function.
367   This function extracts the sub-block of values based on the dimensions of
368   the original input block, and the row,col values corresponding to the blocks.
369 
370   Input Parameters:
371   stash  - the stash
372   row    - the global block-row correspoiding to the values
373   n      - the number of elements inserted. All elements belong to the above row.
374   idxn   - the global block-column indices corresponding to each of the blocks of
375            values. Each block is of size bs*bs.
376   values - the values inserted
377   rmax   - the number of block-rows in the original block.
378   cmax   - the number of block-columsn on the original block.
379   idx    - the index of the current block-row in the original block.
380 */
381 #undef __FUNCT__
382 #define __FUNCT__ "MatStashValuesColBlocked_Private"
383 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
384 {
385   PetscErrorCode     ierr;
386   PetscInt           i,j,k,bs2,bs=stash->bs,l;
387   const PetscScalar  *vals;
388   PetscScalar        *array;
389   PetscMatStashSpace space=stash->space;
390 
391   PetscFunctionBegin;
392   if (!space || space->local_remaining < n){
393     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
394   }
395   space = stash->space;
396   l     = space->local_used;
397   bs2   = bs*bs;
398   for (i=0; i<n; i++) {
399     space->idx[l] = row;
400     space->idy[l] = idxn[i];
401     /* Now copy over the block of values. Store the values column oriented.
402      This enables inserting multiple blocks belonging to a row with a single
403      funtion call */
404     array = space->val + bs2*l;
405     vals  = values + idx*bs2*n + bs*i;
406     for (j=0; j<bs; j++) {
407       for (k=0; k<bs; k++) {array[k] = vals[k];}
408       array += bs;
409       vals  += rmax*bs;
410     }
411     l++;
412   }
413   stash->n               += n;
414   space->local_used      += n;
415   space->local_remaining -= n;
416   PetscFunctionReturn(0);
417 }
418 /*
419   MatStashScatterBegin_Private - Initiates the transfer of values to the
420   correct owners. This function goes through the stash, and check the
421   owners of each stashed value, and sends the values off to the owner
422   processors.
423 
424   Input Parameters:
425   stash  - the stash
426   owners - an array of size 'no-of-procs' which gives the ownership range
427            for each node.
428 
429   Notes: The 'owners' array in the cased of the blocked-stash has the
430   ranges specified blocked global indices, and for the regular stash in
431   the proper global indices.
432 */
433 #undef __FUNCT__
434 #define __FUNCT__ "MatStashScatterBegin_Private"
435 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
436 {
437   PetscInt          *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
438   PetscInt          size=stash->size,nsends;
439   PetscErrorCode    ierr;
440   PetscInt          count,*sindices,**rindices,i,j,idx,lastidx,l;
441   PetscScalar       **rvalues,*svalues;
442   MPI_Comm          comm = stash->comm;
443   MPI_Request       *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
444   PetscMPIInt       *nprocs,*nlengths,nreceives;
445   PetscInt          *sp_idx,*sp_idy;
446   PetscScalar       *sp_val;
447   PetscMatStashSpace space,space_next;
448 
449   PetscFunctionBegin;
450   bs2 = stash->bs*stash->bs;
451 
452   /*  first count number of contributors to each processor */
453   ierr  = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
454   ierr  = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
455   ierr  = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr);
456   ierr  = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
457   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
458 
459   i = j    = 0;
460   lastidx  = -1;
461   space    = stash->space_head;
462   while (space != PETSC_NULL){
463     space_next = space->next;
464     sp_idx     = space->idx;
465     for (l=0; l<space->local_used; l++){
466       /* if indices are NOT locally sorted, need to start search at the beginning */
467       if (lastidx > (idx = sp_idx[l])) j = 0;
468       lastidx = idx;
469       for (; j<size; j++) {
470         if (idx >= owners[j] && idx < owners[j+1]) {
471           nlengths[j]++; owner[i] = j; break;
472         }
473       }
474       i++;
475     }
476     space = space_next;
477   }
478   /* Now check what procs get messages - and compute nsends. */
479   for (i=0, nsends=0 ; i<size; i++) {
480     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
481   }
482 
483   {PetscMPIInt  *onodes,*olengths;
484   /* Determine the number of messages to expect, their lengths, from from-ids */
485   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
486   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
487   /* since clubbing row,col - lengths are multiplied by 2 */
488   for (i=0; i<nreceives; i++) olengths[i] *=2;
489   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
490   /* values are size 'bs2' lengths (and remove earlier factor 2 */
491   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
492   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
493   ierr = PetscFree(onodes);CHKERRQ(ierr);
494   ierr = PetscFree(olengths);CHKERRQ(ierr);
495   }
496 
497   /* do sends:
498       1) starts[i] gives the starting index in svalues for stuff going to
499          the ith processor
500   */
501   ierr     = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr);
502   ierr     = PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
503   ierr     = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr);
504   /* use 2 sends the first with all_a, the next with all_i and all_j */
505   startv[0]  = 0; starti[0] = 0;
506   for (i=1; i<size; i++) {
507     startv[i] = startv[i-1] + nlengths[i-1];
508     starti[i] = starti[i-1] + 2*nlengths[i-1];
509   }
510 
511   i     = 0;
512   space = stash->space_head;
513   while (space != PETSC_NULL){
514     space_next = space->next;
515     sp_idx = space->idx;
516     sp_idy = space->idy;
517     sp_val = space->val;
518     for (l=0; l<space->local_used; l++){
519       j = owner[i];
520       if (bs2 == 1) {
521         svalues[startv[j]] = sp_val[l];
522       } else {
523         PetscInt     k;
524         PetscScalar *buf1,*buf2;
525         buf1 = svalues+bs2*startv[j];
526         buf2 = space->val + bs2*l;
527         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
528       }
529       sindices[starti[j]]             = sp_idx[l];
530       sindices[starti[j]+nlengths[j]] = sp_idy[l];
531       startv[j]++;
532       starti[j]++;
533       i++;
534     }
535     space = space_next;
536   }
537   startv[0] = 0;
538   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
539 
540   for (i=0,count=0; i<size; i++) {
541     if (nprocs[i]) {
542       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
543       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
544     }
545   }
546 #if defined(PETSC_USE_INFO)
547   ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr);
548   for (i=0; i<size; i++) {
549     if (nprocs[i]) {
550       ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
551     }
552   }
553 #endif
554   ierr = PetscFree(nlengths);CHKERRQ(ierr);
555   ierr = PetscFree(owner);CHKERRQ(ierr);
556   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
557   ierr = PetscFree(nprocs);CHKERRQ(ierr);
558 
559   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
560   ierr  = PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
561 
562   for (i=0; i<nreceives; i++) {
563     recv_waits[2*i]   = recv_waits1[i];
564     recv_waits[2*i+1] = recv_waits2[i];
565   }
566   stash->recv_waits = recv_waits;
567   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
568   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
569 
570   stash->svalues     = svalues;
571   stash->sindices    = sindices;
572   stash->rvalues     = rvalues;
573   stash->rindices    = rindices;
574   stash->send_waits  = send_waits;
575   stash->nsends      = nsends;
576   stash->nrecvs      = nreceives;
577   stash->reproduce_count = 0;
578   PetscFunctionReturn(0);
579 }
580 
581 /*
582    MatStashScatterGetMesg_Private - This function waits on the receives posted
583    in the function MatStashScatterBegin_Private() and returns one message at
584    a time to the calling function. If no messages are left, it indicates this
585    by setting flg = 0, else it sets flg = 1.
586 
587    Input Parameters:
588    stash - the stash
589 
590    Output Parameters:
591    nvals - the number of entries in the current message.
592    rows  - an array of row indices (or blocked indices) corresponding to the values
593    cols  - an array of columnindices (or blocked indices) corresponding to the values
594    vals  - the values
595    flg   - 0 indicates no more message left, and the current call has no values associated.
596            1 indicates that the current call successfully received a message, and the
597              other output parameters nvals,rows,cols,vals are set appropriately.
598 */
599 #undef __FUNCT__
600 #define __FUNCT__ "MatStashScatterGetMesg_Private"
601 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg)
602 {
603   PetscErrorCode ierr;
604   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
605   PetscInt       bs2;
606   MPI_Status     recv_status;
607   PetscBool      match_found = PETSC_FALSE;
608 
609   PetscFunctionBegin;
610 
611   *flg = 0; /* When a message is discovered this is reset to 1 */
612   /* Return if no more messages to process */
613   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
614 
615   bs2   = stash->bs*stash->bs;
616   /* If a matching pair of receives are found, process them, and return the data to
617      the calling function. Until then keep receiving messages */
618   while (!match_found) {
619     CHKMEMQ;
620     if (stash->reproduce) {
621       i = stash->reproduce_count++;
622       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
623     } else {
624       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
625     }
626     CHKMEMQ;
627     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
628 
629     /* Now pack the received message into a structure which is usable by others */
630     if (i % 2) {
631       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
632       flg_v[2*recv_status.MPI_SOURCE] = i/2;
633       *nvals = *nvals/bs2;
634     } else {
635       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
636       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
637       *nvals = *nvals/2; /* This message has both row indices and col indices */
638     }
639 
640     /* Check if we have both messages from this proc */
641     i1 = flg_v[2*recv_status.MPI_SOURCE];
642     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
643     if (i1 != -1 && i2 != -1) {
644       *rows       = stash->rindices[i2];
645       *cols       = *rows + *nvals;
646       *vals       = stash->rvalues[i1];
647       *flg        = 1;
648       stash->nprocessed ++;
649       match_found = PETSC_TRUE;
650     }
651   }
652   PetscFunctionReturn(0);
653 }
654