xref: /petsc/src/mat/utils/matstash.c (revision 99a14227db32e09bdaad407e56fdbfd3ed7e9701)
1 #define PETSCMAT_DLL
2 
3 #include "private/matimpl.h"
4 
5 #define DEFAULT_STASH_SIZE   10000
6 
7 /*
8   MatStashCreate_Private - Creates a stash,currently used for all the parallel
9   matrix implementations. The stash is where elements of a matrix destined
10   to be stored on other processors are kept until matrix assembly is done.
11 
12   This is a simple minded stash. Simply adds entries to end of stash.
13 
14   Input Parameters:
15   comm - communicator, required for scatters.
16   bs   - stash block size. used when stashing blocks of values
17 
18   Output Parameters:
19   stash    - the newly created stash
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "MatStashCreate_Private"
23 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
24 {
25   PetscErrorCode ierr;
26   PetscInt       max,*opt,nopt,i;
27   PetscTruth     flg;
28 
29   PetscFunctionBegin;
30   /* Require 2 tags,get the second using PetscCommGetNewTag() */
31   stash->comm = comm;
32   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
33   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
34   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
35   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
36   ierr  = PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);CHKERRQ(ierr);
37   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
38 
39 
40   nopt = stash->size;
41   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
42   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
43   if (flg) {
44     if (nopt == 1)                max = opt[0];
45     else if (nopt == stash->size) max = opt[stash->rank];
46     else if (stash->rank < nopt)  max = opt[stash->rank];
47     else                          max = 0; /* Use default */
48     stash->umax = max;
49   } else {
50     stash->umax = 0;
51   }
52   ierr = PetscFree(opt);CHKERRQ(ierr);
53   if (bs <= 0) bs = 1;
54 
55   stash->bs       = bs;
56   stash->nmax     = 0;
57   stash->oldnmax  = 0;
58   stash->n        = 0;
59   stash->reallocs = -1;
60   stash->space_head = 0;
61   stash->space      = 0;
62 
63   stash->send_waits  = 0;
64   stash->recv_waits  = 0;
65   stash->send_status = 0;
66   stash->nsends      = 0;
67   stash->nrecvs      = 0;
68   stash->svalues     = 0;
69   stash->rvalues     = 0;
70   stash->rindices    = 0;
71   stash->nprocessed  = 0;
72 
73   stash->reproduce   = PETSC_FALSE;
74   ierr = PetscOptionsGetTruth(PETSC_NULL,"-matstash_reproduce",&stash->reproduce,PETSC_NULL);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79    MatStashDestroy_Private - Destroy the stash
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "MatStashDestroy_Private"
83 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
84 {
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   if (stash->space_head){
89     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
90     stash->space_head = 0;
91     stash->space      = 0;
92   }
93   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 
97 /*
98    MatStashScatterEnd_Private - This is called as the final stage of
99    scatter. The final stages of message passing is done here, and
100    all the memory used for message passing is cleaned up. This
101    routine also resets the stash, and deallocates the memory used
102    for the stash. It also keeps track of the current memory usage
103    so that the same value can be used the next time through.
104 */
105 #undef __FUNCT__
106 #define __FUNCT__ "MatStashScatterEnd_Private"
107 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
108 {
109   PetscErrorCode ierr;
110   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
111   MPI_Status     *send_status;
112 
113   PetscFunctionBegin;
114   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
115   /* wait on sends */
116   if (nsends) {
117     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
118     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
119     ierr = PetscFree(send_status);CHKERRQ(ierr);
120   }
121 
122   /* Now update nmaxold to be app 10% more than max n used, this way the
123      wastage of space is reduced the next time this stash is used.
124      Also update the oldmax, only if it increases */
125   if (stash->n) {
126     bs2      = stash->bs*stash->bs;
127     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
128     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
129   }
130 
131   stash->nmax       = 0;
132   stash->n          = 0;
133   stash->reallocs   = -1;
134   stash->nprocessed = 0;
135   if (stash->space_head){
136     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
137     stash->space_head = 0;
138     stash->space      = 0;
139   }
140   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
141   stash->send_waits = 0;
142   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
143   stash->recv_waits = 0;
144   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
145   stash->svalues = 0;
146   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
147   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
148   stash->rvalues = 0;
149   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
150   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
151   stash->rindices = 0;
152   PetscFunctionReturn(0);
153 }
154 
155 /*
156    MatStashGetInfo_Private - Gets the relavant statistics of the stash
157 
158    Input Parameters:
159    stash    - the stash
160    nstash   - the size of the stash. Indicates the number of values stored.
161    reallocs - the number of additional mallocs incurred.
162 
163 */
164 #undef __FUNCT__
165 #define __FUNCT__ "MatStashGetInfo_Private"
166 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
167 {
168   PetscInt bs2 = stash->bs*stash->bs;
169 
170   PetscFunctionBegin;
171   if (nstash) *nstash   = stash->n*bs2;
172   if (reallocs) {
173     if (stash->reallocs < 0) *reallocs = 0;
174     else                     *reallocs = stash->reallocs;
175   }
176   PetscFunctionReturn(0);
177 }
178 
179 /*
180    MatStashSetInitialSize_Private - Sets the initial size of the stash
181 
182    Input Parameters:
183    stash  - the stash
184    max    - the value that is used as the max size of the stash.
185             this value is used while allocating memory.
186 */
187 #undef __FUNCT__
188 #define __FUNCT__ "MatStashSetInitialSize_Private"
189 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
190 {
191   PetscFunctionBegin;
192   stash->umax = max;
193   PetscFunctionReturn(0);
194 }
195 
196 /* MatStashExpand_Private - Expand the stash. This function is called
197    when the space in the stash is not sufficient to add the new values
198    being inserted into the stash.
199 
200    Input Parameters:
201    stash - the stash
202    incr  - the minimum increase requested
203 
204    Notes:
205    This routine doubles the currently used memory.
206  */
207 #undef __FUNCT__
208 #define __FUNCT__ "MatStashExpand_Private"
209 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
210 {
211   PetscErrorCode ierr;
212   PetscInt       newnmax,bs2= stash->bs*stash->bs;
213 
214   PetscFunctionBegin;
215   /* allocate a larger stash */
216   if (!stash->oldnmax && !stash->nmax) { /* new stash */
217     if (stash->umax)                  newnmax = stash->umax/bs2;
218     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
219   } else if (!stash->nmax) { /* resuing stash */
220     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
221     else                              newnmax = stash->oldnmax/bs2;
222   } else                              newnmax = stash->nmax*2;
223   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
224 
225   /* Get a MatStashSpace and attach it to stash */
226   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
227   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
228     stash->space_head = stash->space;
229   }
230 
231   stash->reallocs++;
232   stash->nmax = newnmax;
233   PetscFunctionReturn(0);
234 }
235 /*
236   MatStashValuesRow_Private - inserts values into the stash. This function
237   expects the values to be roworiented. Multiple columns belong to the same row
238   can be inserted with a single call to this function.
239 
240   Input Parameters:
241   stash  - the stash
242   row    - the global row correspoiding to the values
243   n      - the number of elements inserted. All elements belong to the above row.
244   idxn   - the global column indices corresponding to each of the values.
245   values - the values inserted
246 */
247 #undef __FUNCT__
248 #define __FUNCT__ "MatStashValuesRow_Private"
249 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscTruth ignorezeroentries)
250 {
251   PetscErrorCode     ierr;
252   PetscInt           i,k,cnt = 0;
253   PetscMatStashSpace space=stash->space;
254 
255   PetscFunctionBegin;
256   /* Check and see if we have sufficient memory */
257   if (!space || space->local_remaining < n){
258     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
259   }
260   space = stash->space;
261   k     = space->local_used;
262   for (i=0; i<n; i++) {
263     if (ignorezeroentries && (values[i] == 0.0)) continue;
264     space->idx[k] = row;
265     space->idy[k] = idxn[i];
266     space->val[k] = values[i];
267     k++;
268     cnt++;
269   }
270   stash->n               += cnt;
271   space->local_used      += cnt;
272   space->local_remaining -= cnt;
273   PetscFunctionReturn(0);
274 }
275 
276 /*
277   MatStashValuesCol_Private - inserts values into the stash. This function
278   expects the values to be columnoriented. Multiple columns belong to the same row
279   can be inserted with a single call to this function.
280 
281   Input Parameters:
282   stash   - the stash
283   row     - the global row correspoiding to the values
284   n       - the number of elements inserted. All elements belong to the above row.
285   idxn    - the global column indices corresponding to each of the values.
286   values  - the values inserted
287   stepval - the consecutive values are sepated by a distance of stepval.
288             this happens because the input is columnoriented.
289 */
290 #undef __FUNCT__
291 #define __FUNCT__ "MatStashValuesCol_Private"
292 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscTruth ignorezeroentries)
293 {
294   PetscErrorCode     ierr;
295   PetscInt           i,k,cnt = 0;
296   PetscMatStashSpace space=stash->space;
297 
298   PetscFunctionBegin;
299   /* Check and see if we have sufficient memory */
300   if (!space || space->local_remaining < n){
301     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
302   }
303   space = stash->space;
304   k = space->local_used;
305   for (i=0; i<n; i++) {
306     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
307     space->idx[k] = row;
308     space->idy[k] = idxn[i];
309     space->val[k] = values[i*stepval];
310     k++;
311     cnt++;
312   }
313   stash->n               += cnt;
314   space->local_used      += cnt;
315   space->local_remaining -= cnt;
316   PetscFunctionReturn(0);
317 }
318 
319 /*
320   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
321   This function expects the values to be roworiented. Multiple columns belong
322   to the same block-row can be inserted with a single call to this function.
323   This function extracts the sub-block of values based on the dimensions of
324   the original input block, and the row,col values corresponding to the blocks.
325 
326   Input Parameters:
327   stash  - the stash
328   row    - the global block-row correspoiding to the values
329   n      - the number of elements inserted. All elements belong to the above row.
330   idxn   - the global block-column indices corresponding to each of the blocks of
331            values. Each block is of size bs*bs.
332   values - the values inserted
333   rmax   - the number of block-rows in the original block.
334   cmax   - the number of block-columsn on the original block.
335   idx    - the index of the current block-row in the original block.
336 */
337 #undef __FUNCT__
338 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
339 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
340 {
341   PetscErrorCode     ierr;
342   PetscInt           i,j,k,bs2,bs=stash->bs,l;
343   const PetscScalar  *vals;
344   PetscScalar        *array;
345   PetscMatStashSpace space=stash->space;
346 
347   PetscFunctionBegin;
348   if (!space || space->local_remaining < n){
349     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
350   }
351   space = stash->space;
352   l     = space->local_used;
353   bs2   = bs*bs;
354   for (i=0; i<n; i++) {
355     space->idx[l] = row;
356     space->idy[l] = idxn[i];
357     /* Now copy over the block of values. Store the values column oriented.
358        This enables inserting multiple blocks belonging to a row with a single
359        funtion call */
360     array = space->val + bs2*l;
361     vals  = values + idx*bs2*n + bs*i;
362     for (j=0; j<bs; j++) {
363       for (k=0; k<bs; k++) array[k*bs] = vals[k];
364       array++;
365       vals  += cmax*bs;
366     }
367     l++;
368   }
369   stash->n               += n;
370   space->local_used      += n;
371   space->local_remaining -= n;
372   PetscFunctionReturn(0);
373 }
374 
375 /*
376   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
377   This function expects the values to be roworiented. Multiple columns belong
378   to the same block-row can be inserted with a single call to this function.
379   This function extracts the sub-block of values based on the dimensions of
380   the original input block, and the row,col values corresponding to the blocks.
381 
382   Input Parameters:
383   stash  - the stash
384   row    - the global block-row correspoiding to the values
385   n      - the number of elements inserted. All elements belong to the above row.
386   idxn   - the global block-column indices corresponding to each of the blocks of
387            values. Each block is of size bs*bs.
388   values - the values inserted
389   rmax   - the number of block-rows in the original block.
390   cmax   - the number of block-columsn on the original block.
391   idx    - the index of the current block-row in the original block.
392 */
393 #undef __FUNCT__
394 #define __FUNCT__ "MatStashValuesColBlocked_Private"
395 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
396 {
397   PetscErrorCode     ierr;
398   PetscInt           i,j,k,bs2,bs=stash->bs,l;
399   const PetscScalar  *vals;
400   PetscScalar        *array;
401   PetscMatStashSpace space=stash->space;
402 
403   PetscFunctionBegin;
404   if (!space || space->local_remaining < n){
405     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
406   }
407   space = stash->space;
408   l     = space->local_used;
409   bs2   = bs*bs;
410   for (i=0; i<n; i++) {
411     space->idx[l] = row;
412     space->idy[l] = idxn[i];
413     /* Now copy over the block of values. Store the values column oriented.
414      This enables inserting multiple blocks belonging to a row with a single
415      funtion call */
416     array = space->val + bs2*l;
417     vals  = values + idx*bs2*n + bs*i;
418     for (j=0; j<bs; j++) {
419       for (k=0; k<bs; k++) {array[k] = vals[k];}
420       array += bs;
421       vals  += rmax*bs;
422     }
423     l++;
424   }
425   stash->n               += n;
426   space->local_used      += n;
427   space->local_remaining -= n;
428   PetscFunctionReturn(0);
429 }
430 /*
431   MatStashScatterBegin_Private - Initiates the transfer of values to the
432   correct owners. This function goes through the stash, and check the
433   owners of each stashed value, and sends the values off to the owner
434   processors.
435 
436   Input Parameters:
437   stash  - the stash
438   owners - an array of size 'no-of-procs' which gives the ownership range
439            for each node.
440 
441   Notes: The 'owners' array in the cased of the blocked-stash has the
442   ranges specified blocked global indices, and for the regular stash in
443   the proper global indices.
444 */
445 #undef __FUNCT__
446 #define __FUNCT__ "MatStashScatterBegin_Private"
447 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
448 {
449   PetscInt          *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
450   PetscInt          size=stash->size,nsends;
451   PetscErrorCode    ierr;
452   PetscInt          count,*sindices,**rindices,i,j,idx,lastidx,l;
453   PetscScalar       **rvalues,*svalues;
454   MPI_Comm          comm = stash->comm;
455   MPI_Request       *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
456   PetscMPIInt       *nprocs,*nlengths,nreceives;
457   PetscInt          *sp_idx,*sp_idy;
458   PetscScalar       *sp_val;
459   PetscMatStashSpace space,space_next;
460 
461   PetscFunctionBegin;
462   bs2 = stash->bs*stash->bs;
463 
464   /*  first count number of contributors to each processor */
465   ierr  = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
466   ierr  = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
467   ierr  = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr);
468   ierr  = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
469   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
470 
471   i = j    = 0;
472   lastidx  = -1;
473   space    = stash->space_head;
474   while (space != PETSC_NULL){
475     space_next = space->next;
476     sp_idx     = space->idx;
477     for (l=0; l<space->local_used; l++){
478       /* if indices are NOT locally sorted, need to start search at the beginning */
479       if (lastidx > (idx = sp_idx[l])) j = 0;
480       lastidx = idx;
481       for (; j<size; j++) {
482         if (idx >= owners[j] && idx < owners[j+1]) {
483           nlengths[j]++; owner[i] = j; break;
484         }
485       }
486       i++;
487     }
488     space = space_next;
489   }
490   /* Now check what procs get messages - and compute nsends. */
491   for (i=0, nsends=0 ; i<size; i++) {
492     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
493   }
494 
495   {PetscMPIInt  *onodes,*olengths;
496   /* Determine the number of messages to expect, their lengths, from from-ids */
497   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
498   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
499   /* since clubbing row,col - lengths are multiplied by 2 */
500   for (i=0; i<nreceives; i++) olengths[i] *=2;
501   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
502   /* values are size 'bs2' lengths (and remove earlier factor 2 */
503   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
504   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
505   ierr = PetscFree(onodes);CHKERRQ(ierr);
506   ierr = PetscFree(olengths);CHKERRQ(ierr);
507   }
508 
509   /* do sends:
510       1) starts[i] gives the starting index in svalues for stuff going to
511          the ith processor
512   */
513   ierr     = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr);
514   ierr     = PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
515   ierr     = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr);
516   /* use 2 sends the first with all_a, the next with all_i and all_j */
517   startv[0]  = 0; starti[0] = 0;
518   for (i=1; i<size; i++) {
519     startv[i] = startv[i-1] + nlengths[i-1];
520     starti[i] = starti[i-1] + 2*nlengths[i-1];
521   }
522 
523   i     = 0;
524   space = stash->space_head;
525   while (space != PETSC_NULL){
526     space_next = space->next;
527     sp_idx = space->idx;
528     sp_idy = space->idy;
529     sp_val = space->val;
530     for (l=0; l<space->local_used; l++){
531       j = owner[i];
532       if (bs2 == 1) {
533         svalues[startv[j]] = sp_val[l];
534       } else {
535         PetscInt     k;
536         PetscScalar *buf1,*buf2;
537         buf1 = svalues+bs2*startv[j];
538         buf2 = space->val + bs2*l;
539         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
540       }
541       sindices[starti[j]]             = sp_idx[l];
542       sindices[starti[j]+nlengths[j]] = sp_idy[l];
543       startv[j]++;
544       starti[j]++;
545       i++;
546     }
547     space = space_next;
548   }
549   startv[0] = 0;
550   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
551 
552   for (i=0,count=0; i<size; i++) {
553     if (nprocs[i]) {
554       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
555       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
556     }
557   }
558 #if defined(PETSC_USE_INFO)
559   ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr);
560   for (i=0; i<size; i++) {
561     if (nprocs[i]) {
562       ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
563     }
564   }
565 #endif
566   ierr = PetscFree(nlengths);CHKERRQ(ierr);
567   ierr = PetscFree(owner);CHKERRQ(ierr);
568   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
569   ierr = PetscFree(nprocs);CHKERRQ(ierr);
570 
571   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
572   ierr  = PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
573 
574   for (i=0; i<nreceives; i++) {
575     recv_waits[2*i]   = recv_waits1[i];
576     recv_waits[2*i+1] = recv_waits2[i];
577   }
578   stash->recv_waits = recv_waits;
579   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
580   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
581 
582   stash->svalues     = svalues;
583   stash->sindices    = sindices;
584   stash->rvalues     = rvalues;
585   stash->rindices    = rindices;
586   stash->send_waits  = send_waits;
587   stash->nsends      = nsends;
588   stash->nrecvs      = nreceives;
589   stash->reproduce_count = 0;
590   PetscFunctionReturn(0);
591 }
592 
593 /*
594    MatStashScatterGetMesg_Private - This function waits on the receives posted
595    in the function MatStashScatterBegin_Private() and returns one message at
596    a time to the calling function. If no messages are left, it indicates this
597    by setting flg = 0, else it sets flg = 1.
598 
599    Input Parameters:
600    stash - the stash
601 
602    Output Parameters:
603    nvals - the number of entries in the current message.
604    rows  - an array of row indices (or blocked indices) corresponding to the values
605    cols  - an array of columnindices (or blocked indices) corresponding to the values
606    vals  - the values
607    flg   - 0 indicates no more message left, and the current call has no values associated.
608            1 indicates that the current call successfully received a message, and the
609              other output parameters nvals,rows,cols,vals are set appropriately.
610 */
611 #undef __FUNCT__
612 #define __FUNCT__ "MatStashScatterGetMesg_Private"
613 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg)
614 {
615   PetscErrorCode ierr;
616   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
617   PetscInt       bs2;
618   MPI_Status     recv_status;
619   PetscTruth     match_found = PETSC_FALSE;
620 
621   PetscFunctionBegin;
622 
623   *flg = 0; /* When a message is discovered this is reset to 1 */
624   /* Return if no more messages to process */
625   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
626 
627   bs2   = stash->bs*stash->bs;
628   /* If a matching pair of receives are found, process them, and return the data to
629      the calling function. Until then keep receiving messages */
630   while (!match_found) {
631     CHKMEMQ;
632     if (stash->reproduce) {
633       i = stash->reproduce_count++;
634       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
635     } else {
636       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
637     }
638     CHKMEMQ;
639     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
640 
641     /* Now pack the received message into a structure which is usable by others */
642     if (i % 2) {
643       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
644       flg_v[2*recv_status.MPI_SOURCE] = i/2;
645       *nvals = *nvals/bs2;
646     } else {
647       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
648       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
649       *nvals = *nvals/2; /* This message has both row indices and col indices */
650     }
651 
652     /* Check if we have both messages from this proc */
653     i1 = flg_v[2*recv_status.MPI_SOURCE];
654     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
655     if (i1 != -1 && i2 != -1) {
656       *rows       = stash->rindices[i2];
657       *cols       = *rows + *nvals;
658       *vals       = stash->rvalues[i1];
659       *flg        = 1;
660       stash->nprocessed ++;
661       match_found = PETSC_TRUE;
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666