xref: /petsc/src/mat/utils/matstash.c (revision 1e3347e8910e9540ca4e5e63802e011468fd198c)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/matimpl.h"
4 #include "src/mat/utils/matstashspace.h"
5 
6 /*
7        The input to the stash is ALWAYS in MatScalar precision, and the
8     internal storage and output is also in MatScalar.
9 */
10 #define DEFAULT_STASH_SIZE   10000
11 
12 /*
13   MatStashCreate_Private - Creates a stash,currently used for all the parallel
14   matrix implementations. The stash is where elements of a matrix destined
15   to be stored on other processors are kept until matrix assembly is done.
16 
17   This is a simple minded stash. Simply adds entries to end of stash.
18 
19   Input Parameters:
20   comm - communicator, required for scatters.
21   bs   - stash block size. used when stashing blocks of values
22 
23   Output Parameters:
24   stash    - the newly created stash
25 */
26 #undef __FUNCT__
27 #define __FUNCT__ "MatStashCreate_Private"
28 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
29 {
30   PetscErrorCode ierr;
31   PetscInt       max,*opt,nopt;
32   PetscTruth     flg;
33 
34   PetscFunctionBegin;
35   /* Require 2 tags,get the second using PetscCommGetNewTag() */
36   stash->comm = comm;
37   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
38   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
39   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
40   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
41 
42   nopt = stash->size;
43   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
44   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
45   if (flg) {
46     if (nopt == 1)                max = opt[0];
47     else if (nopt == stash->size) max = opt[stash->rank];
48     else if (stash->rank < nopt)  max = opt[stash->rank];
49     else                          max = 0; /* Use default */
50     stash->umax = max;
51   } else {
52     stash->umax = 0;
53   }
54   ierr = PetscFree(opt);CHKERRQ(ierr);
55   if (bs <= 0) bs = 1;
56 
57   stash->bs       = bs;
58   stash->nmax     = 0;
59   stash->oldnmax  = 0;
60   stash->n        = 0;
61   stash->reallocs = -1;
62   stash->space_head = 0;
63   stash->space      = 0;
64 
65   stash->send_waits  = 0;
66   stash->recv_waits  = 0;
67   stash->send_status = 0;
68   stash->nsends      = 0;
69   stash->nrecvs      = 0;
70   stash->svalues     = 0;
71   stash->rvalues     = 0;
72   stash->rindices    = 0;
73   stash->nprocs      = 0;
74   stash->nprocessed  = 0;
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79    MatStashDestroy_Private - Destroy the stash
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "MatStashDestroy_Private"
83 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
84 {
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   if (stash->space_head){
89     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
90     stash->space_head = 0;
91     stash->space      = 0;
92   }
93   PetscFunctionReturn(0);
94 }
95 
96 /*
97    MatStashScatterEnd_Private - This is called as the fial stage of
98    scatter. The final stages of messagepassing is done here, and
99    all the memory used for messagepassing is cleanedu up. This
100    routine also resets the stash, and deallocates the memory used
101    for the stash. It also keeps track of the current memory usage
102    so that the same value can be used the next time through.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "MatStashScatterEnd_Private"
106 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
107 {
108   PetscErrorCode ierr;
109   PetscInt       nsends=stash->nsends,bs2,oldnmax;
110   MPI_Status     *send_status;
111 
112   PetscFunctionBegin;
113   /* wait on sends */
114   if (nsends) {
115     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
116     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
117     ierr = PetscFree(send_status);CHKERRQ(ierr);
118   }
119 
120   /* Now update nmaxold to be app 10% more than max n used, this way the
121      wastage of space is reduced the next time this stash is used.
122      Also update the oldmax, only if it increases */
123   if (stash->n) {
124     bs2      = stash->bs*stash->bs;
125     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
126     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
127   }
128 
129   stash->nmax       = 0;
130   stash->n          = 0;
131   stash->reallocs   = -1;
132   stash->nprocessed = 0;
133   if (stash->space_head){
134     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
135     stash->space_head = 0;
136     stash->space      = 0;
137   }
138   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
139   stash->send_waits = 0;
140   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
141   stash->recv_waits = 0;
142   ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
143   stash->svalues = 0;
144   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
145   stash->rvalues = 0;
146   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
147   stash->rindices = 0;
148   ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
149   stash->nprocs = 0;
150   PetscFunctionReturn(0);
151 }
152 
153 /*
154    MatStashGetInfo_Private - Gets the relavant statistics of the stash
155 
156    Input Parameters:
157    stash    - the stash
158    nstash   - the size of the stash. Indicates the number of values stored.
159    reallocs - the number of additional mallocs incurred.
160 
161 */
162 #undef __FUNCT__
163 #define __FUNCT__ "MatStashGetInfo_Private"
164 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
165 {
166   PetscInt bs2 = stash->bs*stash->bs;
167 
168   PetscFunctionBegin;
169   if (nstash) *nstash   = stash->n*bs2;
170   if (reallocs) {
171     if (stash->reallocs < 0) *reallocs = 0;
172     else                     *reallocs = stash->reallocs;
173   }
174   PetscFunctionReturn(0);
175 }
176 
177 /*
178    MatStashSetInitialSize_Private - Sets the initial size of the stash
179 
180    Input Parameters:
181    stash  - the stash
182    max    - the value that is used as the max size of the stash.
183             this value is used while allocating memory.
184 */
185 #undef __FUNCT__
186 #define __FUNCT__ "MatStashSetInitialSize_Private"
187 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
188 {
189   PetscFunctionBegin;
190   stash->umax = max;
191   PetscFunctionReturn(0);
192 }
193 
194 /* MatStashExpand_Private - Expand the stash. This function is called
195    when the space in the stash is not sufficient to add the new values
196    being inserted into the stash.
197 
198    Input Parameters:
199    stash - the stash
200    incr  - the minimum increase requested
201 
202    Notes:
203    This routine doubles the currently used memory.
204  */
205 #undef __FUNCT__
206 #define __FUNCT__ "MatStashExpand_Private"
207 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
208 {
209   PetscErrorCode ierr;
210   PetscInt       newnmax,bs2= stash->bs*stash->bs;
211 
212   PetscFunctionBegin;
213   /* allocate a larger stash */
214   if (!stash->oldnmax && !stash->nmax) { /* new stash */
215     if (stash->umax)                  newnmax = stash->umax/bs2;
216     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
217   } else if (!stash->nmax) { /* resuing stash */
218     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
219     else                              newnmax = stash->oldnmax/bs2;
220   } else                              newnmax = stash->nmax*2;
221   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
222 
223   /* Get a MatStashSpace and attach it to stash */
224   if (!stash->nmax) { /* new stash or resuing stash->oldnmax */
225     ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space_head);CHKERRQ(ierr);
226     stash->space = stash->space_head;
227   } else {
228     ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
229   }
230   stash->reallocs++;
231   stash->nmax = newnmax;
232   PetscFunctionReturn(0);
233 }
234 /*
235   MatStashValuesRow_Private - inserts values into the stash. This function
236   expects the values to be roworiented. Multiple columns belong to the same row
237   can be inserted with a single call to this function.
238 
239   Input Parameters:
240   stash  - the stash
241   row    - the global row correspoiding to the values
242   n      - the number of elements inserted. All elements belong to the above row.
243   idxn   - the global column indices corresponding to each of the values.
244   values - the values inserted
245 */
246 #undef __FUNCT__
247 #define __FUNCT__ "MatStashValuesRow_Private"
248 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
249 {
250   PetscErrorCode     ierr;
251   PetscInt           i,k;
252   PetscMatStashSpace space=stash->space;
253 
254   PetscFunctionBegin;
255   /* Check and see if we have sufficient memory */
256   if (!space || space->local_remaining < n){
257     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
258   }
259   space = stash->space;
260   k     = space->local_used;
261   for (i=0; i<n; i++) {
262     space->idx[k] = row;
263     space->idy[k] = idxn[i];
264     space->val[k] = values[i];
265     k++;
266   }
267   stash->n               += n;
268   space->local_used      += n;
269   space->local_remaining -= n;
270   PetscFunctionReturn(0);
271 }
272 
273 /*
274   MatStashValuesCol_Private - inserts values into the stash. This function
275   expects the values to be columnoriented. Multiple columns belong to the same row
276   can be inserted with a single call to this function.
277 
278   Input Parameters:
279   stash   - the stash
280   row     - the global row correspoiding to the values
281   n       - the number of elements inserted. All elements belong to the above row.
282   idxn    - the global column indices corresponding to each of the values.
283   values  - the values inserted
284   stepval - the consecutive values are sepated by a distance of stepval.
285             this happens because the input is columnoriented.
286 */
287 #undef __FUNCT__
288 #define __FUNCT__ "MatStashValuesCol_Private"
289 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
290 {
291   PetscErrorCode     ierr;
292   PetscInt           i,k;
293   PetscMatStashSpace space=stash->space;
294 
295   PetscFunctionBegin;
296   /* Check and see if we have sufficient memory */
297   if (!space || space->local_remaining < n){
298     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
299   }
300   space = stash->space;
301   k = space->local_used;
302   for (i=0; i<n; i++) {
303     space->idx[k] = row;
304     space->idy[k] = idxn[i];
305     space->val[k] = values[i*stepval];
306     k++;
307   }
308   stash->n               += n;
309   space->local_used      += n;
310   space->local_remaining -= n;
311   PetscFunctionReturn(0);
312 }
313 
314 /*
315   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
316   This function expects the values to be roworiented. Multiple columns belong
317   to the same block-row can be inserted with a single call to this function.
318   This function extracts the sub-block of values based on the dimensions of
319   the original input block, and the row,col values corresponding to the blocks.
320 
321   Input Parameters:
322   stash  - the stash
323   row    - the global block-row correspoiding to the values
324   n      - the number of elements inserted. All elements belong to the above row.
325   idxn   - the global block-column indices corresponding to each of the blocks of
326            values. Each block is of size bs*bs.
327   values - the values inserted
328   rmax   - the number of block-rows in the original block.
329   cmax   - the number of block-columsn on the original block.
330   idx    - the index of the current block-row in the original block.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
334 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
335 {
336   PetscErrorCode     ierr;
337   PetscInt           i,j,k,bs2,bs=stash->bs,l;
338   const MatScalar    *vals;
339   MatScalar          *array;
340   PetscMatStashSpace space=stash->space;
341 
342   PetscFunctionBegin;
343   if (!space || space->local_remaining < n){
344     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
345   }
346   space = stash->space;
347   l     = space->local_used;
348   bs2   = bs*bs;
349   for (i=0; i<n; i++) {
350     space->idx[l] = row;
351     space->idy[l] = idxn[i];
352     /* Now copy over the block of values. Store the values column oriented.
353        This enables inserting multiple blocks belonging to a row with a single
354        funtion call */
355     array = space->val + bs2*l;
356     vals  = values + idx*bs2*n + bs*i;
357     for (j=0; j<bs; j++) {
358       for (k=0; k<bs; k++) array[k*bs] = vals[k];
359       array++;
360       vals  += cmax*bs;
361     }
362     l++;
363   }
364   stash->n               += n;
365   space->local_used      += n;
366   space->local_remaining -= n;
367   PetscFunctionReturn(0);
368 }
369 
370 /*
371   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
372   This function expects the values to be roworiented. Multiple columns belong
373   to the same block-row can be inserted with a single call to this function.
374   This function extracts the sub-block of values based on the dimensions of
375   the original input block, and the row,col values corresponding to the blocks.
376 
377   Input Parameters:
378   stash  - the stash
379   row    - the global block-row correspoiding to the values
380   n      - the number of elements inserted. All elements belong to the above row.
381   idxn   - the global block-column indices corresponding to each of the blocks of
382            values. Each block is of size bs*bs.
383   values - the values inserted
384   rmax   - the number of block-rows in the original block.
385   cmax   - the number of block-columsn on the original block.
386   idx    - the index of the current block-row in the original block.
387 */
388 #undef __FUNCT__
389 #define __FUNCT__ "MatStashValuesColBlocked_Private"
390 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
391 {
392   PetscErrorCode  ierr;
393   PetscInt        i,j,k,bs2,bs=stash->bs,l;
394   const MatScalar *vals;
395   MatScalar       *array;
396   PetscMatStashSpace space=stash->space;
397 
398   PetscFunctionBegin;
399   if (!space || space->local_remaining < n){
400     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
401   }
402   space = stash->space;
403   l     = space->local_used;
404   bs2   = bs*bs;
405   for (i=0; i<n; i++) {
406     space->idx[l] = row;
407     space->idy[l] = idxn[i];
408     /* Now copy over the block of values. Store the values column oriented.
409      This enables inserting multiple blocks belonging to a row with a single
410      funtion call */
411     array = space->val + bs2*l;
412     vals  = values + idx*bs2*n + bs*i;
413     for (j=0; j<bs; j++) {
414       for (k=0; k<bs; k++) {array[k] = vals[k];}
415       array += bs;
416       vals  += rmax*bs;
417     }
418     l++;
419   }
420   stash->n               += n;
421   space->local_used      += n;
422   space->local_remaining -= n;
423   PetscFunctionReturn(0);
424 }
425 /*
426   MatStashScatterBegin_Private - Initiates the transfer of values to the
427   correct owners. This function goes through the stash, and check the
428   owners of each stashed value, and sends the values off to the owner
429   processors.
430 
431   Input Parameters:
432   stash  - the stash
433   owners - an array of size 'no-of-procs' which gives the ownership range
434            for each node.
435 
436   Notes: The 'owners' array in the cased of the blocked-stash has the
437   ranges specified blocked global indices, and for the regular stash in
438   the proper global indices.
439 */
440 #undef __FUNCT__
441 #define __FUNCT__ "MatStashScatterBegin_Private"
442 PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
443 {
444   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
445   PetscInt       size=stash->size,nsends;
446   PetscErrorCode ierr;
447   PetscInt       count,*sindices,**rindices,i,j,idx,lastidx,l;
448   MatScalar      **rvalues,*svalues;
449   MPI_Comm       comm = stash->comm;
450   MPI_Request    *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
451   PetscMPIInt    *nprocs,*nlengths,nreceives;
452   PetscInt       *sp_idx,*sp_idy;
453   MatScalar      *sp_val;
454   PetscMatStashSpace space,space_next;
455 
456   PetscFunctionBegin;
457   bs2 = stash->bs*stash->bs;
458 
459   /*  first count number of contributors to each processor */
460   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
461   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr);
462   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
463 
464   nlengths = nprocs+size;
465   i = j    = 0;
466   lastidx  = -1;
467   space    = stash->space_head;
468   while (space != PETSC_NULL){
469     space_next = space->next;
470     sp_idx     = space->idx;
471     for (l=0; l<space->local_used; l++){
472       /* if indices are NOT locally sorted, need to start search at the beginning */
473       if (lastidx > (idx = sp_idx[l])) j = 0;
474       lastidx = idx;
475       for (; j<size; j++) {
476         if (idx >= owners[j] && idx < owners[j+1]) {
477           nlengths[j]++; owner[i] = j; break;
478         }
479       }
480       i++;
481     }
482     space = space_next;
483   }
484   /* Now check what procs get messages - and compute nsends. */
485   for (i=0, nsends=0 ; i<size; i++) {
486     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
487   }
488 
489   { int  *onodes,*olengths;
490   /* Determine the number of messages to expect, their lengths, from from-ids */
491   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
492   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
493   /* since clubbing row,col - lengths are multiplied by 2 */
494   for (i=0; i<nreceives; i++) olengths[i] *=2;
495   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
496   /* values are size 'bs2' lengths (and remove earlier factor 2 */
497   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
498   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
499   ierr = PetscFree(onodes);CHKERRQ(ierr);
500   ierr = PetscFree(olengths);CHKERRQ(ierr);
501   }
502 
503   /* do sends:
504       1) starts[i] gives the starting index in svalues for stuff going to
505          the ith processor
506   */
507   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
508   sindices = (PetscInt*)(svalues + bs2*stash->n);
509   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
510   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
511   starti   = startv + size;
512   /* use 2 sends the first with all_a, the next with all_i and all_j */
513   startv[0]  = 0; starti[0] = 0;
514   for (i=1; i<size; i++) {
515     startv[i] = startv[i-1] + nlengths[i-1];
516     starti[i] = starti[i-1] + nlengths[i-1]*2;
517   }
518 
519   i     = 0;
520   space = stash->space_head;
521   while (space != PETSC_NULL){
522     space_next = space->next;
523     sp_idx = space->idx;
524     sp_idy = space->idy;
525     sp_val = space->val;
526     for (l=0; l<space->local_used; l++){
527       j = owner[i];
528       if (bs2 == 1) {
529         svalues[startv[j]] = sp_val[l];
530       } else {
531         PetscInt  k;
532         MatScalar *buf1,*buf2;
533         buf1 = svalues+bs2*startv[j];
534         buf2 = space->val + bs2*i;
535         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
536       }
537       sindices[starti[j]]             = sp_idx[l];
538       sindices[starti[j]+nlengths[j]] = sp_idy[l];
539       startv[j]++;
540       starti[j]++;
541       i++;
542     }
543     space = space_next;
544   }
545   startv[0] = 0;
546   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
547 
548   for (i=0,count=0; i<size; i++) {
549     if (nprocs[i]) {
550       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
551       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
552     }
553   }
554 #if defined(PETSC_USE_INFO)
555   ierr = PetscInfo1(0,"No of messages: %d \n",nsends);CHKERRQ(ierr);
556   for (i=0; i<size; i++) {
557     if (nprocs[i]) {
558       ierr = PetscInfo2(0,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
559     }
560   }
561 #endif
562   ierr = PetscFree(owner);CHKERRQ(ierr);
563   ierr = PetscFree(startv);CHKERRQ(ierr);
564   /* This memory is reused in scatter end  for a different purpose*/
565   for (i=0; i<2*size; i++) nprocs[i] = -1;
566   stash->nprocs = nprocs;
567 
568   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
569   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
570 
571   for (i=0; i<nreceives; i++) {
572     recv_waits[2*i]   = recv_waits1[i];
573     recv_waits[2*i+1] = recv_waits2[i];
574   }
575   stash->recv_waits = recv_waits;
576   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
577   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
578 
579   stash->svalues    = svalues;    stash->rvalues     = rvalues;
580   stash->rindices   = rindices;   stash->send_waits  = send_waits;
581   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
582   PetscFunctionReturn(0);
583 }
584 
585 /*
586    MatStashScatterGetMesg_Private - This function waits on the receives posted
587    in the function MatStashScatterBegin_Private() and returns one message at
588    a time to the calling function. If no messages are left, it indicates this
589    by setting flg = 0, else it sets flg = 1.
590 
591    Input Parameters:
592    stash - the stash
593 
594    Output Parameters:
595    nvals - the number of entries in the current message.
596    rows  - an array of row indices (or blocked indices) corresponding to the values
597    cols  - an array of columnindices (or blocked indices) corresponding to the values
598    vals  - the values
599    flg   - 0 indicates no more message left, and the current call has no values associated.
600            1 indicates that the current call successfully received a message, and the
601              other output parameters nvals,rows,cols,vals are set appropriately.
602 */
603 #undef __FUNCT__
604 #define __FUNCT__ "MatStashScatterGetMesg_Private"
605 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
606 {
607   PetscErrorCode ierr;
608   PetscMPIInt    i,*flg_v,i1,i2;
609   PetscInt       bs2;
610   MPI_Status     recv_status;
611   PetscTruth     match_found = PETSC_FALSE;
612 
613   PetscFunctionBegin;
614 
615   *flg = 0; /* When a message is discovered this is reset to 1 */
616   /* Return if no more messages to process */
617   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
618 
619   flg_v = stash->nprocs;
620   bs2   = stash->bs*stash->bs;
621   /* If a matching pair of receieves are found, process them, and return the data to
622      the calling function. Until then keep receiving messages */
623   while (!match_found) {
624     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
625     /* Now pack the received message into a structure which is useable by others */
626     if (i % 2) {
627       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
628       flg_v[2*recv_status.MPI_SOURCE] = i/2;
629       *nvals = *nvals/bs2;
630     } else {
631       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
632       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
633       *nvals = *nvals/2; /* This message has both row indices and col indices */
634     }
635 
636     /* Check if we have both the messages from this proc */
637     i1 = flg_v[2*recv_status.MPI_SOURCE];
638     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
639     if (i1 != -1 && i2 != -1) {
640       *rows       = stash->rindices[i2];
641       *cols       = *rows + *nvals;
642       *vals       = stash->rvalues[i1];
643       *flg        = 1;
644       stash->nprocessed ++;
645       match_found = PETSC_TRUE;
646     }
647   }
648   PetscFunctionReturn(0);
649 }
650