xref: /petsc/src/mat/utils/matstash.c (revision 421e10b8f79bbed49dcc4e803c884835d979c6ea)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/matimpl.h"
4 #include "src/mat/utils/matstashspace.h"
5 
6 /*
7        The input to the stash is ALWAYS in MatScalar precision, and the
8     internal storage and output is also in MatScalar.
9 */
10 #define DEFAULT_STASH_SIZE   10000
11 
12 /*
13   MatStashCreate_Private - Creates a stash,currently used for all the parallel
14   matrix implementations. The stash is where elements of a matrix destined
15   to be stored on other processors are kept until matrix assembly is done.
16 
17   This is a simple minded stash. Simply adds entries to end of stash.
18 
19   Input Parameters:
20   comm - communicator, required for scatters.
21   bs   - stash block size. used when stashing blocks of values
22 
23   Output Parameters:
24   stash    - the newly created stash
25 */
26 #undef __FUNCT__
27 #define __FUNCT__ "MatStashCreate_Private"
28 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
29 {
30   PetscErrorCode ierr;
31   PetscInt       max,*opt,nopt;
32   PetscTruth     flg;
33 
34   PetscFunctionBegin;
35   /* Require 2 tags,get the second using PetscCommGetNewTag() */
36   stash->comm = comm;
37   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
38   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
39   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
40   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
41 
42   nopt = stash->size;
43   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
44   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
45   if (flg) {
46     if (nopt == 1)                max = opt[0];
47     else if (nopt == stash->size) max = opt[stash->rank];
48     else if (stash->rank < nopt)  max = opt[stash->rank];
49     else                          max = 0; /* Use default */
50     stash->umax = max;
51   } else {
52     stash->umax = 0;
53   }
54   ierr = PetscFree(opt);CHKERRQ(ierr);
55   if (bs <= 0) bs = 1;
56 
57   stash->bs       = bs;
58   stash->nmax     = 0;
59   stash->oldnmax  = 0;
60   stash->n        = 0;
61   stash->reallocs = -1;
62   stash->space_head = 0;
63   stash->space      = 0;
64 
65   stash->send_waits  = 0;
66   stash->recv_waits  = 0;
67   stash->send_status = 0;
68   stash->nsends      = 0;
69   stash->nrecvs      = 0;
70   stash->svalues     = 0;
71   stash->rvalues     = 0;
72   stash->rindices    = 0;
73   stash->nprocs      = 0;
74   stash->nprocessed  = 0;
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79    MatStashDestroy_Private - Destroy the stash
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "MatStashDestroy_Private"
83 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
84 {
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   if (stash->space_head){
89     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
90     stash->space_head = 0;
91     stash->space      = 0;
92   }
93   PetscFunctionReturn(0);
94 }
95 
96 /*
97    MatStashScatterEnd_Private - This is called as the fial stage of
98    scatter. The final stages of messagepassing is done here, and
99    all the memory used for messagepassing is cleanedu up. This
100    routine also resets the stash, and deallocates the memory used
101    for the stash. It also keeps track of the current memory usage
102    so that the same value can be used the next time through.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "MatStashScatterEnd_Private"
106 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
107 {
108   PetscErrorCode ierr;
109   PetscInt       nsends=stash->nsends,bs2,oldnmax;
110   MPI_Status     *send_status;
111 
112   PetscFunctionBegin;
113   /* wait on sends */
114   if (nsends) {
115     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
116     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
117     ierr = PetscFree(send_status);CHKERRQ(ierr);
118   }
119 
120   /* Now update nmaxold to be app 10% more than max n used, this way the
121      wastage of space is reduced the next time this stash is used.
122      Also update the oldmax, only if it increases */
123   if (stash->n) {
124     bs2      = stash->bs*stash->bs;
125     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
126     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
127   }
128 
129   stash->nmax       = 0;
130   stash->n          = 0;
131   stash->reallocs   = -1;
132   stash->nprocessed = 0;
133   if (stash->space_head){
134     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
135     stash->space_head = 0;
136     stash->space      = 0;
137   }
138   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
139   stash->send_waits = 0;
140   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
141   stash->recv_waits = 0;
142   ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
143   stash->svalues = 0;
144   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
145   stash->rvalues = 0;
146   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
147   stash->rindices = 0;
148   ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
149   stash->nprocs = 0;
150   PetscFunctionReturn(0);
151 }
152 
153 /*
154    MatStashGetInfo_Private - Gets the relavant statistics of the stash
155 
156    Input Parameters:
157    stash    - the stash
158    nstash   - the size of the stash. Indicates the number of values stored.
159    reallocs - the number of additional mallocs incurred.
160 
161 */
162 #undef __FUNCT__
163 #define __FUNCT__ "MatStashGetInfo_Private"
164 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
165 {
166   PetscInt bs2 = stash->bs*stash->bs;
167 
168   PetscFunctionBegin;
169   if (nstash) *nstash   = stash->n*bs2;
170   if (reallocs) {
171     if (stash->reallocs < 0) *reallocs = 0;
172     else                     *reallocs = stash->reallocs;
173   }
174   PetscFunctionReturn(0);
175 }
176 
177 /*
178    MatStashSetInitialSize_Private - Sets the initial size of the stash
179 
180    Input Parameters:
181    stash  - the stash
182    max    - the value that is used as the max size of the stash.
183             this value is used while allocating memory.
184 */
185 #undef __FUNCT__
186 #define __FUNCT__ "MatStashSetInitialSize_Private"
187 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
188 {
189   PetscFunctionBegin;
190   stash->umax = max;
191   PetscFunctionReturn(0);
192 }
193 
194 /* MatStashExpand_Private - Expand the stash. This function is called
195    when the space in the stash is not sufficient to add the new values
196    being inserted into the stash.
197 
198    Input Parameters:
199    stash - the stash
200    incr  - the minimum increase requested
201 
202    Notes:
203    This routine doubles the currently used memory.
204  */
205 #undef __FUNCT__
206 #define __FUNCT__ "MatStashExpand_Private"
207 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
208 {
209   PetscErrorCode ierr;
210   PetscInt       newnmax,bs2= stash->bs*stash->bs;
211 
212   PetscFunctionBegin;
213   /* allocate a larger stash */
214   if (!stash->oldnmax && !stash->nmax) { /* new stash */
215     if (stash->umax)                  newnmax = stash->umax/bs2;
216     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
217   } else if (!stash->nmax) { /* resuing stash */
218     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
219     else                              newnmax = stash->oldnmax/bs2;
220   } else                              newnmax = stash->nmax*2;
221   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
222 
223   /* Get a MatStashSpace and attach it to stash */
224   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
225   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
226     stash->space_head = stash->space;
227   }
228 
229   stash->reallocs++;
230   stash->nmax = newnmax;
231   PetscFunctionReturn(0);
232 }
233 /*
234   MatStashValuesRow_Private - inserts values into the stash. This function
235   expects the values to be roworiented. Multiple columns belong to the same row
236   can be inserted with a single call to this function.
237 
238   Input Parameters:
239   stash  - the stash
240   row    - the global row correspoiding to the values
241   n      - the number of elements inserted. All elements belong to the above row.
242   idxn   - the global column indices corresponding to each of the values.
243   values - the values inserted
244 */
245 #undef __FUNCT__
246 #define __FUNCT__ "MatStashValuesRow_Private"
247 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
248 {
249   PetscErrorCode     ierr;
250   PetscInt           i,k;
251   PetscMatStashSpace space=stash->space;
252 
253   PetscFunctionBegin;
254   /* Check and see if we have sufficient memory */
255   if (!space || space->local_remaining < n){
256     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
257   }
258   space = stash->space;
259   k     = space->local_used;
260   for (i=0; i<n; i++) {
261     space->idx[k] = row;
262     space->idy[k] = idxn[i];
263     space->val[k] = values[i];
264     k++;
265   }
266   stash->n               += n;
267   space->local_used      += n;
268   space->local_remaining -= n;
269   PetscFunctionReturn(0);
270 }
271 
272 /*
273   MatStashValuesCol_Private - inserts values into the stash. This function
274   expects the values to be columnoriented. Multiple columns belong to the same row
275   can be inserted with a single call to this function.
276 
277   Input Parameters:
278   stash   - the stash
279   row     - the global row correspoiding to the values
280   n       - the number of elements inserted. All elements belong to the above row.
281   idxn    - the global column indices corresponding to each of the values.
282   values  - the values inserted
283   stepval - the consecutive values are sepated by a distance of stepval.
284             this happens because the input is columnoriented.
285 */
286 #undef __FUNCT__
287 #define __FUNCT__ "MatStashValuesCol_Private"
288 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
289 {
290   PetscErrorCode     ierr;
291   PetscInt           i,k;
292   PetscMatStashSpace space=stash->space;
293 
294   PetscFunctionBegin;
295   /* Check and see if we have sufficient memory */
296   if (!space || space->local_remaining < n){
297     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
298   }
299   space = stash->space;
300   k = space->local_used;
301   for (i=0; i<n; i++) {
302     space->idx[k] = row;
303     space->idy[k] = idxn[i];
304     space->val[k] = values[i*stepval];
305     k++;
306   }
307   stash->n               += n;
308   space->local_used      += n;
309   space->local_remaining -= n;
310   PetscFunctionReturn(0);
311 }
312 
313 /*
314   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
315   This function expects the values to be roworiented. Multiple columns belong
316   to the same block-row can be inserted with a single call to this function.
317   This function extracts the sub-block of values based on the dimensions of
318   the original input block, and the row,col values corresponding to the blocks.
319 
320   Input Parameters:
321   stash  - the stash
322   row    - the global block-row correspoiding to the values
323   n      - the number of elements inserted. All elements belong to the above row.
324   idxn   - the global block-column indices corresponding to each of the blocks of
325            values. Each block is of size bs*bs.
326   values - the values inserted
327   rmax   - the number of block-rows in the original block.
328   cmax   - the number of block-columsn on the original block.
329   idx    - the index of the current block-row in the original block.
330 */
331 #undef __FUNCT__
332 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
333 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
334 {
335   PetscErrorCode     ierr;
336   PetscInt           i,j,k,bs2,bs=stash->bs,l;
337   const MatScalar    *vals;
338   MatScalar          *array;
339   PetscMatStashSpace space=stash->space;
340 
341   PetscFunctionBegin;
342   if (!space || space->local_remaining < n){
343     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
344   }
345   space = stash->space;
346   l     = space->local_used;
347   bs2   = bs*bs;
348   for (i=0; i<n; i++) {
349     space->idx[l] = row;
350     space->idy[l] = idxn[i];
351     /* Now copy over the block of values. Store the values column oriented.
352        This enables inserting multiple blocks belonging to a row with a single
353        funtion call */
354     array = space->val + bs2*l;
355     vals  = values + idx*bs2*n + bs*i;
356     for (j=0; j<bs; j++) {
357       for (k=0; k<bs; k++) array[k*bs] = vals[k];
358       array++;
359       vals  += cmax*bs;
360     }
361     l++;
362   }
363   stash->n               += n;
364   space->local_used      += n;
365   space->local_remaining -= n;
366   PetscFunctionReturn(0);
367 }
368 
369 /*
370   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
371   This function expects the values to be roworiented. Multiple columns belong
372   to the same block-row can be inserted with a single call to this function.
373   This function extracts the sub-block of values based on the dimensions of
374   the original input block, and the row,col values corresponding to the blocks.
375 
376   Input Parameters:
377   stash  - the stash
378   row    - the global block-row correspoiding to the values
379   n      - the number of elements inserted. All elements belong to the above row.
380   idxn   - the global block-column indices corresponding to each of the blocks of
381            values. Each block is of size bs*bs.
382   values - the values inserted
383   rmax   - the number of block-rows in the original block.
384   cmax   - the number of block-columsn on the original block.
385   idx    - the index of the current block-row in the original block.
386 */
387 #undef __FUNCT__
388 #define __FUNCT__ "MatStashValuesColBlocked_Private"
389 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
390 {
391   PetscErrorCode  ierr;
392   PetscInt        i,j,k,bs2,bs=stash->bs,l;
393   const MatScalar *vals;
394   MatScalar       *array;
395   PetscMatStashSpace space=stash->space;
396 
397   PetscFunctionBegin;
398   if (!space || space->local_remaining < n){
399     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
400   }
401   space = stash->space;
402   l     = space->local_used;
403   bs2   = bs*bs;
404   for (i=0; i<n; i++) {
405     space->idx[l] = row;
406     space->idy[l] = idxn[i];
407     /* Now copy over the block of values. Store the values column oriented.
408      This enables inserting multiple blocks belonging to a row with a single
409      funtion call */
410     array = space->val + bs2*l;
411     vals  = values + idx*bs2*n + bs*i;
412     for (j=0; j<bs; j++) {
413       for (k=0; k<bs; k++) {array[k] = vals[k];}
414       array += bs;
415       vals  += rmax*bs;
416     }
417     l++;
418   }
419   stash->n               += n;
420   space->local_used      += n;
421   space->local_remaining -= n;
422   PetscFunctionReturn(0);
423 }
424 /*
425   MatStashScatterBegin_Private - Initiates the transfer of values to the
426   correct owners. This function goes through the stash, and check the
427   owners of each stashed value, and sends the values off to the owner
428   processors.
429 
430   Input Parameters:
431   stash  - the stash
432   owners - an array of size 'no-of-procs' which gives the ownership range
433            for each node.
434 
435   Notes: The 'owners' array in the cased of the blocked-stash has the
436   ranges specified blocked global indices, and for the regular stash in
437   the proper global indices.
438 */
439 #undef __FUNCT__
440 #define __FUNCT__ "MatStashScatterBegin_Private"
441 PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
442 {
443   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
444   PetscInt       size=stash->size,nsends;
445   PetscErrorCode ierr;
446   PetscInt       count,*sindices,**rindices,i,j,idx,lastidx,l;
447   MatScalar      **rvalues,*svalues;
448   MPI_Comm       comm = stash->comm;
449   MPI_Request    *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
450   PetscMPIInt    *nprocs,*nlengths,nreceives;
451   PetscInt       *sp_idx,*sp_idy;
452   MatScalar      *sp_val;
453   PetscMatStashSpace space,space_next;
454 
455   PetscFunctionBegin;
456   bs2 = stash->bs*stash->bs;
457 
458   /*  first count number of contributors to each processor */
459   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
460   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr);
461   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
462 
463   nlengths = nprocs+size;
464   i = j    = 0;
465   lastidx  = -1;
466   space    = stash->space_head;
467   while (space != PETSC_NULL){
468     space_next = space->next;
469     sp_idx     = space->idx;
470     for (l=0; l<space->local_used; l++){
471       /* if indices are NOT locally sorted, need to start search at the beginning */
472       if (lastidx > (idx = sp_idx[l])) j = 0;
473       lastidx = idx;
474       for (; j<size; j++) {
475         if (idx >= owners[j] && idx < owners[j+1]) {
476           nlengths[j]++; owner[i] = j; break;
477         }
478       }
479       i++;
480     }
481     space = space_next;
482   }
483   /* Now check what procs get messages - and compute nsends. */
484   for (i=0, nsends=0 ; i<size; i++) {
485     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
486   }
487 
488   { int  *onodes,*olengths;
489   /* Determine the number of messages to expect, their lengths, from from-ids */
490   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
491   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
492   /* since clubbing row,col - lengths are multiplied by 2 */
493   for (i=0; i<nreceives; i++) olengths[i] *=2;
494   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
495   /* values are size 'bs2' lengths (and remove earlier factor 2 */
496   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
497   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
498   ierr = PetscFree(onodes);CHKERRQ(ierr);
499   ierr = PetscFree(olengths);CHKERRQ(ierr);
500   }
501 
502   /* do sends:
503       1) starts[i] gives the starting index in svalues for stuff going to
504          the ith processor
505   */
506   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
507   sindices = (PetscInt*)(svalues + bs2*stash->n);
508   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
509   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
510   starti   = startv + size;
511   /* use 2 sends the first with all_a, the next with all_i and all_j */
512   startv[0]  = 0; starti[0] = 0;
513   for (i=1; i<size; i++) {
514     startv[i] = startv[i-1] + nlengths[i-1];
515     starti[i] = starti[i-1] + nlengths[i-1]*2;
516   }
517 
518   i     = 0;
519   space = stash->space_head;
520   while (space != PETSC_NULL){
521     space_next = space->next;
522     sp_idx = space->idx;
523     sp_idy = space->idy;
524     sp_val = space->val;
525     for (l=0; l<space->local_used; l++){
526       j = owner[i];
527       if (bs2 == 1) {
528         svalues[startv[j]] = sp_val[l];
529       } else {
530         PetscInt  k;
531         MatScalar *buf1,*buf2;
532         buf1 = svalues+bs2*startv[j];
533         buf2 = space->val + bs2*l;
534         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
535       }
536       sindices[starti[j]]             = sp_idx[l];
537       sindices[starti[j]+nlengths[j]] = sp_idy[l];
538       startv[j]++;
539       starti[j]++;
540       i++;
541     }
542     space = space_next;
543   }
544   startv[0] = 0;
545   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
546 
547   for (i=0,count=0; i<size; i++) {
548     if (nprocs[i]) {
549       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
550       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
551     }
552   }
553 #if defined(PETSC_USE_INFO)
554   ierr = PetscInfo1(0,"No of messages: %d \n",nsends);CHKERRQ(ierr);
555   for (i=0; i<size; i++) {
556     if (nprocs[i]) {
557       ierr = PetscInfo2(0,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
558     }
559   }
560 #endif
561   ierr = PetscFree(owner);CHKERRQ(ierr);
562   ierr = PetscFree(startv);CHKERRQ(ierr);
563   /* This memory is reused in scatter end  for a different purpose*/
564   for (i=0; i<2*size; i++) nprocs[i] = -1;
565   stash->nprocs = nprocs;
566 
567   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
568   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
569 
570   for (i=0; i<nreceives; i++) {
571     recv_waits[2*i]   = recv_waits1[i];
572     recv_waits[2*i+1] = recv_waits2[i];
573   }
574   stash->recv_waits = recv_waits;
575   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
576   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
577 
578   stash->svalues    = svalues;    stash->rvalues     = rvalues;
579   stash->rindices   = rindices;   stash->send_waits  = send_waits;
580   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
581   PetscFunctionReturn(0);
582 }
583 
584 /*
585    MatStashScatterGetMesg_Private - This function waits on the receives posted
586    in the function MatStashScatterBegin_Private() and returns one message at
587    a time to the calling function. If no messages are left, it indicates this
588    by setting flg = 0, else it sets flg = 1.
589 
590    Input Parameters:
591    stash - the stash
592 
593    Output Parameters:
594    nvals - the number of entries in the current message.
595    rows  - an array of row indices (or blocked indices) corresponding to the values
596    cols  - an array of columnindices (or blocked indices) corresponding to the values
597    vals  - the values
598    flg   - 0 indicates no more message left, and the current call has no values associated.
599            1 indicates that the current call successfully received a message, and the
600              other output parameters nvals,rows,cols,vals are set appropriately.
601 */
602 #undef __FUNCT__
603 #define __FUNCT__ "MatStashScatterGetMesg_Private"
604 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
605 {
606   PetscErrorCode ierr;
607   PetscMPIInt    i,*flg_v,i1,i2;
608   PetscInt       bs2;
609   MPI_Status     recv_status;
610   PetscTruth     match_found = PETSC_FALSE;
611 
612   PetscFunctionBegin;
613 
614   *flg = 0; /* When a message is discovered this is reset to 1 */
615   /* Return if no more messages to process */
616   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
617 
618   flg_v = stash->nprocs;
619   bs2   = stash->bs*stash->bs;
620   /* If a matching pair of receieves are found, process them, and return the data to
621      the calling function. Until then keep receiving messages */
622   while (!match_found) {
623     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
624     /* Now pack the received message into a structure which is useable by others */
625     if (i % 2) {
626       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
627       flg_v[2*recv_status.MPI_SOURCE] = i/2;
628       *nvals = *nvals/bs2;
629     } else {
630       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
631       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
632       *nvals = *nvals/2; /* This message has both row indices and col indices */
633     }
634 
635     /* Check if we have both the messages from this proc */
636     i1 = flg_v[2*recv_status.MPI_SOURCE];
637     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
638     if (i1 != -1 && i2 != -1) {
639       *rows       = stash->rindices[i2];
640       *cols       = *rows + *nvals;
641       *vals       = stash->rvalues[i1];
642       *flg        = 1;
643       stash->nprocessed ++;
644       match_found = PETSC_TRUE;
645     }
646   }
647   PetscFunctionReturn(0);
648 }
649