xref: /petsc/src/mat/utils/matstash.c (revision 6ac5842e34eedc6428162d8d42bedaaf46eae34c)
1 
2 #include <petsc-private/matimpl.h>
3 
4 #define DEFAULT_STASH_SIZE   10000
5 
6 /*
7   MatStashCreate_Private - Creates a stash,currently used for all the parallel
8   matrix implementations. The stash is where elements of a matrix destined
9   to be stored on other processors are kept until matrix assembly is done.
10 
11   This is a simple minded stash. Simply adds entries to end of stash.
12 
13   Input Parameters:
14   comm - communicator, required for scatters.
15   bs   - stash block size. used when stashing blocks of values
16 
17   Output Parameters:
18   stash    - the newly created stash
19 */
20 #undef __FUNCT__
21 #define __FUNCT__ "MatStashCreate_Private"
22 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
23 {
24   PetscErrorCode ierr;
25   PetscInt       max,*opt,nopt,i;
26   PetscBool      flg;
27 
28   PetscFunctionBegin;
29   /* Require 2 tags,get the second using PetscCommGetNewTag() */
30   stash->comm = comm;
31 
32   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
33   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
34   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
35   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
36   ierr = PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);CHKERRQ(ierr);
37   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
38 
39 
40   nopt = stash->size;
41   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
42   ierr = PetscOptionsGetIntArray(NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
43   if (flg) {
44     if (nopt == 1)                max = opt[0];
45     else if (nopt == stash->size) max = opt[stash->rank];
46     else if (stash->rank < nopt)  max = opt[stash->rank];
47     else                          max = 0; /* Use default */
48     stash->umax = max;
49   } else {
50     stash->umax = 0;
51   }
52   ierr = PetscFree(opt);CHKERRQ(ierr);
53   if (bs <= 0) bs = 1;
54 
55   stash->bs         = bs;
56   stash->nmax       = 0;
57   stash->oldnmax    = 0;
58   stash->n          = 0;
59   stash->reallocs   = -1;
60   stash->space_head = 0;
61   stash->space      = 0;
62 
63   stash->send_waits  = 0;
64   stash->recv_waits  = 0;
65   stash->send_status = 0;
66   stash->nsends      = 0;
67   stash->nrecvs      = 0;
68   stash->svalues     = 0;
69   stash->rvalues     = 0;
70   stash->rindices    = 0;
71   stash->nprocessed  = 0;
72   stash->reproduce   = PETSC_FALSE;
73 
74   ierr = PetscOptionsGetBool(NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79    MatStashDestroy_Private - Destroy the stash
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "MatStashDestroy_Private"
83 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
84 {
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
89 
90   stash->space = 0;
91 
92   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 
96 /*
97    MatStashScatterEnd_Private - This is called as the final stage of
98    scatter. The final stages of message passing is done here, and
99    all the memory used for message passing is cleaned up. This
100    routine also resets the stash, and deallocates the memory used
101    for the stash. It also keeps track of the current memory usage
102    so that the same value can be used the next time through.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "MatStashScatterEnd_Private"
106 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
107 {
108   PetscErrorCode ierr;
109   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
110   MPI_Status     *send_status;
111 
112   PetscFunctionBegin;
113   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
114   /* wait on sends */
115   if (nsends) {
116     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
117     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
118     ierr = PetscFree(send_status);CHKERRQ(ierr);
119   }
120 
121   /* Now update nmaxold to be app 10% more than max n used, this way the
122      wastage of space is reduced the next time this stash is used.
123      Also update the oldmax, only if it increases */
124   if (stash->n) {
125     bs2     = stash->bs*stash->bs;
126     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
127     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
128   }
129 
130   stash->nmax       = 0;
131   stash->n          = 0;
132   stash->reallocs   = -1;
133   stash->nprocessed = 0;
134 
135   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
136 
137   stash->space = 0;
138 
139   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
140   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
141   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
142   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
143   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
144   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
145   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 /*
150    MatStashGetInfo_Private - Gets the relavant statistics of the stash
151 
152    Input Parameters:
153    stash    - the stash
154    nstash   - the size of the stash. Indicates the number of values stored.
155    reallocs - the number of additional mallocs incurred.
156 
157 */
158 #undef __FUNCT__
159 #define __FUNCT__ "MatStashGetInfo_Private"
160 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
161 {
162   PetscInt bs2 = stash->bs*stash->bs;
163 
164   PetscFunctionBegin;
165   if (nstash) *nstash = stash->n*bs2;
166   if (reallocs) {
167     if (stash->reallocs < 0) *reallocs = 0;
168     else                     *reallocs = stash->reallocs;
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 /*
174    MatStashSetInitialSize_Private - Sets the initial size of the stash
175 
176    Input Parameters:
177    stash  - the stash
178    max    - the value that is used as the max size of the stash.
179             this value is used while allocating memory.
180 */
181 #undef __FUNCT__
182 #define __FUNCT__ "MatStashSetInitialSize_Private"
183 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
184 {
185   PetscFunctionBegin;
186   stash->umax = max;
187   PetscFunctionReturn(0);
188 }
189 
190 /* MatStashExpand_Private - Expand the stash. This function is called
191    when the space in the stash is not sufficient to add the new values
192    being inserted into the stash.
193 
194    Input Parameters:
195    stash - the stash
196    incr  - the minimum increase requested
197 
198    Notes:
199    This routine doubles the currently used memory.
200  */
201 #undef __FUNCT__
202 #define __FUNCT__ "MatStashExpand_Private"
203 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
204 {
205   PetscErrorCode ierr;
206   PetscInt       newnmax,bs2= stash->bs*stash->bs;
207 
208   PetscFunctionBegin;
209   /* allocate a larger stash */
210   if (!stash->oldnmax && !stash->nmax) { /* new stash */
211     if (stash->umax)                  newnmax = stash->umax/bs2;
212     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
213   } else if (!stash->nmax) { /* resuing stash */
214     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
215     else                              newnmax = stash->oldnmax/bs2;
216   } else                              newnmax = stash->nmax*2;
217   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
218 
219   /* Get a MatStashSpace and attach it to stash */
220   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
221   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
222     stash->space_head = stash->space;
223   }
224 
225   stash->reallocs++;
226   stash->nmax = newnmax;
227   PetscFunctionReturn(0);
228 }
229 /*
230   MatStashValuesRow_Private - inserts values into the stash. This function
231   expects the values to be roworiented. Multiple columns belong to the same row
232   can be inserted with a single call to this function.
233 
234   Input Parameters:
235   stash  - the stash
236   row    - the global row correspoiding to the values
237   n      - the number of elements inserted. All elements belong to the above row.
238   idxn   - the global column indices corresponding to each of the values.
239   values - the values inserted
240 */
241 #undef __FUNCT__
242 #define __FUNCT__ "MatStashValuesRow_Private"
243 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
244 {
245   PetscErrorCode     ierr;
246   PetscInt           i,k,cnt = 0;
247   PetscMatStashSpace space=stash->space;
248 
249   PetscFunctionBegin;
250   /* Check and see if we have sufficient memory */
251   if (!space || space->local_remaining < n) {
252     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
253   }
254   space = stash->space;
255   k     = space->local_used;
256   for (i=0; i<n; i++) {
257     if (ignorezeroentries && (values[i] == 0.0)) continue;
258     space->idx[k] = row;
259     space->idy[k] = idxn[i];
260     space->val[k] = values[i];
261     k++;
262     cnt++;
263   }
264   stash->n               += cnt;
265   space->local_used      += cnt;
266   space->local_remaining -= cnt;
267   PetscFunctionReturn(0);
268 }
269 
270 /*
271   MatStashValuesCol_Private - inserts values into the stash. This function
272   expects the values to be columnoriented. Multiple columns belong to the same row
273   can be inserted with a single call to this function.
274 
275   Input Parameters:
276   stash   - the stash
277   row     - the global row correspoiding to the values
278   n       - the number of elements inserted. All elements belong to the above row.
279   idxn    - the global column indices corresponding to each of the values.
280   values  - the values inserted
281   stepval - the consecutive values are sepated by a distance of stepval.
282             this happens because the input is columnoriented.
283 */
284 #undef __FUNCT__
285 #define __FUNCT__ "MatStashValuesCol_Private"
286 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
287 {
288   PetscErrorCode     ierr;
289   PetscInt           i,k,cnt = 0;
290   PetscMatStashSpace space=stash->space;
291 
292   PetscFunctionBegin;
293   /* Check and see if we have sufficient memory */
294   if (!space || space->local_remaining < n) {
295     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
296   }
297   space = stash->space;
298   k     = space->local_used;
299   for (i=0; i<n; i++) {
300     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
301     space->idx[k] = row;
302     space->idy[k] = idxn[i];
303     space->val[k] = values[i*stepval];
304     k++;
305     cnt++;
306   }
307   stash->n               += cnt;
308   space->local_used      += cnt;
309   space->local_remaining -= cnt;
310   PetscFunctionReturn(0);
311 }
312 
313 /*
314   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
315   This function expects the values to be roworiented. Multiple columns belong
316   to the same block-row can be inserted with a single call to this function.
317   This function extracts the sub-block of values based on the dimensions of
318   the original input block, and the row,col values corresponding to the blocks.
319 
320   Input Parameters:
321   stash  - the stash
322   row    - the global block-row correspoiding to the values
323   n      - the number of elements inserted. All elements belong to the above row.
324   idxn   - the global block-column indices corresponding to each of the blocks of
325            values. Each block is of size bs*bs.
326   values - the values inserted
327   rmax   - the number of block-rows in the original block.
328   cmax   - the number of block-columsn on the original block.
329   idx    - the index of the current block-row in the original block.
330 */
331 #undef __FUNCT__
332 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
333 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
334 {
335   PetscErrorCode     ierr;
336   PetscInt           i,j,k,bs2,bs=stash->bs,l;
337   const PetscScalar  *vals;
338   PetscScalar        *array;
339   PetscMatStashSpace space=stash->space;
340 
341   PetscFunctionBegin;
342   if (!space || space->local_remaining < n) {
343     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
344   }
345   space = stash->space;
346   l     = space->local_used;
347   bs2   = bs*bs;
348   for (i=0; i<n; i++) {
349     space->idx[l] = row;
350     space->idy[l] = idxn[i];
351     /* Now copy over the block of values. Store the values column oriented.
352        This enables inserting multiple blocks belonging to a row with a single
353        funtion call */
354     array = space->val + bs2*l;
355     vals  = values + idx*bs2*n + bs*i;
356     for (j=0; j<bs; j++) {
357       for (k=0; k<bs; k++) array[k*bs] = vals[k];
358       array++;
359       vals += cmax*bs;
360     }
361     l++;
362   }
363   stash->n               += n;
364   space->local_used      += n;
365   space->local_remaining -= n;
366   PetscFunctionReturn(0);
367 }
368 
369 /*
370   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
371   This function expects the values to be roworiented. Multiple columns belong
372   to the same block-row can be inserted with a single call to this function.
373   This function extracts the sub-block of values based on the dimensions of
374   the original input block, and the row,col values corresponding to the blocks.
375 
376   Input Parameters:
377   stash  - the stash
378   row    - the global block-row correspoiding to the values
379   n      - the number of elements inserted. All elements belong to the above row.
380   idxn   - the global block-column indices corresponding to each of the blocks of
381            values. Each block is of size bs*bs.
382   values - the values inserted
383   rmax   - the number of block-rows in the original block.
384   cmax   - the number of block-columsn on the original block.
385   idx    - the index of the current block-row in the original block.
386 */
387 #undef __FUNCT__
388 #define __FUNCT__ "MatStashValuesColBlocked_Private"
389 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
390 {
391   PetscErrorCode     ierr;
392   PetscInt           i,j,k,bs2,bs=stash->bs,l;
393   const PetscScalar  *vals;
394   PetscScalar        *array;
395   PetscMatStashSpace space=stash->space;
396 
397   PetscFunctionBegin;
398   if (!space || space->local_remaining < n) {
399     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
400   }
401   space = stash->space;
402   l     = space->local_used;
403   bs2   = bs*bs;
404   for (i=0; i<n; i++) {
405     space->idx[l] = row;
406     space->idy[l] = idxn[i];
407     /* Now copy over the block of values. Store the values column oriented.
408      This enables inserting multiple blocks belonging to a row with a single
409      funtion call */
410     array = space->val + bs2*l;
411     vals  = values + idx*bs2*n + bs*i;
412     for (j=0; j<bs; j++) {
413       for (k=0; k<bs; k++) array[k] = vals[k];
414       array += bs;
415       vals  += rmax*bs;
416     }
417     l++;
418   }
419   stash->n               += n;
420   space->local_used      += n;
421   space->local_remaining -= n;
422   PetscFunctionReturn(0);
423 }
424 /*
425   MatStashScatterBegin_Private - Initiates the transfer of values to the
426   correct owners. This function goes through the stash, and check the
427   owners of each stashed value, and sends the values off to the owner
428   processors.
429 
430   Input Parameters:
431   stash  - the stash
432   owners - an array of size 'no-of-procs' which gives the ownership range
433            for each node.
434 
435   Notes: The 'owners' array in the cased of the blocked-stash has the
436   ranges specified blocked global indices, and for the regular stash in
437   the proper global indices.
438 */
439 #undef __FUNCT__
440 #define __FUNCT__ "MatStashScatterBegin_Private"
441 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
442 {
443   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
444   PetscInt           size=stash->size,nsends;
445   PetscErrorCode     ierr;
446   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
447   PetscScalar        **rvalues,*svalues;
448   MPI_Comm           comm = stash->comm;
449   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
450   PetscMPIInt        *nprocs,*nlengths,nreceives;
451   PetscInt           *sp_idx,*sp_idy;
452   PetscScalar        *sp_val;
453   PetscMatStashSpace space,space_next;
454 
455   PetscFunctionBegin;
456   bs2 = stash->bs*stash->bs;
457 
458   /*  first count number of contributors to each processor */
459   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
460   ierr = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
461   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr);
462   ierr = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
463   ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
464 
465   i       = j    = 0;
466   lastidx = -1;
467   space   = stash->space_head;
468   while (space != NULL) {
469     space_next = space->next;
470     sp_idx     = space->idx;
471     for (l=0; l<space->local_used; l++) {
472       /* if indices are NOT locally sorted, need to start search at the beginning */
473       if (lastidx > (idx = sp_idx[l])) j = 0;
474       lastidx = idx;
475       for (; j<size; j++) {
476         if (idx >= owners[j] && idx < owners[j+1]) {
477           nlengths[j]++; owner[i] = j; break;
478         }
479       }
480       i++;
481     }
482     space = space_next;
483   }
484   /* Now check what procs get messages - and compute nsends. */
485   for (i=0, nsends=0; i<size; i++) {
486     if (nlengths[i]) {
487       nprocs[i] = 1; nsends++;
488     }
489   }
490 
491   {PetscMPIInt *onodes,*olengths;
492    /* Determine the number of messages to expect, their lengths, from from-ids */
493    ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
494    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
495    /* since clubbing row,col - lengths are multiplied by 2 */
496    for (i=0; i<nreceives; i++) olengths[i] *=2;
497    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
498    /* values are size 'bs2' lengths (and remove earlier factor 2 */
499    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
500    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
501    ierr = PetscFree(onodes);CHKERRQ(ierr);
502    ierr = PetscFree(olengths);CHKERRQ(ierr);}
503 
504   /* do sends:
505       1) starts[i] gives the starting index in svalues for stuff going to
506          the ith processor
507   */
508   ierr = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr);
509   ierr = PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
510   ierr = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr);
511   /* use 2 sends the first with all_a, the next with all_i and all_j */
512   startv[0] = 0; starti[0] = 0;
513   for (i=1; i<size; i++) {
514     startv[i] = startv[i-1] + nlengths[i-1];
515     starti[i] = starti[i-1] + 2*nlengths[i-1];
516   }
517 
518   i     = 0;
519   space = stash->space_head;
520   while (space != NULL) {
521     space_next = space->next;
522     sp_idx     = space->idx;
523     sp_idy     = space->idy;
524     sp_val     = space->val;
525     for (l=0; l<space->local_used; l++) {
526       j = owner[i];
527       if (bs2 == 1) {
528         svalues[startv[j]] = sp_val[l];
529       } else {
530         PetscInt    k;
531         PetscScalar *buf1,*buf2;
532         buf1 = svalues+bs2*startv[j];
533         buf2 = space->val + bs2*l;
534         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
535       }
536       sindices[starti[j]]             = sp_idx[l];
537       sindices[starti[j]+nlengths[j]] = sp_idy[l];
538       startv[j]++;
539       starti[j]++;
540       i++;
541     }
542     space = space_next;
543   }
544   startv[0] = 0;
545   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
546 
547   for (i=0,count=0; i<size; i++) {
548     if (nprocs[i]) {
549       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
550       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
551     }
552   }
553 #if defined(PETSC_USE_INFO)
554   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
555   for (i=0; i<size; i++) {
556     if (nprocs[i]) {
557       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
558     }
559   }
560 #endif
561   ierr = PetscFree(nlengths);CHKERRQ(ierr);
562   ierr = PetscFree(owner);CHKERRQ(ierr);
563   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
564   ierr = PetscFree(nprocs);CHKERRQ(ierr);
565 
566   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
567   ierr = PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
568 
569   for (i=0; i<nreceives; i++) {
570     recv_waits[2*i]   = recv_waits1[i];
571     recv_waits[2*i+1] = recv_waits2[i];
572   }
573   stash->recv_waits = recv_waits;
574 
575   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
576   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
577 
578   stash->svalues         = svalues;
579   stash->sindices        = sindices;
580   stash->rvalues         = rvalues;
581   stash->rindices        = rindices;
582   stash->send_waits      = send_waits;
583   stash->nsends          = nsends;
584   stash->nrecvs          = nreceives;
585   stash->reproduce_count = 0;
586   PetscFunctionReturn(0);
587 }
588 
589 /*
590    MatStashScatterGetMesg_Private - This function waits on the receives posted
591    in the function MatStashScatterBegin_Private() and returns one message at
592    a time to the calling function. If no messages are left, it indicates this
593    by setting flg = 0, else it sets flg = 1.
594 
595    Input Parameters:
596    stash - the stash
597 
598    Output Parameters:
599    nvals - the number of entries in the current message.
600    rows  - an array of row indices (or blocked indices) corresponding to the values
601    cols  - an array of columnindices (or blocked indices) corresponding to the values
602    vals  - the values
603    flg   - 0 indicates no more message left, and the current call has no values associated.
604            1 indicates that the current call successfully received a message, and the
605              other output parameters nvals,rows,cols,vals are set appropriately.
606 */
607 #undef __FUNCT__
608 #define __FUNCT__ "MatStashScatterGetMesg_Private"
609 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
610 {
611   PetscErrorCode ierr;
612   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
613   PetscInt       bs2;
614   MPI_Status     recv_status;
615   PetscBool      match_found = PETSC_FALSE;
616 
617   PetscFunctionBegin;
618   *flg = 0; /* When a message is discovered this is reset to 1 */
619   /* Return if no more messages to process */
620   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
621 
622   bs2 = stash->bs*stash->bs;
623   /* If a matching pair of receives are found, process them, and return the data to
624      the calling function. Until then keep receiving messages */
625   while (!match_found) {
626     CHKMEMQ;
627     if (stash->reproduce) {
628       i    = stash->reproduce_count++;
629       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
630     } else {
631       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
632     }
633     CHKMEMQ;
634     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
635 
636     /* Now pack the received message into a structure which is usable by others */
637     if (i % 2) {
638       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
639 
640       flg_v[2*recv_status.MPI_SOURCE] = i/2;
641 
642       *nvals = *nvals/bs2;
643     } else {
644       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
645 
646       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
647 
648       *nvals = *nvals/2; /* This message has both row indices and col indices */
649     }
650 
651     /* Check if we have both messages from this proc */
652     i1 = flg_v[2*recv_status.MPI_SOURCE];
653     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
654     if (i1 != -1 && i2 != -1) {
655       *rows = stash->rindices[i2];
656       *cols = *rows + *nvals;
657       *vals = stash->rvalues[i1];
658       *flg  = 1;
659       stash->nprocessed++;
660       match_found = PETSC_TRUE;
661     }
662   }
663   PetscFunctionReturn(0);
664 }
665