xref: /petsc/src/mat/utils/matstash.c (revision 22982a5f1e5ef536d7b0e4ffb77c4c6450d5b280)
1 #define PETSCMAT_DLL
2 
3 #include "private/matimpl.h"
4 
5 #define DEFAULT_STASH_SIZE   10000
6 
7 /*
8   MatStashCreate_Private - Creates a stash,currently used for all the parallel
9   matrix implementations. The stash is where elements of a matrix destined
10   to be stored on other processors are kept until matrix assembly is done.
11 
12   This is a simple minded stash. Simply adds entries to end of stash.
13 
14   Input Parameters:
15   comm - communicator, required for scatters.
16   bs   - stash block size. used when stashing blocks of values
17 
18   Output Parameters:
19   stash    - the newly created stash
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "MatStashCreate_Private"
23 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
24 {
25   PetscErrorCode ierr;
26   PetscInt       max,*opt,nopt,i;
27   PetscTruth     flg;
28 
29   PetscFunctionBegin;
30   /* Require 2 tags,get the second using PetscCommGetNewTag() */
31   stash->comm = comm;
32   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
33   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
34   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
35   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
36   ierr  = PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);CHKERRQ(ierr);
37   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
38 
39 
40   nopt = stash->size;
41   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
42   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
43   if (flg) {
44     if (nopt == 1)                max = opt[0];
45     else if (nopt == stash->size) max = opt[stash->rank];
46     else if (stash->rank < nopt)  max = opt[stash->rank];
47     else                          max = 0; /* Use default */
48     stash->umax = max;
49   } else {
50     stash->umax = 0;
51   }
52   ierr = PetscFree(opt);CHKERRQ(ierr);
53   if (bs <= 0) bs = 1;
54 
55   stash->bs       = bs;
56   stash->nmax     = 0;
57   stash->oldnmax  = 0;
58   stash->n        = 0;
59   stash->reallocs = -1;
60   stash->space_head = 0;
61   stash->space      = 0;
62 
63   stash->send_waits  = 0;
64   stash->recv_waits  = 0;
65   stash->send_status = 0;
66   stash->nsends      = 0;
67   stash->nrecvs      = 0;
68   stash->svalues     = 0;
69   stash->rvalues     = 0;
70   stash->rindices    = 0;
71   stash->nprocessed  = 0;
72   PetscFunctionReturn(0);
73 }
74 
75 /*
76    MatStashDestroy_Private - Destroy the stash
77 */
78 #undef __FUNCT__
79 #define __FUNCT__ "MatStashDestroy_Private"
80 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
81 {
82   PetscErrorCode ierr;
83 
84   PetscFunctionBegin;
85   if (stash->space_head){
86     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
87     stash->space_head = 0;
88     stash->space      = 0;
89   }
90   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 
94 /*
95    MatStashScatterEnd_Private - This is called as the fial stage of
96    scatter. The final stages of messagepassing is done here, and
97    all the memory used for messagepassing is cleanedu up. This
98    routine also resets the stash, and deallocates the memory used
99    for the stash. It also keeps track of the current memory usage
100    so that the same value can be used the next time through.
101 */
102 #undef __FUNCT__
103 #define __FUNCT__ "MatStashScatterEnd_Private"
104 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
105 {
106   PetscErrorCode ierr;
107   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
108   MPI_Status     *send_status;
109 
110   PetscFunctionBegin;
111   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
112   /* wait on sends */
113   if (nsends) {
114     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
115     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
116     ierr = PetscFree(send_status);CHKERRQ(ierr);
117   }
118 
119   /* Now update nmaxold to be app 10% more than max n used, this way the
120      wastage of space is reduced the next time this stash is used.
121      Also update the oldmax, only if it increases */
122   if (stash->n) {
123     bs2      = stash->bs*stash->bs;
124     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
125     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
126   }
127 
128   stash->nmax       = 0;
129   stash->n          = 0;
130   stash->reallocs   = -1;
131   stash->nprocessed = 0;
132   if (stash->space_head){
133     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
134     stash->space_head = 0;
135     stash->space      = 0;
136   }
137   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
138   stash->send_waits = 0;
139   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
140   stash->recv_waits = 0;
141   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
142   stash->svalues = 0;
143   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
144   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
145   stash->rvalues = 0;
146   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
147   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
148   stash->rindices = 0;
149   PetscFunctionReturn(0);
150 }
151 
152 /*
153    MatStashGetInfo_Private - Gets the relavant statistics of the stash
154 
155    Input Parameters:
156    stash    - the stash
157    nstash   - the size of the stash. Indicates the number of values stored.
158    reallocs - the number of additional mallocs incurred.
159 
160 */
161 #undef __FUNCT__
162 #define __FUNCT__ "MatStashGetInfo_Private"
163 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
164 {
165   PetscInt bs2 = stash->bs*stash->bs;
166 
167   PetscFunctionBegin;
168   if (nstash) *nstash   = stash->n*bs2;
169   if (reallocs) {
170     if (stash->reallocs < 0) *reallocs = 0;
171     else                     *reallocs = stash->reallocs;
172   }
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177    MatStashSetInitialSize_Private - Sets the initial size of the stash
178 
179    Input Parameters:
180    stash  - the stash
181    max    - the value that is used as the max size of the stash.
182             this value is used while allocating memory.
183 */
184 #undef __FUNCT__
185 #define __FUNCT__ "MatStashSetInitialSize_Private"
186 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
187 {
188   PetscFunctionBegin;
189   stash->umax = max;
190   PetscFunctionReturn(0);
191 }
192 
193 /* MatStashExpand_Private - Expand the stash. This function is called
194    when the space in the stash is not sufficient to add the new values
195    being inserted into the stash.
196 
197    Input Parameters:
198    stash - the stash
199    incr  - the minimum increase requested
200 
201    Notes:
202    This routine doubles the currently used memory.
203  */
204 #undef __FUNCT__
205 #define __FUNCT__ "MatStashExpand_Private"
206 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
207 {
208   PetscErrorCode ierr;
209   PetscInt       newnmax,bs2= stash->bs*stash->bs;
210 
211   PetscFunctionBegin;
212   /* allocate a larger stash */
213   if (!stash->oldnmax && !stash->nmax) { /* new stash */
214     if (stash->umax)                  newnmax = stash->umax/bs2;
215     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
216   } else if (!stash->nmax) { /* resuing stash */
217     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
218     else                              newnmax = stash->oldnmax/bs2;
219   } else                              newnmax = stash->nmax*2;
220   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
221 
222   /* Get a MatStashSpace and attach it to stash */
223   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
224   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
225     stash->space_head = stash->space;
226   }
227 
228   stash->reallocs++;
229   stash->nmax = newnmax;
230   PetscFunctionReturn(0);
231 }
232 /*
233   MatStashValuesRow_Private - inserts values into the stash. This function
234   expects the values to be roworiented. Multiple columns belong to the same row
235   can be inserted with a single call to this function.
236 
237   Input Parameters:
238   stash  - the stash
239   row    - the global row correspoiding to the values
240   n      - the number of elements inserted. All elements belong to the above row.
241   idxn   - the global column indices corresponding to each of the values.
242   values - the values inserted
243 */
244 #undef __FUNCT__
245 #define __FUNCT__ "MatStashValuesRow_Private"
246 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscTruth ignorezeroentries)
247 {
248   PetscErrorCode     ierr;
249   PetscInt           i,k,cnt = 0;
250   PetscMatStashSpace space=stash->space;
251 
252   PetscFunctionBegin;
253   /* Check and see if we have sufficient memory */
254   if (!space || space->local_remaining < n){
255     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
256   }
257   space = stash->space;
258   k     = space->local_used;
259   for (i=0; i<n; i++) {
260     if (ignorezeroentries && (values[i] == 0.0)) continue;
261     space->idx[k] = row;
262     space->idy[k] = idxn[i];
263     space->val[k] = values[i];
264     k++;
265     cnt++;
266   }
267   stash->n               += cnt;
268   space->local_used      += cnt;
269   space->local_remaining -= cnt;
270   PetscFunctionReturn(0);
271 }
272 
273 /*
274   MatStashValuesCol_Private - inserts values into the stash. This function
275   expects the values to be columnoriented. Multiple columns belong to the same row
276   can be inserted with a single call to this function.
277 
278   Input Parameters:
279   stash   - the stash
280   row     - the global row correspoiding to the values
281   n       - the number of elements inserted. All elements belong to the above row.
282   idxn    - the global column indices corresponding to each of the values.
283   values  - the values inserted
284   stepval - the consecutive values are sepated by a distance of stepval.
285             this happens because the input is columnoriented.
286 */
287 #undef __FUNCT__
288 #define __FUNCT__ "MatStashValuesCol_Private"
289 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscTruth ignorezeroentries)
290 {
291   PetscErrorCode     ierr;
292   PetscInt           i,k,cnt = 0;
293   PetscMatStashSpace space=stash->space;
294 
295   PetscFunctionBegin;
296   /* Check and see if we have sufficient memory */
297   if (!space || space->local_remaining < n){
298     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
299   }
300   space = stash->space;
301   k = space->local_used;
302   for (i=0; i<n; i++) {
303     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
304     space->idx[k] = row;
305     space->idy[k] = idxn[i];
306     space->val[k] = values[i*stepval];
307     k++;
308     cnt++;
309   }
310   stash->n               += cnt;
311   space->local_used      += cnt;
312   space->local_remaining -= cnt;
313   PetscFunctionReturn(0);
314 }
315 
316 /*
317   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
318   This function expects the values to be roworiented. Multiple columns belong
319   to the same block-row can be inserted with a single call to this function.
320   This function extracts the sub-block of values based on the dimensions of
321   the original input block, and the row,col values corresponding to the blocks.
322 
323   Input Parameters:
324   stash  - the stash
325   row    - the global block-row correspoiding to the values
326   n      - the number of elements inserted. All elements belong to the above row.
327   idxn   - the global block-column indices corresponding to each of the blocks of
328            values. Each block is of size bs*bs.
329   values - the values inserted
330   rmax   - the number of block-rows in the original block.
331   cmax   - the number of block-columsn on the original block.
332   idx    - the index of the current block-row in the original block.
333 */
334 #undef __FUNCT__
335 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
336 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
337 {
338   PetscErrorCode     ierr;
339   PetscInt           i,j,k,bs2,bs=stash->bs,l;
340   const PetscScalar  *vals;
341   PetscScalar        *array;
342   PetscMatStashSpace space=stash->space;
343 
344   PetscFunctionBegin;
345   if (!space || space->local_remaining < n){
346     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
347   }
348   space = stash->space;
349   l     = space->local_used;
350   bs2   = bs*bs;
351   for (i=0; i<n; i++) {
352     space->idx[l] = row;
353     space->idy[l] = idxn[i];
354     /* Now copy over the block of values. Store the values column oriented.
355        This enables inserting multiple blocks belonging to a row with a single
356        funtion call */
357     array = space->val + bs2*l;
358     vals  = values + idx*bs2*n + bs*i;
359     for (j=0; j<bs; j++) {
360       for (k=0; k<bs; k++) array[k*bs] = vals[k];
361       array++;
362       vals  += cmax*bs;
363     }
364     l++;
365   }
366   stash->n               += n;
367   space->local_used      += n;
368   space->local_remaining -= n;
369   PetscFunctionReturn(0);
370 }
371 
372 /*
373   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
374   This function expects the values to be roworiented. Multiple columns belong
375   to the same block-row can be inserted with a single call to this function.
376   This function extracts the sub-block of values based on the dimensions of
377   the original input block, and the row,col values corresponding to the blocks.
378 
379   Input Parameters:
380   stash  - the stash
381   row    - the global block-row correspoiding to the values
382   n      - the number of elements inserted. All elements belong to the above row.
383   idxn   - the global block-column indices corresponding to each of the blocks of
384            values. Each block is of size bs*bs.
385   values - the values inserted
386   rmax   - the number of block-rows in the original block.
387   cmax   - the number of block-columsn on the original block.
388   idx    - the index of the current block-row in the original block.
389 */
390 #undef __FUNCT__
391 #define __FUNCT__ "MatStashValuesColBlocked_Private"
392 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
393 {
394   PetscErrorCode     ierr;
395   PetscInt           i,j,k,bs2,bs=stash->bs,l;
396   const PetscScalar  *vals;
397   PetscScalar        *array;
398   PetscMatStashSpace space=stash->space;
399 
400   PetscFunctionBegin;
401   if (!space || space->local_remaining < n){
402     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
403   }
404   space = stash->space;
405   l     = space->local_used;
406   bs2   = bs*bs;
407   for (i=0; i<n; i++) {
408     space->idx[l] = row;
409     space->idy[l] = idxn[i];
410     /* Now copy over the block of values. Store the values column oriented.
411      This enables inserting multiple blocks belonging to a row with a single
412      funtion call */
413     array = space->val + bs2*l;
414     vals  = values + idx*bs2*n + bs*i;
415     for (j=0; j<bs; j++) {
416       for (k=0; k<bs; k++) {array[k] = vals[k];}
417       array += bs;
418       vals  += rmax*bs;
419     }
420     l++;
421   }
422   stash->n               += n;
423   space->local_used      += n;
424   space->local_remaining -= n;
425   PetscFunctionReturn(0);
426 }
427 /*
428   MatStashScatterBegin_Private - Initiates the transfer of values to the
429   correct owners. This function goes through the stash, and check the
430   owners of each stashed value, and sends the values off to the owner
431   processors.
432 
433   Input Parameters:
434   stash  - the stash
435   owners - an array of size 'no-of-procs' which gives the ownership range
436            for each node.
437 
438   Notes: The 'owners' array in the cased of the blocked-stash has the
439   ranges specified blocked global indices, and for the regular stash in
440   the proper global indices.
441 */
442 #undef __FUNCT__
443 #define __FUNCT__ "MatStashScatterBegin_Private"
444 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
445 {
446   PetscInt          *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
447   PetscInt          size=stash->size,nsends;
448   PetscErrorCode    ierr;
449   PetscInt          count,*sindices,**rindices,i,j,idx,lastidx,l;
450   PetscScalar       **rvalues,*svalues;
451   MPI_Comm          comm = stash->comm;
452   MPI_Request       *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
453   PetscMPIInt       *nprocs,*nlengths,nreceives;
454   PetscInt          *sp_idx,*sp_idy;
455   PetscScalar       *sp_val;
456   PetscMatStashSpace space,space_next;
457 
458   PetscFunctionBegin;
459   bs2 = stash->bs*stash->bs;
460 
461   /*  first count number of contributors to each processor */
462   ierr  = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
463   ierr  = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
464   ierr  = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr);
465   ierr  = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
466   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
467 
468   i = j    = 0;
469   lastidx  = -1;
470   space    = stash->space_head;
471   while (space != PETSC_NULL){
472     space_next = space->next;
473     sp_idx     = space->idx;
474     for (l=0; l<space->local_used; l++){
475       /* if indices are NOT locally sorted, need to start search at the beginning */
476       if (lastidx > (idx = sp_idx[l])) j = 0;
477       lastidx = idx;
478       for (; j<size; j++) {
479         if (idx >= owners[j] && idx < owners[j+1]) {
480           nlengths[j]++; owner[i] = j; break;
481         }
482       }
483       i++;
484     }
485     space = space_next;
486   }
487   /* Now check what procs get messages - and compute nsends. */
488   for (i=0, nsends=0 ; i<size; i++) {
489     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
490   }
491 
492   {PetscMPIInt  *onodes,*olengths;
493   /* Determine the number of messages to expect, their lengths, from from-ids */
494   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
495   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
496   /* since clubbing row,col - lengths are multiplied by 2 */
497   for (i=0; i<nreceives; i++) olengths[i] *=2;
498   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
499   /* values are size 'bs2' lengths (and remove earlier factor 2 */
500   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
501   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
502   ierr = PetscFree(onodes);CHKERRQ(ierr);
503   ierr = PetscFree(olengths);CHKERRQ(ierr);
504   }
505 
506   /* do sends:
507       1) starts[i] gives the starting index in svalues for stuff going to
508          the ith processor
509   */
510   ierr     = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr);
511   ierr     = PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
512   ierr     = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr);
513   /* use 2 sends the first with all_a, the next with all_i and all_j */
514   startv[0]  = 0; starti[0] = 0;
515   for (i=1; i<size; i++) {
516     startv[i] = startv[i-1] + nlengths[i-1];
517     starti[i] = starti[i-1] + 2*nlengths[i-1];
518   }
519 
520   i     = 0;
521   space = stash->space_head;
522   while (space != PETSC_NULL){
523     space_next = space->next;
524     sp_idx = space->idx;
525     sp_idy = space->idy;
526     sp_val = space->val;
527     for (l=0; l<space->local_used; l++){
528       j = owner[i];
529       if (bs2 == 1) {
530         svalues[startv[j]] = sp_val[l];
531       } else {
532         PetscInt     k;
533         PetscScalar *buf1,*buf2;
534         buf1 = svalues+bs2*startv[j];
535         buf2 = space->val + bs2*l;
536         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
537       }
538       sindices[starti[j]]             = sp_idx[l];
539       sindices[starti[j]+nlengths[j]] = sp_idy[l];
540       startv[j]++;
541       starti[j]++;
542       i++;
543     }
544     space = space_next;
545   }
546   startv[0] = 0;
547   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
548 
549   for (i=0,count=0; i<size; i++) {
550     if (nprocs[i]) {
551       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
552       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
553     }
554   }
555 #if defined(PETSC_USE_INFO)
556   ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr);
557   for (i=0; i<size; i++) {
558     if (nprocs[i]) {
559       ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
560     }
561   }
562 #endif
563   ierr = PetscFree(nlengths);CHKERRQ(ierr);
564   ierr = PetscFree(owner);CHKERRQ(ierr);
565   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
566   ierr = PetscFree(nprocs);CHKERRQ(ierr);
567 
568   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
569   ierr  = PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
570 
571   for (i=0; i<nreceives; i++) {
572     recv_waits[2*i]   = recv_waits1[i];
573     recv_waits[2*i+1] = recv_waits2[i];
574   }
575   stash->recv_waits = recv_waits;
576   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
577   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
578 
579   stash->svalues     = svalues;
580   stash->sindices    = sindices;
581   stash->rvalues     = rvalues;
582   stash->rindices    = rindices;
583   stash->send_waits  = send_waits;
584   stash->nsends      = nsends;
585   stash->nrecvs      = nreceives;
586   PetscFunctionReturn(0);
587 }
588 
589 /*
590    MatStashScatterGetMesg_Private - This function waits on the receives posted
591    in the function MatStashScatterBegin_Private() and returns one message at
592    a time to the calling function. If no messages are left, it indicates this
593    by setting flg = 0, else it sets flg = 1.
594 
595    Input Parameters:
596    stash - the stash
597 
598    Output Parameters:
599    nvals - the number of entries in the current message.
600    rows  - an array of row indices (or blocked indices) corresponding to the values
601    cols  - an array of columnindices (or blocked indices) corresponding to the values
602    vals  - the values
603    flg   - 0 indicates no more message left, and the current call has no values associated.
604            1 indicates that the current call successfully received a message, and the
605              other output parameters nvals,rows,cols,vals are set appropriately.
606 */
607 #undef __FUNCT__
608 #define __FUNCT__ "MatStashScatterGetMesg_Private"
609 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg)
610 {
611   PetscErrorCode ierr;
612   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
613   PetscInt       bs2;
614   MPI_Status     recv_status;
615   PetscTruth     match_found = PETSC_FALSE;
616 
617   PetscFunctionBegin;
618 
619   *flg = 0; /* When a message is discovered this is reset to 1 */
620   /* Return if no more messages to process */
621   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
622 
623   bs2   = stash->bs*stash->bs;
624   /* If a matching pair of receieves are found, process them, and return the data to
625      the calling function. Until then keep receiving messages */
626   while (!match_found) {
627     CHKMEMQ;
628     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
629     CHKMEMQ;
630     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_ERR_PLIB,"Negative MPI source!");
631 
632     /* Now pack the received message into a structure which is useable by others */
633     if (i % 2) {
634       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
635       flg_v[2*recv_status.MPI_SOURCE] = i/2;
636       *nvals = *nvals/bs2;
637     } else {
638       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
639       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
640       *nvals = *nvals/2; /* This message has both row indices and col indices */
641     }
642 
643     /* Check if we have both messages from this proc */
644     i1 = flg_v[2*recv_status.MPI_SOURCE];
645     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
646     if (i1 != -1 && i2 != -1) {
647       *rows       = stash->rindices[i2];
648       *cols       = *rows + *nvals;
649       *vals       = stash->rvalues[i1];
650       *flg        = 1;
651       stash->nprocessed ++;
652       match_found = PETSC_TRUE;
653     }
654   }
655   PetscFunctionReturn(0);
656 }
657