xref: /petsc/src/mat/utils/matstash.c (revision 9596e0b48258fba4fca4f68feb5185896facfe69)
1 #define PETSCMAT_DLL
2 
3 #include "private/matimpl.h"
4 
5 #define DEFAULT_STASH_SIZE   10000
6 
7 /*
8   MatStashCreate_Private - Creates a stash,currently used for all the parallel
9   matrix implementations. The stash is where elements of a matrix destined
10   to be stored on other processors are kept until matrix assembly is done.
11 
12   This is a simple minded stash. Simply adds entries to end of stash.
13 
14   Input Parameters:
15   comm - communicator, required for scatters.
16   bs   - stash block size. used when stashing blocks of values
17 
18   Output Parameters:
19   stash    - the newly created stash
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "MatStashCreate_Private"
23 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
24 {
25   PetscErrorCode ierr;
26   PetscInt       max,*opt,nopt;
27   PetscTruth     flg;
28 
29   PetscFunctionBegin;
30   /* Require 2 tags,get the second using PetscCommGetNewTag() */
31   stash->comm = comm;
32   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
33   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
34   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
35   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
36 
37   nopt = stash->size;
38   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
39   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
40   if (flg) {
41     if (nopt == 1)                max = opt[0];
42     else if (nopt == stash->size) max = opt[stash->rank];
43     else if (stash->rank < nopt)  max = opt[stash->rank];
44     else                          max = 0; /* Use default */
45     stash->umax = max;
46   } else {
47     stash->umax = 0;
48   }
49   ierr = PetscFree(opt);CHKERRQ(ierr);
50   if (bs <= 0) bs = 1;
51 
52   stash->bs       = bs;
53   stash->nmax     = 0;
54   stash->oldnmax  = 0;
55   stash->n        = 0;
56   stash->reallocs = -1;
57   stash->space_head = 0;
58   stash->space      = 0;
59 
60   stash->send_waits  = 0;
61   stash->recv_waits  = 0;
62   stash->send_status = 0;
63   stash->nsends      = 0;
64   stash->nrecvs      = 0;
65   stash->svalues     = 0;
66   stash->rvalues     = 0;
67   stash->rindices    = 0;
68   stash->nprocs      = 0;
69   stash->nprocessed  = 0;
70   PetscFunctionReturn(0);
71 }
72 
73 /*
74    MatStashDestroy_Private - Destroy the stash
75 */
76 #undef __FUNCT__
77 #define __FUNCT__ "MatStashDestroy_Private"
78 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
79 {
80   PetscErrorCode ierr;
81 
82   PetscFunctionBegin;
83   if (stash->space_head){
84     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
85     stash->space_head = 0;
86     stash->space      = 0;
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 /*
92    MatStashScatterEnd_Private - This is called as the fial stage of
93    scatter. The final stages of messagepassing is done here, and
94    all the memory used for messagepassing is cleanedu up. This
95    routine also resets the stash, and deallocates the memory used
96    for the stash. It also keeps track of the current memory usage
97    so that the same value can be used the next time through.
98 */
99 #undef __FUNCT__
100 #define __FUNCT__ "MatStashScatterEnd_Private"
101 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
102 {
103   PetscErrorCode ierr;
104   PetscInt       nsends=stash->nsends,bs2,oldnmax;
105   MPI_Status     *send_status;
106 
107   PetscFunctionBegin;
108   /* wait on sends */
109   if (nsends) {
110     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
111     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
112     ierr = PetscFree(send_status);CHKERRQ(ierr);
113   }
114 
115   /* Now update nmaxold to be app 10% more than max n used, this way the
116      wastage of space is reduced the next time this stash is used.
117      Also update the oldmax, only if it increases */
118   if (stash->n) {
119     bs2      = stash->bs*stash->bs;
120     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
121     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
122   }
123 
124   stash->nmax       = 0;
125   stash->n          = 0;
126   stash->reallocs   = -1;
127   stash->nprocessed = 0;
128   if (stash->space_head){
129     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
130     stash->space_head = 0;
131     stash->space      = 0;
132   }
133   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
134   stash->send_waits = 0;
135   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
136   stash->recv_waits = 0;
137   ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
138   stash->svalues = 0;
139   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
140   stash->rvalues = 0;
141   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
142   stash->rindices = 0;
143   ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
144   stash->nprocs = 0;
145   PetscFunctionReturn(0);
146 }
147 
148 /*
149    MatStashGetInfo_Private - Gets the relavant statistics of the stash
150 
151    Input Parameters:
152    stash    - the stash
153    nstash   - the size of the stash. Indicates the number of values stored.
154    reallocs - the number of additional mallocs incurred.
155 
156 */
157 #undef __FUNCT__
158 #define __FUNCT__ "MatStashGetInfo_Private"
159 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
160 {
161   PetscInt bs2 = stash->bs*stash->bs;
162 
163   PetscFunctionBegin;
164   if (nstash) *nstash   = stash->n*bs2;
165   if (reallocs) {
166     if (stash->reallocs < 0) *reallocs = 0;
167     else                     *reallocs = stash->reallocs;
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 /*
173    MatStashSetInitialSize_Private - Sets the initial size of the stash
174 
175    Input Parameters:
176    stash  - the stash
177    max    - the value that is used as the max size of the stash.
178             this value is used while allocating memory.
179 */
180 #undef __FUNCT__
181 #define __FUNCT__ "MatStashSetInitialSize_Private"
182 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
183 {
184   PetscFunctionBegin;
185   stash->umax = max;
186   PetscFunctionReturn(0);
187 }
188 
189 /* MatStashExpand_Private - Expand the stash. This function is called
190    when the space in the stash is not sufficient to add the new values
191    being inserted into the stash.
192 
193    Input Parameters:
194    stash - the stash
195    incr  - the minimum increase requested
196 
197    Notes:
198    This routine doubles the currently used memory.
199  */
200 #undef __FUNCT__
201 #define __FUNCT__ "MatStashExpand_Private"
202 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
203 {
204   PetscErrorCode ierr;
205   PetscInt       newnmax,bs2= stash->bs*stash->bs;
206 
207   PetscFunctionBegin;
208   /* allocate a larger stash */
209   if (!stash->oldnmax && !stash->nmax) { /* new stash */
210     if (stash->umax)                  newnmax = stash->umax/bs2;
211     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
212   } else if (!stash->nmax) { /* resuing stash */
213     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
214     else                              newnmax = stash->oldnmax/bs2;
215   } else                              newnmax = stash->nmax*2;
216   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
217 
218   /* Get a MatStashSpace and attach it to stash */
219   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
220   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
221     stash->space_head = stash->space;
222   }
223 
224   stash->reallocs++;
225   stash->nmax = newnmax;
226   PetscFunctionReturn(0);
227 }
228 /*
229   MatStashValuesRow_Private - inserts values into the stash. This function
230   expects the values to be roworiented. Multiple columns belong to the same row
231   can be inserted with a single call to this function.
232 
233   Input Parameters:
234   stash  - the stash
235   row    - the global row correspoiding to the values
236   n      - the number of elements inserted. All elements belong to the above row.
237   idxn   - the global column indices corresponding to each of the values.
238   values - the values inserted
239 */
240 #undef __FUNCT__
241 #define __FUNCT__ "MatStashValuesRow_Private"
242 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscTruth ignorezeroentries)
243 {
244   PetscErrorCode     ierr;
245   PetscInt           i,k,cnt = 0;
246   PetscMatStashSpace space=stash->space;
247 
248   PetscFunctionBegin;
249   /* Check and see if we have sufficient memory */
250   if (!space || space->local_remaining < n){
251     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
252   }
253   space = stash->space;
254   k     = space->local_used;
255   for (i=0; i<n; i++) {
256     if (ignorezeroentries && (values[i] == 0.0)) continue;
257     space->idx[k] = row;
258     space->idy[k] = idxn[i];
259     space->val[k] = values[i];
260     k++;
261     cnt++;
262   }
263   stash->n               += cnt;
264   space->local_used      += cnt;
265   space->local_remaining -= cnt;
266   PetscFunctionReturn(0);
267 }
268 
269 /*
270   MatStashValuesCol_Private - inserts values into the stash. This function
271   expects the values to be columnoriented. Multiple columns belong to the same row
272   can be inserted with a single call to this function.
273 
274   Input Parameters:
275   stash   - the stash
276   row     - the global row correspoiding to the values
277   n       - the number of elements inserted. All elements belong to the above row.
278   idxn    - the global column indices corresponding to each of the values.
279   values  - the values inserted
280   stepval - the consecutive values are sepated by a distance of stepval.
281             this happens because the input is columnoriented.
282 */
283 #undef __FUNCT__
284 #define __FUNCT__ "MatStashValuesCol_Private"
285 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscTruth ignorezeroentries)
286 {
287   PetscErrorCode     ierr;
288   PetscInt           i,k,cnt = 0;
289   PetscMatStashSpace space=stash->space;
290 
291   PetscFunctionBegin;
292   /* Check and see if we have sufficient memory */
293   if (!space || space->local_remaining < n){
294     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
295   }
296   space = stash->space;
297   k = space->local_used;
298   for (i=0; i<n; i++) {
299     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
300     space->idx[k] = row;
301     space->idy[k] = idxn[i];
302     space->val[k] = values[i*stepval];
303     k++;
304     cnt++;
305   }
306   stash->n               += cnt;
307   space->local_used      += cnt;
308   space->local_remaining -= cnt;
309   PetscFunctionReturn(0);
310 }
311 
312 /*
313   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
314   This function expects the values to be roworiented. Multiple columns belong
315   to the same block-row can be inserted with a single call to this function.
316   This function extracts the sub-block of values based on the dimensions of
317   the original input block, and the row,col values corresponding to the blocks.
318 
319   Input Parameters:
320   stash  - the stash
321   row    - the global block-row correspoiding to the values
322   n      - the number of elements inserted. All elements belong to the above row.
323   idxn   - the global block-column indices corresponding to each of the blocks of
324            values. Each block is of size bs*bs.
325   values - the values inserted
326   rmax   - the number of block-rows in the original block.
327   cmax   - the number of block-columsn on the original block.
328   idx    - the index of the current block-row in the original block.
329 */
330 #undef __FUNCT__
331 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
332 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
333 {
334   PetscErrorCode     ierr;
335   PetscInt           i,j,k,bs2,bs=stash->bs,l;
336   const PetscScalar  *vals;
337   PetscScalar        *array;
338   PetscMatStashSpace space=stash->space;
339 
340   PetscFunctionBegin;
341   if (!space || space->local_remaining < n){
342     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
343   }
344   space = stash->space;
345   l     = space->local_used;
346   bs2   = bs*bs;
347   for (i=0; i<n; i++) {
348     space->idx[l] = row;
349     space->idy[l] = idxn[i];
350     /* Now copy over the block of values. Store the values column oriented.
351        This enables inserting multiple blocks belonging to a row with a single
352        funtion call */
353     array = space->val + bs2*l;
354     vals  = values + idx*bs2*n + bs*i;
355     for (j=0; j<bs; j++) {
356       for (k=0; k<bs; k++) array[k*bs] = vals[k];
357       array++;
358       vals  += cmax*bs;
359     }
360     l++;
361   }
362   stash->n               += n;
363   space->local_used      += n;
364   space->local_remaining -= n;
365   PetscFunctionReturn(0);
366 }
367 
368 /*
369   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
370   This function expects the values to be roworiented. Multiple columns belong
371   to the same block-row can be inserted with a single call to this function.
372   This function extracts the sub-block of values based on the dimensions of
373   the original input block, and the row,col values corresponding to the blocks.
374 
375   Input Parameters:
376   stash  - the stash
377   row    - the global block-row correspoiding to the values
378   n      - the number of elements inserted. All elements belong to the above row.
379   idxn   - the global block-column indices corresponding to each of the blocks of
380            values. Each block is of size bs*bs.
381   values - the values inserted
382   rmax   - the number of block-rows in the original block.
383   cmax   - the number of block-columsn on the original block.
384   idx    - the index of the current block-row in the original block.
385 */
386 #undef __FUNCT__
387 #define __FUNCT__ "MatStashValuesColBlocked_Private"
388 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
389 {
390   PetscErrorCode     ierr;
391   PetscInt           i,j,k,bs2,bs=stash->bs,l;
392   const PetscScalar  *vals;
393   PetscScalar        *array;
394   PetscMatStashSpace space=stash->space;
395 
396   PetscFunctionBegin;
397   if (!space || space->local_remaining < n){
398     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
399   }
400   space = stash->space;
401   l     = space->local_used;
402   bs2   = bs*bs;
403   for (i=0; i<n; i++) {
404     space->idx[l] = row;
405     space->idy[l] = idxn[i];
406     /* Now copy over the block of values. Store the values column oriented.
407      This enables inserting multiple blocks belonging to a row with a single
408      funtion call */
409     array = space->val + bs2*l;
410     vals  = values + idx*bs2*n + bs*i;
411     for (j=0; j<bs; j++) {
412       for (k=0; k<bs; k++) {array[k] = vals[k];}
413       array += bs;
414       vals  += rmax*bs;
415     }
416     l++;
417   }
418   stash->n               += n;
419   space->local_used      += n;
420   space->local_remaining -= n;
421   PetscFunctionReturn(0);
422 }
423 /*
424   MatStashScatterBegin_Private - Initiates the transfer of values to the
425   correct owners. This function goes through the stash, and check the
426   owners of each stashed value, and sends the values off to the owner
427   processors.
428 
429   Input Parameters:
430   stash  - the stash
431   owners - an array of size 'no-of-procs' which gives the ownership range
432            for each node.
433 
434   Notes: The 'owners' array in the cased of the blocked-stash has the
435   ranges specified blocked global indices, and for the regular stash in
436   the proper global indices.
437 */
438 #undef __FUNCT__
439 #define __FUNCT__ "MatStashScatterBegin_Private"
440 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
441 {
442   PetscInt          *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
443   PetscInt          size=stash->size,nsends;
444   PetscErrorCode    ierr;
445   PetscInt          count,*sindices,**rindices,i,j,idx,lastidx,l;
446   PetscScalar       **rvalues,*svalues;
447   MPI_Comm          comm = stash->comm;
448   MPI_Request       *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
449   PetscMPIInt       *nprocs,*nlengths,nreceives;
450   PetscInt          *sp_idx,*sp_idy;
451   PetscScalar       *sp_val;
452   PetscMatStashSpace space,space_next;
453 
454   PetscFunctionBegin;
455   bs2 = stash->bs*stash->bs;
456 
457   /*  first count number of contributors to each processor */
458   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
459   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr);
460   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
461 
462   nlengths = nprocs+size;
463   i = j    = 0;
464   lastidx  = -1;
465   space    = stash->space_head;
466   while (space != PETSC_NULL){
467     space_next = space->next;
468     sp_idx     = space->idx;
469     for (l=0; l<space->local_used; l++){
470       /* if indices are NOT locally sorted, need to start search at the beginning */
471       if (lastidx > (idx = sp_idx[l])) j = 0;
472       lastidx = idx;
473       for (; j<size; j++) {
474         if (idx >= owners[j] && idx < owners[j+1]) {
475           nlengths[j]++; owner[i] = j; break;
476         }
477       }
478       i++;
479     }
480     space = space_next;
481   }
482   /* Now check what procs get messages - and compute nsends. */
483   for (i=0, nsends=0 ; i<size; i++) {
484     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
485   }
486 
487   {PetscMPIInt  *onodes,*olengths;
488   /* Determine the number of messages to expect, their lengths, from from-ids */
489   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
490   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
491   /* since clubbing row,col - lengths are multiplied by 2 */
492   for (i=0; i<nreceives; i++) olengths[i] *=2;
493   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
494   /* values are size 'bs2' lengths (and remove earlier factor 2 */
495   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
496   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
497   ierr = PetscFree(onodes);CHKERRQ(ierr);
498   ierr = PetscFree(olengths);CHKERRQ(ierr);
499   }
500 
501   /* do sends:
502       1) starts[i] gives the starting index in svalues for stuff going to
503          the ith processor
504   */
505   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
506   sindices = (PetscInt*)(svalues + bs2*stash->n);
507   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
508   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
509   starti   = startv + size;
510   /* use 2 sends the first with all_a, the next with all_i and all_j */
511   startv[0]  = 0; starti[0] = 0;
512   for (i=1; i<size; i++) {
513     startv[i] = startv[i-1] + nlengths[i-1];
514     starti[i] = starti[i-1] + nlengths[i-1]*2;
515   }
516 
517   i     = 0;
518   space = stash->space_head;
519   while (space != PETSC_NULL){
520     space_next = space->next;
521     sp_idx = space->idx;
522     sp_idy = space->idy;
523     sp_val = space->val;
524     for (l=0; l<space->local_used; l++){
525       j = owner[i];
526       if (bs2 == 1) {
527         svalues[startv[j]] = sp_val[l];
528       } else {
529         PetscInt     k;
530         PetscScalar *buf1,*buf2;
531         buf1 = svalues+bs2*startv[j];
532         buf2 = space->val + bs2*l;
533         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
534       }
535       sindices[starti[j]]             = sp_idx[l];
536       sindices[starti[j]+nlengths[j]] = sp_idy[l];
537       startv[j]++;
538       starti[j]++;
539       i++;
540     }
541     space = space_next;
542   }
543   startv[0] = 0;
544   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
545 
546   for (i=0,count=0; i<size; i++) {
547     if (nprocs[i]) {
548       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
549       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
550     }
551   }
552 #if defined(PETSC_USE_INFO)
553   ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr);
554   for (i=0; i<size; i++) {
555     if (nprocs[i]) {
556       ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
557     }
558   }
559 #endif
560   ierr = PetscFree(owner);CHKERRQ(ierr);
561   ierr = PetscFree(startv);CHKERRQ(ierr);
562   /* This memory is reused in scatter end  for a different purpose*/
563   for (i=0; i<2*size; i++) nprocs[i] = -1;
564   stash->nprocs = nprocs;
565 
566   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
567   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
568 
569   for (i=0; i<nreceives; i++) {
570     recv_waits[2*i]   = recv_waits1[i];
571     recv_waits[2*i+1] = recv_waits2[i];
572   }
573   stash->recv_waits = recv_waits;
574   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
575   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
576 
577   stash->svalues    = svalues;    stash->rvalues     = rvalues;
578   stash->rindices   = rindices;   stash->send_waits  = send_waits;
579   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
580   PetscFunctionReturn(0);
581 }
582 
583 /*
584    MatStashScatterGetMesg_Private - This function waits on the receives posted
585    in the function MatStashScatterBegin_Private() and returns one message at
586    a time to the calling function. If no messages are left, it indicates this
587    by setting flg = 0, else it sets flg = 1.
588 
589    Input Parameters:
590    stash - the stash
591 
592    Output Parameters:
593    nvals - the number of entries in the current message.
594    rows  - an array of row indices (or blocked indices) corresponding to the values
595    cols  - an array of columnindices (or blocked indices) corresponding to the values
596    vals  - the values
597    flg   - 0 indicates no more message left, and the current call has no values associated.
598            1 indicates that the current call successfully received a message, and the
599              other output parameters nvals,rows,cols,vals are set appropriately.
600 */
601 #undef __FUNCT__
602 #define __FUNCT__ "MatStashScatterGetMesg_Private"
603 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg)
604 {
605   PetscErrorCode ierr;
606   PetscMPIInt    i,*flg_v,i1,i2;
607   PetscInt       bs2;
608   MPI_Status     recv_status;
609   PetscTruth     match_found = PETSC_FALSE;
610 
611   PetscFunctionBegin;
612 
613   *flg = 0; /* When a message is discovered this is reset to 1 */
614   /* Return if no more messages to process */
615   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
616 
617   flg_v = stash->nprocs;
618   bs2   = stash->bs*stash->bs;
619   /* If a matching pair of receieves are found, process them, and return the data to
620      the calling function. Until then keep receiving messages */
621   while (!match_found) {
622     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
623     /* Now pack the received message into a structure which is useable by others */
624     if (i % 2) {
625       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
626       flg_v[2*recv_status.MPI_SOURCE] = i/2;
627       *nvals = *nvals/bs2;
628     } else {
629       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
630       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
631       *nvals = *nvals/2; /* This message has both row indices and col indices */
632     }
633 
634     /* Check if we have both messages from this proc */
635     i1 = flg_v[2*recv_status.MPI_SOURCE];
636     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
637     if (i1 != -1 && i2 != -1) {
638       *rows       = stash->rindices[i2];
639       *cols       = *rows + *nvals;
640       *vals       = stash->rvalues[i1];
641       *flg        = 1;
642       stash->nprocessed ++;
643       match_found = PETSC_TRUE;
644     }
645   }
646   PetscFunctionReturn(0);
647 }
648