xref: /petsc/src/mat/utils/matstash.c (revision 13e0d08325be81619ffebf1117f9a3b4ad59ea83)
1 #define PETSCMAT_DLL
2 
3 #include "private/matimpl.h"
4 
5 #define DEFAULT_STASH_SIZE   10000
6 
7 /*
8   MatStashCreate_Private - Creates a stash,currently used for all the parallel
9   matrix implementations. The stash is where elements of a matrix destined
10   to be stored on other processors are kept until matrix assembly is done.
11 
12   This is a simple minded stash. Simply adds entries to end of stash.
13 
14   Input Parameters:
15   comm - communicator, required for scatters.
16   bs   - stash block size. used when stashing blocks of values
17 
18   Output Parameters:
19   stash    - the newly created stash
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "MatStashCreate_Private"
23 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
24 {
25   PetscErrorCode ierr;
26   PetscInt       max,*opt,nopt,i;
27   PetscBool      flg;
28 
29   PetscFunctionBegin;
30   /* Require 2 tags,get the second using PetscCommGetNewTag() */
31   stash->comm = comm;
32   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
33   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
34   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
35   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
36   ierr  = PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);CHKERRQ(ierr);
37   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
38 
39 
40   nopt = stash->size;
41   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
42   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
43   if (flg) {
44     if (nopt == 1)                max = opt[0];
45     else if (nopt == stash->size) max = opt[stash->rank];
46     else if (stash->rank < nopt)  max = opt[stash->rank];
47     else                          max = 0; /* Use default */
48     stash->umax = max;
49   } else {
50     stash->umax = 0;
51   }
52   ierr = PetscFree(opt);CHKERRQ(ierr);
53   if (bs <= 0) bs = 1;
54 
55   stash->bs       = bs;
56   stash->nmax     = 0;
57   stash->oldnmax  = 0;
58   stash->n        = 0;
59   stash->reallocs = -1;
60   stash->space_head = 0;
61   stash->space      = 0;
62 
63   stash->send_waits  = 0;
64   stash->recv_waits  = 0;
65   stash->send_status = 0;
66   stash->nsends      = 0;
67   stash->nrecvs      = 0;
68   stash->svalues     = 0;
69   stash->rvalues     = 0;
70   stash->rindices    = 0;
71   stash->nprocessed  = 0;
72 
73   stash->reproduce   = PETSC_FALSE;
74   ierr = PetscOptionsGetBool(PETSC_NULL,"-matstash_reproduce",&stash->reproduce,PETSC_NULL);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79    MatStashDestroy_Private - Destroy the stash
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "MatStashDestroy_Private"
83 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
84 {
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   if (stash->space_head){
89     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
90     stash->space_head = 0;
91     stash->space      = 0;
92   }
93   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 
97 /*
98    MatStashScatterEnd_Private - This is called as the final stage of
99    scatter. The final stages of message passing is done here, and
100    all the memory used for message passing is cleaned up. This
101    routine also resets the stash, and deallocates the memory used
102    for the stash. It also keeps track of the current memory usage
103    so that the same value can be used the next time through.
104 */
105 #undef __FUNCT__
106 #define __FUNCT__ "MatStashScatterEnd_Private"
107 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
108 {
109   PetscErrorCode ierr;
110   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
111   MPI_Status     *send_status;
112 
113   PetscFunctionBegin;
114   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
115   /* wait on sends */
116   if (nsends) {
117     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
118     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
119     ierr = PetscFree(send_status);CHKERRQ(ierr);
120   }
121 
122   /* Now update nmaxold to be app 10% more than max n used, this way the
123      wastage of space is reduced the next time this stash is used.
124      Also update the oldmax, only if it increases */
125   if (stash->n) {
126     bs2      = stash->bs*stash->bs;
127     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
128     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
129   }
130 
131   stash->nmax       = 0;
132   stash->n          = 0;
133   stash->reallocs   = -1;
134   stash->nprocessed = 0;
135   if (stash->space_head){
136     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
137     stash->space_head = 0;
138     stash->space      = 0;
139   }
140   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
141   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
142   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
143   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
144   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
145   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
146   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 /*
151    MatStashGetInfo_Private - Gets the relavant statistics of the stash
152 
153    Input Parameters:
154    stash    - the stash
155    nstash   - the size of the stash. Indicates the number of values stored.
156    reallocs - the number of additional mallocs incurred.
157 
158 */
159 #undef __FUNCT__
160 #define __FUNCT__ "MatStashGetInfo_Private"
161 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
162 {
163   PetscInt bs2 = stash->bs*stash->bs;
164 
165   PetscFunctionBegin;
166   if (nstash) *nstash   = stash->n*bs2;
167   if (reallocs) {
168     if (stash->reallocs < 0) *reallocs = 0;
169     else                     *reallocs = stash->reallocs;
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 /*
175    MatStashSetInitialSize_Private - Sets the initial size of the stash
176 
177    Input Parameters:
178    stash  - the stash
179    max    - the value that is used as the max size of the stash.
180             this value is used while allocating memory.
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "MatStashSetInitialSize_Private"
184 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
185 {
186   PetscFunctionBegin;
187   stash->umax = max;
188   PetscFunctionReturn(0);
189 }
190 
191 /* MatStashExpand_Private - Expand the stash. This function is called
192    when the space in the stash is not sufficient to add the new values
193    being inserted into the stash.
194 
195    Input Parameters:
196    stash - the stash
197    incr  - the minimum increase requested
198 
199    Notes:
200    This routine doubles the currently used memory.
201  */
202 #undef __FUNCT__
203 #define __FUNCT__ "MatStashExpand_Private"
204 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
205 {
206   PetscErrorCode ierr;
207   PetscInt       newnmax,bs2= stash->bs*stash->bs;
208 
209   PetscFunctionBegin;
210   /* allocate a larger stash */
211   if (!stash->oldnmax && !stash->nmax) { /* new stash */
212     if (stash->umax)                  newnmax = stash->umax/bs2;
213     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
214   } else if (!stash->nmax) { /* resuing stash */
215     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
216     else                              newnmax = stash->oldnmax/bs2;
217   } else                              newnmax = stash->nmax*2;
218   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
219 
220   /* Get a MatStashSpace and attach it to stash */
221   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
222   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
223     stash->space_head = stash->space;
224   }
225 
226   stash->reallocs++;
227   stash->nmax = newnmax;
228   PetscFunctionReturn(0);
229 }
230 /*
231   MatStashValuesRow_Private - inserts values into the stash. This function
232   expects the values to be roworiented. Multiple columns belong to the same row
233   can be inserted with a single call to this function.
234 
235   Input Parameters:
236   stash  - the stash
237   row    - the global row correspoiding to the values
238   n      - the number of elements inserted. All elements belong to the above row.
239   idxn   - the global column indices corresponding to each of the values.
240   values - the values inserted
241 */
242 #undef __FUNCT__
243 #define __FUNCT__ "MatStashValuesRow_Private"
244 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool  ignorezeroentries)
245 {
246   PetscErrorCode     ierr;
247   PetscInt           i,k,cnt = 0;
248   PetscMatStashSpace space=stash->space;
249 
250   PetscFunctionBegin;
251   /* Check and see if we have sufficient memory */
252   if (!space || space->local_remaining < n){
253     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
254   }
255   space = stash->space;
256   k     = space->local_used;
257   for (i=0; i<n; i++) {
258     if (ignorezeroentries && (values[i] == 0.0)) continue;
259     space->idx[k] = row;
260     space->idy[k] = idxn[i];
261     space->val[k] = values[i];
262     k++;
263     cnt++;
264   }
265   stash->n               += cnt;
266   space->local_used      += cnt;
267   space->local_remaining -= cnt;
268   PetscFunctionReturn(0);
269 }
270 
271 /*
272   MatStashValuesCol_Private - inserts values into the stash. This function
273   expects the values to be columnoriented. Multiple columns belong to the same row
274   can be inserted with a single call to this function.
275 
276   Input Parameters:
277   stash   - the stash
278   row     - the global row correspoiding to the values
279   n       - the number of elements inserted. All elements belong to the above row.
280   idxn    - the global column indices corresponding to each of the values.
281   values  - the values inserted
282   stepval - the consecutive values are sepated by a distance of stepval.
283             this happens because the input is columnoriented.
284 */
285 #undef __FUNCT__
286 #define __FUNCT__ "MatStashValuesCol_Private"
287 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool  ignorezeroentries)
288 {
289   PetscErrorCode     ierr;
290   PetscInt           i,k,cnt = 0;
291   PetscMatStashSpace space=stash->space;
292 
293   PetscFunctionBegin;
294   /* Check and see if we have sufficient memory */
295   if (!space || space->local_remaining < n){
296     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
297   }
298   space = stash->space;
299   k = space->local_used;
300   for (i=0; i<n; i++) {
301     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
302     space->idx[k] = row;
303     space->idy[k] = idxn[i];
304     space->val[k] = values[i*stepval];
305     k++;
306     cnt++;
307   }
308   stash->n               += cnt;
309   space->local_used      += cnt;
310   space->local_remaining -= cnt;
311   PetscFunctionReturn(0);
312 }
313 
314 /*
315   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
316   This function expects the values to be roworiented. Multiple columns belong
317   to the same block-row can be inserted with a single call to this function.
318   This function extracts the sub-block of values based on the dimensions of
319   the original input block, and the row,col values corresponding to the blocks.
320 
321   Input Parameters:
322   stash  - the stash
323   row    - the global block-row correspoiding to the values
324   n      - the number of elements inserted. All elements belong to the above row.
325   idxn   - the global block-column indices corresponding to each of the blocks of
326            values. Each block is of size bs*bs.
327   values - the values inserted
328   rmax   - the number of block-rows in the original block.
329   cmax   - the number of block-columsn on the original block.
330   idx    - the index of the current block-row in the original block.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
334 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
335 {
336   PetscErrorCode     ierr;
337   PetscInt           i,j,k,bs2,bs=stash->bs,l;
338   const PetscScalar  *vals;
339   PetscScalar        *array;
340   PetscMatStashSpace space=stash->space;
341 
342   PetscFunctionBegin;
343   if (!space || space->local_remaining < n){
344     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
345   }
346   space = stash->space;
347   l     = space->local_used;
348   bs2   = bs*bs;
349   for (i=0; i<n; i++) {
350     space->idx[l] = row;
351     space->idy[l] = idxn[i];
352     /* Now copy over the block of values. Store the values column oriented.
353        This enables inserting multiple blocks belonging to a row with a single
354        funtion call */
355     array = space->val + bs2*l;
356     vals  = values + idx*bs2*n + bs*i;
357     for (j=0; j<bs; j++) {
358       for (k=0; k<bs; k++) array[k*bs] = vals[k];
359       array++;
360       vals  += cmax*bs;
361     }
362     l++;
363   }
364   stash->n               += n;
365   space->local_used      += n;
366   space->local_remaining -= n;
367   PetscFunctionReturn(0);
368 }
369 
370 /*
371   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
372   This function expects the values to be roworiented. Multiple columns belong
373   to the same block-row can be inserted with a single call to this function.
374   This function extracts the sub-block of values based on the dimensions of
375   the original input block, and the row,col values corresponding to the blocks.
376 
377   Input Parameters:
378   stash  - the stash
379   row    - the global block-row correspoiding to the values
380   n      - the number of elements inserted. All elements belong to the above row.
381   idxn   - the global block-column indices corresponding to each of the blocks of
382            values. Each block is of size bs*bs.
383   values - the values inserted
384   rmax   - the number of block-rows in the original block.
385   cmax   - the number of block-columsn on the original block.
386   idx    - the index of the current block-row in the original block.
387 */
388 #undef __FUNCT__
389 #define __FUNCT__ "MatStashValuesColBlocked_Private"
390 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
391 {
392   PetscErrorCode     ierr;
393   PetscInt           i,j,k,bs2,bs=stash->bs,l;
394   const PetscScalar  *vals;
395   PetscScalar        *array;
396   PetscMatStashSpace space=stash->space;
397 
398   PetscFunctionBegin;
399   if (!space || space->local_remaining < n){
400     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
401   }
402   space = stash->space;
403   l     = space->local_used;
404   bs2   = bs*bs;
405   for (i=0; i<n; i++) {
406     space->idx[l] = row;
407     space->idy[l] = idxn[i];
408     /* Now copy over the block of values. Store the values column oriented.
409      This enables inserting multiple blocks belonging to a row with a single
410      funtion call */
411     array = space->val + bs2*l;
412     vals  = values + idx*bs2*n + bs*i;
413     for (j=0; j<bs; j++) {
414       for (k=0; k<bs; k++) {array[k] = vals[k];}
415       array += bs;
416       vals  += rmax*bs;
417     }
418     l++;
419   }
420   stash->n               += n;
421   space->local_used      += n;
422   space->local_remaining -= n;
423   PetscFunctionReturn(0);
424 }
425 /*
426   MatStashScatterBegin_Private - Initiates the transfer of values to the
427   correct owners. This function goes through the stash, and check the
428   owners of each stashed value, and sends the values off to the owner
429   processors.
430 
431   Input Parameters:
432   stash  - the stash
433   owners - an array of size 'no-of-procs' which gives the ownership range
434            for each node.
435 
436   Notes: The 'owners' array in the cased of the blocked-stash has the
437   ranges specified blocked global indices, and for the regular stash in
438   the proper global indices.
439 */
440 #undef __FUNCT__
441 #define __FUNCT__ "MatStashScatterBegin_Private"
442 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
443 {
444   PetscInt          *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
445   PetscInt          size=stash->size,nsends;
446   PetscErrorCode    ierr;
447   PetscInt          count,*sindices,**rindices,i,j,idx,lastidx,l;
448   PetscScalar       **rvalues,*svalues;
449   MPI_Comm          comm = stash->comm;
450   MPI_Request       *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
451   PetscMPIInt       *nprocs,*nlengths,nreceives;
452   PetscInt          *sp_idx,*sp_idy;
453   PetscScalar       *sp_val;
454   PetscMatStashSpace space,space_next;
455 
456   PetscFunctionBegin;
457   bs2 = stash->bs*stash->bs;
458 
459   /*  first count number of contributors to each processor */
460   ierr  = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
461   ierr  = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
462   ierr  = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr);
463   ierr  = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
464   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
465 
466   i = j    = 0;
467   lastidx  = -1;
468   space    = stash->space_head;
469   while (space != PETSC_NULL){
470     space_next = space->next;
471     sp_idx     = space->idx;
472     for (l=0; l<space->local_used; l++){
473       /* if indices are NOT locally sorted, need to start search at the beginning */
474       if (lastidx > (idx = sp_idx[l])) j = 0;
475       lastidx = idx;
476       for (; j<size; j++) {
477         if (idx >= owners[j] && idx < owners[j+1]) {
478           nlengths[j]++; owner[i] = j; break;
479         }
480       }
481       i++;
482     }
483     space = space_next;
484   }
485   /* Now check what procs get messages - and compute nsends. */
486   for (i=0, nsends=0 ; i<size; i++) {
487     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
488   }
489 
490   {PetscMPIInt  *onodes,*olengths;
491   /* Determine the number of messages to expect, their lengths, from from-ids */
492   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
493   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
494   /* since clubbing row,col - lengths are multiplied by 2 */
495   for (i=0; i<nreceives; i++) olengths[i] *=2;
496   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
497   /* values are size 'bs2' lengths (and remove earlier factor 2 */
498   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
499   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
500   ierr = PetscFree(onodes);CHKERRQ(ierr);
501   ierr = PetscFree(olengths);CHKERRQ(ierr);
502   }
503 
504   /* do sends:
505       1) starts[i] gives the starting index in svalues for stuff going to
506          the ith processor
507   */
508   ierr     = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr);
509   ierr     = PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
510   ierr     = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr);
511   /* use 2 sends the first with all_a, the next with all_i and all_j */
512   startv[0]  = 0; starti[0] = 0;
513   for (i=1; i<size; i++) {
514     startv[i] = startv[i-1] + nlengths[i-1];
515     starti[i] = starti[i-1] + 2*nlengths[i-1];
516   }
517 
518   i     = 0;
519   space = stash->space_head;
520   while (space != PETSC_NULL){
521     space_next = space->next;
522     sp_idx = space->idx;
523     sp_idy = space->idy;
524     sp_val = space->val;
525     for (l=0; l<space->local_used; l++){
526       j = owner[i];
527       if (bs2 == 1) {
528         svalues[startv[j]] = sp_val[l];
529       } else {
530         PetscInt     k;
531         PetscScalar *buf1,*buf2;
532         buf1 = svalues+bs2*startv[j];
533         buf2 = space->val + bs2*l;
534         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
535       }
536       sindices[starti[j]]             = sp_idx[l];
537       sindices[starti[j]+nlengths[j]] = sp_idy[l];
538       startv[j]++;
539       starti[j]++;
540       i++;
541     }
542     space = space_next;
543   }
544   startv[0] = 0;
545   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
546 
547   for (i=0,count=0; i<size; i++) {
548     if (nprocs[i]) {
549       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
550       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
551     }
552   }
553 #if defined(PETSC_USE_INFO)
554   ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr);
555   for (i=0; i<size; i++) {
556     if (nprocs[i]) {
557       ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
558     }
559   }
560 #endif
561   ierr = PetscFree(nlengths);CHKERRQ(ierr);
562   ierr = PetscFree(owner);CHKERRQ(ierr);
563   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
564   ierr = PetscFree(nprocs);CHKERRQ(ierr);
565 
566   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
567   ierr  = PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
568 
569   for (i=0; i<nreceives; i++) {
570     recv_waits[2*i]   = recv_waits1[i];
571     recv_waits[2*i+1] = recv_waits2[i];
572   }
573   stash->recv_waits = recv_waits;
574   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
575   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
576 
577   stash->svalues     = svalues;
578   stash->sindices    = sindices;
579   stash->rvalues     = rvalues;
580   stash->rindices    = rindices;
581   stash->send_waits  = send_waits;
582   stash->nsends      = nsends;
583   stash->nrecvs      = nreceives;
584   stash->reproduce_count = 0;
585   PetscFunctionReturn(0);
586 }
587 
588 /*
589    MatStashScatterGetMesg_Private - This function waits on the receives posted
590    in the function MatStashScatterBegin_Private() and returns one message at
591    a time to the calling function. If no messages are left, it indicates this
592    by setting flg = 0, else it sets flg = 1.
593 
594    Input Parameters:
595    stash - the stash
596 
597    Output Parameters:
598    nvals - the number of entries in the current message.
599    rows  - an array of row indices (or blocked indices) corresponding to the values
600    cols  - an array of columnindices (or blocked indices) corresponding to the values
601    vals  - the values
602    flg   - 0 indicates no more message left, and the current call has no values associated.
603            1 indicates that the current call successfully received a message, and the
604              other output parameters nvals,rows,cols,vals are set appropriately.
605 */
606 #undef __FUNCT__
607 #define __FUNCT__ "MatStashScatterGetMesg_Private"
608 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg)
609 {
610   PetscErrorCode ierr;
611   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
612   PetscInt       bs2;
613   MPI_Status     recv_status;
614   PetscBool      match_found = PETSC_FALSE;
615 
616   PetscFunctionBegin;
617 
618   *flg = 0; /* When a message is discovered this is reset to 1 */
619   /* Return if no more messages to process */
620   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
621 
622   bs2   = stash->bs*stash->bs;
623   /* If a matching pair of receives are found, process them, and return the data to
624      the calling function. Until then keep receiving messages */
625   while (!match_found) {
626     CHKMEMQ;
627     if (stash->reproduce) {
628       i = stash->reproduce_count++;
629       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
630     } else {
631       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
632     }
633     CHKMEMQ;
634     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
635 
636     /* Now pack the received message into a structure which is usable by others */
637     if (i % 2) {
638       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
639       flg_v[2*recv_status.MPI_SOURCE] = i/2;
640       *nvals = *nvals/bs2;
641     } else {
642       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
643       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
644       *nvals = *nvals/2; /* This message has both row indices and col indices */
645     }
646 
647     /* Check if we have both messages from this proc */
648     i1 = flg_v[2*recv_status.MPI_SOURCE];
649     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
650     if (i1 != -1 && i2 != -1) {
651       *rows       = stash->rindices[i2];
652       *cols       = *rows + *nvals;
653       *vals       = stash->rvalues[i1];
654       *flg        = 1;
655       stash->nprocessed ++;
656       match_found = PETSC_TRUE;
657     }
658   }
659   PetscFunctionReturn(0);
660 }
661