xref: /petsc/src/mat/utils/matstash.c (revision 6849ba73f22fecb8f92ef896a42e4e8bd4cd6965)
1 
2 #include "src/mat/matimpl.h"
3 
4 /*
5        The input to the stash is ALWAYS in MatScalar precision, and the
6     internal storage and output is also in MatScalar.
7 */
8 #define DEFAULT_STASH_SIZE   10000
9 
10 /*
11   MatStashCreate_Private - Creates a stash,currently used for all the parallel
12   matrix implementations. The stash is where elements of a matrix destined
13   to be stored on other processors are kept until matrix assembly is done.
14 
15   This is a simple minded stash. Simply adds entries to end of stash.
16 
17   Input Parameters:
18   comm - communicator, required for scatters.
19   bs   - stash block size. used when stashing blocks of values
20 
21   Output Parameters:
22   stash    - the newly created stash
23 */
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStashCreate_Private"
26 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,int bs,MatStash *stash)
27 {
28   PetscErrorCode ierr;
29   int max,*opt,nopt;
30   PetscTruth flg;
31 
32   PetscFunctionBegin;
33   /* Require 2 tags,get the second using PetscCommGetNewTag() */
34   stash->comm = comm;
35   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
36   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
37   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
38   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
39 
40   nopt = stash->size;
41   ierr = PetscMalloc(nopt*sizeof(int),&opt);CHKERRQ(ierr);
42   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
43   if (flg) {
44     if (nopt == 1)                max = opt[0];
45     else if (nopt == stash->size) max = opt[stash->rank];
46     else if (stash->rank < nopt)  max = opt[stash->rank];
47     else                          max = 0; /* Use default */
48     stash->umax = max;
49   } else {
50     stash->umax = 0;
51   }
52   ierr = PetscFree(opt);CHKERRQ(ierr);
53   if (bs <= 0) bs = 1;
54 
55   stash->bs       = bs;
56   stash->nmax     = 0;
57   stash->oldnmax  = 0;
58   stash->n        = 0;
59   stash->reallocs = -1;
60   stash->idx      = 0;
61   stash->idy      = 0;
62   stash->array    = 0;
63 
64   stash->send_waits  = 0;
65   stash->recv_waits  = 0;
66   stash->send_status = 0;
67   stash->nsends      = 0;
68   stash->nrecvs      = 0;
69   stash->svalues     = 0;
70   stash->rvalues     = 0;
71   stash->rmax        = 0;
72   stash->nprocs      = 0;
73   stash->nprocessed  = 0;
74   PetscFunctionReturn(0);
75 }
76 
77 /*
78    MatStashDestroy_Private - Destroy the stash
79 */
80 #undef __FUNCT__
81 #define __FUNCT__ "MatStashDestroy_Private"
82 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
83 {
84   PetscErrorCode ierr;
85 
86   PetscFunctionBegin;
87   if (stash->array) {
88     ierr = PetscFree(stash->array);CHKERRQ(ierr);
89     stash->array = 0;
90   }
91   PetscFunctionReturn(0);
92 }
93 
94 /*
95    MatStashScatterEnd_Private - This is called as the fial stage of
96    scatter. The final stages of messagepassing is done here, and
97    all the memory used for messagepassing is cleanedu up. This
98    routine also resets the stash, and deallocates the memory used
99    for the stash. It also keeps track of the current memory usage
100    so that the same value can be used the next time through.
101 */
102 #undef __FUNCT__
103 #define __FUNCT__ "MatStashScatterEnd_Private"
104 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
105 {
106   PetscErrorCode ierr;
107   int         nsends=stash->nsends,bs2,oldnmax;
108   MPI_Status  *send_status;
109 
110   PetscFunctionBegin;
111   /* wait on sends */
112   if (nsends) {
113     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
114     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
115     ierr = PetscFree(send_status);CHKERRQ(ierr);
116   }
117 
118   /* Now update nmaxold to be app 10% more than max n used, this way the
119      wastage of space is reduced the next time this stash is used.
120      Also update the oldmax, only if it increases */
121   if (stash->n) {
122     bs2      = stash->bs*stash->bs;
123     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
124     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
125   }
126 
127   stash->nmax       = 0;
128   stash->n          = 0;
129   stash->reallocs   = -1;
130   stash->rmax       = 0;
131   stash->nprocessed = 0;
132 
133   if (stash->array) {
134     ierr         = PetscFree(stash->array);CHKERRQ(ierr);
135     stash->array = 0;
136     stash->idx   = 0;
137     stash->idy   = 0;
138   }
139   if (stash->send_waits) {
140     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
141     stash->send_waits = 0;
142   }
143   if (stash->recv_waits) {
144     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
145     stash->recv_waits = 0;
146   }
147   if (stash->svalues) {
148     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
149     stash->svalues = 0;
150   }
151   if (stash->rvalues) {
152     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
153     stash->rvalues = 0;
154   }
155   if (stash->nprocs) {
156     ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
157     stash->nprocs = 0;
158   }
159 
160   PetscFunctionReturn(0);
161 }
162 
163 /*
164    MatStashGetInfo_Private - Gets the relavant statistics of the stash
165 
166    Input Parameters:
167    stash    - the stash
168    nstash   - the size of the stash. Indicates the number of values stored.
169    reallocs - the number of additional mallocs incurred.
170 
171 */
172 #undef __FUNCT__
173 #define __FUNCT__ "MatStashGetInfo_Private"
174 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,int *nstash,int *reallocs)
175 {
176   int bs2 = stash->bs*stash->bs;
177 
178   PetscFunctionBegin;
179   if (nstash) *nstash   = stash->n*bs2;
180   if (reallocs) {
181     if (stash->reallocs < 0) *reallocs = 0;
182     else                     *reallocs = stash->reallocs;
183   }
184   PetscFunctionReturn(0);
185 }
186 
187 
188 /*
189    MatStashSetInitialSize_Private - Sets the initial size of the stash
190 
191    Input Parameters:
192    stash  - the stash
193    max    - the value that is used as the max size of the stash.
194             this value is used while allocating memory.
195 */
196 #undef __FUNCT__
197 #define __FUNCT__ "MatStashSetInitialSize_Private"
198 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,int max)
199 {
200   PetscFunctionBegin;
201   stash->umax = max;
202   PetscFunctionReturn(0);
203 }
204 
205 /* MatStashExpand_Private - Expand the stash. This function is called
206    when the space in the stash is not sufficient to add the new values
207    being inserted into the stash.
208 
209    Input Parameters:
210    stash - the stash
211    incr  - the minimum increase requested
212 
213    Notes:
214    This routine doubles the currently used memory.
215  */
216 #undef __FUNCT__
217 #define __FUNCT__ "MatStashExpand_Private"
218 static PetscErrorCode MatStashExpand_Private(MatStash *stash,int incr)
219 {
220   PetscErrorCode ierr;
221   int       *n_idx,*n_idy,newnmax,bs2;
222   MatScalar *n_array;
223 
224   PetscFunctionBegin;
225   /* allocate a larger stash */
226   bs2     = stash->bs*stash->bs;
227   if (!stash->oldnmax && !stash->nmax) { /* new stash */
228     if (stash->umax)                  newnmax = stash->umax/bs2;
229     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
230   } else if (!stash->nmax) { /* resuing stash */
231     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
232     else                              newnmax = stash->oldnmax/bs2;
233   } else                              newnmax = stash->nmax*2;
234   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
235 
236   ierr  = PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr);
237   n_idx = (int*)(n_array + bs2*newnmax);
238   n_idy = (int*)(n_idx + newnmax);
239   ierr  = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr);
240   ierr  = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));CHKERRQ(ierr);
241   ierr  = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));CHKERRQ(ierr);
242   if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);}
243   stash->array   = n_array;
244   stash->idx     = n_idx;
245   stash->idy     = n_idy;
246   stash->nmax    = newnmax;
247   stash->reallocs++;
248   PetscFunctionReturn(0);
249 }
250 /*
251   MatStashValuesRow_Private - inserts values into the stash. This function
252   expects the values to be roworiented. Multiple columns belong to the same row
253   can be inserted with a single call to this function.
254 
255   Input Parameters:
256   stash  - the stash
257   row    - the global row correspoiding to the values
258   n      - the number of elements inserted. All elements belong to the above row.
259   idxn   - the global column indices corresponding to each of the values.
260   values - the values inserted
261 */
262 #undef __FUNCT__
263 #define __FUNCT__ "MatStashValuesRow_Private"
264 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,int row,int n,const int idxn[],const MatScalar values[])
265 {
266   PetscErrorCode ierr;
267   int i;
268 
269   PetscFunctionBegin;
270   /* Check and see if we have sufficient memory */
271   if ((stash->n + n) > stash->nmax) {
272     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
273   }
274   for (i=0; i<n; i++) {
275     stash->idx[stash->n]   = row;
276     stash->idy[stash->n]   = idxn[i];
277     stash->array[stash->n] = values[i];
278     stash->n++;
279   }
280   PetscFunctionReturn(0);
281 }
282 /*
283   MatStashValuesCol_Private - inserts values into the stash. This function
284   expects the values to be columnoriented. Multiple columns belong to the same row
285   can be inserted with a single call to this function.
286 
287   Input Parameters:
288   stash   - the stash
289   row     - the global row correspoiding to the values
290   n       - the number of elements inserted. All elements belong to the above row.
291   idxn    - the global column indices corresponding to each of the values.
292   values  - the values inserted
293   stepval - the consecutive values are sepated by a distance of stepval.
294             this happens because the input is columnoriented.
295 */
296 #undef __FUNCT__
297 #define __FUNCT__ "MatStashValuesCol_Private"
298 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,int row,int n,const int idxn[],const MatScalar values[],int stepval)
299 {
300   PetscErrorCode ierr;
301   int i;
302 
303   PetscFunctionBegin;
304   /* Check and see if we have sufficient memory */
305   if ((stash->n + n) > stash->nmax) {
306     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
307   }
308   for (i=0; i<n; i++) {
309     stash->idx[stash->n]   = row;
310     stash->idy[stash->n]   = idxn[i];
311     stash->array[stash->n] = values[i*stepval];
312     stash->n++;
313   }
314   PetscFunctionReturn(0);
315 }
316 
317 /*
318   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
319   This function expects the values to be roworiented. Multiple columns belong
320   to the same block-row can be inserted with a single call to this function.
321   This function extracts the sub-block of values based on the dimensions of
322   the original input block, and the row,col values corresponding to the blocks.
323 
324   Input Parameters:
325   stash  - the stash
326   row    - the global block-row correspoiding to the values
327   n      - the number of elements inserted. All elements belong to the above row.
328   idxn   - the global block-column indices corresponding to each of the blocks of
329            values. Each block is of size bs*bs.
330   values - the values inserted
331   rmax   - the number of block-rows in the original block.
332   cmax   - the number of block-columsn on the original block.
333   idx    - the index of the current block-row in the original block.
334 */
335 #undef __FUNCT__
336 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
337 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,const int idxn[],const MatScalar values[],int rmax,int cmax,int idx)
338 {
339   PetscErrorCode ierr;
340   int i,j,k,bs2,bs=stash->bs;
341   const MatScalar *vals;
342   MatScalar       *array;
343 
344   PetscFunctionBegin;
345   bs2 = bs*bs;
346   if ((stash->n+n) > stash->nmax) {
347     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
348   }
349   for (i=0; i<n; i++) {
350     stash->idx[stash->n]   = row;
351     stash->idy[stash->n] = idxn[i];
352     /* Now copy over the block of values. Store the values column oriented.
353        This enables inserting multiple blocks belonging to a row with a single
354        funtion call */
355     array = stash->array + bs2*stash->n;
356     vals  = values + idx*bs2*n + bs*i;
357     for (j=0; j<bs; j++) {
358       for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
359       array += 1;
360       vals  += cmax*bs;
361     }
362     stash->n++;
363   }
364   PetscFunctionReturn(0);
365 }
366 
367 /*
368   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
369   This function expects the values to be roworiented. Multiple columns belong
370   to the same block-row can be inserted with a single call to this function.
371   This function extracts the sub-block of values based on the dimensions of
372   the original input block, and the row,col values corresponding to the blocks.
373 
374   Input Parameters:
375   stash  - the stash
376   row    - the global block-row correspoiding to the values
377   n      - the number of elements inserted. All elements belong to the above row.
378   idxn   - the global block-column indices corresponding to each of the blocks of
379            values. Each block is of size bs*bs.
380   values - the values inserted
381   rmax   - the number of block-rows in the original block.
382   cmax   - the number of block-columsn on the original block.
383   idx    - the index of the current block-row in the original block.
384 */
385 #undef __FUNCT__
386 #define __FUNCT__ "MatStashValuesColBlocked_Private"
387 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,const int idxn[],const MatScalar values[],int rmax,int cmax,int idx)
388 {
389   PetscErrorCode ierr;
390   int i,j,k,bs2,bs=stash->bs;
391   const MatScalar *vals;
392   MatScalar       *array;
393 
394   PetscFunctionBegin;
395   bs2 = bs*bs;
396   if ((stash->n+n) > stash->nmax) {
397     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
398   }
399   for (i=0; i<n; i++) {
400     stash->idx[stash->n]   = row;
401     stash->idy[stash->n] = idxn[i];
402     /* Now copy over the block of values. Store the values column oriented.
403      This enables inserting multiple blocks belonging to a row with a single
404      funtion call */
405     array = stash->array + bs2*stash->n;
406     vals  = values + idx*bs + bs2*rmax*i;
407     for (j=0; j<bs; j++) {
408       for (k=0; k<bs; k++) {array[k] = vals[k];}
409       array += bs;
410       vals  += rmax*bs;
411     }
412     stash->n++;
413   }
414   PetscFunctionReturn(0);
415 }
416 /*
417   MatStashScatterBegin_Private - Initiates the transfer of values to the
418   correct owners. This function goes through the stash, and check the
419   owners of each stashed value, and sends the values off to the owner
420   processors.
421 
422   Input Parameters:
423   stash  - the stash
424   owners - an array of size 'no-of-procs' which gives the ownership range
425            for each node.
426 
427   Notes: The 'owners' array in the cased of the blocked-stash has the
428   ranges specified blocked global indices, and for the regular stash in
429   the proper global indices.
430 */
431 #undef __FUNCT__
432 #define __FUNCT__ "MatStashScatterBegin_Private"
433 PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,int *owners)
434 {
435   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
436   int         size=stash->size,*nprocs,nsends,nreceives;
437   PetscErrorCode ierr;
438   int         nmax,count,*sindices,*rindices,i,j,idx;
439   MatScalar   *rvalues,*svalues;
440   MPI_Comm    comm = stash->comm;
441   MPI_Request *send_waits,*recv_waits;
442 
443   PetscFunctionBegin;
444 
445   bs2   = stash->bs*stash->bs;
446   /*  first count number of contributors to each processor */
447   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
448   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
449   ierr  = PetscMalloc((stash->n+1)*sizeof(int),&owner);CHKERRQ(ierr);
450 
451   for (i=0; i<stash->n; i++) {
452     idx = stash->idx[i];
453     for (j=0; j<size; j++) {
454       if (idx >= owners[j] && idx < owners[j+1]) {
455         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
456       }
457     }
458   }
459   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
460 
461   /* inform other processors of number of messages and max length*/
462   ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr);
463 
464   /* post receives:
465      since we don't know how long each individual message is we
466      allocate the largest needed buffer for each receive. Potentially
467      this is a lot of wasted space.
468   */
469   ierr     = PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)),&rvalues);CHKERRQ(ierr);
470   rindices = (int*)(rvalues + bs2*nreceives*nmax);
471   ierr     = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
472   for (i=0,count=0; i<nreceives; i++) {
473     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_MATSCALAR,MPI_ANY_SOURCE,tag1,comm,
474                      recv_waits+count++);CHKERRQ(ierr);
475     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);CHKERRQ(ierr);
476   }
477 
478   /* do sends:
479       1) starts[i] gives the starting index in svalues for stuff going to
480          the ith processor
481   */
482   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)),&svalues);CHKERRQ(ierr);
483   sindices = (int*)(svalues + bs2*stash->n);
484   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
485   ierr     = PetscMalloc(2*size*sizeof(int),&startv);CHKERRQ(ierr);
486   starti   = startv + size;
487   /* use 2 sends the first with all_a, the next with all_i and all_j */
488   startv[0]  = 0; starti[0] = 0;
489   for (i=1; i<size; i++) {
490     startv[i] = startv[i-1] + nprocs[2*i-2];
491     starti[i] = starti[i-1] + nprocs[2*i-2]*2;
492   }
493   for (i=0; i<stash->n; i++) {
494     j = owner[i];
495     if (bs2 == 1) {
496       svalues[startv[j]]              = stash->array[i];
497     } else {
498       int       k;
499       MatScalar *buf1,*buf2;
500       buf1 = svalues+bs2*startv[j];
501       buf2 = stash->array+bs2*i;
502       for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
503     }
504     sindices[starti[j]]               = stash->idx[i];
505     sindices[starti[j]+nprocs[2*j]]   = stash->idy[i];
506     startv[j]++;
507     starti[j]++;
508   }
509   startv[0] = 0;
510   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nprocs[2*i-2];}
511   for (i=0,count=0; i<size; i++) {
512     if (nprocs[2*i+1]) {
513       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[2*i],MPIU_MATSCALAR,i,tag1,comm,
514                        send_waits+count++);CHKERRQ(ierr);
515       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[2*i],MPI_INT,i,tag2,comm,
516                        send_waits+count++);CHKERRQ(ierr);
517     }
518   }
519   ierr = PetscFree(owner);CHKERRQ(ierr);
520   ierr = PetscFree(startv);CHKERRQ(ierr);
521   /* This memory is reused in scatter end  for a different purpose*/
522   for (i=0; i<2*size; i++) nprocs[i] = -1;
523   stash->nprocs      = nprocs;
524 
525   stash->svalues    = svalues;    stash->rvalues    = rvalues;
526   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
527   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
528   stash->rmax       = nmax;
529   PetscFunctionReturn(0);
530 }
531 
532 /*
533    MatStashScatterGetMesg_Private - This function waits on the receives posted
534    in the function MatStashScatterBegin_Private() and returns one message at
535    a time to the calling function. If no messages are left, it indicates this
536    by setting flg = 0, else it sets flg = 1.
537 
538    Input Parameters:
539    stash - the stash
540 
541    Output Parameters:
542    nvals - the number of entries in the current message.
543    rows  - an array of row indices (or blocked indices) corresponding to the values
544    cols  - an array of columnindices (or blocked indices) corresponding to the values
545    vals  - the values
546    flg   - 0 indicates no more message left, and the current call has no values associated.
547            1 indicates that the current call successfully received a message, and the
548              other output parameters nvals,rows,cols,vals are set appropriately.
549 */
550 #undef __FUNCT__
551 #define __FUNCT__ "MatStashScatterGetMesg_Private"
552 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,MatScalar **vals,int *flg)
553 {
554   PetscErrorCode ierr;
555   int         i,*flg_v,i1,i2,*rindices,bs2;
556   MPI_Status  recv_status;
557   PetscTruth  match_found = PETSC_FALSE;
558 
559   PetscFunctionBegin;
560 
561   *flg = 0; /* When a message is discovered this is reset to 1 */
562   /* Return if no more messages to process */
563   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
564 
565   flg_v = stash->nprocs;
566   bs2   = stash->bs*stash->bs;
567   /* If a matching pair of receieves are found, process them, and return the data to
568      the calling function. Until then keep receiving messages */
569   while (!match_found) {
570     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
571     /* Now pack the received message into a structure which is useable by others */
572     if (i % 2) {
573       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
574       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
575       *nvals = *nvals/2; /* This message has both row indices and col indices */
576     } else {
577       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
578       flg_v[2*recv_status.MPI_SOURCE] = i/2;
579       *nvals = *nvals/bs2;
580     }
581 
582     /* Check if we have both the messages from this proc */
583     i1 = flg_v[2*recv_status.MPI_SOURCE];
584     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
585     if (i1 != -1 && i2 != -1) {
586       rindices    = (int*)(stash->rvalues + bs2*stash->rmax*stash->nrecvs);
587       *rows       = rindices + 2*i2*stash->rmax;
588       *cols       = *rows + *nvals;
589       *vals       = stash->rvalues + i1*bs2*stash->rmax;
590       *flg        = 1;
591       stash->nprocessed ++;
592       match_found = PETSC_TRUE;
593     }
594   }
595   PetscFunctionReturn(0);
596 }
597