xref: /petsc/src/mat/utils/matstash.c (revision 73f4d3771d9e6ab3f04055eab794d7609818b9d3)
1 /*$Id: matstash.c,v 1.50 2001/03/23 23:22:45 balay Exp $*/
2 
3 #include "src/mat/matimpl.h"
4 
5 /*
6        The input to the stash is ALWAYS in MatScalar precision, and the
7     internal storage and output is also in MatScalar.
8 */
9 #define DEFAULT_STASH_SIZE   10000
10 
11 /*
12   MatStashCreate_Private - Creates a stash,currently used for all the parallel
13   matrix implementations. The stash is where elements of a matrix destined
14   to be stored on other processors are kept until matrix assembly is done.
15 
16   This is a simple minded stash. Simply adds entries to end of stash.
17 
18   Input Parameters:
19   comm - communicator, required for scatters.
20   bs   - stash block size. used when stashing blocks of values
21 
22   Output Parameters:
23   stash    - the newly created stash
24 */
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStashCreate_Private"
27 int MatStashCreate_Private(MPI_Comm comm,int bs,MatStash *stash)
28 {
29   int        ierr,max,*opt,nopt;
30   PetscTruth flg;
31 
32   PetscFunctionBegin;
33   /* Require 2 tags,get the second using PetscCommGetNewTag() */
34   stash->comm = comm;
35   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
36   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
37   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
38   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
39 
40   nopt = stash->size;
41   ierr = PetscMalloc(nopt*sizeof(int),&opt);CHKERRQ(ierr);
42   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
43   if (flg) {
44     if (nopt == 1)                max = opt[0];
45     else if (nopt == stash->size) max = opt[stash->rank];
46     else if (stash->rank < nopt)  max = opt[stash->rank];
47     else                          max = 0; /* Use default */
48     stash->umax = max;
49   } else {
50     stash->umax = 0;
51   }
52   ierr = PetscFree(opt);CHKERRQ(ierr);
53   if (bs <= 0) bs = 1;
54 
55   stash->bs       = bs;
56   stash->nmax     = 0;
57   stash->oldnmax  = 0;
58   stash->n        = 0;
59   stash->reallocs = -1;
60   stash->idx      = 0;
61   stash->idy      = 0;
62   stash->array    = 0;
63 
64   stash->send_waits  = 0;
65   stash->recv_waits  = 0;
66   stash->send_status = 0;
67   stash->nsends      = 0;
68   stash->nrecvs      = 0;
69   stash->svalues     = 0;
70   stash->rvalues     = 0;
71   stash->rmax        = 0;
72   stash->nprocs      = 0;
73   stash->nprocessed  = 0;
74   PetscFunctionReturn(0);
75 }
76 
77 /*
78    MatStashDestroy_Private - Destroy the stash
79 */
80 #undef __FUNCT__
81 #define __FUNCT__ "MatStashDestroy_Private"
82 int MatStashDestroy_Private(MatStash *stash)
83 {
84   int ierr;
85 
86   PetscFunctionBegin;
87   if (stash->array) {
88     ierr = PetscFree(stash->array);CHKERRQ(ierr);
89     stash->array = 0;
90   }
91   PetscFunctionReturn(0);
92 }
93 
94 /*
95    MatStashScatterEnd_Private - This is called as the fial stage of
96    scatter. The final stages of messagepassing is done here, and
97    all the memory used for messagepassing is cleanedu up. This
98    routine also resets the stash, and deallocates the memory used
99    for the stash. It also keeps track of the current memory usage
100    so that the same value can be used the next time through.
101 */
102 #undef __FUNCT__
103 #define __FUNCT__ "MatStashScatterEnd_Private"
104 int MatStashScatterEnd_Private(MatStash *stash)
105 {
106   int         nsends=stash->nsends,ierr,bs2,oldnmax;
107   MPI_Status  *send_status;
108 
109   PetscFunctionBegin;
110   /* wait on sends */
111   if (nsends) {
112     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
113     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
114     ierr = PetscFree(send_status);CHKERRQ(ierr);
115   }
116 
117   /* Now update nmaxold to be app 10% more than max n used, this way the
118      wastage of space is reduced the next time this stash is used.
119      Also update the oldmax, only if it increases */
120   if (stash->n) {
121     bs2      = stash->bs*stash->bs;
122     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
123     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
124   }
125 
126   stash->nmax       = 0;
127   stash->n          = 0;
128   stash->reallocs   = -1;
129   stash->rmax       = 0;
130   stash->nprocessed = 0;
131 
132   if (stash->array) {
133     ierr         = PetscFree(stash->array);CHKERRQ(ierr);
134     stash->array = 0;
135     stash->idx   = 0;
136     stash->idy   = 0;
137   }
138   if (stash->send_waits) {
139     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
140     stash->send_waits = 0;
141   }
142   if (stash->recv_waits) {
143     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
144     stash->recv_waits = 0;
145   }
146   if (stash->svalues) {
147     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
148     stash->svalues = 0;
149   }
150   if (stash->rvalues) {
151     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
152     stash->rvalues = 0;
153   }
154   if (stash->nprocs) {
155     ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
156     stash->nprocs = 0;
157   }
158 
159   PetscFunctionReturn(0);
160 }
161 
162 /*
163    MatStashGetInfo_Private - Gets the relavant statistics of the stash
164 
165    Input Parameters:
166    stash    - the stash
167    nstash   - the size of the stash. Indicates the number of values stored.
168    reallocs - the number of additional mallocs incurred.
169 
170 */
171 #undef __FUNCT__
172 #define __FUNCT__ "MatStashGetInfo_Private"
173 int MatStashGetInfo_Private(MatStash *stash,int *nstash,int *reallocs)
174 {
175   int bs2 = stash->bs*stash->bs;
176 
177   PetscFunctionBegin;
178   *nstash   = stash->n*bs2;
179   if (stash->reallocs < 0) *reallocs = 0;
180   else                     *reallocs = stash->reallocs;
181   PetscFunctionReturn(0);
182 }
183 
184 
185 /*
186    MatStashSetInitialSize_Private - Sets the initial size of the stash
187 
188    Input Parameters:
189    stash  - the stash
190    max    - the value that is used as the max size of the stash.
191             this value is used while allocating memory.
192 */
193 #undef __FUNCT__
194 #define __FUNCT__ "MatStashSetInitialSize_Private"
195 int MatStashSetInitialSize_Private(MatStash *stash,int max)
196 {
197   PetscFunctionBegin;
198   stash->umax = max;
199   PetscFunctionReturn(0);
200 }
201 
202 /* MatStashExpand_Private - Expand the stash. This function is called
203    when the space in the stash is not sufficient to add the new values
204    being inserted into the stash.
205 
206    Input Parameters:
207    stash - the stash
208    incr  - the minimum increase requested
209 
210    Notes:
211    This routine doubles the currently used memory.
212  */
213 #undef __FUNCT__
214 #define __FUNCT__ "MatStashExpand_Private"
215 static int MatStashExpand_Private(MatStash *stash,int incr)
216 {
217   int       *n_idx,*n_idy,newnmax,bs2,ierr;
218   MatScalar *n_array;
219 
220   PetscFunctionBegin;
221   /* allocate a larger stash */
222   bs2     = stash->bs*stash->bs;
223   if (!stash->oldnmax && !stash->nmax) { /* new stash */
224     if (stash->umax)                  newnmax = stash->umax/bs2;
225     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
226   } else if (!stash->nmax) { /* resuing stash */
227     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
228     else                              newnmax = stash->oldnmax/bs2;
229   } else                              newnmax = stash->nmax*2;
230   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
231 
232   ierr  = PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr);
233   n_idx = (int*)(n_array + bs2*newnmax);
234   n_idy = (int*)(n_idx + newnmax);
235   ierr  = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr);
236   ierr  = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));CHKERRQ(ierr);
237   ierr  = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));CHKERRQ(ierr);
238   if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);}
239   stash->array   = n_array;
240   stash->idx     = n_idx;
241   stash->idy     = n_idy;
242   stash->nmax    = newnmax;
243   stash->reallocs++;
244   PetscFunctionReturn(0);
245 }
246 /*
247   MatStashValuesRow_Private - inserts values into the stash. This function
248   expects the values to be roworiented. Multiple columns belong to the same row
249   can be inserted with a single call to this function.
250 
251   Input Parameters:
252   stash  - the stash
253   row    - the global row correspoiding to the values
254   n      - the number of elements inserted. All elements belong to the above row.
255   idxn   - the global column indices corresponding to each of the values.
256   values - the values inserted
257 */
258 #undef __FUNCT__
259 #define __FUNCT__ "MatStashValuesRow_Private"
260 int MatStashValuesRow_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values)
261 {
262   int    ierr,i;
263 
264   PetscFunctionBegin;
265   /* Check and see if we have sufficient memory */
266   if ((stash->n + n) > stash->nmax) {
267     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
268   }
269   for (i=0; i<n; i++) {
270     stash->idx[stash->n]   = row;
271     stash->idy[stash->n]   = idxn[i];
272     stash->array[stash->n] = values[i];
273     stash->n++;
274   }
275   PetscFunctionReturn(0);
276 }
277 /*
278   MatStashValuesCol_Private - inserts values into the stash. This function
279   expects the values to be columnoriented. Multiple columns belong to the same row
280   can be inserted with a single call to this function.
281 
282   Input Parameters:
283   stash   - the stash
284   row     - the global row correspoiding to the values
285   n       - the number of elements inserted. All elements belong to the above row.
286   idxn    - the global column indices corresponding to each of the values.
287   values  - the values inserted
288   stepval - the consecutive values are sepated by a distance of stepval.
289             this happens because the input is columnoriented.
290 */
291 #undef __FUNCT__
292 #define __FUNCT__ "MatStashValuesCol_Private"
293 int MatStashValuesCol_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int stepval)
294 {
295   int    ierr,i;
296 
297   PetscFunctionBegin;
298   /* Check and see if we have sufficient memory */
299   if ((stash->n + n) > stash->nmax) {
300     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
301   }
302   for (i=0; i<n; i++) {
303     stash->idx[stash->n]   = row;
304     stash->idy[stash->n]   = idxn[i];
305     stash->array[stash->n] = values[i*stepval];
306     stash->n++;
307   }
308   PetscFunctionReturn(0);
309 }
310 
311 /*
312   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
313   This function expects the values to be roworiented. Multiple columns belong
314   to the same block-row can be inserted with a single call to this function.
315   This function extracts the sub-block of values based on the dimensions of
316   the original input block, and the row,col values corresponding to the blocks.
317 
318   Input Parameters:
319   stash  - the stash
320   row    - the global block-row correspoiding to the values
321   n      - the number of elements inserted. All elements belong to the above row.
322   idxn   - the global block-column indices corresponding to each of the blocks of
323            values. Each block is of size bs*bs.
324   values - the values inserted
325   rmax   - the number of block-rows in the original block.
326   cmax   - the number of block-columsn on the original block.
327   idx    - the index of the current block-row in the original block.
328 */
329 #undef __FUNCT__
330 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
331 int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int rmax,int cmax,int idx)
332 {
333   int       ierr,i,j,k,bs2,bs=stash->bs;
334   MatScalar *vals,*array;
335 
336   PetscFunctionBegin;
337   bs2 = bs*bs;
338   if ((stash->n+n) > stash->nmax) {
339     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
340   }
341   for (i=0; i<n; i++) {
342     stash->idx[stash->n]   = row;
343     stash->idy[stash->n] = idxn[i];
344     /* Now copy over the block of values. Store the values column oriented.
345        This enables inserting multiple blocks belonging to a row with a single
346        funtion call */
347     array = stash->array + bs2*stash->n;
348     vals  = values + idx*bs2*n + bs*i;
349     for (j=0; j<bs; j++) {
350       for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
351       array += 1;
352       vals  += cmax*bs;
353     }
354     stash->n++;
355   }
356   PetscFunctionReturn(0);
357 }
358 
359 /*
360   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
361   This function expects the values to be roworiented. Multiple columns belong
362   to the same block-row can be inserted with a single call to this function.
363   This function extracts the sub-block of values based on the dimensions of
364   the original input block, and the row,col values corresponding to the blocks.
365 
366   Input Parameters:
367   stash  - the stash
368   row    - the global block-row correspoiding to the values
369   n      - the number of elements inserted. All elements belong to the above row.
370   idxn   - the global block-column indices corresponding to each of the blocks of
371            values. Each block is of size bs*bs.
372   values - the values inserted
373   rmax   - the number of block-rows in the original block.
374   cmax   - the number of block-columsn on the original block.
375   idx    - the index of the current block-row in the original block.
376 */
377 #undef __FUNCT__
378 #define __FUNCT__ "MatStashValuesColBlocked_Private"
379 int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int rmax,int cmax,int idx)
380 {
381   int       ierr,i,j,k,bs2,bs=stash->bs;
382   MatScalar *vals,*array;
383 
384   PetscFunctionBegin;
385   bs2 = bs*bs;
386   if ((stash->n+n) > stash->nmax) {
387     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
388   }
389   for (i=0; i<n; i++) {
390     stash->idx[stash->n]   = row;
391     stash->idy[stash->n] = idxn[i];
392     /* Now copy over the block of values. Store the values column oriented.
393      This enables inserting multiple blocks belonging to a row with a single
394      funtion call */
395     array = stash->array + bs2*stash->n;
396     vals  = values + idx*bs + bs2*rmax*i;
397     for (j=0; j<bs; j++) {
398       for (k=0; k<bs; k++) {array[k] = vals[k];}
399       array += bs;
400       vals  += rmax*bs;
401     }
402     stash->n++;
403   }
404   PetscFunctionReturn(0);
405 }
406 /*
407   MatStashScatterBegin_Private - Initiates the transfer of values to the
408   correct owners. This function goes through the stash, and check the
409   owners of each stashed value, and sends the values off to the owner
410   processors.
411 
412   Input Parameters:
413   stash  - the stash
414   owners - an array of size 'no-of-procs' which gives the ownership range
415            for each node.
416 
417   Notes: The 'owners' array in the cased of the blocked-stash has the
418   ranges specified blocked global indices, and for the regular stash in
419   the proper global indices.
420 */
421 #undef __FUNCT__
422 #define __FUNCT__ "MatStashScatterBegin_Private"
423 int MatStashScatterBegin_Private(MatStash *stash,int *owners)
424 {
425   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
426   int         rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
427   int         nmax,*work,count,ierr,*sindices,*rindices,i,j,idx;
428   MatScalar   *rvalues,*svalues;
429   MPI_Comm    comm = stash->comm;
430   MPI_Request *send_waits,*recv_waits;
431 
432   PetscFunctionBegin;
433 
434   bs2   = stash->bs*stash->bs;
435   /*  first count number of contributors to each processor */
436   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
437   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
438   procs = nprocs + size;
439   ierr  = PetscMalloc((stash->n+1)*sizeof(int),&owner);CHKERRQ(ierr);
440 
441   for (i=0; i<stash->n; i++) {
442     idx = stash->idx[i];
443     for (j=0; j<size; j++) {
444       if (idx >= owners[j] && idx < owners[j+1]) {
445         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
446       }
447     }
448   }
449   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
450 
451   /* inform other processors of number of messages and max length*/
452   ierr      = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
453   ierr      = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
454   nmax      = work[rank];
455   nreceives = work[size+rank];
456   ierr      = PetscFree(work);CHKERRQ(ierr);
457   /* post receives:
458      since we don't know how long each individual message is we
459      allocate the largest needed buffer for each receive. Potentially
460      this is a lot of wasted space.
461   */
462   ierr     = PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)),&rvalues);CHKERRQ(ierr);
463   rindices = (int*)(rvalues + bs2*nreceives*nmax);
464   ierr     = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
465   for (i=0,count=0; i<nreceives; i++) {
466     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_MATSCALAR,MPI_ANY_SOURCE,tag1,comm,
467                      recv_waits+count++);CHKERRQ(ierr);
468     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);CHKERRQ(ierr);
469   }
470 
471   /* do sends:
472       1) starts[i] gives the starting index in svalues for stuff going to
473          the ith processor
474   */
475   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)),&svalues);CHKERRQ(ierr);
476   sindices = (int*)(svalues + bs2*stash->n);
477   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
478   ierr     = PetscMalloc(2*size*sizeof(int),&startv);CHKERRQ(ierr);
479   starti   = startv + size;
480   /* use 2 sends the first with all_a, the next with all_i and all_j */
481   startv[0]  = 0; starti[0] = 0;
482   for (i=1; i<size; i++) {
483     startv[i] = startv[i-1] + nprocs[i-1];
484     starti[i] = starti[i-1] + nprocs[i-1]*2;
485   }
486   for (i=0; i<stash->n; i++) {
487     j = owner[i];
488     if (bs2 == 1) {
489       svalues[startv[j]]              = stash->array[i];
490     } else {
491       int       k;
492       MatScalar *buf1,*buf2;
493       buf1 = svalues+bs2*startv[j];
494       buf2 = stash->array+bs2*i;
495       for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
496     }
497     sindices[starti[j]]             = stash->idx[i];
498     sindices[starti[j]+nprocs[j]]   = stash->idy[i];
499     startv[j]++;
500     starti[j]++;
501   }
502   startv[0] = 0;
503   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nprocs[i-1];}
504   for (i=0,count=0; i<size; i++) {
505     if (procs[i]) {
506       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_MATSCALAR,i,tag1,comm,
507                        send_waits+count++);CHKERRQ(ierr);
508       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
509                        send_waits+count++);CHKERRQ(ierr);
510     }
511   }
512   ierr = PetscFree(owner);CHKERRQ(ierr);
513   ierr = PetscFree(startv);CHKERRQ(ierr);
514   /* This memory is reused in scatter end  for a different purpose*/
515   for (i=0; i<2*size; i++) nprocs[i] = -1;
516   stash->nprocs      = nprocs;
517 
518   stash->svalues    = svalues;    stash->rvalues    = rvalues;
519   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
520   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
521   stash->rmax       = nmax;
522   PetscFunctionReturn(0);
523 }
524 
525 /*
526    MatStashScatterGetMesg_Private - This function waits on the receives posted
527    in the function MatStashScatterBegin_Private() and returns one message at
528    a time to the calling function. If no messages are left, it indicates this
529    by setting flg = 0, else it sets flg = 1.
530 
531    Input Parameters:
532    stash - the stash
533 
534    Output Parameters:
535    nvals - the number of entries in the current message.
536    rows  - an array of row indices (or blocked indices) corresponding to the values
537    cols  - an array of columnindices (or blocked indices) corresponding to the values
538    vals  - the values
539    flg   - 0 indicates no more message left, and the current call has no values associated.
540            1 indicates that the current call successfully received a message, and the
541              other output parameters nvals,rows,cols,vals are set appropriately.
542 */
543 #undef __FUNCT__
544 #define __FUNCT__ "MatStashScatterGetMesg_Private"
545 int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,MatScalar **vals,int *flg)
546 {
547   int         i,ierr,size=stash->size,*flg_v,*flg_i,i1,i2,*rindices,bs2;
548   MPI_Status  recv_status;
549   PetscTruth  match_found = PETSC_FALSE;
550 
551   PetscFunctionBegin;
552 
553   *flg = 0; /* When a message is discovered this is reset to 1 */
554   /* Return if no more messages to process */
555   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
556 
557   flg_v = stash->nprocs;
558   flg_i = flg_v + size;
559   bs2   = stash->bs*stash->bs;
560   /* If a matching pair of receieves are found, process them, and return the data to
561      the calling function. Until then keep receiving messages */
562   while (!match_found) {
563     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
564     /* Now pack the received message into a structure which is useable by others */
565     if (i % 2) {
566       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
567       flg_i[recv_status.MPI_SOURCE] = i/2;
568       *nvals = *nvals/2; /* This message has both row indices and col indices */
569     } else {
570       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
571       flg_v[recv_status.MPI_SOURCE] = i/2;
572       *nvals = *nvals/bs2;
573     }
574 
575     /* Check if we have both the messages from this proc */
576     i1 = flg_v[recv_status.MPI_SOURCE];
577     i2 = flg_i[recv_status.MPI_SOURCE];
578     if (i1 != -1 && i2 != -1) {
579       rindices    = (int*)(stash->rvalues + bs2*stash->rmax*stash->nrecvs);
580       *rows       = rindices + 2*i2*stash->rmax;
581       *cols       = *rows + *nvals;
582       *vals       = stash->rvalues + i1*bs2*stash->rmax;
583       *flg        = 1;
584       stash->nprocessed ++;
585       match_found = PETSC_TRUE;
586     }
587   }
588   PetscFunctionReturn(0);
589 }
590