xref: /petsc/src/mat/utils/matstash.c (revision a58c3bc3391eee32bc3fd94ac7edeea38fe57aae)
1 #define PETSCMAT_DLL
2 
3 #include "include/private/matimpl.h"
4 
5 /*
6        The input to the stash is ALWAYS in MatScalar precision, and the
7     internal storage and output is also in MatScalar.
8 */
9 #define DEFAULT_STASH_SIZE   10000
10 
11 /*
12   MatStashCreate_Private - Creates a stash,currently used for all the parallel
13   matrix implementations. The stash is where elements of a matrix destined
14   to be stored on other processors are kept until matrix assembly is done.
15 
16   This is a simple minded stash. Simply adds entries to end of stash.
17 
18   Input Parameters:
19   comm - communicator, required for scatters.
20   bs   - stash block size. used when stashing blocks of values
21 
22   Output Parameters:
23   stash    - the newly created stash
24 */
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStashCreate_Private"
27 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
28 {
29   PetscErrorCode ierr;
30   PetscInt       max,*opt,nopt;
31   PetscTruth     flg;
32 
33   PetscFunctionBegin;
34   /* Require 2 tags,get the second using PetscCommGetNewTag() */
35   stash->comm = comm;
36   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
37   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
38   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
39   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
40 
41   nopt = stash->size;
42   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
43   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
44   if (flg) {
45     if (nopt == 1)                max = opt[0];
46     else if (nopt == stash->size) max = opt[stash->rank];
47     else if (stash->rank < nopt)  max = opt[stash->rank];
48     else                          max = 0; /* Use default */
49     stash->umax = max;
50   } else {
51     stash->umax = 0;
52   }
53   ierr = PetscFree(opt);CHKERRQ(ierr);
54   if (bs <= 0) bs = 1;
55 
56   stash->bs       = bs;
57   stash->nmax     = 0;
58   stash->oldnmax  = 0;
59   stash->n        = 0;
60   stash->reallocs = -1;
61   stash->space_head = 0;
62   stash->space      = 0;
63 
64   stash->send_waits  = 0;
65   stash->recv_waits  = 0;
66   stash->send_status = 0;
67   stash->nsends      = 0;
68   stash->nrecvs      = 0;
69   stash->svalues     = 0;
70   stash->rvalues     = 0;
71   stash->rindices    = 0;
72   stash->nprocs      = 0;
73   stash->nprocessed  = 0;
74   PetscFunctionReturn(0);
75 }
76 
77 /*
78    MatStashDestroy_Private - Destroy the stash
79 */
80 #undef __FUNCT__
81 #define __FUNCT__ "MatStashDestroy_Private"
82 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
83 {
84   PetscErrorCode ierr;
85 
86   PetscFunctionBegin;
87   if (stash->space_head){
88     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
89     stash->space_head = 0;
90     stash->space      = 0;
91   }
92   PetscFunctionReturn(0);
93 }
94 
95 /*
96    MatStashScatterEnd_Private - This is called as the fial stage of
97    scatter. The final stages of messagepassing is done here, and
98    all the memory used for messagepassing is cleanedu up. This
99    routine also resets the stash, and deallocates the memory used
100    for the stash. It also keeps track of the current memory usage
101    so that the same value can be used the next time through.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "MatStashScatterEnd_Private"
105 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
106 {
107   PetscErrorCode ierr;
108   PetscInt       nsends=stash->nsends,bs2,oldnmax;
109   MPI_Status     *send_status;
110 
111   PetscFunctionBegin;
112   /* wait on sends */
113   if (nsends) {
114     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
115     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
116     ierr = PetscFree(send_status);CHKERRQ(ierr);
117   }
118 
119   /* Now update nmaxold to be app 10% more than max n used, this way the
120      wastage of space is reduced the next time this stash is used.
121      Also update the oldmax, only if it increases */
122   if (stash->n) {
123     bs2      = stash->bs*stash->bs;
124     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
125     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
126   }
127 
128   stash->nmax       = 0;
129   stash->n          = 0;
130   stash->reallocs   = -1;
131   stash->nprocessed = 0;
132   if (stash->space_head){
133     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
134     stash->space_head = 0;
135     stash->space      = 0;
136   }
137   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
138   stash->send_waits = 0;
139   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
140   stash->recv_waits = 0;
141   ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
142   stash->svalues = 0;
143   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
144   stash->rvalues = 0;
145   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
146   stash->rindices = 0;
147   ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
148   stash->nprocs = 0;
149   PetscFunctionReturn(0);
150 }
151 
152 /*
153    MatStashGetInfo_Private - Gets the relavant statistics of the stash
154 
155    Input Parameters:
156    stash    - the stash
157    nstash   - the size of the stash. Indicates the number of values stored.
158    reallocs - the number of additional mallocs incurred.
159 
160 */
161 #undef __FUNCT__
162 #define __FUNCT__ "MatStashGetInfo_Private"
163 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
164 {
165   PetscInt bs2 = stash->bs*stash->bs;
166 
167   PetscFunctionBegin;
168   if (nstash) *nstash   = stash->n*bs2;
169   if (reallocs) {
170     if (stash->reallocs < 0) *reallocs = 0;
171     else                     *reallocs = stash->reallocs;
172   }
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177    MatStashSetInitialSize_Private - Sets the initial size of the stash
178 
179    Input Parameters:
180    stash  - the stash
181    max    - the value that is used as the max size of the stash.
182             this value is used while allocating memory.
183 */
184 #undef __FUNCT__
185 #define __FUNCT__ "MatStashSetInitialSize_Private"
186 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
187 {
188   PetscFunctionBegin;
189   stash->umax = max;
190   PetscFunctionReturn(0);
191 }
192 
193 /* MatStashExpand_Private - Expand the stash. This function is called
194    when the space in the stash is not sufficient to add the new values
195    being inserted into the stash.
196 
197    Input Parameters:
198    stash - the stash
199    incr  - the minimum increase requested
200 
201    Notes:
202    This routine doubles the currently used memory.
203  */
204 #undef __FUNCT__
205 #define __FUNCT__ "MatStashExpand_Private"
206 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
207 {
208   PetscErrorCode ierr;
209   PetscInt       newnmax,bs2= stash->bs*stash->bs;
210 
211   PetscFunctionBegin;
212   /* allocate a larger stash */
213   if (!stash->oldnmax && !stash->nmax) { /* new stash */
214     if (stash->umax)                  newnmax = stash->umax/bs2;
215     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
216   } else if (!stash->nmax) { /* resuing stash */
217     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
218     else                              newnmax = stash->oldnmax/bs2;
219   } else                              newnmax = stash->nmax*2;
220   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
221 
222   /* Get a MatStashSpace and attach it to stash */
223   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
224   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
225     stash->space_head = stash->space;
226   }
227 
228   stash->reallocs++;
229   stash->nmax = newnmax;
230   PetscFunctionReturn(0);
231 }
232 /*
233   MatStashValuesRow_Private - inserts values into the stash. This function
234   expects the values to be roworiented. Multiple columns belong to the same row
235   can be inserted with a single call to this function.
236 
237   Input Parameters:
238   stash  - the stash
239   row    - the global row correspoiding to the values
240   n      - the number of elements inserted. All elements belong to the above row.
241   idxn   - the global column indices corresponding to each of the values.
242   values - the values inserted
243 */
244 #undef __FUNCT__
245 #define __FUNCT__ "MatStashValuesRow_Private"
246 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
247 {
248   PetscErrorCode     ierr;
249   PetscInt           i,k;
250   PetscMatStashSpace space=stash->space;
251 
252   PetscFunctionBegin;
253   /* Check and see if we have sufficient memory */
254   if (!space || space->local_remaining < n){
255     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
256   }
257   space = stash->space;
258   k     = space->local_used;
259   for (i=0; i<n; i++) {
260     space->idx[k] = row;
261     space->idy[k] = idxn[i];
262     space->val[k] = values[i];
263     k++;
264   }
265   stash->n               += n;
266   space->local_used      += n;
267   space->local_remaining -= n;
268   PetscFunctionReturn(0);
269 }
270 
271 /*
272   MatStashValuesCol_Private - inserts values into the stash. This function
273   expects the values to be columnoriented. Multiple columns belong to the same row
274   can be inserted with a single call to this function.
275 
276   Input Parameters:
277   stash   - the stash
278   row     - the global row correspoiding to the values
279   n       - the number of elements inserted. All elements belong to the above row.
280   idxn    - the global column indices corresponding to each of the values.
281   values  - the values inserted
282   stepval - the consecutive values are sepated by a distance of stepval.
283             this happens because the input is columnoriented.
284 */
285 #undef __FUNCT__
286 #define __FUNCT__ "MatStashValuesCol_Private"
287 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
288 {
289   PetscErrorCode     ierr;
290   PetscInt           i,k;
291   PetscMatStashSpace space=stash->space;
292 
293   PetscFunctionBegin;
294   /* Check and see if we have sufficient memory */
295   if (!space || space->local_remaining < n){
296     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
297   }
298   space = stash->space;
299   k = space->local_used;
300   for (i=0; i<n; i++) {
301     space->idx[k] = row;
302     space->idy[k] = idxn[i];
303     space->val[k] = values[i*stepval];
304     k++;
305   }
306   stash->n               += n;
307   space->local_used      += n;
308   space->local_remaining -= n;
309   PetscFunctionReturn(0);
310 }
311 
312 /*
313   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
314   This function expects the values to be roworiented. Multiple columns belong
315   to the same block-row can be inserted with a single call to this function.
316   This function extracts the sub-block of values based on the dimensions of
317   the original input block, and the row,col values corresponding to the blocks.
318 
319   Input Parameters:
320   stash  - the stash
321   row    - the global block-row correspoiding to the values
322   n      - the number of elements inserted. All elements belong to the above row.
323   idxn   - the global block-column indices corresponding to each of the blocks of
324            values. Each block is of size bs*bs.
325   values - the values inserted
326   rmax   - the number of block-rows in the original block.
327   cmax   - the number of block-columsn on the original block.
328   idx    - the index of the current block-row in the original block.
329 */
330 #undef __FUNCT__
331 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
332 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
333 {
334   PetscErrorCode     ierr;
335   PetscInt           i,j,k,bs2,bs=stash->bs,l;
336   const MatScalar    *vals;
337   MatScalar          *array;
338   PetscMatStashSpace space=stash->space;
339 
340   PetscFunctionBegin;
341   if (!space || space->local_remaining < n){
342     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
343   }
344   space = stash->space;
345   l     = space->local_used;
346   bs2   = bs*bs;
347   for (i=0; i<n; i++) {
348     space->idx[l] = row;
349     space->idy[l] = idxn[i];
350     /* Now copy over the block of values. Store the values column oriented.
351        This enables inserting multiple blocks belonging to a row with a single
352        funtion call */
353     array = space->val + bs2*l;
354     vals  = values + idx*bs2*n + bs*i;
355     for (j=0; j<bs; j++) {
356       for (k=0; k<bs; k++) array[k*bs] = vals[k];
357       array++;
358       vals  += cmax*bs;
359     }
360     l++;
361   }
362   stash->n               += n;
363   space->local_used      += n;
364   space->local_remaining -= n;
365   PetscFunctionReturn(0);
366 }
367 
368 /*
369   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
370   This function expects the values to be roworiented. Multiple columns belong
371   to the same block-row can be inserted with a single call to this function.
372   This function extracts the sub-block of values based on the dimensions of
373   the original input block, and the row,col values corresponding to the blocks.
374 
375   Input Parameters:
376   stash  - the stash
377   row    - the global block-row correspoiding to the values
378   n      - the number of elements inserted. All elements belong to the above row.
379   idxn   - the global block-column indices corresponding to each of the blocks of
380            values. Each block is of size bs*bs.
381   values - the values inserted
382   rmax   - the number of block-rows in the original block.
383   cmax   - the number of block-columsn on the original block.
384   idx    - the index of the current block-row in the original block.
385 */
386 #undef __FUNCT__
387 #define __FUNCT__ "MatStashValuesColBlocked_Private"
388 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
389 {
390   PetscErrorCode  ierr;
391   PetscInt        i,j,k,bs2,bs=stash->bs,l;
392   const MatScalar *vals;
393   MatScalar       *array;
394   PetscMatStashSpace space=stash->space;
395 
396   PetscFunctionBegin;
397   if (!space || space->local_remaining < n){
398     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
399   }
400   space = stash->space;
401   l     = space->local_used;
402   bs2   = bs*bs;
403   for (i=0; i<n; i++) {
404     space->idx[l] = row;
405     space->idy[l] = idxn[i];
406     /* Now copy over the block of values. Store the values column oriented.
407      This enables inserting multiple blocks belonging to a row with a single
408      funtion call */
409     array = space->val + bs2*l;
410     vals  = values + idx*bs2*n + bs*i;
411     for (j=0; j<bs; j++) {
412       for (k=0; k<bs; k++) {array[k] = vals[k];}
413       array += bs;
414       vals  += rmax*bs;
415     }
416     l++;
417   }
418   stash->n               += n;
419   space->local_used      += n;
420   space->local_remaining -= n;
421   PetscFunctionReturn(0);
422 }
423 /*
424   MatStashScatterBegin_Private - Initiates the transfer of values to the
425   correct owners. This function goes through the stash, and check the
426   owners of each stashed value, and sends the values off to the owner
427   processors.
428 
429   Input Parameters:
430   stash  - the stash
431   owners - an array of size 'no-of-procs' which gives the ownership range
432            for each node.
433 
434   Notes: The 'owners' array in the cased of the blocked-stash has the
435   ranges specified blocked global indices, and for the regular stash in
436   the proper global indices.
437 */
438 #undef __FUNCT__
439 #define __FUNCT__ "MatStashScatterBegin_Private"
440 PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
441 {
442   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
443   PetscInt       size=stash->size,nsends;
444   PetscErrorCode ierr;
445   PetscInt       count,*sindices,**rindices,i,j,idx,lastidx,l;
446   MatScalar      **rvalues,*svalues;
447   MPI_Comm       comm = stash->comm;
448   MPI_Request    *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
449   PetscMPIInt    *nprocs,*nlengths,nreceives;
450   PetscInt       *sp_idx,*sp_idy;
451   MatScalar      *sp_val;
452   PetscMatStashSpace space,space_next;
453 
454   PetscFunctionBegin;
455   bs2 = stash->bs*stash->bs;
456 
457   /*  first count number of contributors to each processor */
458   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
459   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr);
460   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
461 
462   nlengths = nprocs+size;
463   i = j    = 0;
464   lastidx  = -1;
465   space    = stash->space_head;
466   while (space != PETSC_NULL){
467     space_next = space->next;
468     sp_idx     = space->idx;
469     for (l=0; l<space->local_used; l++){
470       /* if indices are NOT locally sorted, need to start search at the beginning */
471       if (lastidx > (idx = sp_idx[l])) j = 0;
472       lastidx = idx;
473       for (; j<size; j++) {
474         if (idx >= owners[j] && idx < owners[j+1]) {
475           nlengths[j]++; owner[i] = j; break;
476         }
477       }
478       i++;
479     }
480     space = space_next;
481   }
482   /* Now check what procs get messages - and compute nsends. */
483   for (i=0, nsends=0 ; i<size; i++) {
484     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
485   }
486 
487   { int  *onodes,*olengths;
488   /* Determine the number of messages to expect, their lengths, from from-ids */
489   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
490   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
491   /* since clubbing row,col - lengths are multiplied by 2 */
492   for (i=0; i<nreceives; i++) olengths[i] *=2;
493   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
494   /* values are size 'bs2' lengths (and remove earlier factor 2 */
495   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
496   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
497   ierr = PetscFree(onodes);CHKERRQ(ierr);
498   ierr = PetscFree(olengths);CHKERRQ(ierr);
499   }
500 
501   /* do sends:
502       1) starts[i] gives the starting index in svalues for stuff going to
503          the ith processor
504   */
505   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
506   sindices = (PetscInt*)(svalues + bs2*stash->n);
507   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
508   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
509   starti   = startv + size;
510   /* use 2 sends the first with all_a, the next with all_i and all_j */
511   startv[0]  = 0; starti[0] = 0;
512   for (i=1; i<size; i++) {
513     startv[i] = startv[i-1] + nlengths[i-1];
514     starti[i] = starti[i-1] + nlengths[i-1]*2;
515   }
516 
517   i     = 0;
518   space = stash->space_head;
519   while (space != PETSC_NULL){
520     space_next = space->next;
521     sp_idx = space->idx;
522     sp_idy = space->idy;
523     sp_val = space->val;
524     for (l=0; l<space->local_used; l++){
525       j = owner[i];
526       if (bs2 == 1) {
527         svalues[startv[j]] = sp_val[l];
528       } else {
529         PetscInt  k;
530         MatScalar *buf1,*buf2;
531         buf1 = svalues+bs2*startv[j];
532         buf2 = space->val + bs2*l;
533         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
534       }
535       sindices[starti[j]]             = sp_idx[l];
536       sindices[starti[j]+nlengths[j]] = sp_idy[l];
537       startv[j]++;
538       starti[j]++;
539       i++;
540     }
541     space = space_next;
542   }
543   startv[0] = 0;
544   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
545 
546   for (i=0,count=0; i<size; i++) {
547     if (nprocs[i]) {
548       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
549       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
550     }
551   }
552 #if defined(PETSC_USE_INFO)
553   ierr = PetscInfo1(0,"No of messages: %d \n",nsends);CHKERRQ(ierr);
554   for (i=0; i<size; i++) {
555     if (nprocs[i]) {
556       ierr = PetscInfo2(0,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
557     }
558   }
559 #endif
560   ierr = PetscFree(owner);CHKERRQ(ierr);
561   ierr = PetscFree(startv);CHKERRQ(ierr);
562   /* This memory is reused in scatter end  for a different purpose*/
563   for (i=0; i<2*size; i++) nprocs[i] = -1;
564   stash->nprocs = nprocs;
565 
566   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
567   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
568 
569   for (i=0; i<nreceives; i++) {
570     recv_waits[2*i]   = recv_waits1[i];
571     recv_waits[2*i+1] = recv_waits2[i];
572   }
573   stash->recv_waits = recv_waits;
574   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
575   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
576 
577   stash->svalues    = svalues;    stash->rvalues     = rvalues;
578   stash->rindices   = rindices;   stash->send_waits  = send_waits;
579   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
580   PetscFunctionReturn(0);
581 }
582 
583 /*
584    MatStashScatterGetMesg_Private - This function waits on the receives posted
585    in the function MatStashScatterBegin_Private() and returns one message at
586    a time to the calling function. If no messages are left, it indicates this
587    by setting flg = 0, else it sets flg = 1.
588 
589    Input Parameters:
590    stash - the stash
591 
592    Output Parameters:
593    nvals - the number of entries in the current message.
594    rows  - an array of row indices (or blocked indices) corresponding to the values
595    cols  - an array of columnindices (or blocked indices) corresponding to the values
596    vals  - the values
597    flg   - 0 indicates no more message left, and the current call has no values associated.
598            1 indicates that the current call successfully received a message, and the
599              other output parameters nvals,rows,cols,vals are set appropriately.
600 */
601 #undef __FUNCT__
602 #define __FUNCT__ "MatStashScatterGetMesg_Private"
603 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
604 {
605   PetscErrorCode ierr;
606   PetscMPIInt    i,*flg_v,i1,i2;
607   PetscInt       bs2;
608   MPI_Status     recv_status;
609   PetscTruth     match_found = PETSC_FALSE;
610 
611   PetscFunctionBegin;
612 
613   *flg = 0; /* When a message is discovered this is reset to 1 */
614   /* Return if no more messages to process */
615   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
616 
617   flg_v = stash->nprocs;
618   bs2   = stash->bs*stash->bs;
619   /* If a matching pair of receieves are found, process them, and return the data to
620      the calling function. Until then keep receiving messages */
621   while (!match_found) {
622     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
623     /* Now pack the received message into a structure which is useable by others */
624     if (i % 2) {
625       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
626       flg_v[2*recv_status.MPI_SOURCE] = i/2;
627       *nvals = *nvals/bs2;
628     } else {
629       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
630       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
631       *nvals = *nvals/2; /* This message has both row indices and col indices */
632     }
633 
634     /* Check if we have both the messages from this proc */
635     i1 = flg_v[2*recv_status.MPI_SOURCE];
636     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
637     if (i1 != -1 && i2 != -1) {
638       *rows       = stash->rindices[i2];
639       *cols       = *rows + *nvals;
640       *vals       = stash->rvalues[i1];
641       *flg        = 1;
642       stash->nprocessed ++;
643       match_found = PETSC_TRUE;
644     }
645   }
646   PetscFunctionReturn(0);
647 }
648