xref: /petsc/src/mat/utils/matstash.c (revision fe998a80077c9ee0917a39496df43fc256e1b478)
1 
2 #include <petsc-private/matimpl.h>
3 
4 #define DEFAULT_STASH_SIZE   10000
5 
6 /*
7   MatStashCreate_Private - Creates a stash,currently used for all the parallel
8   matrix implementations. The stash is where elements of a matrix destined
9   to be stored on other processors are kept until matrix assembly is done.
10 
11   This is a simple minded stash. Simply adds entries to end of stash.
12 
13   Input Parameters:
14   comm - communicator, required for scatters.
15   bs   - stash block size. used when stashing blocks of values
16 
17   Output Parameters:
18   stash    - the newly created stash
19 */
20 #undef __FUNCT__
21 #define __FUNCT__ "MatStashCreate_Private"
22 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
23 {
24   PetscErrorCode ierr;
25   PetscInt       max,*opt,nopt,i;
26   PetscBool      flg;
27 
28   PetscFunctionBegin;
29   /* Require 2 tags,get the second using PetscCommGetNewTag() */
30   stash->comm = comm;
31 
32   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
33   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
34   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
35   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
36   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
37   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
38 
39 
40   nopt = stash->size;
41   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
42   ierr = PetscOptionsGetIntArray(NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
43   if (flg) {
44     if (nopt == 1)                max = opt[0];
45     else if (nopt == stash->size) max = opt[stash->rank];
46     else if (stash->rank < nopt)  max = opt[stash->rank];
47     else                          max = 0; /* Use default */
48     stash->umax = max;
49   } else {
50     stash->umax = 0;
51   }
52   ierr = PetscFree(opt);CHKERRQ(ierr);
53   if (bs <= 0) bs = 1;
54 
55   stash->bs         = bs;
56   stash->nmax       = 0;
57   stash->oldnmax    = 0;
58   stash->n          = 0;
59   stash->reallocs   = -1;
60   stash->space_head = 0;
61   stash->space      = 0;
62 
63   stash->send_waits  = 0;
64   stash->recv_waits  = 0;
65   stash->send_status = 0;
66   stash->nsends      = 0;
67   stash->nrecvs      = 0;
68   stash->svalues     = 0;
69   stash->rvalues     = 0;
70   stash->rindices    = 0;
71   stash->nprocessed  = 0;
72   stash->reproduce   = PETSC_FALSE;
73 
74   ierr = PetscOptionsGetBool(NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79    MatStashDestroy_Private - Destroy the stash
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "MatStashDestroy_Private"
83 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
84 {
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
89 
90   stash->space = 0;
91 
92   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 
96 /*
97    MatStashScatterEnd_Private - This is called as the final stage of
98    scatter. The final stages of message passing is done here, and
99    all the memory used for message passing is cleaned up. This
100    routine also resets the stash, and deallocates the memory used
101    for the stash. It also keeps track of the current memory usage
102    so that the same value can be used the next time through.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "MatStashScatterEnd_Private"
106 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
107 {
108   PetscErrorCode ierr;
109   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
110   MPI_Status     *send_status;
111 
112   PetscFunctionBegin;
113   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
114   /* wait on sends */
115   if (nsends) {
116     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
117     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
118     ierr = PetscFree(send_status);CHKERRQ(ierr);
119   }
120 
121   /* Now update nmaxold to be app 10% more than max n used, this way the
122      wastage of space is reduced the next time this stash is used.
123      Also update the oldmax, only if it increases */
124   if (stash->n) {
125     bs2     = stash->bs*stash->bs;
126     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
127     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
128   }
129 
130   stash->nmax       = 0;
131   stash->n          = 0;
132   stash->reallocs   = -1;
133   stash->nprocessed = 0;
134 
135   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
136 
137   stash->space = 0;
138 
139   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
140   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
141   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
142   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
143   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
144   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
145   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 /*
150    MatStashGetInfo_Private - Gets the relavant statistics of the stash
151 
152    Input Parameters:
153    stash    - the stash
154    nstash   - the size of the stash. Indicates the number of values stored.
155    reallocs - the number of additional mallocs incurred.
156 
157 */
158 #undef __FUNCT__
159 #define __FUNCT__ "MatStashGetInfo_Private"
160 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
161 {
162   PetscInt bs2 = stash->bs*stash->bs;
163 
164   PetscFunctionBegin;
165   if (nstash) *nstash = stash->n*bs2;
166   if (reallocs) {
167     if (stash->reallocs < 0) *reallocs = 0;
168     else                     *reallocs = stash->reallocs;
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 /*
174    MatStashSetInitialSize_Private - Sets the initial size of the stash
175 
176    Input Parameters:
177    stash  - the stash
178    max    - the value that is used as the max size of the stash.
179             this value is used while allocating memory.
180 */
181 #undef __FUNCT__
182 #define __FUNCT__ "MatStashSetInitialSize_Private"
183 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
184 {
185   PetscFunctionBegin;
186   stash->umax = max;
187   PetscFunctionReturn(0);
188 }
189 
190 /* MatStashExpand_Private - Expand the stash. This function is called
191    when the space in the stash is not sufficient to add the new values
192    being inserted into the stash.
193 
194    Input Parameters:
195    stash - the stash
196    incr  - the minimum increase requested
197 
198    Notes:
199    This routine doubles the currently used memory.
200  */
201 #undef __FUNCT__
202 #define __FUNCT__ "MatStashExpand_Private"
203 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
204 {
205   PetscErrorCode ierr;
206   PetscInt       newnmax,bs2= stash->bs*stash->bs;
207 
208   PetscFunctionBegin;
209   /* allocate a larger stash */
210   if (!stash->oldnmax && !stash->nmax) { /* new stash */
211     if (stash->umax)                  newnmax = stash->umax/bs2;
212     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
213   } else if (!stash->nmax) { /* resuing stash */
214     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
215     else                              newnmax = stash->oldnmax/bs2;
216   } else                              newnmax = stash->nmax*2;
217   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
218 
219   /* Get a MatStashSpace and attach it to stash */
220   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
221   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
222     stash->space_head = stash->space;
223   }
224 
225   stash->reallocs++;
226   stash->nmax = newnmax;
227   PetscFunctionReturn(0);
228 }
229 /*
230   MatStashValuesRow_Private - inserts values into the stash. This function
231   expects the values to be roworiented. Multiple columns belong to the same row
232   can be inserted with a single call to this function.
233 
234   Input Parameters:
235   stash  - the stash
236   row    - the global row correspoiding to the values
237   n      - the number of elements inserted. All elements belong to the above row.
238   idxn   - the global column indices corresponding to each of the values.
239   values - the values inserted
240 */
241 #undef __FUNCT__
242 #define __FUNCT__ "MatStashValuesRow_Private"
243 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
244 {
245   PetscErrorCode     ierr;
246   PetscInt           i,k,cnt = 0;
247   PetscMatStashSpace space=stash->space;
248 
249   PetscFunctionBegin;
250   /* Check and see if we have sufficient memory */
251   if (!space || space->local_remaining < n) {
252     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
253   }
254   space = stash->space;
255   k     = space->local_used;
256   for (i=0; i<n; i++) {
257     if (ignorezeroentries && (values[i] == 0.0)) continue;
258     space->idx[k] = row;
259     space->idy[k] = idxn[i];
260     space->val[k] = values[i];
261     k++;
262     cnt++;
263   }
264   stash->n               += cnt;
265   space->local_used      += cnt;
266   space->local_remaining -= cnt;
267   PetscFunctionReturn(0);
268 }
269 
270 /*
271   MatStashValuesCol_Private - inserts values into the stash. This function
272   expects the values to be columnoriented. Multiple columns belong to the same row
273   can be inserted with a single call to this function.
274 
275   Input Parameters:
276   stash   - the stash
277   row     - the global row correspoiding to the values
278   n       - the number of elements inserted. All elements belong to the above row.
279   idxn    - the global column indices corresponding to each of the values.
280   values  - the values inserted
281   stepval - the consecutive values are sepated by a distance of stepval.
282             this happens because the input is columnoriented.
283 */
284 #undef __FUNCT__
285 #define __FUNCT__ "MatStashValuesCol_Private"
286 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
287 {
288   PetscErrorCode     ierr;
289   PetscInt           i,k,cnt = 0;
290   PetscMatStashSpace space=stash->space;
291 
292   PetscFunctionBegin;
293   /* Check and see if we have sufficient memory */
294   if (!space || space->local_remaining < n) {
295     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
296   }
297   space = stash->space;
298   k     = space->local_used;
299   for (i=0; i<n; i++) {
300     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
301     space->idx[k] = row;
302     space->idy[k] = idxn[i];
303     space->val[k] = values[i*stepval];
304     k++;
305     cnt++;
306   }
307   stash->n               += cnt;
308   space->local_used      += cnt;
309   space->local_remaining -= cnt;
310   PetscFunctionReturn(0);
311 }
312 
313 /*
314   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
315   This function expects the values to be roworiented. Multiple columns belong
316   to the same block-row can be inserted with a single call to this function.
317   This function extracts the sub-block of values based on the dimensions of
318   the original input block, and the row,col values corresponding to the blocks.
319 
320   Input Parameters:
321   stash  - the stash
322   row    - the global block-row correspoiding to the values
323   n      - the number of elements inserted. All elements belong to the above row.
324   idxn   - the global block-column indices corresponding to each of the blocks of
325            values. Each block is of size bs*bs.
326   values - the values inserted
327   rmax   - the number of block-rows in the original block.
328   cmax   - the number of block-columsn on the original block.
329   idx    - the index of the current block-row in the original block.
330 */
331 #undef __FUNCT__
332 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
333 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
334 {
335   PetscErrorCode     ierr;
336   PetscInt           i,j,k,bs2,bs=stash->bs,l;
337   const PetscScalar  *vals;
338   PetscScalar        *array;
339   PetscMatStashSpace space=stash->space;
340 
341   PetscFunctionBegin;
342   if (!space || space->local_remaining < n) {
343     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
344   }
345   space = stash->space;
346   l     = space->local_used;
347   bs2   = bs*bs;
348   for (i=0; i<n; i++) {
349     space->idx[l] = row;
350     space->idy[l] = idxn[i];
351     /* Now copy over the block of values. Store the values column oriented.
352        This enables inserting multiple blocks belonging to a row with a single
353        funtion call */
354     array = space->val + bs2*l;
355     vals  = values + idx*bs2*n + bs*i;
356     for (j=0; j<bs; j++) {
357       for (k=0; k<bs; k++) array[k*bs] = vals[k];
358       array++;
359       vals += cmax*bs;
360     }
361     l++;
362   }
363   stash->n               += n;
364   space->local_used      += n;
365   space->local_remaining -= n;
366   PetscFunctionReturn(0);
367 }
368 
369 /*
370   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
371   This function expects the values to be roworiented. Multiple columns belong
372   to the same block-row can be inserted with a single call to this function.
373   This function extracts the sub-block of values based on the dimensions of
374   the original input block, and the row,col values corresponding to the blocks.
375 
376   Input Parameters:
377   stash  - the stash
378   row    - the global block-row correspoiding to the values
379   n      - the number of elements inserted. All elements belong to the above row.
380   idxn   - the global block-column indices corresponding to each of the blocks of
381            values. Each block is of size bs*bs.
382   values - the values inserted
383   rmax   - the number of block-rows in the original block.
384   cmax   - the number of block-columsn on the original block.
385   idx    - the index of the current block-row in the original block.
386 */
387 #undef __FUNCT__
388 #define __FUNCT__ "MatStashValuesColBlocked_Private"
389 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
390 {
391   PetscErrorCode     ierr;
392   PetscInt           i,j,k,bs2,bs=stash->bs,l;
393   const PetscScalar  *vals;
394   PetscScalar        *array;
395   PetscMatStashSpace space=stash->space;
396 
397   PetscFunctionBegin;
398   if (!space || space->local_remaining < n) {
399     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
400   }
401   space = stash->space;
402   l     = space->local_used;
403   bs2   = bs*bs;
404   for (i=0; i<n; i++) {
405     space->idx[l] = row;
406     space->idy[l] = idxn[i];
407     /* Now copy over the block of values. Store the values column oriented.
408      This enables inserting multiple blocks belonging to a row with a single
409      funtion call */
410     array = space->val + bs2*l;
411     vals  = values + idx*bs2*n + bs*i;
412     for (j=0; j<bs; j++) {
413       for (k=0; k<bs; k++) array[k] = vals[k];
414       array += bs;
415       vals  += rmax*bs;
416     }
417     l++;
418   }
419   stash->n               += n;
420   space->local_used      += n;
421   space->local_remaining -= n;
422   PetscFunctionReturn(0);
423 }
424 /*
425   MatStashScatterBegin_Private - Initiates the transfer of values to the
426   correct owners. This function goes through the stash, and check the
427   owners of each stashed value, and sends the values off to the owner
428   processors.
429 
430   Input Parameters:
431   stash  - the stash
432   owners - an array of size 'no-of-procs' which gives the ownership range
433            for each node.
434 
435   Notes: The 'owners' array in the cased of the blocked-stash has the
436   ranges specified blocked global indices, and for the regular stash in
437   the proper global indices.
438 */
439 #undef __FUNCT__
440 #define __FUNCT__ "MatStashScatterBegin_Private"
441 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
442 {
443   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
444   PetscInt           size=stash->size,nsends;
445   PetscErrorCode     ierr;
446   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
447   PetscScalar        **rvalues,*svalues;
448   MPI_Comm           comm = stash->comm;
449   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
450   PetscMPIInt        *sizes,*nlengths,nreceives;
451   PetscInt           *sp_idx,*sp_idy;
452   PetscScalar        *sp_val;
453   PetscMatStashSpace space,space_next;
454 
455   PetscFunctionBegin;
456   bs2 = stash->bs*stash->bs;
457 
458   /*  first count number of contributors to each processor */
459   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
460   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
461   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
462 
463   i       = j    = 0;
464   lastidx = -1;
465   space   = stash->space_head;
466   while (space != NULL) {
467     space_next = space->next;
468     sp_idx     = space->idx;
469     for (l=0; l<space->local_used; l++) {
470       /* if indices are NOT locally sorted, need to start search at the beginning */
471       if (lastidx > (idx = sp_idx[l])) j = 0;
472       lastidx = idx;
473       for (; j<size; j++) {
474         if (idx >= owners[j] && idx < owners[j+1]) {
475           nlengths[j]++; owner[i] = j; break;
476         }
477       }
478       i++;
479     }
480     space = space_next;
481   }
482   /* Now check what procs get messages - and compute nsends. */
483   for (i=0, nsends=0; i<size; i++) {
484     if (nlengths[i]) {
485       sizes[i] = 1; nsends++;
486     }
487   }
488 
489   {PetscMPIInt *onodes,*olengths;
490    /* Determine the number of messages to expect, their lengths, from from-ids */
491    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
492    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
493    /* since clubbing row,col - lengths are multiplied by 2 */
494    for (i=0; i<nreceives; i++) olengths[i] *=2;
495    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
496    /* values are size 'bs2' lengths (and remove earlier factor 2 */
497    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
498    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
499    ierr = PetscFree(onodes);CHKERRQ(ierr);
500    ierr = PetscFree(olengths);CHKERRQ(ierr);}
501 
502   /* do sends:
503       1) starts[i] gives the starting index in svalues for stuff going to
504          the ith processor
505   */
506   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
507   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
508   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
509   /* use 2 sends the first with all_a, the next with all_i and all_j */
510   startv[0] = 0; starti[0] = 0;
511   for (i=1; i<size; i++) {
512     startv[i] = startv[i-1] + nlengths[i-1];
513     starti[i] = starti[i-1] + 2*nlengths[i-1];
514   }
515 
516   i     = 0;
517   space = stash->space_head;
518   while (space != NULL) {
519     space_next = space->next;
520     sp_idx     = space->idx;
521     sp_idy     = space->idy;
522     sp_val     = space->val;
523     for (l=0; l<space->local_used; l++) {
524       j = owner[i];
525       if (bs2 == 1) {
526         svalues[startv[j]] = sp_val[l];
527       } else {
528         PetscInt    k;
529         PetscScalar *buf1,*buf2;
530         buf1 = svalues+bs2*startv[j];
531         buf2 = space->val + bs2*l;
532         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
533       }
534       sindices[starti[j]]             = sp_idx[l];
535       sindices[starti[j]+nlengths[j]] = sp_idy[l];
536       startv[j]++;
537       starti[j]++;
538       i++;
539     }
540     space = space_next;
541   }
542   startv[0] = 0;
543   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
544 
545   for (i=0,count=0; i<size; i++) {
546     if (sizes[i]) {
547       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
548       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
549     }
550   }
551 #if defined(PETSC_USE_INFO)
552   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
553   for (i=0; i<size; i++) {
554     if (sizes[i]) {
555       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
556     }
557   }
558 #endif
559   ierr = PetscFree(nlengths);CHKERRQ(ierr);
560   ierr = PetscFree(owner);CHKERRQ(ierr);
561   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
562   ierr = PetscFree(sizes);CHKERRQ(ierr);
563 
564   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
565   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
566 
567   for (i=0; i<nreceives; i++) {
568     recv_waits[2*i]   = recv_waits1[i];
569     recv_waits[2*i+1] = recv_waits2[i];
570   }
571   stash->recv_waits = recv_waits;
572 
573   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
574   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
575 
576   stash->svalues         = svalues;
577   stash->sindices        = sindices;
578   stash->rvalues         = rvalues;
579   stash->rindices        = rindices;
580   stash->send_waits      = send_waits;
581   stash->nsends          = nsends;
582   stash->nrecvs          = nreceives;
583   stash->reproduce_count = 0;
584   PetscFunctionReturn(0);
585 }
586 
587 /*
588    MatStashScatterGetMesg_Private - This function waits on the receives posted
589    in the function MatStashScatterBegin_Private() and returns one message at
590    a time to the calling function. If no messages are left, it indicates this
591    by setting flg = 0, else it sets flg = 1.
592 
593    Input Parameters:
594    stash - the stash
595 
596    Output Parameters:
597    nvals - the number of entries in the current message.
598    rows  - an array of row indices (or blocked indices) corresponding to the values
599    cols  - an array of columnindices (or blocked indices) corresponding to the values
600    vals  - the values
601    flg   - 0 indicates no more message left, and the current call has no values associated.
602            1 indicates that the current call successfully received a message, and the
603              other output parameters nvals,rows,cols,vals are set appropriately.
604 */
605 #undef __FUNCT__
606 #define __FUNCT__ "MatStashScatterGetMesg_Private"
607 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
608 {
609   PetscErrorCode ierr;
610   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
611   PetscInt       bs2;
612   MPI_Status     recv_status;
613   PetscBool      match_found = PETSC_FALSE;
614 
615   PetscFunctionBegin;
616   *flg = 0; /* When a message is discovered this is reset to 1 */
617   /* Return if no more messages to process */
618   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
619 
620   bs2 = stash->bs*stash->bs;
621   /* If a matching pair of receives are found, process them, and return the data to
622      the calling function. Until then keep receiving messages */
623   while (!match_found) {
624     if (stash->reproduce) {
625       i    = stash->reproduce_count++;
626       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
627     } else {
628       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
629     }
630     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
631 
632     /* Now pack the received message into a structure which is usable by others */
633     if (i % 2) {
634       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
635 
636       flg_v[2*recv_status.MPI_SOURCE] = i/2;
637 
638       *nvals = *nvals/bs2;
639     } else {
640       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
641 
642       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
643 
644       *nvals = *nvals/2; /* This message has both row indices and col indices */
645     }
646 
647     /* Check if we have both messages from this proc */
648     i1 = flg_v[2*recv_status.MPI_SOURCE];
649     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
650     if (i1 != -1 && i2 != -1) {
651       *rows = stash->rindices[i2];
652       *cols = *rows + *nvals;
653       *vals = stash->rvalues[i1];
654       *flg  = 1;
655       stash->nprocessed++;
656       match_found = PETSC_TRUE;
657     }
658   }
659   PetscFunctionReturn(0);
660 }
661