xref: /petsc/src/mat/utils/matstash.c (revision 487a658c8b32ba712a1dc8280daad2fd70c1dcd9)
1 
2 #include <petsc/private/matimpl.h>
3 
4 #define DEFAULT_STASH_SIZE   10000
5 
6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8 static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9 #if !defined(PETSC_HAVE_MPIUNI)
10 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
11 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
12 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
13 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*);
14 #endif
15 
16 /*
17   MatStashCreate_Private - Creates a stash,currently used for all the parallel
18   matrix implementations. The stash is where elements of a matrix destined
19   to be stored on other processors are kept until matrix assembly is done.
20 
21   This is a simple minded stash. Simply adds entries to end of stash.
22 
23   Input Parameters:
24   comm - communicator, required for scatters.
25   bs   - stash block size. used when stashing blocks of values
26 
27   Output Parameters:
28   stash    - the newly created stash
29 */
30 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
31 {
32   PetscErrorCode ierr;
33   PetscInt       max,*opt,nopt,i;
34   PetscBool      flg;
35 
36   PetscFunctionBegin;
37   /* Require 2 tags,get the second using PetscCommGetNewTag() */
38   stash->comm = comm;
39 
40   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
41   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
42   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
43   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
44   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
45   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
46 
47 
48   nopt = stash->size;
49   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
50   ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
51   if (flg) {
52     if (nopt == 1)                max = opt[0];
53     else if (nopt == stash->size) max = opt[stash->rank];
54     else if (stash->rank < nopt)  max = opt[stash->rank];
55     else                          max = 0; /* Use default */
56     stash->umax = max;
57   } else {
58     stash->umax = 0;
59   }
60   ierr = PetscFree(opt);CHKERRQ(ierr);
61   if (bs <= 0) bs = 1;
62 
63   stash->bs         = bs;
64   stash->nmax       = 0;
65   stash->oldnmax    = 0;
66   stash->n          = 0;
67   stash->reallocs   = -1;
68   stash->space_head = 0;
69   stash->space      = 0;
70 
71   stash->send_waits  = 0;
72   stash->recv_waits  = 0;
73   stash->send_status = 0;
74   stash->nsends      = 0;
75   stash->nrecvs      = 0;
76   stash->svalues     = 0;
77   stash->rvalues     = 0;
78   stash->rindices    = 0;
79   stash->nprocessed  = 0;
80   stash->reproduce   = PETSC_FALSE;
81   stash->blocktype   = MPI_DATATYPE_NULL;
82 
83   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
84 #if !defined(PETSC_HAVE_MPIUNI)
85   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr);
86   if (!flg) {
87     stash->ScatterBegin   = MatStashScatterBegin_BTS;
88     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
89     stash->ScatterEnd     = MatStashScatterEnd_BTS;
90     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
91   } else {
92 #endif
93     stash->ScatterBegin   = MatStashScatterBegin_Ref;
94     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
95     stash->ScatterEnd     = MatStashScatterEnd_Ref;
96     stash->ScatterDestroy = NULL;
97 #if !defined(PETSC_HAVE_MPIUNI)
98   }
99 #endif
100   PetscFunctionReturn(0);
101 }
102 
103 /*
104    MatStashDestroy_Private - Destroy the stash
105 */
106 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
107 {
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
112   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
113 
114   stash->space = 0;
115 
116   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 /*
121    MatStashScatterEnd_Private - This is called as the final stage of
122    scatter. The final stages of message passing is done here, and
123    all the memory used for message passing is cleaned up. This
124    routine also resets the stash, and deallocates the memory used
125    for the stash. It also keeps track of the current memory usage
126    so that the same value can be used the next time through.
127 */
128 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
129 {
130   PetscErrorCode ierr;
131 
132   PetscFunctionBegin;
133   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
138 {
139   PetscErrorCode ierr;
140   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
141   MPI_Status     *send_status;
142 
143   PetscFunctionBegin;
144   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
145   /* wait on sends */
146   if (nsends) {
147     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
148     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
149     ierr = PetscFree(send_status);CHKERRQ(ierr);
150   }
151 
152   /* Now update nmaxold to be app 10% more than max n used, this way the
153      wastage of space is reduced the next time this stash is used.
154      Also update the oldmax, only if it increases */
155   if (stash->n) {
156     bs2     = stash->bs*stash->bs;
157     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
158     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
159   }
160 
161   stash->nmax       = 0;
162   stash->n          = 0;
163   stash->reallocs   = -1;
164   stash->nprocessed = 0;
165 
166   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
167 
168   stash->space = 0;
169 
170   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
171   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
172   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
173   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
174   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
175   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
176   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 /*
181    MatStashGetInfo_Private - Gets the relavant statistics of the stash
182 
183    Input Parameters:
184    stash    - the stash
185    nstash   - the size of the stash. Indicates the number of values stored.
186    reallocs - the number of additional mallocs incurred.
187 
188 */
189 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
190 {
191   PetscInt bs2 = stash->bs*stash->bs;
192 
193   PetscFunctionBegin;
194   if (nstash) *nstash = stash->n*bs2;
195   if (reallocs) {
196     if (stash->reallocs < 0) *reallocs = 0;
197     else                     *reallocs = stash->reallocs;
198   }
199   PetscFunctionReturn(0);
200 }
201 
202 /*
203    MatStashSetInitialSize_Private - Sets the initial size of the stash
204 
205    Input Parameters:
206    stash  - the stash
207    max    - the value that is used as the max size of the stash.
208             this value is used while allocating memory.
209 */
210 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
211 {
212   PetscFunctionBegin;
213   stash->umax = max;
214   PetscFunctionReturn(0);
215 }
216 
217 /* MatStashExpand_Private - Expand the stash. This function is called
218    when the space in the stash is not sufficient to add the new values
219    being inserted into the stash.
220 
221    Input Parameters:
222    stash - the stash
223    incr  - the minimum increase requested
224 
225    Notes:
226    This routine doubles the currently used memory.
227  */
228 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
229 {
230   PetscErrorCode ierr;
231   PetscInt       newnmax,bs2= stash->bs*stash->bs;
232 
233   PetscFunctionBegin;
234   /* allocate a larger stash */
235   if (!stash->oldnmax && !stash->nmax) { /* new stash */
236     if (stash->umax)                  newnmax = stash->umax/bs2;
237     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
238   } else if (!stash->nmax) { /* resuing stash */
239     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
240     else                              newnmax = stash->oldnmax/bs2;
241   } else                              newnmax = stash->nmax*2;
242   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
243 
244   /* Get a MatStashSpace and attach it to stash */
245   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
246   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
247     stash->space_head = stash->space;
248   }
249 
250   stash->reallocs++;
251   stash->nmax = newnmax;
252   PetscFunctionReturn(0);
253 }
254 /*
255   MatStashValuesRow_Private - inserts values into the stash. This function
256   expects the values to be roworiented. Multiple columns belong to the same row
257   can be inserted with a single call to this function.
258 
259   Input Parameters:
260   stash  - the stash
261   row    - the global row correspoiding to the values
262   n      - the number of elements inserted. All elements belong to the above row.
263   idxn   - the global column indices corresponding to each of the values.
264   values - the values inserted
265 */
266 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
267 {
268   PetscErrorCode     ierr;
269   PetscInt           i,k,cnt = 0;
270   PetscMatStashSpace space=stash->space;
271 
272   PetscFunctionBegin;
273   /* Check and see if we have sufficient memory */
274   if (!space || space->local_remaining < n) {
275     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
276   }
277   space = stash->space;
278   k     = space->local_used;
279   for (i=0; i<n; i++) {
280     if (ignorezeroentries && (values[i] == 0.0)) continue;
281     space->idx[k] = row;
282     space->idy[k] = idxn[i];
283     space->val[k] = values[i];
284     k++;
285     cnt++;
286   }
287   stash->n               += cnt;
288   space->local_used      += cnt;
289   space->local_remaining -= cnt;
290   PetscFunctionReturn(0);
291 }
292 
293 /*
294   MatStashValuesCol_Private - inserts values into the stash. This function
295   expects the values to be columnoriented. Multiple columns belong to the same row
296   can be inserted with a single call to this function.
297 
298   Input Parameters:
299   stash   - the stash
300   row     - the global row correspoiding to the values
301   n       - the number of elements inserted. All elements belong to the above row.
302   idxn    - the global column indices corresponding to each of the values.
303   values  - the values inserted
304   stepval - the consecutive values are sepated by a distance of stepval.
305             this happens because the input is columnoriented.
306 */
307 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
308 {
309   PetscErrorCode     ierr;
310   PetscInt           i,k,cnt = 0;
311   PetscMatStashSpace space=stash->space;
312 
313   PetscFunctionBegin;
314   /* Check and see if we have sufficient memory */
315   if (!space || space->local_remaining < n) {
316     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
317   }
318   space = stash->space;
319   k     = space->local_used;
320   for (i=0; i<n; i++) {
321     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
322     space->idx[k] = row;
323     space->idy[k] = idxn[i];
324     space->val[k] = values[i*stepval];
325     k++;
326     cnt++;
327   }
328   stash->n               += cnt;
329   space->local_used      += cnt;
330   space->local_remaining -= cnt;
331   PetscFunctionReturn(0);
332 }
333 
334 /*
335   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
336   This function expects the values to be roworiented. Multiple columns belong
337   to the same block-row can be inserted with a single call to this function.
338   This function extracts the sub-block of values based on the dimensions of
339   the original input block, and the row,col values corresponding to the blocks.
340 
341   Input Parameters:
342   stash  - the stash
343   row    - the global block-row correspoiding to the values
344   n      - the number of elements inserted. All elements belong to the above row.
345   idxn   - the global block-column indices corresponding to each of the blocks of
346            values. Each block is of size bs*bs.
347   values - the values inserted
348   rmax   - the number of block-rows in the original block.
349   cmax   - the number of block-columsn on the original block.
350   idx    - the index of the current block-row in the original block.
351 */
352 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
353 {
354   PetscErrorCode     ierr;
355   PetscInt           i,j,k,bs2,bs=stash->bs,l;
356   const PetscScalar  *vals;
357   PetscScalar        *array;
358   PetscMatStashSpace space=stash->space;
359 
360   PetscFunctionBegin;
361   if (!space || space->local_remaining < n) {
362     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
363   }
364   space = stash->space;
365   l     = space->local_used;
366   bs2   = bs*bs;
367   for (i=0; i<n; i++) {
368     space->idx[l] = row;
369     space->idy[l] = idxn[i];
370     /* Now copy over the block of values. Store the values column oriented.
371        This enables inserting multiple blocks belonging to a row with a single
372        funtion call */
373     array = space->val + bs2*l;
374     vals  = values + idx*bs2*n + bs*i;
375     for (j=0; j<bs; j++) {
376       for (k=0; k<bs; k++) array[k*bs] = vals[k];
377       array++;
378       vals += cmax*bs;
379     }
380     l++;
381   }
382   stash->n               += n;
383   space->local_used      += n;
384   space->local_remaining -= n;
385   PetscFunctionReturn(0);
386 }
387 
388 /*
389   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
390   This function expects the values to be roworiented. Multiple columns belong
391   to the same block-row can be inserted with a single call to this function.
392   This function extracts the sub-block of values based on the dimensions of
393   the original input block, and the row,col values corresponding to the blocks.
394 
395   Input Parameters:
396   stash  - the stash
397   row    - the global block-row correspoiding to the values
398   n      - the number of elements inserted. All elements belong to the above row.
399   idxn   - the global block-column indices corresponding to each of the blocks of
400            values. Each block is of size bs*bs.
401   values - the values inserted
402   rmax   - the number of block-rows in the original block.
403   cmax   - the number of block-columsn on the original block.
404   idx    - the index of the current block-row in the original block.
405 */
406 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
407 {
408   PetscErrorCode     ierr;
409   PetscInt           i,j,k,bs2,bs=stash->bs,l;
410   const PetscScalar  *vals;
411   PetscScalar        *array;
412   PetscMatStashSpace space=stash->space;
413 
414   PetscFunctionBegin;
415   if (!space || space->local_remaining < n) {
416     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
417   }
418   space = stash->space;
419   l     = space->local_used;
420   bs2   = bs*bs;
421   for (i=0; i<n; i++) {
422     space->idx[l] = row;
423     space->idy[l] = idxn[i];
424     /* Now copy over the block of values. Store the values column oriented.
425      This enables inserting multiple blocks belonging to a row with a single
426      funtion call */
427     array = space->val + bs2*l;
428     vals  = values + idx*bs2*n + bs*i;
429     for (j=0; j<bs; j++) {
430       for (k=0; k<bs; k++) array[k] = vals[k];
431       array += bs;
432       vals  += rmax*bs;
433     }
434     l++;
435   }
436   stash->n               += n;
437   space->local_used      += n;
438   space->local_remaining -= n;
439   PetscFunctionReturn(0);
440 }
441 /*
442   MatStashScatterBegin_Private - Initiates the transfer of values to the
443   correct owners. This function goes through the stash, and check the
444   owners of each stashed value, and sends the values off to the owner
445   processors.
446 
447   Input Parameters:
448   stash  - the stash
449   owners - an array of size 'no-of-procs' which gives the ownership range
450            for each node.
451 
452   Notes:
453     The 'owners' array in the cased of the blocked-stash has the
454   ranges specified blocked global indices, and for the regular stash in
455   the proper global indices.
456 */
457 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
458 {
459   PetscErrorCode ierr;
460 
461   PetscFunctionBegin;
462   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465 
466 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
467 {
468   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
469   PetscInt           size=stash->size,nsends;
470   PetscErrorCode     ierr;
471   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
472   PetscScalar        **rvalues,*svalues;
473   MPI_Comm           comm = stash->comm;
474   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
475   PetscMPIInt        *sizes,*nlengths,nreceives;
476   PetscInt           *sp_idx,*sp_idy;
477   PetscScalar        *sp_val;
478   PetscMatStashSpace space,space_next;
479 
480   PetscFunctionBegin;
481   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
482     InsertMode addv;
483     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
484     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
485     mat->insertmode = addv; /* in case this processor had no cache */
486   }
487 
488   bs2 = stash->bs*stash->bs;
489 
490   /*  first count number of contributors to each processor */
491   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
492   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
493   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
494 
495   i       = j    = 0;
496   lastidx = -1;
497   space   = stash->space_head;
498   while (space) {
499     space_next = space->next;
500     sp_idx     = space->idx;
501     for (l=0; l<space->local_used; l++) {
502       /* if indices are NOT locally sorted, need to start search at the beginning */
503       if (lastidx > (idx = sp_idx[l])) j = 0;
504       lastidx = idx;
505       for (; j<size; j++) {
506         if (idx >= owners[j] && idx < owners[j+1]) {
507           nlengths[j]++; owner[i] = j; break;
508         }
509       }
510       i++;
511     }
512     space = space_next;
513   }
514   /* Now check what procs get messages - and compute nsends. */
515   for (i=0, nsends=0; i<size; i++) {
516     if (nlengths[i]) {
517       sizes[i] = 1; nsends++;
518     }
519   }
520 
521   {PetscMPIInt *onodes,*olengths;
522    /* Determine the number of messages to expect, their lengths, from from-ids */
523    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
524    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
525    /* since clubbing row,col - lengths are multiplied by 2 */
526    for (i=0; i<nreceives; i++) olengths[i] *=2;
527    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
528    /* values are size 'bs2' lengths (and remove earlier factor 2 */
529    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
530    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
531    ierr = PetscFree(onodes);CHKERRQ(ierr);
532    ierr = PetscFree(olengths);CHKERRQ(ierr);}
533 
534   /* do sends:
535       1) starts[i] gives the starting index in svalues for stuff going to
536          the ith processor
537   */
538   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
539   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
540   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
541   /* use 2 sends the first with all_a, the next with all_i and all_j */
542   startv[0] = 0; starti[0] = 0;
543   for (i=1; i<size; i++) {
544     startv[i] = startv[i-1] + nlengths[i-1];
545     starti[i] = starti[i-1] + 2*nlengths[i-1];
546   }
547 
548   i     = 0;
549   space = stash->space_head;
550   while (space) {
551     space_next = space->next;
552     sp_idx     = space->idx;
553     sp_idy     = space->idy;
554     sp_val     = space->val;
555     for (l=0; l<space->local_used; l++) {
556       j = owner[i];
557       if (bs2 == 1) {
558         svalues[startv[j]] = sp_val[l];
559       } else {
560         PetscInt    k;
561         PetscScalar *buf1,*buf2;
562         buf1 = svalues+bs2*startv[j];
563         buf2 = space->val + bs2*l;
564         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
565       }
566       sindices[starti[j]]             = sp_idx[l];
567       sindices[starti[j]+nlengths[j]] = sp_idy[l];
568       startv[j]++;
569       starti[j]++;
570       i++;
571     }
572     space = space_next;
573   }
574   startv[0] = 0;
575   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
576 
577   for (i=0,count=0; i<size; i++) {
578     if (sizes[i]) {
579       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
580       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
581     }
582   }
583 #if defined(PETSC_USE_INFO)
584   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
585   for (i=0; i<size; i++) {
586     if (sizes[i]) {
587       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
588     }
589   }
590 #endif
591   ierr = PetscFree(nlengths);CHKERRQ(ierr);
592   ierr = PetscFree(owner);CHKERRQ(ierr);
593   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
594   ierr = PetscFree(sizes);CHKERRQ(ierr);
595 
596   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
597   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
598 
599   for (i=0; i<nreceives; i++) {
600     recv_waits[2*i]   = recv_waits1[i];
601     recv_waits[2*i+1] = recv_waits2[i];
602   }
603   stash->recv_waits = recv_waits;
604 
605   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
606   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
607 
608   stash->svalues         = svalues;
609   stash->sindices        = sindices;
610   stash->rvalues         = rvalues;
611   stash->rindices        = rindices;
612   stash->send_waits      = send_waits;
613   stash->nsends          = nsends;
614   stash->nrecvs          = nreceives;
615   stash->reproduce_count = 0;
616   PetscFunctionReturn(0);
617 }
618 
619 /*
620    MatStashScatterGetMesg_Private - This function waits on the receives posted
621    in the function MatStashScatterBegin_Private() and returns one message at
622    a time to the calling function. If no messages are left, it indicates this
623    by setting flg = 0, else it sets flg = 1.
624 
625    Input Parameters:
626    stash - the stash
627 
628    Output Parameters:
629    nvals - the number of entries in the current message.
630    rows  - an array of row indices (or blocked indices) corresponding to the values
631    cols  - an array of columnindices (or blocked indices) corresponding to the values
632    vals  - the values
633    flg   - 0 indicates no more message left, and the current call has no values associated.
634            1 indicates that the current call successfully received a message, and the
635              other output parameters nvals,rows,cols,vals are set appropriately.
636 */
637 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
638 {
639   PetscErrorCode ierr;
640 
641   PetscFunctionBegin;
642   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
643   PetscFunctionReturn(0);
644 }
645 
646 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
647 {
648   PetscErrorCode ierr;
649   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
650   PetscInt       bs2;
651   MPI_Status     recv_status;
652   PetscBool      match_found = PETSC_FALSE;
653 
654   PetscFunctionBegin;
655   *flg = 0; /* When a message is discovered this is reset to 1 */
656   /* Return if no more messages to process */
657   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
658 
659   bs2 = stash->bs*stash->bs;
660   /* If a matching pair of receives are found, process them, and return the data to
661      the calling function. Until then keep receiving messages */
662   while (!match_found) {
663     if (stash->reproduce) {
664       i    = stash->reproduce_count++;
665       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
666     } else {
667       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
668     }
669     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
670 
671     /* Now pack the received message into a structure which is usable by others */
672     if (i % 2) {
673       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
674 
675       flg_v[2*recv_status.MPI_SOURCE] = i/2;
676 
677       *nvals = *nvals/bs2;
678     } else {
679       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
680 
681       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
682 
683       *nvals = *nvals/2; /* This message has both row indices and col indices */
684     }
685 
686     /* Check if we have both messages from this proc */
687     i1 = flg_v[2*recv_status.MPI_SOURCE];
688     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
689     if (i1 != -1 && i2 != -1) {
690       *rows = stash->rindices[i2];
691       *cols = *rows + *nvals;
692       *vals = stash->rvalues[i1];
693       *flg  = 1;
694       stash->nprocessed++;
695       match_found = PETSC_TRUE;
696     }
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 #if !defined(PETSC_HAVE_MPIUNI)
702 typedef struct {
703   PetscInt row;
704   PetscInt col;
705   PetscScalar vals[1];          /* Actually an array of length bs2 */
706 } MatStashBlock;
707 
708 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
709 {
710   PetscErrorCode ierr;
711   PetscMatStashSpace space;
712   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
713   PetscScalar **valptr;
714 
715   PetscFunctionBegin;
716   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
717   for (space=stash->space_head,cnt=0; space; space=space->next) {
718     for (i=0; i<space->local_used; i++) {
719       row[cnt] = space->idx[i];
720       col[cnt] = space->idy[i];
721       valptr[cnt] = &space->val[i*bs2];
722       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
723       cnt++;
724     }
725   }
726   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
727   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
728   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
729   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
730     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
731       PetscInt colstart;
732       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
733       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
734         PetscInt j,l;
735         MatStashBlock *block;
736         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
737         block->row = row[rowstart];
738         block->col = col[colstart];
739         ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
740         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
741           if (insertmode == ADD_VALUES) {
742             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
743           } else {
744             ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
745           }
746         }
747         colstart = j;
748       }
749       rowstart = i;
750     }
751   }
752   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
753   PetscFunctionReturn(0);
754 }
755 
756 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
757 {
758   PetscErrorCode ierr;
759 
760   PetscFunctionBegin;
761   if (stash->blocktype == MPI_DATATYPE_NULL) {
762     PetscInt     bs2 = PetscSqr(stash->bs);
763     PetscMPIInt  blocklens[2];
764     MPI_Aint     displs[2];
765     MPI_Datatype types[2],stype;
766     /* C++ std::complex is not my favorite datatype.  Since it is not POD, we cannot use offsetof to find the offset of
767      * vals.  But the layout is actually guaranteed by the standard, so we do a little dance here with struct
768      * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset.
769      */
770     struct DummyBlock {PetscInt row,col; PetscReal vals;};
771 
772     stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
773     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
774       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
775     }
776     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
777     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
778     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
779     blocklens[0] = 2;
780     blocklens[1] = bs2;
781     displs[0] = offsetof(struct DummyBlock,row);
782     displs[1] = offsetof(struct DummyBlock,vals);
783     types[0] = MPIU_INT;
784     types[1] = MPIU_SCALAR;
785     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr);
786     ierr = MPI_Type_commit(&stype);CHKERRQ(ierr);
787     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */
788     ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr);
789     ierr = MPI_Type_free(&stype);CHKERRQ(ierr);
790   }
791   PetscFunctionReturn(0);
792 }
793 
794 /* Callback invoked after target rank has initiatied receive of rendezvous message.
795  * Here we post the main sends.
796  */
797 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
798 {
799   MatStash *stash = (MatStash*)ctx;
800   MatStashHeader *hdr = (MatStashHeader*)sdata;
801   PetscErrorCode ierr;
802 
803   PetscFunctionBegin;
804   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
805   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
806   stash->sendframes[rankid].count = hdr->count;
807   stash->sendframes[rankid].pending = 1;
808   PetscFunctionReturn(0);
809 }
810 
811 /* Callback invoked by target after receiving rendezvous message.
812  * Here we post the main recvs.
813  */
814 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
815 {
816   MatStash *stash = (MatStash*)ctx;
817   MatStashHeader *hdr = (MatStashHeader*)rdata;
818   MatStashFrame *frame;
819   PetscErrorCode ierr;
820 
821   PetscFunctionBegin;
822   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
823   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
824   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
825   frame->count = hdr->count;
826   frame->pending = 1;
827   PetscFunctionReturn(0);
828 }
829 
830 /*
831  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
832  */
833 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
834 {
835   PetscErrorCode ierr;
836   size_t nblocks;
837   char *sendblocks;
838 
839   PetscFunctionBegin;
840 #if defined(PETSC_USE_DEBUG)
841   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
842     InsertMode addv;
843     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
844     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
845   }
846 #endif
847 
848   if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */
849     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
850   }
851 
852   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
853   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
854   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
855   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
856   if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */
857     PetscInt i;
858     size_t b;
859     for (i=0,b=0; i<stash->nsendranks; i++) {
860       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
861       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
862       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
863       for ( ; b<nblocks; b++) {
864         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
865         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
866         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
867         stash->sendhdr[i].count++;
868       }
869     }
870   } else {                      /* Dynamically count and pack (first time) */
871     PetscInt sendno;
872     size_t i,rowstart;
873 
874     /* Count number of send ranks and allocate for sends */
875     stash->nsendranks = 0;
876     for (rowstart=0; rowstart<nblocks; ) {
877       PetscInt owner;
878       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
879       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
880       if (owner < 0) owner = -(owner+2);
881       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
882         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
883         if (sendblock_i->row >= owners[owner+1]) break;
884       }
885       stash->nsendranks++;
886       rowstart = i;
887     }
888     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
889 
890     /* Set up sendhdrs and sendframes */
891     sendno = 0;
892     for (rowstart=0; rowstart<nblocks; ) {
893       PetscInt owner;
894       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
895       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
896       if (owner < 0) owner = -(owner+2);
897       stash->sendranks[sendno] = owner;
898       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
899         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
900         if (sendblock_i->row >= owners[owner+1]) break;
901       }
902       stash->sendframes[sendno].buffer = sendblock_rowstart;
903       stash->sendframes[sendno].pending = 0;
904       stash->sendhdr[sendno].count = i - rowstart;
905       sendno++;
906       rowstart = i;
907     }
908     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
909   }
910 
911   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
912    * message or a dummy entry of some sort. */
913   if (mat->insertmode == INSERT_VALUES) {
914     size_t i;
915     for (i=0; i<nblocks; i++) {
916       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
917       sendblock_i->row = -(sendblock_i->row+1);
918     }
919   }
920 
921   if (stash->subset_off_proc && mat->subsetoffprocentries) {
922     PetscMPIInt i,tag;
923     ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr);
924     for (i=0; i<stash->nrecvranks; i++) {
925       ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr);
926     }
927     for (i=0; i<stash->nsendranks; i++) {
928       ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr);
929     }
930     stash->use_status = PETSC_TRUE; /* Use count from message status. */
931   } else {
932     ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
933                                       &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
934                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
935     ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
936     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
937   }
938 
939   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
940   stash->recvframe_active = NULL;
941   stash->recvframe_i      = 0;
942   stash->some_i           = 0;
943   stash->some_count       = 0;
944   stash->recvcount        = 0;
945   stash->subset_off_proc  = mat->subsetoffprocentries;
946   stash->insertmode       = &mat->insertmode;
947   PetscFunctionReturn(0);
948 }
949 
950 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
951 {
952   PetscErrorCode ierr;
953   MatStashBlock *block;
954 
955   PetscFunctionBegin;
956   *flg = 0;
957   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
958     if (stash->some_i == stash->some_count) {
959       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
960       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr);
961       stash->some_i = 0;
962     }
963     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
964     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
965     if (stash->use_status) { /* Count what was actually sent */
966       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr);
967     }
968     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
969       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
970       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
971       if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
972       if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
973     }
974     stash->some_i++;
975     stash->recvcount++;
976     stash->recvframe_i = 0;
977   }
978   *n = 1;
979   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
980   if (block->row < 0) block->row = -(block->row + 1);
981   *row = &block->row;
982   *col = &block->col;
983   *val = block->vals;
984   stash->recvframe_i++;
985   *flg = 1;
986   PetscFunctionReturn(0);
987 }
988 
989 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
990 {
991   PetscErrorCode ierr;
992 
993   PetscFunctionBegin;
994   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
995   if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
996     void *dummy;
997     ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr);
998   } else {                      /* No reuse, so collect everything. */
999     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
1000   }
1001 
1002   /* Now update nmaxold to be app 10% more than max n used, this way the
1003      wastage of space is reduced the next time this stash is used.
1004      Also update the oldmax, only if it increases */
1005   if (stash->n) {
1006     PetscInt bs2     = stash->bs*stash->bs;
1007     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1008     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1009   }
1010 
1011   stash->nmax       = 0;
1012   stash->n          = 0;
1013   stash->reallocs   = -1;
1014   stash->nprocessed = 0;
1015 
1016   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1017 
1018   stash->space = 0;
1019 
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1024 {
1025   PetscErrorCode ierr;
1026 
1027   PetscFunctionBegin;
1028   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1029   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1030   stash->recvframes = NULL;
1031   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1032   if (stash->blocktype != MPI_DATATYPE_NULL) {
1033     ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr);
1034   }
1035   stash->nsendranks = 0;
1036   stash->nrecvranks = 0;
1037   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1038   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1039   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1040   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1041   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1042   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 #endif
1046