xref: /petsc/src/mat/utils/matstash.c (revision 6a98f8dc3f2c9149905a87dc2e9d0fedaf64e09a)
1 
2 #include <petsc/private/matimpl.h>
3 
4 #define DEFAULT_STASH_SIZE   10000
5 
6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8 static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9 #if !defined(PETSC_HAVE_MPIUNI)
10 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
11 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
12 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
13 #endif
14 
15 /*
16   MatStashCreate_Private - Creates a stash,currently used for all the parallel
17   matrix implementations. The stash is where elements of a matrix destined
18   to be stored on other processors are kept until matrix assembly is done.
19 
20   This is a simple minded stash. Simply adds entries to end of stash.
21 
22   Input Parameters:
23   comm - communicator, required for scatters.
24   bs   - stash block size. used when stashing blocks of values
25 
26   Output Parameters:
27   stash    - the newly created stash
28 */
29 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
30 {
31   PetscErrorCode ierr;
32   PetscInt       max,*opt,nopt,i;
33   PetscBool      flg;
34 
35   PetscFunctionBegin;
36   /* Require 2 tags,get the second using PetscCommGetNewTag() */
37   stash->comm = comm;
38 
39   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
40   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
41   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
42   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
43   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
44   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
45 
46 
47   nopt = stash->size;
48   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
49   ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
50   if (flg) {
51     if (nopt == 1)                max = opt[0];
52     else if (nopt == stash->size) max = opt[stash->rank];
53     else if (stash->rank < nopt)  max = opt[stash->rank];
54     else                          max = 0; /* Use default */
55     stash->umax = max;
56   } else {
57     stash->umax = 0;
58   }
59   ierr = PetscFree(opt);CHKERRQ(ierr);
60   if (bs <= 0) bs = 1;
61 
62   stash->bs         = bs;
63   stash->nmax       = 0;
64   stash->oldnmax    = 0;
65   stash->n          = 0;
66   stash->reallocs   = -1;
67   stash->space_head = 0;
68   stash->space      = 0;
69 
70   stash->send_waits  = 0;
71   stash->recv_waits  = 0;
72   stash->send_status = 0;
73   stash->nsends      = 0;
74   stash->nrecvs      = 0;
75   stash->svalues     = 0;
76   stash->rvalues     = 0;
77   stash->rindices    = 0;
78   stash->nprocessed  = 0;
79   stash->reproduce   = PETSC_FALSE;
80   stash->blocktype   = MPI_DATATYPE_NULL;
81 
82   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
83 #if !defined(PETSC_HAVE_MPIUNI)
84   flg  = PETSC_FALSE;
85   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr);
86   if (!flg) {
87     stash->ScatterBegin   = MatStashScatterBegin_BTS;
88     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
89     stash->ScatterEnd     = MatStashScatterEnd_BTS;
90     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
91   } else {
92 #endif
93     stash->ScatterBegin   = MatStashScatterBegin_Ref;
94     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
95     stash->ScatterEnd     = MatStashScatterEnd_Ref;
96     stash->ScatterDestroy = NULL;
97 #if !defined(PETSC_HAVE_MPIUNI)
98   }
99 #endif
100   PetscFunctionReturn(0);
101 }
102 
103 /*
104    MatStashDestroy_Private - Destroy the stash
105 */
106 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
107 {
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
112   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
113 
114   stash->space = 0;
115 
116   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 /*
121    MatStashScatterEnd_Private - This is called as the final stage of
122    scatter. The final stages of message passing is done here, and
123    all the memory used for message passing is cleaned up. This
124    routine also resets the stash, and deallocates the memory used
125    for the stash. It also keeps track of the current memory usage
126    so that the same value can be used the next time through.
127 */
128 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
129 {
130   PetscErrorCode ierr;
131 
132   PetscFunctionBegin;
133   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
138 {
139   PetscErrorCode ierr;
140   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
141   MPI_Status     *send_status;
142 
143   PetscFunctionBegin;
144   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
145   /* wait on sends */
146   if (nsends) {
147     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
148     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
149     ierr = PetscFree(send_status);CHKERRQ(ierr);
150   }
151 
152   /* Now update nmaxold to be app 10% more than max n used, this way the
153      wastage of space is reduced the next time this stash is used.
154      Also update the oldmax, only if it increases */
155   if (stash->n) {
156     bs2     = stash->bs*stash->bs;
157     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
158     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
159   }
160 
161   stash->nmax       = 0;
162   stash->n          = 0;
163   stash->reallocs   = -1;
164   stash->nprocessed = 0;
165 
166   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
167 
168   stash->space = 0;
169 
170   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
171   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
172   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
173   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
174   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
175   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
176   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 /*
181    MatStashGetInfo_Private - Gets the relavant statistics of the stash
182 
183    Input Parameters:
184    stash    - the stash
185    nstash   - the size of the stash. Indicates the number of values stored.
186    reallocs - the number of additional mallocs incurred.
187 
188 */
189 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
190 {
191   PetscInt bs2 = stash->bs*stash->bs;
192 
193   PetscFunctionBegin;
194   if (nstash) *nstash = stash->n*bs2;
195   if (reallocs) {
196     if (stash->reallocs < 0) *reallocs = 0;
197     else                     *reallocs = stash->reallocs;
198   }
199   PetscFunctionReturn(0);
200 }
201 
202 /*
203    MatStashSetInitialSize_Private - Sets the initial size of the stash
204 
205    Input Parameters:
206    stash  - the stash
207    max    - the value that is used as the max size of the stash.
208             this value is used while allocating memory.
209 */
210 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
211 {
212   PetscFunctionBegin;
213   stash->umax = max;
214   PetscFunctionReturn(0);
215 }
216 
217 /* MatStashExpand_Private - Expand the stash. This function is called
218    when the space in the stash is not sufficient to add the new values
219    being inserted into the stash.
220 
221    Input Parameters:
222    stash - the stash
223    incr  - the minimum increase requested
224 
225    Notes:
226    This routine doubles the currently used memory.
227  */
228 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
229 {
230   PetscErrorCode ierr;
231   PetscInt       newnmax,bs2= stash->bs*stash->bs;
232 
233   PetscFunctionBegin;
234   /* allocate a larger stash */
235   if (!stash->oldnmax && !stash->nmax) { /* new stash */
236     if (stash->umax)                  newnmax = stash->umax/bs2;
237     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
238   } else if (!stash->nmax) { /* resuing stash */
239     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
240     else                              newnmax = stash->oldnmax/bs2;
241   } else                              newnmax = stash->nmax*2;
242   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
243 
244   /* Get a MatStashSpace and attach it to stash */
245   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
246   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
247     stash->space_head = stash->space;
248   }
249 
250   stash->reallocs++;
251   stash->nmax = newnmax;
252   PetscFunctionReturn(0);
253 }
254 /*
255   MatStashValuesRow_Private - inserts values into the stash. This function
256   expects the values to be roworiented. Multiple columns belong to the same row
257   can be inserted with a single call to this function.
258 
259   Input Parameters:
260   stash  - the stash
261   row    - the global row correspoiding to the values
262   n      - the number of elements inserted. All elements belong to the above row.
263   idxn   - the global column indices corresponding to each of the values.
264   values - the values inserted
265 */
266 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
267 {
268   PetscErrorCode     ierr;
269   PetscInt           i,k,cnt = 0;
270   PetscMatStashSpace space=stash->space;
271 
272   PetscFunctionBegin;
273   /* Check and see if we have sufficient memory */
274   if (!space || space->local_remaining < n) {
275     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
276   }
277   space = stash->space;
278   k     = space->local_used;
279   for (i=0; i<n; i++) {
280     if (ignorezeroentries && (values[i] == 0.0)) continue;
281     space->idx[k] = row;
282     space->idy[k] = idxn[i];
283     space->val[k] = values[i];
284     k++;
285     cnt++;
286   }
287   stash->n               += cnt;
288   space->local_used      += cnt;
289   space->local_remaining -= cnt;
290   PetscFunctionReturn(0);
291 }
292 
293 /*
294   MatStashValuesCol_Private - inserts values into the stash. This function
295   expects the values to be columnoriented. Multiple columns belong to the same row
296   can be inserted with a single call to this function.
297 
298   Input Parameters:
299   stash   - the stash
300   row     - the global row correspoiding to the values
301   n       - the number of elements inserted. All elements belong to the above row.
302   idxn    - the global column indices corresponding to each of the values.
303   values  - the values inserted
304   stepval - the consecutive values are sepated by a distance of stepval.
305             this happens because the input is columnoriented.
306 */
307 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
308 {
309   PetscErrorCode     ierr;
310   PetscInt           i,k,cnt = 0;
311   PetscMatStashSpace space=stash->space;
312 
313   PetscFunctionBegin;
314   /* Check and see if we have sufficient memory */
315   if (!space || space->local_remaining < n) {
316     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
317   }
318   space = stash->space;
319   k     = space->local_used;
320   for (i=0; i<n; i++) {
321     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
322     space->idx[k] = row;
323     space->idy[k] = idxn[i];
324     space->val[k] = values[i*stepval];
325     k++;
326     cnt++;
327   }
328   stash->n               += cnt;
329   space->local_used      += cnt;
330   space->local_remaining -= cnt;
331   PetscFunctionReturn(0);
332 }
333 
334 /*
335   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
336   This function expects the values to be roworiented. Multiple columns belong
337   to the same block-row can be inserted with a single call to this function.
338   This function extracts the sub-block of values based on the dimensions of
339   the original input block, and the row,col values corresponding to the blocks.
340 
341   Input Parameters:
342   stash  - the stash
343   row    - the global block-row correspoiding to the values
344   n      - the number of elements inserted. All elements belong to the above row.
345   idxn   - the global block-column indices corresponding to each of the blocks of
346            values. Each block is of size bs*bs.
347   values - the values inserted
348   rmax   - the number of block-rows in the original block.
349   cmax   - the number of block-columsn on the original block.
350   idx    - the index of the current block-row in the original block.
351 */
352 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
353 {
354   PetscErrorCode     ierr;
355   PetscInt           i,j,k,bs2,bs=stash->bs,l;
356   const PetscScalar  *vals;
357   PetscScalar        *array;
358   PetscMatStashSpace space=stash->space;
359 
360   PetscFunctionBegin;
361   if (!space || space->local_remaining < n) {
362     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
363   }
364   space = stash->space;
365   l     = space->local_used;
366   bs2   = bs*bs;
367   for (i=0; i<n; i++) {
368     space->idx[l] = row;
369     space->idy[l] = idxn[i];
370     /* Now copy over the block of values. Store the values column oriented.
371        This enables inserting multiple blocks belonging to a row with a single
372        funtion call */
373     array = space->val + bs2*l;
374     vals  = values + idx*bs2*n + bs*i;
375     for (j=0; j<bs; j++) {
376       for (k=0; k<bs; k++) array[k*bs] = vals[k];
377       array++;
378       vals += cmax*bs;
379     }
380     l++;
381   }
382   stash->n               += n;
383   space->local_used      += n;
384   space->local_remaining -= n;
385   PetscFunctionReturn(0);
386 }
387 
388 /*
389   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
390   This function expects the values to be roworiented. Multiple columns belong
391   to the same block-row can be inserted with a single call to this function.
392   This function extracts the sub-block of values based on the dimensions of
393   the original input block, and the row,col values corresponding to the blocks.
394 
395   Input Parameters:
396   stash  - the stash
397   row    - the global block-row correspoiding to the values
398   n      - the number of elements inserted. All elements belong to the above row.
399   idxn   - the global block-column indices corresponding to each of the blocks of
400            values. Each block is of size bs*bs.
401   values - the values inserted
402   rmax   - the number of block-rows in the original block.
403   cmax   - the number of block-columsn on the original block.
404   idx    - the index of the current block-row in the original block.
405 */
406 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
407 {
408   PetscErrorCode     ierr;
409   PetscInt           i,j,k,bs2,bs=stash->bs,l;
410   const PetscScalar  *vals;
411   PetscScalar        *array;
412   PetscMatStashSpace space=stash->space;
413 
414   PetscFunctionBegin;
415   if (!space || space->local_remaining < n) {
416     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
417   }
418   space = stash->space;
419   l     = space->local_used;
420   bs2   = bs*bs;
421   for (i=0; i<n; i++) {
422     space->idx[l] = row;
423     space->idy[l] = idxn[i];
424     /* Now copy over the block of values. Store the values column oriented.
425      This enables inserting multiple blocks belonging to a row with a single
426      funtion call */
427     array = space->val + bs2*l;
428     vals  = values + idx*bs2*n + bs*i;
429     for (j=0; j<bs; j++) {
430       for (k=0; k<bs; k++) array[k] = vals[k];
431       array += bs;
432       vals  += rmax*bs;
433     }
434     l++;
435   }
436   stash->n               += n;
437   space->local_used      += n;
438   space->local_remaining -= n;
439   PetscFunctionReturn(0);
440 }
441 /*
442   MatStashScatterBegin_Private - Initiates the transfer of values to the
443   correct owners. This function goes through the stash, and check the
444   owners of each stashed value, and sends the values off to the owner
445   processors.
446 
447   Input Parameters:
448   stash  - the stash
449   owners - an array of size 'no-of-procs' which gives the ownership range
450            for each node.
451 
452   Notes:
453     The 'owners' array in the cased of the blocked-stash has the
454   ranges specified blocked global indices, and for the regular stash in
455   the proper global indices.
456 */
457 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
458 {
459   PetscErrorCode ierr;
460 
461   PetscFunctionBegin;
462   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465 
466 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
467 {
468   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
469   PetscInt           size=stash->size,nsends;
470   PetscErrorCode     ierr;
471   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
472   PetscScalar        **rvalues,*svalues;
473   MPI_Comm           comm = stash->comm;
474   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
475   PetscMPIInt        *sizes,*nlengths,nreceives;
476   PetscInt           *sp_idx,*sp_idy;
477   PetscScalar        *sp_val;
478   PetscMatStashSpace space,space_next;
479 
480   PetscFunctionBegin;
481   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
482     InsertMode addv;
483     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
484     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
485     mat->insertmode = addv; /* in case this processor had no cache */
486   }
487 
488   bs2 = stash->bs*stash->bs;
489 
490   /*  first count number of contributors to each processor */
491   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
492   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
493 
494   i       = j    = 0;
495   lastidx = -1;
496   space   = stash->space_head;
497   while (space) {
498     space_next = space->next;
499     sp_idx     = space->idx;
500     for (l=0; l<space->local_used; l++) {
501       /* if indices are NOT locally sorted, need to start search at the beginning */
502       if (lastidx > (idx = sp_idx[l])) j = 0;
503       lastidx = idx;
504       for (; j<size; j++) {
505         if (idx >= owners[j] && idx < owners[j+1]) {
506           nlengths[j]++; owner[i] = j; break;
507         }
508       }
509       i++;
510     }
511     space = space_next;
512   }
513 
514   /* Now check what procs get messages - and compute nsends. */
515   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
516   for (i=0, nsends=0; i<size; i++) {
517     if (nlengths[i]) {
518       sizes[i] = 1; nsends++;
519     }
520   }
521 
522   {PetscMPIInt *onodes,*olengths;
523    /* Determine the number of messages to expect, their lengths, from from-ids */
524    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
525    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
526    /* since clubbing row,col - lengths are multiplied by 2 */
527    for (i=0; i<nreceives; i++) olengths[i] *=2;
528    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
529    /* values are size 'bs2' lengths (and remove earlier factor 2 */
530    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
531    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
532    ierr = PetscFree(onodes);CHKERRQ(ierr);
533    ierr = PetscFree(olengths);CHKERRQ(ierr);}
534 
535   /* do sends:
536       1) starts[i] gives the starting index in svalues for stuff going to
537          the ith processor
538   */
539   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
540   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
541   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
542   /* use 2 sends the first with all_a, the next with all_i and all_j */
543   startv[0] = 0; starti[0] = 0;
544   for (i=1; i<size; i++) {
545     startv[i] = startv[i-1] + nlengths[i-1];
546     starti[i] = starti[i-1] + 2*nlengths[i-1];
547   }
548 
549   i     = 0;
550   space = stash->space_head;
551   while (space) {
552     space_next = space->next;
553     sp_idx     = space->idx;
554     sp_idy     = space->idy;
555     sp_val     = space->val;
556     for (l=0; l<space->local_used; l++) {
557       j = owner[i];
558       if (bs2 == 1) {
559         svalues[startv[j]] = sp_val[l];
560       } else {
561         PetscInt    k;
562         PetscScalar *buf1,*buf2;
563         buf1 = svalues+bs2*startv[j];
564         buf2 = space->val + bs2*l;
565         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
566       }
567       sindices[starti[j]]             = sp_idx[l];
568       sindices[starti[j]+nlengths[j]] = sp_idy[l];
569       startv[j]++;
570       starti[j]++;
571       i++;
572     }
573     space = space_next;
574   }
575   startv[0] = 0;
576   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
577 
578   for (i=0,count=0; i<size; i++) {
579     if (sizes[i]) {
580       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
581       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
582     }
583   }
584 #if defined(PETSC_USE_INFO)
585   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
586   for (i=0; i<size; i++) {
587     if (sizes[i]) {
588       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
589     }
590   }
591 #endif
592   ierr = PetscFree(nlengths);CHKERRQ(ierr);
593   ierr = PetscFree(owner);CHKERRQ(ierr);
594   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
595   ierr = PetscFree(sizes);CHKERRQ(ierr);
596 
597   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
598   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
599 
600   for (i=0; i<nreceives; i++) {
601     recv_waits[2*i]   = recv_waits1[i];
602     recv_waits[2*i+1] = recv_waits2[i];
603   }
604   stash->recv_waits = recv_waits;
605 
606   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
607   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
608 
609   stash->svalues         = svalues;
610   stash->sindices        = sindices;
611   stash->rvalues         = rvalues;
612   stash->rindices        = rindices;
613   stash->send_waits      = send_waits;
614   stash->nsends          = nsends;
615   stash->nrecvs          = nreceives;
616   stash->reproduce_count = 0;
617   PetscFunctionReturn(0);
618 }
619 
620 /*
621    MatStashScatterGetMesg_Private - This function waits on the receives posted
622    in the function MatStashScatterBegin_Private() and returns one message at
623    a time to the calling function. If no messages are left, it indicates this
624    by setting flg = 0, else it sets flg = 1.
625 
626    Input Parameters:
627    stash - the stash
628 
629    Output Parameters:
630    nvals - the number of entries in the current message.
631    rows  - an array of row indices (or blocked indices) corresponding to the values
632    cols  - an array of columnindices (or blocked indices) corresponding to the values
633    vals  - the values
634    flg   - 0 indicates no more message left, and the current call has no values associated.
635            1 indicates that the current call successfully received a message, and the
636              other output parameters nvals,rows,cols,vals are set appropriately.
637 */
638 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
639 {
640   PetscErrorCode ierr;
641 
642   PetscFunctionBegin;
643   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
648 {
649   PetscErrorCode ierr;
650   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
651   PetscInt       bs2;
652   MPI_Status     recv_status;
653   PetscBool      match_found = PETSC_FALSE;
654 
655   PetscFunctionBegin;
656   *flg = 0; /* When a message is discovered this is reset to 1 */
657   /* Return if no more messages to process */
658   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
659 
660   bs2 = stash->bs*stash->bs;
661   /* If a matching pair of receives are found, process them, and return the data to
662      the calling function. Until then keep receiving messages */
663   while (!match_found) {
664     if (stash->reproduce) {
665       i    = stash->reproduce_count++;
666       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
667     } else {
668       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
669     }
670     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
671 
672     /* Now pack the received message into a structure which is usable by others */
673     if (i % 2) {
674       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
675 
676       flg_v[2*recv_status.MPI_SOURCE] = i/2;
677 
678       *nvals = *nvals/bs2;
679     } else {
680       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
681 
682       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
683 
684       *nvals = *nvals/2; /* This message has both row indices and col indices */
685     }
686 
687     /* Check if we have both messages from this proc */
688     i1 = flg_v[2*recv_status.MPI_SOURCE];
689     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
690     if (i1 != -1 && i2 != -1) {
691       *rows = stash->rindices[i2];
692       *cols = *rows + *nvals;
693       *vals = stash->rvalues[i1];
694       *flg  = 1;
695       stash->nprocessed++;
696       match_found = PETSC_TRUE;
697     }
698   }
699   PetscFunctionReturn(0);
700 }
701 
702 #if !defined(PETSC_HAVE_MPIUNI)
703 typedef struct {
704   PetscInt row;
705   PetscInt col;
706   PetscScalar vals[1];          /* Actually an array of length bs2 */
707 } MatStashBlock;
708 
709 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
710 {
711   PetscErrorCode ierr;
712   PetscMatStashSpace space;
713   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
714   PetscScalar **valptr;
715 
716   PetscFunctionBegin;
717   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
718   for (space=stash->space_head,cnt=0; space; space=space->next) {
719     for (i=0; i<space->local_used; i++) {
720       row[cnt] = space->idx[i];
721       col[cnt] = space->idy[i];
722       valptr[cnt] = &space->val[i*bs2];
723       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
724       cnt++;
725     }
726   }
727   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
728   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
729   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
730   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
731     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
732       PetscInt colstart;
733       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
734       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
735         PetscInt j,l;
736         MatStashBlock *block;
737         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
738         block->row = row[rowstart];
739         block->col = col[colstart];
740         ierr = PetscArraycpy(block->vals,valptr[perm[colstart]],bs2);CHKERRQ(ierr);
741         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
742           if (insertmode == ADD_VALUES) {
743             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
744           } else {
745             ierr = PetscArraycpy(block->vals,valptr[perm[j]],bs2);CHKERRQ(ierr);
746           }
747         }
748         colstart = j;
749       }
750       rowstart = i;
751     }
752   }
753   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
758 {
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   if (stash->blocktype == MPI_DATATYPE_NULL) {
763     PetscInt     bs2 = PetscSqr(stash->bs);
764     PetscMPIInt  blocklens[2];
765     MPI_Aint     displs[2];
766     MPI_Datatype types[2],stype;
767     /* C++ std::complex is not my favorite datatype.  Since it is not POD, we cannot use offsetof to find the offset of
768      * vals.  But the layout is actually guaranteed by the standard, so we do a little dance here with struct
769      * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset.
770      */
771     struct DummyBlock {PetscInt row,col; PetscReal vals;};
772 
773     stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
774     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
775       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
776     }
777     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
778     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
779     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
780     blocklens[0] = 2;
781     blocklens[1] = bs2;
782     displs[0] = offsetof(struct DummyBlock,row);
783     displs[1] = offsetof(struct DummyBlock,vals);
784     types[0] = MPIU_INT;
785     types[1] = MPIU_SCALAR;
786     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr);
787     ierr = MPI_Type_commit(&stype);CHKERRQ(ierr);
788     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */
789     ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr);
790     ierr = MPI_Type_free(&stype);CHKERRQ(ierr);
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 /* Callback invoked after target rank has initiatied receive of rendezvous message.
796  * Here we post the main sends.
797  */
798 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
799 {
800   MatStash *stash = (MatStash*)ctx;
801   MatStashHeader *hdr = (MatStashHeader*)sdata;
802   PetscErrorCode ierr;
803 
804   PetscFunctionBegin;
805   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
806   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
807   stash->sendframes[rankid].count = hdr->count;
808   stash->sendframes[rankid].pending = 1;
809   PetscFunctionReturn(0);
810 }
811 
812 /* Callback invoked by target after receiving rendezvous message.
813  * Here we post the main recvs.
814  */
815 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
816 {
817   MatStash *stash = (MatStash*)ctx;
818   MatStashHeader *hdr = (MatStashHeader*)rdata;
819   MatStashFrame *frame;
820   PetscErrorCode ierr;
821 
822   PetscFunctionBegin;
823   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
824   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
825   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
826   frame->count = hdr->count;
827   frame->pending = 1;
828   PetscFunctionReturn(0);
829 }
830 
831 /*
832  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
833  */
834 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
835 {
836   PetscErrorCode ierr;
837   size_t nblocks;
838   char *sendblocks;
839 
840   PetscFunctionBegin;
841 #if defined(PETSC_USE_DEBUG)
842   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
843     InsertMode addv;
844     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
845     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
846   }
847 #endif
848 
849   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
850   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
851   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
852   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
853   if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
854     PetscInt i;
855     size_t b;
856     for (i=0,b=0; i<stash->nsendranks; i++) {
857       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
858       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
859       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
860       for ( ; b<nblocks; b++) {
861         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
862         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
863         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
864         stash->sendhdr[i].count++;
865       }
866     }
867   } else {                      /* Dynamically count and pack (first time) */
868     PetscInt sendno;
869     size_t i,rowstart;
870 
871     /* Count number of send ranks and allocate for sends */
872     stash->nsendranks = 0;
873     for (rowstart=0; rowstart<nblocks; ) {
874       PetscInt owner;
875       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
876       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
877       if (owner < 0) owner = -(owner+2);
878       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
879         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
880         if (sendblock_i->row >= owners[owner+1]) break;
881       }
882       stash->nsendranks++;
883       rowstart = i;
884     }
885     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
886 
887     /* Set up sendhdrs and sendframes */
888     sendno = 0;
889     for (rowstart=0; rowstart<nblocks; ) {
890       PetscInt owner;
891       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
892       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
893       if (owner < 0) owner = -(owner+2);
894       stash->sendranks[sendno] = owner;
895       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
896         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
897         if (sendblock_i->row >= owners[owner+1]) break;
898       }
899       stash->sendframes[sendno].buffer = sendblock_rowstart;
900       stash->sendframes[sendno].pending = 0;
901       stash->sendhdr[sendno].count = i - rowstart;
902       sendno++;
903       rowstart = i;
904     }
905     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
906   }
907 
908   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
909    * message or a dummy entry of some sort. */
910   if (mat->insertmode == INSERT_VALUES) {
911     size_t i;
912     for (i=0; i<nblocks; i++) {
913       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
914       sendblock_i->row = -(sendblock_i->row+1);
915     }
916   }
917 
918   if (stash->first_assembly_done) {
919     PetscMPIInt i,tag;
920     ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr);
921     for (i=0; i<stash->nrecvranks; i++) {
922       ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr);
923     }
924     for (i=0; i<stash->nsendranks; i++) {
925       ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr);
926     }
927     stash->use_status = PETSC_TRUE; /* Use count from message status. */
928   } else {
929     ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
930                                       &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
931                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
932     ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
933     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
934   }
935 
936   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
937   stash->recvframe_active     = NULL;
938   stash->recvframe_i          = 0;
939   stash->some_i               = 0;
940   stash->some_count           = 0;
941   stash->recvcount            = 0;
942   stash->first_assembly_done  = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
943   stash->insertmode           = &mat->insertmode;
944   PetscFunctionReturn(0);
945 }
946 
947 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
948 {
949   PetscErrorCode ierr;
950   MatStashBlock *block;
951 
952   PetscFunctionBegin;
953   *flg = 0;
954   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
955     if (stash->some_i == stash->some_count) {
956       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
957       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr);
958       stash->some_i = 0;
959     }
960     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
961     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
962     if (stash->use_status) { /* Count what was actually sent */
963       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr);
964     }
965     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
966       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
967       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
968       if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
969       if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
970     }
971     stash->some_i++;
972     stash->recvcount++;
973     stash->recvframe_i = 0;
974   }
975   *n = 1;
976   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
977   if (block->row < 0) block->row = -(block->row + 1);
978   *row = &block->row;
979   *col = &block->col;
980   *val = block->vals;
981   stash->recvframe_i++;
982   *flg = 1;
983   PetscFunctionReturn(0);
984 }
985 
986 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
987 {
988   PetscErrorCode ierr;
989 
990   PetscFunctionBegin;
991   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
992   if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
993     void *dummy;
994     ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr);
995   } else {                      /* No reuse, so collect everything. */
996     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
997   }
998 
999   /* Now update nmaxold to be app 10% more than max n used, this way the
1000      wastage of space is reduced the next time this stash is used.
1001      Also update the oldmax, only if it increases */
1002   if (stash->n) {
1003     PetscInt bs2     = stash->bs*stash->bs;
1004     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1005     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1006   }
1007 
1008   stash->nmax       = 0;
1009   stash->n          = 0;
1010   stash->reallocs   = -1;
1011   stash->nprocessed = 0;
1012 
1013   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1014 
1015   stash->space = 0;
1016 
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1021 {
1022   PetscErrorCode ierr;
1023 
1024   PetscFunctionBegin;
1025   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1026   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1027   stash->recvframes = NULL;
1028   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1029   if (stash->blocktype != MPI_DATATYPE_NULL) {
1030     ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr);
1031   }
1032   stash->nsendranks = 0;
1033   stash->nrecvranks = 0;
1034   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1035   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1036   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1037   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1038   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1039   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1040   PetscFunctionReturn(0);
1041 }
1042 #endif
1043