xref: /petsc/src/mat/utils/matstash.c (revision 5a856986583887c326abe5dfd149e8184a29cd80)
1 
2 #include <petsc/private/matimpl.h>
3 
4 #define DEFAULT_STASH_SIZE   10000
5 
6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8 static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9 #if !defined(PETSC_HAVE_MPIUNI)
10 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
11 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
12 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
13 #endif
14 
15 /*
16   MatStashCreate_Private - Creates a stash,currently used for all the parallel
17   matrix implementations. The stash is where elements of a matrix destined
18   to be stored on other processors are kept until matrix assembly is done.
19 
20   This is a simple minded stash. Simply adds entries to end of stash.
21 
22   Input Parameters:
23   comm - communicator, required for scatters.
24   bs   - stash block size. used when stashing blocks of values
25 
26   Output Parameters:
27   stash    - the newly created stash
28 */
29 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
30 {
31   PetscErrorCode ierr;
32   PetscInt       max,*opt,nopt,i;
33   PetscBool      flg;
34 
35   PetscFunctionBegin;
36   /* Require 2 tags,get the second using PetscCommGetNewTag() */
37   stash->comm = comm;
38 
39   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
40   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
41   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
42   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
43   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
44   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
45 
46 
47   nopt = stash->size;
48   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
49   ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
50   if (flg) {
51     if (nopt == 1)                max = opt[0];
52     else if (nopt == stash->size) max = opt[stash->rank];
53     else if (stash->rank < nopt)  max = opt[stash->rank];
54     else                          max = 0; /* Use default */
55     stash->umax = max;
56   } else {
57     stash->umax = 0;
58   }
59   ierr = PetscFree(opt);CHKERRQ(ierr);
60   if (bs <= 0) bs = 1;
61 
62   stash->bs         = bs;
63   stash->nmax       = 0;
64   stash->oldnmax    = 0;
65   stash->n          = 0;
66   stash->reallocs   = -1;
67   stash->space_head = 0;
68   stash->space      = 0;
69 
70   stash->send_waits  = 0;
71   stash->recv_waits  = 0;
72   stash->send_status = 0;
73   stash->nsends      = 0;
74   stash->nrecvs      = 0;
75   stash->svalues     = 0;
76   stash->rvalues     = 0;
77   stash->rindices    = 0;
78   stash->nprocessed  = 0;
79   stash->reproduce   = PETSC_FALSE;
80   stash->blocktype   = MPI_DATATYPE_NULL;
81 
82   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
83 #if !defined(PETSC_HAVE_MPIUNI)
84   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr);
85   if (!flg) {
86     stash->ScatterBegin   = MatStashScatterBegin_BTS;
87     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
88     stash->ScatterEnd     = MatStashScatterEnd_BTS;
89     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
90   } else {
91 #endif
92     stash->ScatterBegin   = MatStashScatterBegin_Ref;
93     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
94     stash->ScatterEnd     = MatStashScatterEnd_Ref;
95     stash->ScatterDestroy = NULL;
96 #if !defined(PETSC_HAVE_MPIUNI)
97   }
98 #endif
99   PetscFunctionReturn(0);
100 }
101 
102 /*
103    MatStashDestroy_Private - Destroy the stash
104 */
105 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
106 {
107   PetscErrorCode ierr;
108 
109   PetscFunctionBegin;
110   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
111   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
112 
113   stash->space = 0;
114 
115   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
116   PetscFunctionReturn(0);
117 }
118 
119 /*
120    MatStashScatterEnd_Private - This is called as the final stage of
121    scatter. The final stages of message passing is done here, and
122    all the memory used for message passing is cleaned up. This
123    routine also resets the stash, and deallocates the memory used
124    for the stash. It also keeps track of the current memory usage
125    so that the same value can be used the next time through.
126 */
127 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
128 {
129   PetscErrorCode ierr;
130 
131   PetscFunctionBegin;
132   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
133   PetscFunctionReturn(0);
134 }
135 
136 static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
137 {
138   PetscErrorCode ierr;
139   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
140   MPI_Status     *send_status;
141 
142   PetscFunctionBegin;
143   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
144   /* wait on sends */
145   if (nsends) {
146     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
147     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
148     ierr = PetscFree(send_status);CHKERRQ(ierr);
149   }
150 
151   /* Now update nmaxold to be app 10% more than max n used, this way the
152      wastage of space is reduced the next time this stash is used.
153      Also update the oldmax, only if it increases */
154   if (stash->n) {
155     bs2     = stash->bs*stash->bs;
156     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
157     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
158   }
159 
160   stash->nmax       = 0;
161   stash->n          = 0;
162   stash->reallocs   = -1;
163   stash->nprocessed = 0;
164 
165   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
166 
167   stash->space = 0;
168 
169   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
170   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
171   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
172   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
173   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
174   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
175   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 /*
180    MatStashGetInfo_Private - Gets the relavant statistics of the stash
181 
182    Input Parameters:
183    stash    - the stash
184    nstash   - the size of the stash. Indicates the number of values stored.
185    reallocs - the number of additional mallocs incurred.
186 
187 */
188 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
189 {
190   PetscInt bs2 = stash->bs*stash->bs;
191 
192   PetscFunctionBegin;
193   if (nstash) *nstash = stash->n*bs2;
194   if (reallocs) {
195     if (stash->reallocs < 0) *reallocs = 0;
196     else                     *reallocs = stash->reallocs;
197   }
198   PetscFunctionReturn(0);
199 }
200 
201 /*
202    MatStashSetInitialSize_Private - Sets the initial size of the stash
203 
204    Input Parameters:
205    stash  - the stash
206    max    - the value that is used as the max size of the stash.
207             this value is used while allocating memory.
208 */
209 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
210 {
211   PetscFunctionBegin;
212   stash->umax = max;
213   PetscFunctionReturn(0);
214 }
215 
216 /* MatStashExpand_Private - Expand the stash. This function is called
217    when the space in the stash is not sufficient to add the new values
218    being inserted into the stash.
219 
220    Input Parameters:
221    stash - the stash
222    incr  - the minimum increase requested
223 
224    Notes:
225    This routine doubles the currently used memory.
226  */
227 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
228 {
229   PetscErrorCode ierr;
230   PetscInt       newnmax,bs2= stash->bs*stash->bs;
231 
232   PetscFunctionBegin;
233   /* allocate a larger stash */
234   if (!stash->oldnmax && !stash->nmax) { /* new stash */
235     if (stash->umax)                  newnmax = stash->umax/bs2;
236     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
237   } else if (!stash->nmax) { /* resuing stash */
238     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
239     else                              newnmax = stash->oldnmax/bs2;
240   } else                              newnmax = stash->nmax*2;
241   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
242 
243   /* Get a MatStashSpace and attach it to stash */
244   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
245   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
246     stash->space_head = stash->space;
247   }
248 
249   stash->reallocs++;
250   stash->nmax = newnmax;
251   PetscFunctionReturn(0);
252 }
253 /*
254   MatStashValuesRow_Private - inserts values into the stash. This function
255   expects the values to be roworiented. Multiple columns belong to the same row
256   can be inserted with a single call to this function.
257 
258   Input Parameters:
259   stash  - the stash
260   row    - the global row correspoiding to the values
261   n      - the number of elements inserted. All elements belong to the above row.
262   idxn   - the global column indices corresponding to each of the values.
263   values - the values inserted
264 */
265 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
266 {
267   PetscErrorCode     ierr;
268   PetscInt           i,k,cnt = 0;
269   PetscMatStashSpace space=stash->space;
270 
271   PetscFunctionBegin;
272   /* Check and see if we have sufficient memory */
273   if (!space || space->local_remaining < n) {
274     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
275   }
276   space = stash->space;
277   k     = space->local_used;
278   for (i=0; i<n; i++) {
279     if (ignorezeroentries && (values[i] == 0.0)) continue;
280     space->idx[k] = row;
281     space->idy[k] = idxn[i];
282     space->val[k] = values[i];
283     k++;
284     cnt++;
285   }
286   stash->n               += cnt;
287   space->local_used      += cnt;
288   space->local_remaining -= cnt;
289   PetscFunctionReturn(0);
290 }
291 
292 /*
293   MatStashValuesCol_Private - inserts values into the stash. This function
294   expects the values to be columnoriented. Multiple columns belong to the same row
295   can be inserted with a single call to this function.
296 
297   Input Parameters:
298   stash   - the stash
299   row     - the global row correspoiding to the values
300   n       - the number of elements inserted. All elements belong to the above row.
301   idxn    - the global column indices corresponding to each of the values.
302   values  - the values inserted
303   stepval - the consecutive values are sepated by a distance of stepval.
304             this happens because the input is columnoriented.
305 */
306 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
307 {
308   PetscErrorCode     ierr;
309   PetscInt           i,k,cnt = 0;
310   PetscMatStashSpace space=stash->space;
311 
312   PetscFunctionBegin;
313   /* Check and see if we have sufficient memory */
314   if (!space || space->local_remaining < n) {
315     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
316   }
317   space = stash->space;
318   k     = space->local_used;
319   for (i=0; i<n; i++) {
320     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
321     space->idx[k] = row;
322     space->idy[k] = idxn[i];
323     space->val[k] = values[i*stepval];
324     k++;
325     cnt++;
326   }
327   stash->n               += cnt;
328   space->local_used      += cnt;
329   space->local_remaining -= cnt;
330   PetscFunctionReturn(0);
331 }
332 
333 /*
334   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
335   This function expects the values to be roworiented. Multiple columns belong
336   to the same block-row can be inserted with a single call to this function.
337   This function extracts the sub-block of values based on the dimensions of
338   the original input block, and the row,col values corresponding to the blocks.
339 
340   Input Parameters:
341   stash  - the stash
342   row    - the global block-row correspoiding to the values
343   n      - the number of elements inserted. All elements belong to the above row.
344   idxn   - the global block-column indices corresponding to each of the blocks of
345            values. Each block is of size bs*bs.
346   values - the values inserted
347   rmax   - the number of block-rows in the original block.
348   cmax   - the number of block-columsn on the original block.
349   idx    - the index of the current block-row in the original block.
350 */
351 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
352 {
353   PetscErrorCode     ierr;
354   PetscInt           i,j,k,bs2,bs=stash->bs,l;
355   const PetscScalar  *vals;
356   PetscScalar        *array;
357   PetscMatStashSpace space=stash->space;
358 
359   PetscFunctionBegin;
360   if (!space || space->local_remaining < n) {
361     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
362   }
363   space = stash->space;
364   l     = space->local_used;
365   bs2   = bs*bs;
366   for (i=0; i<n; i++) {
367     space->idx[l] = row;
368     space->idy[l] = idxn[i];
369     /* Now copy over the block of values. Store the values column oriented.
370        This enables inserting multiple blocks belonging to a row with a single
371        funtion call */
372     array = space->val + bs2*l;
373     vals  = values + idx*bs2*n + bs*i;
374     for (j=0; j<bs; j++) {
375       for (k=0; k<bs; k++) array[k*bs] = vals[k];
376       array++;
377       vals += cmax*bs;
378     }
379     l++;
380   }
381   stash->n               += n;
382   space->local_used      += n;
383   space->local_remaining -= n;
384   PetscFunctionReturn(0);
385 }
386 
387 /*
388   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
389   This function expects the values to be roworiented. Multiple columns belong
390   to the same block-row can be inserted with a single call to this function.
391   This function extracts the sub-block of values based on the dimensions of
392   the original input block, and the row,col values corresponding to the blocks.
393 
394   Input Parameters:
395   stash  - the stash
396   row    - the global block-row correspoiding to the values
397   n      - the number of elements inserted. All elements belong to the above row.
398   idxn   - the global block-column indices corresponding to each of the blocks of
399            values. Each block is of size bs*bs.
400   values - the values inserted
401   rmax   - the number of block-rows in the original block.
402   cmax   - the number of block-columsn on the original block.
403   idx    - the index of the current block-row in the original block.
404 */
405 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
406 {
407   PetscErrorCode     ierr;
408   PetscInt           i,j,k,bs2,bs=stash->bs,l;
409   const PetscScalar  *vals;
410   PetscScalar        *array;
411   PetscMatStashSpace space=stash->space;
412 
413   PetscFunctionBegin;
414   if (!space || space->local_remaining < n) {
415     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
416   }
417   space = stash->space;
418   l     = space->local_used;
419   bs2   = bs*bs;
420   for (i=0; i<n; i++) {
421     space->idx[l] = row;
422     space->idy[l] = idxn[i];
423     /* Now copy over the block of values. Store the values column oriented.
424      This enables inserting multiple blocks belonging to a row with a single
425      funtion call */
426     array = space->val + bs2*l;
427     vals  = values + idx*bs2*n + bs*i;
428     for (j=0; j<bs; j++) {
429       for (k=0; k<bs; k++) array[k] = vals[k];
430       array += bs;
431       vals  += rmax*bs;
432     }
433     l++;
434   }
435   stash->n               += n;
436   space->local_used      += n;
437   space->local_remaining -= n;
438   PetscFunctionReturn(0);
439 }
440 /*
441   MatStashScatterBegin_Private - Initiates the transfer of values to the
442   correct owners. This function goes through the stash, and check the
443   owners of each stashed value, and sends the values off to the owner
444   processors.
445 
446   Input Parameters:
447   stash  - the stash
448   owners - an array of size 'no-of-procs' which gives the ownership range
449            for each node.
450 
451   Notes:
452     The 'owners' array in the cased of the blocked-stash has the
453   ranges specified blocked global indices, and for the regular stash in
454   the proper global indices.
455 */
456 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
457 {
458   PetscErrorCode ierr;
459 
460   PetscFunctionBegin;
461   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
466 {
467   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
468   PetscInt           size=stash->size,nsends;
469   PetscErrorCode     ierr;
470   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
471   PetscScalar        **rvalues,*svalues;
472   MPI_Comm           comm = stash->comm;
473   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
474   PetscMPIInt        *sizes,*nlengths,nreceives;
475   PetscInt           *sp_idx,*sp_idy;
476   PetscScalar        *sp_val;
477   PetscMatStashSpace space,space_next;
478 
479   PetscFunctionBegin;
480   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
481     InsertMode addv;
482     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
483     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
484     mat->insertmode = addv; /* in case this processor had no cache */
485   }
486 
487   bs2 = stash->bs*stash->bs;
488 
489   /*  first count number of contributors to each processor */
490   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
491   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
492 
493   i       = j    = 0;
494   lastidx = -1;
495   space   = stash->space_head;
496   while (space) {
497     space_next = space->next;
498     sp_idx     = space->idx;
499     for (l=0; l<space->local_used; l++) {
500       /* if indices are NOT locally sorted, need to start search at the beginning */
501       if (lastidx > (idx = sp_idx[l])) j = 0;
502       lastidx = idx;
503       for (; j<size; j++) {
504         if (idx >= owners[j] && idx < owners[j+1]) {
505           nlengths[j]++; owner[i] = j; break;
506         }
507       }
508       i++;
509     }
510     space = space_next;
511   }
512 
513   /* Now check what procs get messages - and compute nsends. */
514   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
515   for (i=0, nsends=0; i<size; i++) {
516     if (nlengths[i]) {
517       sizes[i] = 1; nsends++;
518     }
519   }
520 
521   {PetscMPIInt *onodes,*olengths;
522    /* Determine the number of messages to expect, their lengths, from from-ids */
523    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
524    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
525    /* since clubbing row,col - lengths are multiplied by 2 */
526    for (i=0; i<nreceives; i++) olengths[i] *=2;
527    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
528    /* values are size 'bs2' lengths (and remove earlier factor 2 */
529    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
530    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
531    ierr = PetscFree(onodes);CHKERRQ(ierr);
532    ierr = PetscFree(olengths);CHKERRQ(ierr);}
533 
534   /* do sends:
535       1) starts[i] gives the starting index in svalues for stuff going to
536          the ith processor
537   */
538   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
539   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
540   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
541   /* use 2 sends the first with all_a, the next with all_i and all_j */
542   startv[0] = 0; starti[0] = 0;
543   for (i=1; i<size; i++) {
544     startv[i] = startv[i-1] + nlengths[i-1];
545     starti[i] = starti[i-1] + 2*nlengths[i-1];
546   }
547 
548   i     = 0;
549   space = stash->space_head;
550   while (space) {
551     space_next = space->next;
552     sp_idx     = space->idx;
553     sp_idy     = space->idy;
554     sp_val     = space->val;
555     for (l=0; l<space->local_used; l++) {
556       j = owner[i];
557       if (bs2 == 1) {
558         svalues[startv[j]] = sp_val[l];
559       } else {
560         PetscInt    k;
561         PetscScalar *buf1,*buf2;
562         buf1 = svalues+bs2*startv[j];
563         buf2 = space->val + bs2*l;
564         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
565       }
566       sindices[starti[j]]             = sp_idx[l];
567       sindices[starti[j]+nlengths[j]] = sp_idy[l];
568       startv[j]++;
569       starti[j]++;
570       i++;
571     }
572     space = space_next;
573   }
574   startv[0] = 0;
575   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
576 
577   for (i=0,count=0; i<size; i++) {
578     if (sizes[i]) {
579       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
580       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
581     }
582   }
583 #if defined(PETSC_USE_INFO)
584   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
585   for (i=0; i<size; i++) {
586     if (sizes[i]) {
587       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
588     }
589   }
590 #endif
591   ierr = PetscFree(nlengths);CHKERRQ(ierr);
592   ierr = PetscFree(owner);CHKERRQ(ierr);
593   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
594   ierr = PetscFree(sizes);CHKERRQ(ierr);
595 
596   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
597   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
598 
599   for (i=0; i<nreceives; i++) {
600     recv_waits[2*i]   = recv_waits1[i];
601     recv_waits[2*i+1] = recv_waits2[i];
602   }
603   stash->recv_waits = recv_waits;
604 
605   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
606   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
607 
608   stash->svalues         = svalues;
609   stash->sindices        = sindices;
610   stash->rvalues         = rvalues;
611   stash->rindices        = rindices;
612   stash->send_waits      = send_waits;
613   stash->nsends          = nsends;
614   stash->nrecvs          = nreceives;
615   stash->reproduce_count = 0;
616   PetscFunctionReturn(0);
617 }
618 
619 /*
620    MatStashScatterGetMesg_Private - This function waits on the receives posted
621    in the function MatStashScatterBegin_Private() and returns one message at
622    a time to the calling function. If no messages are left, it indicates this
623    by setting flg = 0, else it sets flg = 1.
624 
625    Input Parameters:
626    stash - the stash
627 
628    Output Parameters:
629    nvals - the number of entries in the current message.
630    rows  - an array of row indices (or blocked indices) corresponding to the values
631    cols  - an array of columnindices (or blocked indices) corresponding to the values
632    vals  - the values
633    flg   - 0 indicates no more message left, and the current call has no values associated.
634            1 indicates that the current call successfully received a message, and the
635              other output parameters nvals,rows,cols,vals are set appropriately.
636 */
637 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
638 {
639   PetscErrorCode ierr;
640 
641   PetscFunctionBegin;
642   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
643   PetscFunctionReturn(0);
644 }
645 
646 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
647 {
648   PetscErrorCode ierr;
649   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
650   PetscInt       bs2;
651   MPI_Status     recv_status;
652   PetscBool      match_found = PETSC_FALSE;
653 
654   PetscFunctionBegin;
655   *flg = 0; /* When a message is discovered this is reset to 1 */
656   /* Return if no more messages to process */
657   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
658 
659   bs2 = stash->bs*stash->bs;
660   /* If a matching pair of receives are found, process them, and return the data to
661      the calling function. Until then keep receiving messages */
662   while (!match_found) {
663     if (stash->reproduce) {
664       i    = stash->reproduce_count++;
665       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
666     } else {
667       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
668     }
669     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
670 
671     /* Now pack the received message into a structure which is usable by others */
672     if (i % 2) {
673       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
674 
675       flg_v[2*recv_status.MPI_SOURCE] = i/2;
676 
677       *nvals = *nvals/bs2;
678     } else {
679       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
680 
681       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
682 
683       *nvals = *nvals/2; /* This message has both row indices and col indices */
684     }
685 
686     /* Check if we have both messages from this proc */
687     i1 = flg_v[2*recv_status.MPI_SOURCE];
688     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
689     if (i1 != -1 && i2 != -1) {
690       *rows = stash->rindices[i2];
691       *cols = *rows + *nvals;
692       *vals = stash->rvalues[i1];
693       *flg  = 1;
694       stash->nprocessed++;
695       match_found = PETSC_TRUE;
696     }
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 #if !defined(PETSC_HAVE_MPIUNI)
702 typedef struct {
703   PetscInt row;
704   PetscInt col;
705   PetscScalar vals[1];          /* Actually an array of length bs2 */
706 } MatStashBlock;
707 
708 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
709 {
710   PetscErrorCode ierr;
711   PetscMatStashSpace space;
712   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
713   PetscScalar **valptr;
714 
715   PetscFunctionBegin;
716   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
717   for (space=stash->space_head,cnt=0; space; space=space->next) {
718     for (i=0; i<space->local_used; i++) {
719       row[cnt] = space->idx[i];
720       col[cnt] = space->idy[i];
721       valptr[cnt] = &space->val[i*bs2];
722       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
723       cnt++;
724     }
725   }
726   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
727   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
728   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
729   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
730     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
731       PetscInt colstart;
732       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
733       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
734         PetscInt j,l;
735         MatStashBlock *block;
736         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
737         block->row = row[rowstart];
738         block->col = col[colstart];
739         ierr = PetscArraycpy(block->vals,valptr[perm[colstart]],bs2);CHKERRQ(ierr);
740         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
741           if (insertmode == ADD_VALUES) {
742             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
743           } else {
744             ierr = PetscArraycpy(block->vals,valptr[perm[j]],bs2);CHKERRQ(ierr);
745           }
746         }
747         colstart = j;
748       }
749       rowstart = i;
750     }
751   }
752   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
753   PetscFunctionReturn(0);
754 }
755 
756 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
757 {
758   PetscErrorCode ierr;
759 
760   PetscFunctionBegin;
761   if (stash->blocktype == MPI_DATATYPE_NULL) {
762     PetscInt     bs2 = PetscSqr(stash->bs);
763     PetscMPIInt  blocklens[2];
764     MPI_Aint     displs[2];
765     MPI_Datatype types[2],stype;
766     /* C++ std::complex is not my favorite datatype.  Since it is not POD, we cannot use offsetof to find the offset of
767      * vals.  But the layout is actually guaranteed by the standard, so we do a little dance here with struct
768      * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset.
769      */
770     struct DummyBlock {PetscInt row,col; PetscReal vals;};
771 
772     stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
773     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
774       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
775     }
776     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
777     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
778     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
779     blocklens[0] = 2;
780     blocklens[1] = bs2;
781     displs[0] = offsetof(struct DummyBlock,row);
782     displs[1] = offsetof(struct DummyBlock,vals);
783     types[0] = MPIU_INT;
784     types[1] = MPIU_SCALAR;
785     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr);
786     ierr = MPI_Type_commit(&stype);CHKERRQ(ierr);
787     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */
788     ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr);
789     ierr = MPI_Type_free(&stype);CHKERRQ(ierr);
790   }
791   PetscFunctionReturn(0);
792 }
793 
794 /* Callback invoked after target rank has initiatied receive of rendezvous message.
795  * Here we post the main sends.
796  */
797 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
798 {
799   MatStash *stash = (MatStash*)ctx;
800   MatStashHeader *hdr = (MatStashHeader*)sdata;
801   PetscErrorCode ierr;
802 
803   PetscFunctionBegin;
804   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
805   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
806   stash->sendframes[rankid].count = hdr->count;
807   stash->sendframes[rankid].pending = 1;
808   PetscFunctionReturn(0);
809 }
810 
811 /* Callback invoked by target after receiving rendezvous message.
812  * Here we post the main recvs.
813  */
814 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
815 {
816   MatStash *stash = (MatStash*)ctx;
817   MatStashHeader *hdr = (MatStashHeader*)rdata;
818   MatStashFrame *frame;
819   PetscErrorCode ierr;
820 
821   PetscFunctionBegin;
822   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
823   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
824   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
825   frame->count = hdr->count;
826   frame->pending = 1;
827   PetscFunctionReturn(0);
828 }
829 
830 /*
831  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
832  */
833 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
834 {
835   PetscErrorCode ierr;
836   size_t nblocks;
837   char *sendblocks;
838 
839   PetscFunctionBegin;
840 #if defined(PETSC_USE_DEBUG)
841   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
842     InsertMode addv;
843     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
844     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
845   }
846 #endif
847 
848   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
849   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
850   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
851   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
852   if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
853     PetscInt i;
854     size_t b;
855     for (i=0,b=0; i<stash->nsendranks; i++) {
856       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
857       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
858       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
859       for ( ; b<nblocks; b++) {
860         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
861         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
862         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
863         stash->sendhdr[i].count++;
864       }
865     }
866   } else {                      /* Dynamically count and pack (first time) */
867     PetscInt sendno;
868     size_t i,rowstart;
869 
870     /* Count number of send ranks and allocate for sends */
871     stash->nsendranks = 0;
872     for (rowstart=0; rowstart<nblocks; ) {
873       PetscInt owner;
874       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
875       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
876       if (owner < 0) owner = -(owner+2);
877       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
878         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
879         if (sendblock_i->row >= owners[owner+1]) break;
880       }
881       stash->nsendranks++;
882       rowstart = i;
883     }
884     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
885 
886     /* Set up sendhdrs and sendframes */
887     sendno = 0;
888     for (rowstart=0; rowstart<nblocks; ) {
889       PetscInt owner;
890       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
891       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
892       if (owner < 0) owner = -(owner+2);
893       stash->sendranks[sendno] = owner;
894       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
895         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
896         if (sendblock_i->row >= owners[owner+1]) break;
897       }
898       stash->sendframes[sendno].buffer = sendblock_rowstart;
899       stash->sendframes[sendno].pending = 0;
900       stash->sendhdr[sendno].count = i - rowstart;
901       sendno++;
902       rowstart = i;
903     }
904     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
905   }
906 
907   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
908    * message or a dummy entry of some sort. */
909   if (mat->insertmode == INSERT_VALUES) {
910     size_t i;
911     for (i=0; i<nblocks; i++) {
912       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
913       sendblock_i->row = -(sendblock_i->row+1);
914     }
915   }
916 
917   if (stash->first_assembly_done) {
918     PetscMPIInt i,tag;
919     ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr);
920     for (i=0; i<stash->nrecvranks; i++) {
921       ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr);
922     }
923     for (i=0; i<stash->nsendranks; i++) {
924       ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr);
925     }
926     stash->use_status = PETSC_TRUE; /* Use count from message status. */
927   } else {
928     ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
929                                       &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
930                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
931     ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
932     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
933   }
934 
935   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
936   stash->recvframe_active     = NULL;
937   stash->recvframe_i          = 0;
938   stash->some_i               = 0;
939   stash->some_count           = 0;
940   stash->recvcount            = 0;
941   stash->first_assembly_done  = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
942   stash->insertmode           = &mat->insertmode;
943   PetscFunctionReturn(0);
944 }
945 
946 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
947 {
948   PetscErrorCode ierr;
949   MatStashBlock *block;
950 
951   PetscFunctionBegin;
952   *flg = 0;
953   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
954     if (stash->some_i == stash->some_count) {
955       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
956       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr);
957       stash->some_i = 0;
958     }
959     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
960     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
961     if (stash->use_status) { /* Count what was actually sent */
962       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr);
963     }
964     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
965       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
966       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
967       if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
968       if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
969     }
970     stash->some_i++;
971     stash->recvcount++;
972     stash->recvframe_i = 0;
973   }
974   *n = 1;
975   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
976   if (block->row < 0) block->row = -(block->row + 1);
977   *row = &block->row;
978   *col = &block->col;
979   *val = block->vals;
980   stash->recvframe_i++;
981   *flg = 1;
982   PetscFunctionReturn(0);
983 }
984 
985 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
986 {
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
991   if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
992     void *dummy;
993     ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr);
994   } else {                      /* No reuse, so collect everything. */
995     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
996   }
997 
998   /* Now update nmaxold to be app 10% more than max n used, this way the
999      wastage of space is reduced the next time this stash is used.
1000      Also update the oldmax, only if it increases */
1001   if (stash->n) {
1002     PetscInt bs2     = stash->bs*stash->bs;
1003     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1004     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1005   }
1006 
1007   stash->nmax       = 0;
1008   stash->n          = 0;
1009   stash->reallocs   = -1;
1010   stash->nprocessed = 0;
1011 
1012   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1013 
1014   stash->space = 0;
1015 
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1020 {
1021   PetscErrorCode ierr;
1022 
1023   PetscFunctionBegin;
1024   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1025   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1026   stash->recvframes = NULL;
1027   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1028   if (stash->blocktype != MPI_DATATYPE_NULL) {
1029     ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr);
1030   }
1031   stash->nsendranks = 0;
1032   stash->nrecvranks = 0;
1033   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1034   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1035   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1036   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1037   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1038   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 #endif
1042