xref: /petsc/src/mat/utils/matstash.c (revision bfcb38ea38335faa6e7f8d97f6bc6ce9aa2a1dd1)
1 
2 #include <petsc/private/matimpl.h>
3 
4 #define DEFAULT_STASH_SIZE   10000
5 
6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8 static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9 #if !defined(PETSC_HAVE_MPIUNI)
10 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
11 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
12 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
13 #endif
14 
15 /*
16   MatStashCreate_Private - Creates a stash,currently used for all the parallel
17   matrix implementations. The stash is where elements of a matrix destined
18   to be stored on other processors are kept until matrix assembly is done.
19 
20   This is a simple minded stash. Simply adds entries to end of stash.
21 
22   Input Parameters:
23   comm - communicator, required for scatters.
24   bs   - stash block size. used when stashing blocks of values
25 
26   Output Parameters:
27   stash    - the newly created stash
28 */
29 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
30 {
31   PetscErrorCode ierr;
32   PetscInt       max,*opt,nopt,i;
33   PetscBool      flg;
34 
35   PetscFunctionBegin;
36   /* Require 2 tags,get the second using PetscCommGetNewTag() */
37   stash->comm = comm;
38 
39   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
40   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
41   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
42   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
43   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
44   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
45 
46 
47   nopt = stash->size;
48   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
49   ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
50   if (flg) {
51     if (nopt == 1)                max = opt[0];
52     else if (nopt == stash->size) max = opt[stash->rank];
53     else if (stash->rank < nopt)  max = opt[stash->rank];
54     else                          max = 0; /* Use default */
55     stash->umax = max;
56   } else {
57     stash->umax = 0;
58   }
59   ierr = PetscFree(opt);CHKERRQ(ierr);
60   if (bs <= 0) bs = 1;
61 
62   stash->bs         = bs;
63   stash->nmax       = 0;
64   stash->oldnmax    = 0;
65   stash->n          = 0;
66   stash->reallocs   = -1;
67   stash->space_head = 0;
68   stash->space      = 0;
69 
70   stash->send_waits  = 0;
71   stash->recv_waits  = 0;
72   stash->send_status = 0;
73   stash->nsends      = 0;
74   stash->nrecvs      = 0;
75   stash->svalues     = 0;
76   stash->rvalues     = 0;
77   stash->rindices    = 0;
78   stash->nprocessed  = 0;
79   stash->reproduce   = PETSC_FALSE;
80   stash->blocktype   = MPI_DATATYPE_NULL;
81 
82   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
83 #if !defined(PETSC_HAVE_MPIUNI)
84   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr);
85   if (!flg) {
86     stash->ScatterBegin   = MatStashScatterBegin_BTS;
87     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
88     stash->ScatterEnd     = MatStashScatterEnd_BTS;
89     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
90   } else {
91 #endif
92     stash->ScatterBegin   = MatStashScatterBegin_Ref;
93     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
94     stash->ScatterEnd     = MatStashScatterEnd_Ref;
95     stash->ScatterDestroy = NULL;
96 #if !defined(PETSC_HAVE_MPIUNI)
97   }
98 #endif
99   PetscFunctionReturn(0);
100 }
101 
102 /*
103    MatStashDestroy_Private - Destroy the stash
104 */
105 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
106 {
107   PetscErrorCode ierr;
108 
109   PetscFunctionBegin;
110   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
111   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
112 
113   stash->space = 0;
114 
115   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
116   PetscFunctionReturn(0);
117 }
118 
119 /*
120    MatStashScatterEnd_Private - This is called as the final stage of
121    scatter. The final stages of message passing is done here, and
122    all the memory used for message passing is cleaned up. This
123    routine also resets the stash, and deallocates the memory used
124    for the stash. It also keeps track of the current memory usage
125    so that the same value can be used the next time through.
126 */
127 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
128 {
129   PetscErrorCode ierr;
130 
131   PetscFunctionBegin;
132   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
133   PetscFunctionReturn(0);
134 }
135 
136 static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
137 {
138   PetscErrorCode ierr;
139   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
140   MPI_Status     *send_status;
141 
142   PetscFunctionBegin;
143   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
144   /* wait on sends */
145   if (nsends) {
146     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
147     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
148     ierr = PetscFree(send_status);CHKERRQ(ierr);
149   }
150 
151   /* Now update nmaxold to be app 10% more than max n used, this way the
152      wastage of space is reduced the next time this stash is used.
153      Also update the oldmax, only if it increases */
154   if (stash->n) {
155     bs2     = stash->bs*stash->bs;
156     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
157     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
158   }
159 
160   stash->nmax       = 0;
161   stash->n          = 0;
162   stash->reallocs   = -1;
163   stash->nprocessed = 0;
164 
165   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
166 
167   stash->space = 0;
168 
169   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
170   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
171   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
172   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
173   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
174   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
175   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 /*
180    MatStashGetInfo_Private - Gets the relavant statistics of the stash
181 
182    Input Parameters:
183    stash    - the stash
184    nstash   - the size of the stash. Indicates the number of values stored.
185    reallocs - the number of additional mallocs incurred.
186 
187 */
188 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
189 {
190   PetscInt bs2 = stash->bs*stash->bs;
191 
192   PetscFunctionBegin;
193   if (nstash) *nstash = stash->n*bs2;
194   if (reallocs) {
195     if (stash->reallocs < 0) *reallocs = 0;
196     else                     *reallocs = stash->reallocs;
197   }
198   PetscFunctionReturn(0);
199 }
200 
201 /*
202    MatStashSetInitialSize_Private - Sets the initial size of the stash
203 
204    Input Parameters:
205    stash  - the stash
206    max    - the value that is used as the max size of the stash.
207             this value is used while allocating memory.
208 */
209 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
210 {
211   PetscFunctionBegin;
212   stash->umax = max;
213   PetscFunctionReturn(0);
214 }
215 
216 /* MatStashExpand_Private - Expand the stash. This function is called
217    when the space in the stash is not sufficient to add the new values
218    being inserted into the stash.
219 
220    Input Parameters:
221    stash - the stash
222    incr  - the minimum increase requested
223 
224    Notes:
225    This routine doubles the currently used memory.
226  */
227 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
228 {
229   PetscErrorCode ierr;
230   PetscInt       newnmax,bs2= stash->bs*stash->bs;
231 
232   PetscFunctionBegin;
233   /* allocate a larger stash */
234   if (!stash->oldnmax && !stash->nmax) { /* new stash */
235     if (stash->umax)                  newnmax = stash->umax/bs2;
236     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
237   } else if (!stash->nmax) { /* resuing stash */
238     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
239     else                              newnmax = stash->oldnmax/bs2;
240   } else                              newnmax = stash->nmax*2;
241   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
242 
243   /* Get a MatStashSpace and attach it to stash */
244   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
245   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
246     stash->space_head = stash->space;
247   }
248 
249   stash->reallocs++;
250   stash->nmax = newnmax;
251   PetscFunctionReturn(0);
252 }
253 /*
254   MatStashValuesRow_Private - inserts values into the stash. This function
255   expects the values to be roworiented. Multiple columns belong to the same row
256   can be inserted with a single call to this function.
257 
258   Input Parameters:
259   stash  - the stash
260   row    - the global row correspoiding to the values
261   n      - the number of elements inserted. All elements belong to the above row.
262   idxn   - the global column indices corresponding to each of the values.
263   values - the values inserted
264 */
265 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
266 {
267   PetscErrorCode     ierr;
268   PetscInt           i,k,cnt = 0;
269   PetscMatStashSpace space=stash->space;
270 
271   PetscFunctionBegin;
272   /* Check and see if we have sufficient memory */
273   if (!space || space->local_remaining < n) {
274     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
275   }
276   space = stash->space;
277   k     = space->local_used;
278   for (i=0; i<n; i++) {
279     if (ignorezeroentries && (values[i] == 0.0)) continue;
280     space->idx[k] = row;
281     space->idy[k] = idxn[i];
282     space->val[k] = values[i];
283     k++;
284     cnt++;
285   }
286   stash->n               += cnt;
287   space->local_used      += cnt;
288   space->local_remaining -= cnt;
289   PetscFunctionReturn(0);
290 }
291 
292 /*
293   MatStashValuesCol_Private - inserts values into the stash. This function
294   expects the values to be columnoriented. Multiple columns belong to the same row
295   can be inserted with a single call to this function.
296 
297   Input Parameters:
298   stash   - the stash
299   row     - the global row correspoiding to the values
300   n       - the number of elements inserted. All elements belong to the above row.
301   idxn    - the global column indices corresponding to each of the values.
302   values  - the values inserted
303   stepval - the consecutive values are sepated by a distance of stepval.
304             this happens because the input is columnoriented.
305 */
306 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
307 {
308   PetscErrorCode     ierr;
309   PetscInt           i,k,cnt = 0;
310   PetscMatStashSpace space=stash->space;
311 
312   PetscFunctionBegin;
313   /* Check and see if we have sufficient memory */
314   if (!space || space->local_remaining < n) {
315     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
316   }
317   space = stash->space;
318   k     = space->local_used;
319   for (i=0; i<n; i++) {
320     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
321     space->idx[k] = row;
322     space->idy[k] = idxn[i];
323     space->val[k] = values[i*stepval];
324     k++;
325     cnt++;
326   }
327   stash->n               += cnt;
328   space->local_used      += cnt;
329   space->local_remaining -= cnt;
330   PetscFunctionReturn(0);
331 }
332 
333 /*
334   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
335   This function expects the values to be roworiented. Multiple columns belong
336   to the same block-row can be inserted with a single call to this function.
337   This function extracts the sub-block of values based on the dimensions of
338   the original input block, and the row,col values corresponding to the blocks.
339 
340   Input Parameters:
341   stash  - the stash
342   row    - the global block-row correspoiding to the values
343   n      - the number of elements inserted. All elements belong to the above row.
344   idxn   - the global block-column indices corresponding to each of the blocks of
345            values. Each block is of size bs*bs.
346   values - the values inserted
347   rmax   - the number of block-rows in the original block.
348   cmax   - the number of block-columsn on the original block.
349   idx    - the index of the current block-row in the original block.
350 */
351 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
352 {
353   PetscErrorCode     ierr;
354   PetscInt           i,j,k,bs2,bs=stash->bs,l;
355   const PetscScalar  *vals;
356   PetscScalar        *array;
357   PetscMatStashSpace space=stash->space;
358 
359   PetscFunctionBegin;
360   if (!space || space->local_remaining < n) {
361     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
362   }
363   space = stash->space;
364   l     = space->local_used;
365   bs2   = bs*bs;
366   for (i=0; i<n; i++) {
367     space->idx[l] = row;
368     space->idy[l] = idxn[i];
369     /* Now copy over the block of values. Store the values column oriented.
370        This enables inserting multiple blocks belonging to a row with a single
371        funtion call */
372     array = space->val + bs2*l;
373     vals  = values + idx*bs2*n + bs*i;
374     for (j=0; j<bs; j++) {
375       for (k=0; k<bs; k++) array[k*bs] = vals[k];
376       array++;
377       vals += cmax*bs;
378     }
379     l++;
380   }
381   stash->n               += n;
382   space->local_used      += n;
383   space->local_remaining -= n;
384   PetscFunctionReturn(0);
385 }
386 
387 /*
388   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
389   This function expects the values to be roworiented. Multiple columns belong
390   to the same block-row can be inserted with a single call to this function.
391   This function extracts the sub-block of values based on the dimensions of
392   the original input block, and the row,col values corresponding to the blocks.
393 
394   Input Parameters:
395   stash  - the stash
396   row    - the global block-row correspoiding to the values
397   n      - the number of elements inserted. All elements belong to the above row.
398   idxn   - the global block-column indices corresponding to each of the blocks of
399            values. Each block is of size bs*bs.
400   values - the values inserted
401   rmax   - the number of block-rows in the original block.
402   cmax   - the number of block-columsn on the original block.
403   idx    - the index of the current block-row in the original block.
404 */
405 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
406 {
407   PetscErrorCode     ierr;
408   PetscInt           i,j,k,bs2,bs=stash->bs,l;
409   const PetscScalar  *vals;
410   PetscScalar        *array;
411   PetscMatStashSpace space=stash->space;
412 
413   PetscFunctionBegin;
414   if (!space || space->local_remaining < n) {
415     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
416   }
417   space = stash->space;
418   l     = space->local_used;
419   bs2   = bs*bs;
420   for (i=0; i<n; i++) {
421     space->idx[l] = row;
422     space->idy[l] = idxn[i];
423     /* Now copy over the block of values. Store the values column oriented.
424      This enables inserting multiple blocks belonging to a row with a single
425      funtion call */
426     array = space->val + bs2*l;
427     vals  = values + idx*bs2*n + bs*i;
428     for (j=0; j<bs; j++) {
429       for (k=0; k<bs; k++) array[k] = vals[k];
430       array += bs;
431       vals  += rmax*bs;
432     }
433     l++;
434   }
435   stash->n               += n;
436   space->local_used      += n;
437   space->local_remaining -= n;
438   PetscFunctionReturn(0);
439 }
440 /*
441   MatStashScatterBegin_Private - Initiates the transfer of values to the
442   correct owners. This function goes through the stash, and check the
443   owners of each stashed value, and sends the values off to the owner
444   processors.
445 
446   Input Parameters:
447   stash  - the stash
448   owners - an array of size 'no-of-procs' which gives the ownership range
449            for each node.
450 
451   Notes:
452     The 'owners' array in the cased of the blocked-stash has the
453   ranges specified blocked global indices, and for the regular stash in
454   the proper global indices.
455 */
456 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
457 {
458   PetscErrorCode ierr;
459 
460   PetscFunctionBegin;
461   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
466 {
467   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
468   PetscInt           size=stash->size,nsends;
469   PetscErrorCode     ierr;
470   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
471   PetscScalar        **rvalues,*svalues;
472   MPI_Comm           comm = stash->comm;
473   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
474   PetscMPIInt        *sizes,*nlengths,nreceives;
475   PetscInt           *sp_idx,*sp_idy;
476   PetscScalar        *sp_val;
477   PetscMatStashSpace space,space_next;
478 
479   PetscFunctionBegin;
480   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
481     InsertMode addv;
482     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
483     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
484     mat->insertmode = addv; /* in case this processor had no cache */
485   }
486 
487   bs2 = stash->bs*stash->bs;
488 
489   /*  first count number of contributors to each processor */
490   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
491   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
492   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
493 
494   i       = j    = 0;
495   lastidx = -1;
496   space   = stash->space_head;
497   while (space) {
498     space_next = space->next;
499     sp_idx     = space->idx;
500     for (l=0; l<space->local_used; l++) {
501       /* if indices are NOT locally sorted, need to start search at the beginning */
502       if (lastidx > (idx = sp_idx[l])) j = 0;
503       lastidx = idx;
504       for (; j<size; j++) {
505         if (idx >= owners[j] && idx < owners[j+1]) {
506           nlengths[j]++; owner[i] = j; break;
507         }
508       }
509       i++;
510     }
511     space = space_next;
512   }
513   /* Now check what procs get messages - and compute nsends. */
514   for (i=0, nsends=0; i<size; i++) {
515     if (nlengths[i]) {
516       sizes[i] = 1; nsends++;
517     }
518   }
519 
520   {PetscMPIInt *onodes,*olengths;
521    /* Determine the number of messages to expect, their lengths, from from-ids */
522    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
523    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
524    /* since clubbing row,col - lengths are multiplied by 2 */
525    for (i=0; i<nreceives; i++) olengths[i] *=2;
526    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
527    /* values are size 'bs2' lengths (and remove earlier factor 2 */
528    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
529    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
530    ierr = PetscFree(onodes);CHKERRQ(ierr);
531    ierr = PetscFree(olengths);CHKERRQ(ierr);}
532 
533   /* do sends:
534       1) starts[i] gives the starting index in svalues for stuff going to
535          the ith processor
536   */
537   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
538   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
539   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
540   /* use 2 sends the first with all_a, the next with all_i and all_j */
541   startv[0] = 0; starti[0] = 0;
542   for (i=1; i<size; i++) {
543     startv[i] = startv[i-1] + nlengths[i-1];
544     starti[i] = starti[i-1] + 2*nlengths[i-1];
545   }
546 
547   i     = 0;
548   space = stash->space_head;
549   while (space) {
550     space_next = space->next;
551     sp_idx     = space->idx;
552     sp_idy     = space->idy;
553     sp_val     = space->val;
554     for (l=0; l<space->local_used; l++) {
555       j = owner[i];
556       if (bs2 == 1) {
557         svalues[startv[j]] = sp_val[l];
558       } else {
559         PetscInt    k;
560         PetscScalar *buf1,*buf2;
561         buf1 = svalues+bs2*startv[j];
562         buf2 = space->val + bs2*l;
563         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
564       }
565       sindices[starti[j]]             = sp_idx[l];
566       sindices[starti[j]+nlengths[j]] = sp_idy[l];
567       startv[j]++;
568       starti[j]++;
569       i++;
570     }
571     space = space_next;
572   }
573   startv[0] = 0;
574   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
575 
576   for (i=0,count=0; i<size; i++) {
577     if (sizes[i]) {
578       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
579       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
580     }
581   }
582 #if defined(PETSC_USE_INFO)
583   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
584   for (i=0; i<size; i++) {
585     if (sizes[i]) {
586       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
587     }
588   }
589 #endif
590   ierr = PetscFree(nlengths);CHKERRQ(ierr);
591   ierr = PetscFree(owner);CHKERRQ(ierr);
592   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
593   ierr = PetscFree(sizes);CHKERRQ(ierr);
594 
595   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
596   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
597 
598   for (i=0; i<nreceives; i++) {
599     recv_waits[2*i]   = recv_waits1[i];
600     recv_waits[2*i+1] = recv_waits2[i];
601   }
602   stash->recv_waits = recv_waits;
603 
604   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
605   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
606 
607   stash->svalues         = svalues;
608   stash->sindices        = sindices;
609   stash->rvalues         = rvalues;
610   stash->rindices        = rindices;
611   stash->send_waits      = send_waits;
612   stash->nsends          = nsends;
613   stash->nrecvs          = nreceives;
614   stash->reproduce_count = 0;
615   PetscFunctionReturn(0);
616 }
617 
618 /*
619    MatStashScatterGetMesg_Private - This function waits on the receives posted
620    in the function MatStashScatterBegin_Private() and returns one message at
621    a time to the calling function. If no messages are left, it indicates this
622    by setting flg = 0, else it sets flg = 1.
623 
624    Input Parameters:
625    stash - the stash
626 
627    Output Parameters:
628    nvals - the number of entries in the current message.
629    rows  - an array of row indices (or blocked indices) corresponding to the values
630    cols  - an array of columnindices (or blocked indices) corresponding to the values
631    vals  - the values
632    flg   - 0 indicates no more message left, and the current call has no values associated.
633            1 indicates that the current call successfully received a message, and the
634              other output parameters nvals,rows,cols,vals are set appropriately.
635 */
636 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
637 {
638   PetscErrorCode ierr;
639 
640   PetscFunctionBegin;
641   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
642   PetscFunctionReturn(0);
643 }
644 
645 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
646 {
647   PetscErrorCode ierr;
648   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
649   PetscInt       bs2;
650   MPI_Status     recv_status;
651   PetscBool      match_found = PETSC_FALSE;
652 
653   PetscFunctionBegin;
654   *flg = 0; /* When a message is discovered this is reset to 1 */
655   /* Return if no more messages to process */
656   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
657 
658   bs2 = stash->bs*stash->bs;
659   /* If a matching pair of receives are found, process them, and return the data to
660      the calling function. Until then keep receiving messages */
661   while (!match_found) {
662     if (stash->reproduce) {
663       i    = stash->reproduce_count++;
664       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
665     } else {
666       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
667     }
668     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
669 
670     /* Now pack the received message into a structure which is usable by others */
671     if (i % 2) {
672       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
673 
674       flg_v[2*recv_status.MPI_SOURCE] = i/2;
675 
676       *nvals = *nvals/bs2;
677     } else {
678       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
679 
680       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
681 
682       *nvals = *nvals/2; /* This message has both row indices and col indices */
683     }
684 
685     /* Check if we have both messages from this proc */
686     i1 = flg_v[2*recv_status.MPI_SOURCE];
687     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
688     if (i1 != -1 && i2 != -1) {
689       *rows = stash->rindices[i2];
690       *cols = *rows + *nvals;
691       *vals = stash->rvalues[i1];
692       *flg  = 1;
693       stash->nprocessed++;
694       match_found = PETSC_TRUE;
695     }
696   }
697   PetscFunctionReturn(0);
698 }
699 
700 #if !defined(PETSC_HAVE_MPIUNI)
701 typedef struct {
702   PetscInt row;
703   PetscInt col;
704   PetscScalar vals[1];          /* Actually an array of length bs2 */
705 } MatStashBlock;
706 
707 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
708 {
709   PetscErrorCode ierr;
710   PetscMatStashSpace space;
711   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
712   PetscScalar **valptr;
713 
714   PetscFunctionBegin;
715   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
716   for (space=stash->space_head,cnt=0; space; space=space->next) {
717     for (i=0; i<space->local_used; i++) {
718       row[cnt] = space->idx[i];
719       col[cnt] = space->idy[i];
720       valptr[cnt] = &space->val[i*bs2];
721       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
722       cnt++;
723     }
724   }
725   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
726   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
727   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
728   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
729     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
730       PetscInt colstart;
731       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
732       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
733         PetscInt j,l;
734         MatStashBlock *block;
735         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
736         block->row = row[rowstart];
737         block->col = col[colstart];
738         ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
739         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
740           if (insertmode == ADD_VALUES) {
741             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
742           } else {
743             ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
744           }
745         }
746         colstart = j;
747       }
748       rowstart = i;
749     }
750   }
751   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
752   PetscFunctionReturn(0);
753 }
754 
755 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
756 {
757   PetscErrorCode ierr;
758 
759   PetscFunctionBegin;
760   if (stash->blocktype == MPI_DATATYPE_NULL) {
761     PetscInt     bs2 = PetscSqr(stash->bs);
762     PetscMPIInt  blocklens[2];
763     MPI_Aint     displs[2];
764     MPI_Datatype types[2],stype;
765     /* C++ std::complex is not my favorite datatype.  Since it is not POD, we cannot use offsetof to find the offset of
766      * vals.  But the layout is actually guaranteed by the standard, so we do a little dance here with struct
767      * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset.
768      */
769     struct DummyBlock {PetscInt row,col; PetscReal vals;};
770 
771     stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
772     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
773       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
774     }
775     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
776     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
777     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
778     blocklens[0] = 2;
779     blocklens[1] = bs2;
780     displs[0] = offsetof(struct DummyBlock,row);
781     displs[1] = offsetof(struct DummyBlock,vals);
782     types[0] = MPIU_INT;
783     types[1] = MPIU_SCALAR;
784     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr);
785     ierr = MPI_Type_commit(&stype);CHKERRQ(ierr);
786     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */
787     ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr);
788     ierr = MPI_Type_free(&stype);CHKERRQ(ierr);
789   }
790   PetscFunctionReturn(0);
791 }
792 
793 /* Callback invoked after target rank has initiatied receive of rendezvous message.
794  * Here we post the main sends.
795  */
796 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
797 {
798   MatStash *stash = (MatStash*)ctx;
799   MatStashHeader *hdr = (MatStashHeader*)sdata;
800   PetscErrorCode ierr;
801 
802   PetscFunctionBegin;
803   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
804   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
805   stash->sendframes[rankid].count = hdr->count;
806   stash->sendframes[rankid].pending = 1;
807   PetscFunctionReturn(0);
808 }
809 
810 /* Callback invoked by target after receiving rendezvous message.
811  * Here we post the main recvs.
812  */
813 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
814 {
815   MatStash *stash = (MatStash*)ctx;
816   MatStashHeader *hdr = (MatStashHeader*)rdata;
817   MatStashFrame *frame;
818   PetscErrorCode ierr;
819 
820   PetscFunctionBegin;
821   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
822   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
823   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
824   frame->count = hdr->count;
825   frame->pending = 1;
826   PetscFunctionReturn(0);
827 }
828 
829 /*
830  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
831  */
832 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
833 {
834   PetscErrorCode ierr;
835   size_t nblocks;
836   char *sendblocks;
837 
838   PetscFunctionBegin;
839 #if defined(PETSC_USE_DEBUG)
840   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
841     InsertMode addv;
842     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
843     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
844   }
845 #endif
846 
847   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
848   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
849   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
850   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
851   if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
852     PetscInt i;
853     size_t b;
854     for (i=0,b=0; i<stash->nsendranks; i++) {
855       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
856       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
857       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
858       for ( ; b<nblocks; b++) {
859         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
860         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
861         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
862         stash->sendhdr[i].count++;
863       }
864     }
865   } else {                      /* Dynamically count and pack (first time) */
866     PetscInt sendno;
867     size_t i,rowstart;
868 
869     /* Count number of send ranks and allocate for sends */
870     stash->nsendranks = 0;
871     for (rowstart=0; rowstart<nblocks; ) {
872       PetscInt owner;
873       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
874       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
875       if (owner < 0) owner = -(owner+2);
876       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
877         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
878         if (sendblock_i->row >= owners[owner+1]) break;
879       }
880       stash->nsendranks++;
881       rowstart = i;
882     }
883     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
884 
885     /* Set up sendhdrs and sendframes */
886     sendno = 0;
887     for (rowstart=0; rowstart<nblocks; ) {
888       PetscInt owner;
889       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
890       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
891       if (owner < 0) owner = -(owner+2);
892       stash->sendranks[sendno] = owner;
893       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
894         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
895         if (sendblock_i->row >= owners[owner+1]) break;
896       }
897       stash->sendframes[sendno].buffer = sendblock_rowstart;
898       stash->sendframes[sendno].pending = 0;
899       stash->sendhdr[sendno].count = i - rowstart;
900       sendno++;
901       rowstart = i;
902     }
903     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
904   }
905 
906   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
907    * message or a dummy entry of some sort. */
908   if (mat->insertmode == INSERT_VALUES) {
909     size_t i;
910     for (i=0; i<nblocks; i++) {
911       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
912       sendblock_i->row = -(sendblock_i->row+1);
913     }
914   }
915 
916   if (stash->first_assembly_done) {
917     PetscMPIInt i,tag;
918     ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr);
919     for (i=0; i<stash->nrecvranks; i++) {
920       ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr);
921     }
922     for (i=0; i<stash->nsendranks; i++) {
923       ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr);
924     }
925     stash->use_status = PETSC_TRUE; /* Use count from message status. */
926   } else {
927     ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
928                                       &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
929                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
930     ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
931     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
932   }
933 
934   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
935   stash->recvframe_active     = NULL;
936   stash->recvframe_i          = 0;
937   stash->some_i               = 0;
938   stash->some_count           = 0;
939   stash->recvcount            = 0;
940   stash->first_assembly_done  = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
941   stash->insertmode           = &mat->insertmode;
942   PetscFunctionReturn(0);
943 }
944 
945 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
946 {
947   PetscErrorCode ierr;
948   MatStashBlock *block;
949 
950   PetscFunctionBegin;
951   *flg = 0;
952   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
953     if (stash->some_i == stash->some_count) {
954       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
955       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr);
956       stash->some_i = 0;
957     }
958     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
959     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
960     if (stash->use_status) { /* Count what was actually sent */
961       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr);
962     }
963     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
964       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
965       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
966       if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
967       if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
968     }
969     stash->some_i++;
970     stash->recvcount++;
971     stash->recvframe_i = 0;
972   }
973   *n = 1;
974   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
975   if (block->row < 0) block->row = -(block->row + 1);
976   *row = &block->row;
977   *col = &block->col;
978   *val = block->vals;
979   stash->recvframe_i++;
980   *flg = 1;
981   PetscFunctionReturn(0);
982 }
983 
984 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
985 {
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
990   if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
991     void *dummy;
992     ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr);
993   } else {                      /* No reuse, so collect everything. */
994     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
995   }
996 
997   /* Now update nmaxold to be app 10% more than max n used, this way the
998      wastage of space is reduced the next time this stash is used.
999      Also update the oldmax, only if it increases */
1000   if (stash->n) {
1001     PetscInt bs2     = stash->bs*stash->bs;
1002     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1003     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1004   }
1005 
1006   stash->nmax       = 0;
1007   stash->n          = 0;
1008   stash->reallocs   = -1;
1009   stash->nprocessed = 0;
1010 
1011   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1012 
1013   stash->space = 0;
1014 
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1019 {
1020   PetscErrorCode ierr;
1021 
1022   PetscFunctionBegin;
1023   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1024   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1025   stash->recvframes = NULL;
1026   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1027   if (stash->blocktype != MPI_DATATYPE_NULL) {
1028     ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr);
1029   }
1030   stash->nsendranks = 0;
1031   stash->nrecvranks = 0;
1032   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1033   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1034   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1035   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1036   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1037   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 #endif
1041