xref: /petsc/src/mat/utils/matstash.c (revision feff33ee0b5b037fa8f9f294dede656a2f85cc47)
1 
2 #include <petsc/private/matimpl.h>
3 
4 #define DEFAULT_STASH_SIZE   10000
5 
6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8 static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9 #if !defined(PETSC_HAVE_MPIUNI)
10 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
11 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
12 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
13 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*);
14 #endif
15 
16 /*
17   MatStashCreate_Private - Creates a stash,currently used for all the parallel
18   matrix implementations. The stash is where elements of a matrix destined
19   to be stored on other processors are kept until matrix assembly is done.
20 
21   This is a simple minded stash. Simply adds entries to end of stash.
22 
23   Input Parameters:
24   comm - communicator, required for scatters.
25   bs   - stash block size. used when stashing blocks of values
26 
27   Output Parameters:
28   stash    - the newly created stash
29 */
30 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
31 {
32   PetscErrorCode ierr;
33   PetscInt       max,*opt,nopt,i;
34   PetscBool      flg;
35 
36   PetscFunctionBegin;
37   /* Require 2 tags,get the second using PetscCommGetNewTag() */
38   stash->comm = comm;
39 
40   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
41   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
42   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
43   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
44   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
45   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
46 
47 
48   nopt = stash->size;
49   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
50   ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
51   if (flg) {
52     if (nopt == 1)                max = opt[0];
53     else if (nopt == stash->size) max = opt[stash->rank];
54     else if (stash->rank < nopt)  max = opt[stash->rank];
55     else                          max = 0; /* Use default */
56     stash->umax = max;
57   } else {
58     stash->umax = 0;
59   }
60   ierr = PetscFree(opt);CHKERRQ(ierr);
61   if (bs <= 0) bs = 1;
62 
63   stash->bs         = bs;
64   stash->nmax       = 0;
65   stash->oldnmax    = 0;
66   stash->n          = 0;
67   stash->reallocs   = -1;
68   stash->space_head = 0;
69   stash->space      = 0;
70 
71   stash->send_waits  = 0;
72   stash->recv_waits  = 0;
73   stash->send_status = 0;
74   stash->nsends      = 0;
75   stash->nrecvs      = 0;
76   stash->svalues     = 0;
77   stash->rvalues     = 0;
78   stash->rindices    = 0;
79   stash->nprocessed  = 0;
80   stash->reproduce   = PETSC_FALSE;
81   stash->blocktype   = MPI_DATATYPE_NULL;
82 
83   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
84 #if !defined(PETSC_HAVE_MPIUNI)
85   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr);
86   if (!flg) {
87     stash->ScatterBegin   = MatStashScatterBegin_BTS;
88     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
89     stash->ScatterEnd     = MatStashScatterEnd_BTS;
90     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
91   } else {
92 #endif
93     stash->ScatterBegin   = MatStashScatterBegin_Ref;
94     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
95     stash->ScatterEnd     = MatStashScatterEnd_Ref;
96     stash->ScatterDestroy = NULL;
97 #if !defined(PETSC_HAVE_MPIUNI)
98   }
99 #endif
100   PetscFunctionReturn(0);
101 }
102 
103 /*
104    MatStashDestroy_Private - Destroy the stash
105 */
106 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
107 {
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
112   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
113 
114   stash->space = 0;
115 
116   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 /*
121    MatStashScatterEnd_Private - This is called as the final stage of
122    scatter. The final stages of message passing is done here, and
123    all the memory used for message passing is cleaned up. This
124    routine also resets the stash, and deallocates the memory used
125    for the stash. It also keeps track of the current memory usage
126    so that the same value can be used the next time through.
127 */
128 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
129 {
130   PetscErrorCode ierr;
131 
132   PetscFunctionBegin;
133   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
138 {
139   PetscErrorCode ierr;
140   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
141   MPI_Status     *send_status;
142 
143   PetscFunctionBegin;
144   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
145   /* wait on sends */
146   if (nsends) {
147     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
148     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
149     ierr = PetscFree(send_status);CHKERRQ(ierr);
150   }
151 
152   /* Now update nmaxold to be app 10% more than max n used, this way the
153      wastage of space is reduced the next time this stash is used.
154      Also update the oldmax, only if it increases */
155   if (stash->n) {
156     bs2     = stash->bs*stash->bs;
157     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
158     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
159   }
160 
161   stash->nmax       = 0;
162   stash->n          = 0;
163   stash->reallocs   = -1;
164   stash->nprocessed = 0;
165 
166   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
167 
168   stash->space = 0;
169 
170   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
171   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
172   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
173   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
174   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
175   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
176   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 /*
181    MatStashGetInfo_Private - Gets the relavant statistics of the stash
182 
183    Input Parameters:
184    stash    - the stash
185    nstash   - the size of the stash. Indicates the number of values stored.
186    reallocs - the number of additional mallocs incurred.
187 
188 */
189 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
190 {
191   PetscInt bs2 = stash->bs*stash->bs;
192 
193   PetscFunctionBegin;
194   if (nstash) *nstash = stash->n*bs2;
195   if (reallocs) {
196     if (stash->reallocs < 0) *reallocs = 0;
197     else                     *reallocs = stash->reallocs;
198   }
199   PetscFunctionReturn(0);
200 }
201 
202 /*
203    MatStashSetInitialSize_Private - Sets the initial size of the stash
204 
205    Input Parameters:
206    stash  - the stash
207    max    - the value that is used as the max size of the stash.
208             this value is used while allocating memory.
209 */
210 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
211 {
212   PetscFunctionBegin;
213   stash->umax = max;
214   PetscFunctionReturn(0);
215 }
216 
217 /* MatStashExpand_Private - Expand the stash. This function is called
218    when the space in the stash is not sufficient to add the new values
219    being inserted into the stash.
220 
221    Input Parameters:
222    stash - the stash
223    incr  - the minimum increase requested
224 
225    Notes:
226    This routine doubles the currently used memory.
227  */
228 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
229 {
230   PetscErrorCode ierr;
231   PetscInt       newnmax,bs2= stash->bs*stash->bs;
232 
233   PetscFunctionBegin;
234   /* allocate a larger stash */
235   if (!stash->oldnmax && !stash->nmax) { /* new stash */
236     if (stash->umax)                  newnmax = stash->umax/bs2;
237     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
238   } else if (!stash->nmax) { /* resuing stash */
239     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
240     else                              newnmax = stash->oldnmax/bs2;
241   } else                              newnmax = stash->nmax*2;
242   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
243 
244   /* Get a MatStashSpace and attach it to stash */
245   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
246   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
247     stash->space_head = stash->space;
248   }
249 
250   stash->reallocs++;
251   stash->nmax = newnmax;
252   PetscFunctionReturn(0);
253 }
254 /*
255   MatStashValuesRow_Private - inserts values into the stash. This function
256   expects the values to be roworiented. Multiple columns belong to the same row
257   can be inserted with a single call to this function.
258 
259   Input Parameters:
260   stash  - the stash
261   row    - the global row correspoiding to the values
262   n      - the number of elements inserted. All elements belong to the above row.
263   idxn   - the global column indices corresponding to each of the values.
264   values - the values inserted
265 */
266 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
267 {
268   PetscErrorCode     ierr;
269   PetscInt           i,k,cnt = 0;
270   PetscMatStashSpace space=stash->space;
271 
272   PetscFunctionBegin;
273   /* Check and see if we have sufficient memory */
274   if (!space || space->local_remaining < n) {
275     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
276   }
277   space = stash->space;
278   k     = space->local_used;
279   for (i=0; i<n; i++) {
280     if (ignorezeroentries && (values[i] == 0.0)) continue;
281     space->idx[k] = row;
282     space->idy[k] = idxn[i];
283     space->val[k] = values[i];
284     k++;
285     cnt++;
286   }
287   stash->n               += cnt;
288   space->local_used      += cnt;
289   space->local_remaining -= cnt;
290   PetscFunctionReturn(0);
291 }
292 
293 /*
294   MatStashValuesCol_Private - inserts values into the stash. This function
295   expects the values to be columnoriented. Multiple columns belong to the same row
296   can be inserted with a single call to this function.
297 
298   Input Parameters:
299   stash   - the stash
300   row     - the global row correspoiding to the values
301   n       - the number of elements inserted. All elements belong to the above row.
302   idxn    - the global column indices corresponding to each of the values.
303   values  - the values inserted
304   stepval - the consecutive values are sepated by a distance of stepval.
305             this happens because the input is columnoriented.
306 */
307 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
308 {
309   PetscErrorCode     ierr;
310   PetscInt           i,k,cnt = 0;
311   PetscMatStashSpace space=stash->space;
312 
313   PetscFunctionBegin;
314   /* Check and see if we have sufficient memory */
315   if (!space || space->local_remaining < n) {
316     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
317   }
318   space = stash->space;
319   k     = space->local_used;
320   for (i=0; i<n; i++) {
321     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
322     space->idx[k] = row;
323     space->idy[k] = idxn[i];
324     space->val[k] = values[i*stepval];
325     k++;
326     cnt++;
327   }
328   stash->n               += cnt;
329   space->local_used      += cnt;
330   space->local_remaining -= cnt;
331   PetscFunctionReturn(0);
332 }
333 
334 /*
335   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
336   This function expects the values to be roworiented. Multiple columns belong
337   to the same block-row can be inserted with a single call to this function.
338   This function extracts the sub-block of values based on the dimensions of
339   the original input block, and the row,col values corresponding to the blocks.
340 
341   Input Parameters:
342   stash  - the stash
343   row    - the global block-row correspoiding to the values
344   n      - the number of elements inserted. All elements belong to the above row.
345   idxn   - the global block-column indices corresponding to each of the blocks of
346            values. Each block is of size bs*bs.
347   values - the values inserted
348   rmax   - the number of block-rows in the original block.
349   cmax   - the number of block-columsn on the original block.
350   idx    - the index of the current block-row in the original block.
351 */
352 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
353 {
354   PetscErrorCode     ierr;
355   PetscInt           i,j,k,bs2,bs=stash->bs,l;
356   const PetscScalar  *vals;
357   PetscScalar        *array;
358   PetscMatStashSpace space=stash->space;
359 
360   PetscFunctionBegin;
361   if (!space || space->local_remaining < n) {
362     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
363   }
364   space = stash->space;
365   l     = space->local_used;
366   bs2   = bs*bs;
367   for (i=0; i<n; i++) {
368     space->idx[l] = row;
369     space->idy[l] = idxn[i];
370     /* Now copy over the block of values. Store the values column oriented.
371        This enables inserting multiple blocks belonging to a row with a single
372        funtion call */
373     array = space->val + bs2*l;
374     vals  = values + idx*bs2*n + bs*i;
375     for (j=0; j<bs; j++) {
376       for (k=0; k<bs; k++) array[k*bs] = vals[k];
377       array++;
378       vals += cmax*bs;
379     }
380     l++;
381   }
382   stash->n               += n;
383   space->local_used      += n;
384   space->local_remaining -= n;
385   PetscFunctionReturn(0);
386 }
387 
388 /*
389   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
390   This function expects the values to be roworiented. Multiple columns belong
391   to the same block-row can be inserted with a single call to this function.
392   This function extracts the sub-block of values based on the dimensions of
393   the original input block, and the row,col values corresponding to the blocks.
394 
395   Input Parameters:
396   stash  - the stash
397   row    - the global block-row correspoiding to the values
398   n      - the number of elements inserted. All elements belong to the above row.
399   idxn   - the global block-column indices corresponding to each of the blocks of
400            values. Each block is of size bs*bs.
401   values - the values inserted
402   rmax   - the number of block-rows in the original block.
403   cmax   - the number of block-columsn on the original block.
404   idx    - the index of the current block-row in the original block.
405 */
406 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
407 {
408   PetscErrorCode     ierr;
409   PetscInt           i,j,k,bs2,bs=stash->bs,l;
410   const PetscScalar  *vals;
411   PetscScalar        *array;
412   PetscMatStashSpace space=stash->space;
413 
414   PetscFunctionBegin;
415   if (!space || space->local_remaining < n) {
416     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
417   }
418   space = stash->space;
419   l     = space->local_used;
420   bs2   = bs*bs;
421   for (i=0; i<n; i++) {
422     space->idx[l] = row;
423     space->idy[l] = idxn[i];
424     /* Now copy over the block of values. Store the values column oriented.
425      This enables inserting multiple blocks belonging to a row with a single
426      funtion call */
427     array = space->val + bs2*l;
428     vals  = values + idx*bs2*n + bs*i;
429     for (j=0; j<bs; j++) {
430       for (k=0; k<bs; k++) array[k] = vals[k];
431       array += bs;
432       vals  += rmax*bs;
433     }
434     l++;
435   }
436   stash->n               += n;
437   space->local_used      += n;
438   space->local_remaining -= n;
439   PetscFunctionReturn(0);
440 }
441 /*
442   MatStashScatterBegin_Private - Initiates the transfer of values to the
443   correct owners. This function goes through the stash, and check the
444   owners of each stashed value, and sends the values off to the owner
445   processors.
446 
447   Input Parameters:
448   stash  - the stash
449   owners - an array of size 'no-of-procs' which gives the ownership range
450            for each node.
451 
452   Notes: The 'owners' array in the cased of the blocked-stash has the
453   ranges specified blocked global indices, and for the regular stash in
454   the proper global indices.
455 */
456 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
457 {
458   PetscErrorCode ierr;
459 
460   PetscFunctionBegin;
461   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
466 {
467   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
468   PetscInt           size=stash->size,nsends;
469   PetscErrorCode     ierr;
470   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
471   PetscScalar        **rvalues,*svalues;
472   MPI_Comm           comm = stash->comm;
473   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
474   PetscMPIInt        *sizes,*nlengths,nreceives;
475   PetscInt           *sp_idx,*sp_idy;
476   PetscScalar        *sp_val;
477   PetscMatStashSpace space,space_next;
478 
479   PetscFunctionBegin;
480   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
481     InsertMode addv;
482     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
483     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
484     mat->insertmode = addv; /* in case this processor had no cache */
485   }
486 
487   bs2 = stash->bs*stash->bs;
488 
489   /*  first count number of contributors to each processor */
490   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
491   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
492   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
493 
494   i       = j    = 0;
495   lastidx = -1;
496   space   = stash->space_head;
497   while (space) {
498     space_next = space->next;
499     sp_idx     = space->idx;
500     for (l=0; l<space->local_used; l++) {
501       /* if indices are NOT locally sorted, need to start search at the beginning */
502       if (lastidx > (idx = sp_idx[l])) j = 0;
503       lastidx = idx;
504       for (; j<size; j++) {
505         if (idx >= owners[j] && idx < owners[j+1]) {
506           nlengths[j]++; owner[i] = j; break;
507         }
508       }
509       i++;
510     }
511     space = space_next;
512   }
513   /* Now check what procs get messages - and compute nsends. */
514   for (i=0, nsends=0; i<size; i++) {
515     if (nlengths[i]) {
516       sizes[i] = 1; nsends++;
517     }
518   }
519 
520   {PetscMPIInt *onodes,*olengths;
521    /* Determine the number of messages to expect, their lengths, from from-ids */
522    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
523    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
524    /* since clubbing row,col - lengths are multiplied by 2 */
525    for (i=0; i<nreceives; i++) olengths[i] *=2;
526    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
527    /* values are size 'bs2' lengths (and remove earlier factor 2 */
528    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
529    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
530    ierr = PetscFree(onodes);CHKERRQ(ierr);
531    ierr = PetscFree(olengths);CHKERRQ(ierr);}
532 
533   /* do sends:
534       1) starts[i] gives the starting index in svalues for stuff going to
535          the ith processor
536   */
537   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
538   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
539   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
540   /* use 2 sends the first with all_a, the next with all_i and all_j */
541   startv[0] = 0; starti[0] = 0;
542   for (i=1; i<size; i++) {
543     startv[i] = startv[i-1] + nlengths[i-1];
544     starti[i] = starti[i-1] + 2*nlengths[i-1];
545   }
546 
547   i     = 0;
548   space = stash->space_head;
549   while (space) {
550     space_next = space->next;
551     sp_idx     = space->idx;
552     sp_idy     = space->idy;
553     sp_val     = space->val;
554     for (l=0; l<space->local_used; l++) {
555       j = owner[i];
556       if (bs2 == 1) {
557         svalues[startv[j]] = sp_val[l];
558       } else {
559         PetscInt    k;
560         PetscScalar *buf1,*buf2;
561         buf1 = svalues+bs2*startv[j];
562         buf2 = space->val + bs2*l;
563         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
564       }
565       sindices[starti[j]]             = sp_idx[l];
566       sindices[starti[j]+nlengths[j]] = sp_idy[l];
567       startv[j]++;
568       starti[j]++;
569       i++;
570     }
571     space = space_next;
572   }
573   startv[0] = 0;
574   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
575 
576   for (i=0,count=0; i<size; i++) {
577     if (sizes[i]) {
578       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
579       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
580     }
581   }
582 #if defined(PETSC_USE_INFO)
583   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
584   for (i=0; i<size; i++) {
585     if (sizes[i]) {
586       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
587     }
588   }
589 #endif
590   ierr = PetscFree(nlengths);CHKERRQ(ierr);
591   ierr = PetscFree(owner);CHKERRQ(ierr);
592   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
593   ierr = PetscFree(sizes);CHKERRQ(ierr);
594 
595   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
596   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
597 
598   for (i=0; i<nreceives; i++) {
599     recv_waits[2*i]   = recv_waits1[i];
600     recv_waits[2*i+1] = recv_waits2[i];
601   }
602   stash->recv_waits = recv_waits;
603 
604   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
605   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
606 
607   stash->svalues         = svalues;
608   stash->sindices        = sindices;
609   stash->rvalues         = rvalues;
610   stash->rindices        = rindices;
611   stash->send_waits      = send_waits;
612   stash->nsends          = nsends;
613   stash->nrecvs          = nreceives;
614   stash->reproduce_count = 0;
615   PetscFunctionReturn(0);
616 }
617 
618 /*
619    MatStashScatterGetMesg_Private - This function waits on the receives posted
620    in the function MatStashScatterBegin_Private() and returns one message at
621    a time to the calling function. If no messages are left, it indicates this
622    by setting flg = 0, else it sets flg = 1.
623 
624    Input Parameters:
625    stash - the stash
626 
627    Output Parameters:
628    nvals - the number of entries in the current message.
629    rows  - an array of row indices (or blocked indices) corresponding to the values
630    cols  - an array of columnindices (or blocked indices) corresponding to the values
631    vals  - the values
632    flg   - 0 indicates no more message left, and the current call has no values associated.
633            1 indicates that the current call successfully received a message, and the
634              other output parameters nvals,rows,cols,vals are set appropriately.
635 */
636 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
637 {
638   PetscErrorCode ierr;
639 
640   PetscFunctionBegin;
641   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
642   PetscFunctionReturn(0);
643 }
644 
645 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
646 {
647   PetscErrorCode ierr;
648   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
649   PetscInt       bs2;
650   MPI_Status     recv_status;
651   PetscBool      match_found = PETSC_FALSE;
652 
653   PetscFunctionBegin;
654   *flg = 0; /* When a message is discovered this is reset to 1 */
655   /* Return if no more messages to process */
656   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
657 
658   bs2 = stash->bs*stash->bs;
659   /* If a matching pair of receives are found, process them, and return the data to
660      the calling function. Until then keep receiving messages */
661   while (!match_found) {
662     if (stash->reproduce) {
663       i    = stash->reproduce_count++;
664       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
665     } else {
666       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
667     }
668     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
669 
670     /* Now pack the received message into a structure which is usable by others */
671     if (i % 2) {
672       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
673 
674       flg_v[2*recv_status.MPI_SOURCE] = i/2;
675 
676       *nvals = *nvals/bs2;
677     } else {
678       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
679 
680       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
681 
682       *nvals = *nvals/2; /* This message has both row indices and col indices */
683     }
684 
685     /* Check if we have both messages from this proc */
686     i1 = flg_v[2*recv_status.MPI_SOURCE];
687     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
688     if (i1 != -1 && i2 != -1) {
689       *rows = stash->rindices[i2];
690       *cols = *rows + *nvals;
691       *vals = stash->rvalues[i1];
692       *flg  = 1;
693       stash->nprocessed++;
694       match_found = PETSC_TRUE;
695     }
696   }
697   PetscFunctionReturn(0);
698 }
699 
700 #if !defined(PETSC_HAVE_MPIUNI)
701 typedef struct {
702   PetscInt row;
703   PetscInt col;
704   PetscScalar vals[1];          /* Actually an array of length bs2 */
705 } MatStashBlock;
706 
707 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
708 {
709   PetscErrorCode ierr;
710   PetscMatStashSpace space;
711   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
712   PetscScalar **valptr;
713 
714   PetscFunctionBegin;
715   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
716   for (space=stash->space_head,cnt=0; space; space=space->next) {
717     for (i=0; i<space->local_used; i++) {
718       row[cnt] = space->idx[i];
719       col[cnt] = space->idy[i];
720       valptr[cnt] = &space->val[i*bs2];
721       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
722       cnt++;
723     }
724   }
725   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
726   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
727   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
728   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
729     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
730       PetscInt colstart;
731       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
732       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
733         PetscInt j,l;
734         MatStashBlock *block;
735         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
736         block->row = row[rowstart];
737         block->col = col[colstart];
738         ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
739         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
740           if (insertmode == ADD_VALUES) {
741             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
742           } else {
743             ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
744           }
745         }
746         colstart = j;
747       }
748       rowstart = i;
749     }
750   }
751   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
752   PetscFunctionReturn(0);
753 }
754 
755 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
756 {
757   PetscErrorCode ierr;
758 
759   PetscFunctionBegin;
760   if (stash->blocktype == MPI_DATATYPE_NULL) {
761     PetscInt     bs2 = PetscSqr(stash->bs);
762     PetscMPIInt  blocklens[2];
763     MPI_Aint     displs[2];
764     MPI_Datatype types[2],stype;
765     /* C++ std::complex is not my favorite datatype.  Since it is not POD, we cannot use offsetof to find the offset of
766      * vals.  But the layout is actually guaranteed by the standard, so we do a little dance here with struct
767      * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset.
768      */
769     struct DummyBlock {PetscInt row,col; PetscReal vals;};
770 
771     stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
772     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
773       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
774     }
775     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
776     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
777     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
778     blocklens[0] = 2;
779     blocklens[1] = bs2;
780     displs[0] = offsetof(struct DummyBlock,row);
781     displs[1] = offsetof(struct DummyBlock,vals);
782     types[0] = MPIU_INT;
783     types[1] = MPIU_SCALAR;
784     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr);
785     ierr = MPI_Type_commit(&stype);CHKERRQ(ierr);
786     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */
787     ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr);
788     ierr = MPI_Type_free(&stype);CHKERRQ(ierr);
789   }
790   PetscFunctionReturn(0);
791 }
792 
793 /* Callback invoked after target rank has initiatied receive of rendezvous message.
794  * Here we post the main sends.
795  */
796 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
797 {
798   MatStash *stash = (MatStash*)ctx;
799   MatStashHeader *hdr = (MatStashHeader*)sdata;
800   PetscErrorCode ierr;
801 
802   PetscFunctionBegin;
803   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
804   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
805   stash->sendframes[rankid].count = hdr->count;
806   stash->sendframes[rankid].pending = 1;
807   PetscFunctionReturn(0);
808 }
809 
810 /* Callback invoked by target after receiving rendezvous message.
811  * Here we post the main recvs.
812  */
813 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
814 {
815   MatStash *stash = (MatStash*)ctx;
816   MatStashHeader *hdr = (MatStashHeader*)rdata;
817   MatStashFrame *frame;
818   PetscErrorCode ierr;
819 
820   PetscFunctionBegin;
821   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
822   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
823   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
824   frame->count = hdr->count;
825   frame->pending = 1;
826   PetscFunctionReturn(0);
827 }
828 
829 /*
830  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
831  */
832 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
833 {
834   PetscErrorCode ierr;
835   size_t nblocks;
836   char *sendblocks;
837 
838   PetscFunctionBegin;
839 #if defined(PETSC_USE_DEBUG)
840   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
841     InsertMode addv;
842     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
843     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
844   }
845 #endif
846 
847   if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */
848     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
849   }
850 
851   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
852   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
853   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
854   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
855   if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */
856     PetscInt i;
857     size_t b;
858     for (i=0,b=0; i<stash->nsendranks; i++) {
859       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
860       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
861       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
862       for ( ; b<nblocks; b++) {
863         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
864         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
865         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
866         stash->sendhdr[i].count++;
867       }
868     }
869   } else {                      /* Dynamically count and pack (first time) */
870     PetscInt sendno;
871     size_t i,rowstart;
872 
873     /* Count number of send ranks and allocate for sends */
874     stash->nsendranks = 0;
875     for (rowstart=0; rowstart<nblocks; ) {
876       PetscInt owner;
877       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
878       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
879       if (owner < 0) owner = -(owner+2);
880       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
881         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
882         if (sendblock_i->row >= owners[owner+1]) break;
883       }
884       stash->nsendranks++;
885       rowstart = i;
886     }
887     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
888 
889     /* Set up sendhdrs and sendframes */
890     sendno = 0;
891     for (rowstart=0; rowstart<nblocks; ) {
892       PetscInt owner;
893       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
894       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
895       if (owner < 0) owner = -(owner+2);
896       stash->sendranks[sendno] = owner;
897       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
898         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
899         if (sendblock_i->row >= owners[owner+1]) break;
900       }
901       stash->sendframes[sendno].buffer = sendblock_rowstart;
902       stash->sendframes[sendno].pending = 0;
903       stash->sendhdr[sendno].count = i - rowstart;
904       sendno++;
905       rowstart = i;
906     }
907     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
908   }
909 
910   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
911    * message or a dummy entry of some sort. */
912   if (mat->insertmode == INSERT_VALUES) {
913     size_t i;
914     for (i=0; i<nblocks; i++) {
915       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
916       sendblock_i->row = -(sendblock_i->row+1);
917     }
918   }
919 
920   if (stash->subset_off_proc && mat->subsetoffprocentries) {
921     PetscMPIInt i,tag;
922     ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr);
923     for (i=0; i<stash->nrecvranks; i++) {
924       ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr);
925     }
926     for (i=0; i<stash->nsendranks; i++) {
927       ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr);
928     }
929     stash->use_status = PETSC_TRUE; /* Use count from message status. */
930   } else {
931     ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
932                                       &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
933                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
934     ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
935     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
936   }
937 
938   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
939   stash->recvframe_active = NULL;
940   stash->recvframe_i      = 0;
941   stash->some_i           = 0;
942   stash->some_count       = 0;
943   stash->recvcount        = 0;
944   stash->subset_off_proc  = mat->subsetoffprocentries;
945   stash->insertmode       = &mat->insertmode;
946   PetscFunctionReturn(0);
947 }
948 
949 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
950 {
951   PetscErrorCode ierr;
952   MatStashBlock *block;
953 
954   PetscFunctionBegin;
955   *flg = 0;
956   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
957     if (stash->some_i == stash->some_count) {
958       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
959       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr);
960       stash->some_i = 0;
961     }
962     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
963     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
964     if (stash->use_status) { /* Count what was actually sent */
965       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr);
966     }
967     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
968       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
969       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
970       if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
971       if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
972     }
973     stash->some_i++;
974     stash->recvcount++;
975     stash->recvframe_i = 0;
976   }
977   *n = 1;
978   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
979   if (block->row < 0) block->row = -(block->row + 1);
980   *row = &block->row;
981   *col = &block->col;
982   *val = block->vals;
983   stash->recvframe_i++;
984   *flg = 1;
985   PetscFunctionReturn(0);
986 }
987 
988 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
989 {
990   PetscErrorCode ierr;
991 
992   PetscFunctionBegin;
993   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
994   if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
995     void *dummy;
996     ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr);
997   } else {                      /* No reuse, so collect everything. */
998     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
999   }
1000 
1001   /* Now update nmaxold to be app 10% more than max n used, this way the
1002      wastage of space is reduced the next time this stash is used.
1003      Also update the oldmax, only if it increases */
1004   if (stash->n) {
1005     PetscInt bs2     = stash->bs*stash->bs;
1006     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1007     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1008   }
1009 
1010   stash->nmax       = 0;
1011   stash->n          = 0;
1012   stash->reallocs   = -1;
1013   stash->nprocessed = 0;
1014 
1015   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1016 
1017   stash->space = 0;
1018 
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1023 {
1024   PetscErrorCode ierr;
1025 
1026   PetscFunctionBegin;
1027   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1028   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1029   stash->recvframes = NULL;
1030   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1031   if (stash->blocktype != MPI_DATATYPE_NULL) {
1032     ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr);
1033   }
1034   stash->nsendranks = 0;
1035   stash->nrecvranks = 0;
1036   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1037   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1038   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1039   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1040   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1041   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 #endif
1045