xref: /petsc/src/mat/utils/matstash.c (revision 609bdbee21ea3be08735c64dbe00a9ab27759925)
1 
2 #include <petsc/private/matimpl.h>
3 
4 #define DEFAULT_STASH_SIZE   10000
5 
6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8 static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
10 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
11 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
12 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*);
13 
14 /*
15   MatStashCreate_Private - Creates a stash,currently used for all the parallel
16   matrix implementations. The stash is where elements of a matrix destined
17   to be stored on other processors are kept until matrix assembly is done.
18 
19   This is a simple minded stash. Simply adds entries to end of stash.
20 
21   Input Parameters:
22   comm - communicator, required for scatters.
23   bs   - stash block size. used when stashing blocks of values
24 
25   Output Parameters:
26   stash    - the newly created stash
27 */
28 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
29 {
30   PetscErrorCode ierr;
31   PetscInt       max,*opt,nopt,i;
32   PetscBool      flg;
33 
34   PetscFunctionBegin;
35   /* Require 2 tags,get the second using PetscCommGetNewTag() */
36   stash->comm = comm;
37 
38   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
39   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
40   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
41   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
42   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
43   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
44 
45 
46   nopt = stash->size;
47   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
48   ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
49   if (flg) {
50     if (nopt == 1)                max = opt[0];
51     else if (nopt == stash->size) max = opt[stash->rank];
52     else if (stash->rank < nopt)  max = opt[stash->rank];
53     else                          max = 0; /* Use default */
54     stash->umax = max;
55   } else {
56     stash->umax = 0;
57   }
58   ierr = PetscFree(opt);CHKERRQ(ierr);
59   if (bs <= 0) bs = 1;
60 
61   stash->bs         = bs;
62   stash->nmax       = 0;
63   stash->oldnmax    = 0;
64   stash->n          = 0;
65   stash->reallocs   = -1;
66   stash->space_head = 0;
67   stash->space      = 0;
68 
69   stash->send_waits  = 0;
70   stash->recv_waits  = 0;
71   stash->send_status = 0;
72   stash->nsends      = 0;
73   stash->nrecvs      = 0;
74   stash->svalues     = 0;
75   stash->rvalues     = 0;
76   stash->rindices    = 0;
77   stash->nprocessed  = 0;
78   stash->reproduce   = PETSC_FALSE;
79   stash->blocktype   = MPI_DATATYPE_NULL;
80 
81   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
82   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_bts",&flg,NULL);CHKERRQ(ierr);
83   if (flg) {
84     stash->ScatterBegin   = MatStashScatterBegin_BTS;
85     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
86     stash->ScatterEnd     = MatStashScatterEnd_BTS;
87     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
88   } else {
89     stash->ScatterBegin   = MatStashScatterBegin_Ref;
90     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
91     stash->ScatterEnd     = MatStashScatterEnd_Ref;
92     stash->ScatterDestroy = NULL;
93   }
94   PetscFunctionReturn(0);
95 }
96 
97 /*
98    MatStashDestroy_Private - Destroy the stash
99 */
100 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
101 {
102   PetscErrorCode ierr;
103 
104   PetscFunctionBegin;
105   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
106   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
107 
108   stash->space = 0;
109 
110   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
111   PetscFunctionReturn(0);
112 }
113 
114 /*
115    MatStashScatterEnd_Private - This is called as the final stage of
116    scatter. The final stages of message passing is done here, and
117    all the memory used for message passing is cleaned up. This
118    routine also resets the stash, and deallocates the memory used
119    for the stash. It also keeps track of the current memory usage
120    so that the same value can be used the next time through.
121 */
122 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
123 {
124   PetscErrorCode ierr;
125 
126   PetscFunctionBegin;
127   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
128   PetscFunctionReturn(0);
129 }
130 
131 static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
132 {
133   PetscErrorCode ierr;
134   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
135   MPI_Status     *send_status;
136 
137   PetscFunctionBegin;
138   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
139   /* wait on sends */
140   if (nsends) {
141     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
142     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
143     ierr = PetscFree(send_status);CHKERRQ(ierr);
144   }
145 
146   /* Now update nmaxold to be app 10% more than max n used, this way the
147      wastage of space is reduced the next time this stash is used.
148      Also update the oldmax, only if it increases */
149   if (stash->n) {
150     bs2     = stash->bs*stash->bs;
151     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
152     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
153   }
154 
155   stash->nmax       = 0;
156   stash->n          = 0;
157   stash->reallocs   = -1;
158   stash->nprocessed = 0;
159 
160   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
161 
162   stash->space = 0;
163 
164   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
165   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
166   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
167   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
168   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
169   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
170   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
171   PetscFunctionReturn(0);
172 }
173 
174 /*
175    MatStashGetInfo_Private - Gets the relavant statistics of the stash
176 
177    Input Parameters:
178    stash    - the stash
179    nstash   - the size of the stash. Indicates the number of values stored.
180    reallocs - the number of additional mallocs incurred.
181 
182 */
183 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
184 {
185   PetscInt bs2 = stash->bs*stash->bs;
186 
187   PetscFunctionBegin;
188   if (nstash) *nstash = stash->n*bs2;
189   if (reallocs) {
190     if (stash->reallocs < 0) *reallocs = 0;
191     else                     *reallocs = stash->reallocs;
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 /*
197    MatStashSetInitialSize_Private - Sets the initial size of the stash
198 
199    Input Parameters:
200    stash  - the stash
201    max    - the value that is used as the max size of the stash.
202             this value is used while allocating memory.
203 */
204 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
205 {
206   PetscFunctionBegin;
207   stash->umax = max;
208   PetscFunctionReturn(0);
209 }
210 
211 /* MatStashExpand_Private - Expand the stash. This function is called
212    when the space in the stash is not sufficient to add the new values
213    being inserted into the stash.
214 
215    Input Parameters:
216    stash - the stash
217    incr  - the minimum increase requested
218 
219    Notes:
220    This routine doubles the currently used memory.
221  */
222 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
223 {
224   PetscErrorCode ierr;
225   PetscInt       newnmax,bs2= stash->bs*stash->bs;
226 
227   PetscFunctionBegin;
228   /* allocate a larger stash */
229   if (!stash->oldnmax && !stash->nmax) { /* new stash */
230     if (stash->umax)                  newnmax = stash->umax/bs2;
231     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
232   } else if (!stash->nmax) { /* resuing stash */
233     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
234     else                              newnmax = stash->oldnmax/bs2;
235   } else                              newnmax = stash->nmax*2;
236   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
237 
238   /* Get a MatStashSpace and attach it to stash */
239   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
240   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
241     stash->space_head = stash->space;
242   }
243 
244   stash->reallocs++;
245   stash->nmax = newnmax;
246   PetscFunctionReturn(0);
247 }
248 /*
249   MatStashValuesRow_Private - inserts values into the stash. This function
250   expects the values to be roworiented. Multiple columns belong to the same row
251   can be inserted with a single call to this function.
252 
253   Input Parameters:
254   stash  - the stash
255   row    - the global row correspoiding to the values
256   n      - the number of elements inserted. All elements belong to the above row.
257   idxn   - the global column indices corresponding to each of the values.
258   values - the values inserted
259 */
260 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
261 {
262   PetscErrorCode     ierr;
263   PetscInt           i,k,cnt = 0;
264   PetscMatStashSpace space=stash->space;
265 
266   PetscFunctionBegin;
267   /* Check and see if we have sufficient memory */
268   if (!space || space->local_remaining < n) {
269     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
270   }
271   space = stash->space;
272   k     = space->local_used;
273   for (i=0; i<n; i++) {
274     if (ignorezeroentries && (values[i] == 0.0)) continue;
275     space->idx[k] = row;
276     space->idy[k] = idxn[i];
277     space->val[k] = values[i];
278     k++;
279     cnt++;
280   }
281   stash->n               += cnt;
282   space->local_used      += cnt;
283   space->local_remaining -= cnt;
284   PetscFunctionReturn(0);
285 }
286 
287 /*
288   MatStashValuesCol_Private - inserts values into the stash. This function
289   expects the values to be columnoriented. Multiple columns belong to the same row
290   can be inserted with a single call to this function.
291 
292   Input Parameters:
293   stash   - the stash
294   row     - the global row correspoiding to the values
295   n       - the number of elements inserted. All elements belong to the above row.
296   idxn    - the global column indices corresponding to each of the values.
297   values  - the values inserted
298   stepval - the consecutive values are sepated by a distance of stepval.
299             this happens because the input is columnoriented.
300 */
301 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
302 {
303   PetscErrorCode     ierr;
304   PetscInt           i,k,cnt = 0;
305   PetscMatStashSpace space=stash->space;
306 
307   PetscFunctionBegin;
308   /* Check and see if we have sufficient memory */
309   if (!space || space->local_remaining < n) {
310     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
311   }
312   space = stash->space;
313   k     = space->local_used;
314   for (i=0; i<n; i++) {
315     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
316     space->idx[k] = row;
317     space->idy[k] = idxn[i];
318     space->val[k] = values[i*stepval];
319     k++;
320     cnt++;
321   }
322   stash->n               += cnt;
323   space->local_used      += cnt;
324   space->local_remaining -= cnt;
325   PetscFunctionReturn(0);
326 }
327 
328 /*
329   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
330   This function expects the values to be roworiented. Multiple columns belong
331   to the same block-row can be inserted with a single call to this function.
332   This function extracts the sub-block of values based on the dimensions of
333   the original input block, and the row,col values corresponding to the blocks.
334 
335   Input Parameters:
336   stash  - the stash
337   row    - the global block-row correspoiding to the values
338   n      - the number of elements inserted. All elements belong to the above row.
339   idxn   - the global block-column indices corresponding to each of the blocks of
340            values. Each block is of size bs*bs.
341   values - the values inserted
342   rmax   - the number of block-rows in the original block.
343   cmax   - the number of block-columsn on the original block.
344   idx    - the index of the current block-row in the original block.
345 */
346 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
347 {
348   PetscErrorCode     ierr;
349   PetscInt           i,j,k,bs2,bs=stash->bs,l;
350   const PetscScalar  *vals;
351   PetscScalar        *array;
352   PetscMatStashSpace space=stash->space;
353 
354   PetscFunctionBegin;
355   if (!space || space->local_remaining < n) {
356     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
357   }
358   space = stash->space;
359   l     = space->local_used;
360   bs2   = bs*bs;
361   for (i=0; i<n; i++) {
362     space->idx[l] = row;
363     space->idy[l] = idxn[i];
364     /* Now copy over the block of values. Store the values column oriented.
365        This enables inserting multiple blocks belonging to a row with a single
366        funtion call */
367     array = space->val + bs2*l;
368     vals  = values + idx*bs2*n + bs*i;
369     for (j=0; j<bs; j++) {
370       for (k=0; k<bs; k++) array[k*bs] = vals[k];
371       array++;
372       vals += cmax*bs;
373     }
374     l++;
375   }
376   stash->n               += n;
377   space->local_used      += n;
378   space->local_remaining -= n;
379   PetscFunctionReturn(0);
380 }
381 
382 /*
383   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
384   This function expects the values to be roworiented. Multiple columns belong
385   to the same block-row can be inserted with a single call to this function.
386   This function extracts the sub-block of values based on the dimensions of
387   the original input block, and the row,col values corresponding to the blocks.
388 
389   Input Parameters:
390   stash  - the stash
391   row    - the global block-row correspoiding to the values
392   n      - the number of elements inserted. All elements belong to the above row.
393   idxn   - the global block-column indices corresponding to each of the blocks of
394            values. Each block is of size bs*bs.
395   values - the values inserted
396   rmax   - the number of block-rows in the original block.
397   cmax   - the number of block-columsn on the original block.
398   idx    - the index of the current block-row in the original block.
399 */
400 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
401 {
402   PetscErrorCode     ierr;
403   PetscInt           i,j,k,bs2,bs=stash->bs,l;
404   const PetscScalar  *vals;
405   PetscScalar        *array;
406   PetscMatStashSpace space=stash->space;
407 
408   PetscFunctionBegin;
409   if (!space || space->local_remaining < n) {
410     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
411   }
412   space = stash->space;
413   l     = space->local_used;
414   bs2   = bs*bs;
415   for (i=0; i<n; i++) {
416     space->idx[l] = row;
417     space->idy[l] = idxn[i];
418     /* Now copy over the block of values. Store the values column oriented.
419      This enables inserting multiple blocks belonging to a row with a single
420      funtion call */
421     array = space->val + bs2*l;
422     vals  = values + idx*bs2*n + bs*i;
423     for (j=0; j<bs; j++) {
424       for (k=0; k<bs; k++) array[k] = vals[k];
425       array += bs;
426       vals  += rmax*bs;
427     }
428     l++;
429   }
430   stash->n               += n;
431   space->local_used      += n;
432   space->local_remaining -= n;
433   PetscFunctionReturn(0);
434 }
435 /*
436   MatStashScatterBegin_Private - Initiates the transfer of values to the
437   correct owners. This function goes through the stash, and check the
438   owners of each stashed value, and sends the values off to the owner
439   processors.
440 
441   Input Parameters:
442   stash  - the stash
443   owners - an array of size 'no-of-procs' which gives the ownership range
444            for each node.
445 
446   Notes: The 'owners' array in the cased of the blocked-stash has the
447   ranges specified blocked global indices, and for the regular stash in
448   the proper global indices.
449 */
450 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
451 {
452   PetscErrorCode ierr;
453 
454   PetscFunctionBegin;
455   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
456   PetscFunctionReturn(0);
457 }
458 
459 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
460 {
461   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
462   PetscInt           size=stash->size,nsends;
463   PetscErrorCode     ierr;
464   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
465   PetscScalar        **rvalues,*svalues;
466   MPI_Comm           comm = stash->comm;
467   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
468   PetscMPIInt        *sizes,*nlengths,nreceives;
469   PetscInt           *sp_idx,*sp_idy;
470   PetscScalar        *sp_val;
471   PetscMatStashSpace space,space_next;
472 
473   PetscFunctionBegin;
474   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
475     InsertMode addv;
476     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
477     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
478     mat->insertmode = addv; /* in case this processor had no cache */
479   }
480 
481   bs2 = stash->bs*stash->bs;
482 
483   /*  first count number of contributors to each processor */
484   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
485   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
486   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
487 
488   i       = j    = 0;
489   lastidx = -1;
490   space   = stash->space_head;
491   while (space) {
492     space_next = space->next;
493     sp_idx     = space->idx;
494     for (l=0; l<space->local_used; l++) {
495       /* if indices are NOT locally sorted, need to start search at the beginning */
496       if (lastidx > (idx = sp_idx[l])) j = 0;
497       lastidx = idx;
498       for (; j<size; j++) {
499         if (idx >= owners[j] && idx < owners[j+1]) {
500           nlengths[j]++; owner[i] = j; break;
501         }
502       }
503       i++;
504     }
505     space = space_next;
506   }
507   /* Now check what procs get messages - and compute nsends. */
508   for (i=0, nsends=0; i<size; i++) {
509     if (nlengths[i]) {
510       sizes[i] = 1; nsends++;
511     }
512   }
513 
514   {PetscMPIInt *onodes,*olengths;
515    /* Determine the number of messages to expect, their lengths, from from-ids */
516    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
517    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
518    /* since clubbing row,col - lengths are multiplied by 2 */
519    for (i=0; i<nreceives; i++) olengths[i] *=2;
520    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
521    /* values are size 'bs2' lengths (and remove earlier factor 2 */
522    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
523    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
524    ierr = PetscFree(onodes);CHKERRQ(ierr);
525    ierr = PetscFree(olengths);CHKERRQ(ierr);}
526 
527   /* do sends:
528       1) starts[i] gives the starting index in svalues for stuff going to
529          the ith processor
530   */
531   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
532   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
533   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
534   /* use 2 sends the first with all_a, the next with all_i and all_j */
535   startv[0] = 0; starti[0] = 0;
536   for (i=1; i<size; i++) {
537     startv[i] = startv[i-1] + nlengths[i-1];
538     starti[i] = starti[i-1] + 2*nlengths[i-1];
539   }
540 
541   i     = 0;
542   space = stash->space_head;
543   while (space) {
544     space_next = space->next;
545     sp_idx     = space->idx;
546     sp_idy     = space->idy;
547     sp_val     = space->val;
548     for (l=0; l<space->local_used; l++) {
549       j = owner[i];
550       if (bs2 == 1) {
551         svalues[startv[j]] = sp_val[l];
552       } else {
553         PetscInt    k;
554         PetscScalar *buf1,*buf2;
555         buf1 = svalues+bs2*startv[j];
556         buf2 = space->val + bs2*l;
557         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
558       }
559       sindices[starti[j]]             = sp_idx[l];
560       sindices[starti[j]+nlengths[j]] = sp_idy[l];
561       startv[j]++;
562       starti[j]++;
563       i++;
564     }
565     space = space_next;
566   }
567   startv[0] = 0;
568   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
569 
570   for (i=0,count=0; i<size; i++) {
571     if (sizes[i]) {
572       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
573       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
574     }
575   }
576 #if defined(PETSC_USE_INFO)
577   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
578   for (i=0; i<size; i++) {
579     if (sizes[i]) {
580       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
581     }
582   }
583 #endif
584   ierr = PetscFree(nlengths);CHKERRQ(ierr);
585   ierr = PetscFree(owner);CHKERRQ(ierr);
586   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
587   ierr = PetscFree(sizes);CHKERRQ(ierr);
588 
589   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
590   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
591 
592   for (i=0; i<nreceives; i++) {
593     recv_waits[2*i]   = recv_waits1[i];
594     recv_waits[2*i+1] = recv_waits2[i];
595   }
596   stash->recv_waits = recv_waits;
597 
598   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
599   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
600 
601   stash->svalues         = svalues;
602   stash->sindices        = sindices;
603   stash->rvalues         = rvalues;
604   stash->rindices        = rindices;
605   stash->send_waits      = send_waits;
606   stash->nsends          = nsends;
607   stash->nrecvs          = nreceives;
608   stash->reproduce_count = 0;
609   PetscFunctionReturn(0);
610 }
611 
612 /*
613    MatStashScatterGetMesg_Private - This function waits on the receives posted
614    in the function MatStashScatterBegin_Private() and returns one message at
615    a time to the calling function. If no messages are left, it indicates this
616    by setting flg = 0, else it sets flg = 1.
617 
618    Input Parameters:
619    stash - the stash
620 
621    Output Parameters:
622    nvals - the number of entries in the current message.
623    rows  - an array of row indices (or blocked indices) corresponding to the values
624    cols  - an array of columnindices (or blocked indices) corresponding to the values
625    vals  - the values
626    flg   - 0 indicates no more message left, and the current call has no values associated.
627            1 indicates that the current call successfully received a message, and the
628              other output parameters nvals,rows,cols,vals are set appropriately.
629 */
630 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
631 {
632   PetscErrorCode ierr;
633 
634   PetscFunctionBegin;
635   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
636   PetscFunctionReturn(0);
637 }
638 
639 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
640 {
641   PetscErrorCode ierr;
642   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
643   PetscInt       bs2;
644   MPI_Status     recv_status;
645   PetscBool      match_found = PETSC_FALSE;
646 
647   PetscFunctionBegin;
648   *flg = 0; /* When a message is discovered this is reset to 1 */
649   /* Return if no more messages to process */
650   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
651 
652   bs2 = stash->bs*stash->bs;
653   /* If a matching pair of receives are found, process them, and return the data to
654      the calling function. Until then keep receiving messages */
655   while (!match_found) {
656     if (stash->reproduce) {
657       i    = stash->reproduce_count++;
658       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
659     } else {
660       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
661     }
662     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
663 
664     /* Now pack the received message into a structure which is usable by others */
665     if (i % 2) {
666       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
667 
668       flg_v[2*recv_status.MPI_SOURCE] = i/2;
669 
670       *nvals = *nvals/bs2;
671     } else {
672       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
673 
674       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
675 
676       *nvals = *nvals/2; /* This message has both row indices and col indices */
677     }
678 
679     /* Check if we have both messages from this proc */
680     i1 = flg_v[2*recv_status.MPI_SOURCE];
681     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
682     if (i1 != -1 && i2 != -1) {
683       *rows = stash->rindices[i2];
684       *cols = *rows + *nvals;
685       *vals = stash->rvalues[i1];
686       *flg  = 1;
687       stash->nprocessed++;
688       match_found = PETSC_TRUE;
689     }
690   }
691   PetscFunctionReturn(0);
692 }
693 
694 typedef struct {
695   PetscInt row;
696   PetscInt col;
697   PetscScalar vals[1];          /* Actually an array of length bs2 */
698 } MatStashBlock;
699 
700 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
701 {
702   PetscErrorCode ierr;
703   PetscMatStashSpace space;
704   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
705   PetscScalar **valptr;
706 
707   PetscFunctionBegin;
708   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
709   for (space=stash->space_head,cnt=0; space; space=space->next) {
710     for (i=0; i<space->local_used; i++) {
711       row[cnt] = space->idx[i];
712       col[cnt] = space->idy[i];
713       valptr[cnt] = &space->val[i*bs2];
714       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
715       cnt++;
716     }
717   }
718   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
719   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
720   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
721   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
722     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
723       PetscInt colstart;
724       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
725       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
726         PetscInt j,l;
727         MatStashBlock *block;
728         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
729         block->row = row[rowstart];
730         block->col = col[colstart];
731         ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
732         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
733           if (insertmode == ADD_VALUES) {
734             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
735           } else {
736             ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
737           }
738         }
739         colstart = j;
740       }
741       rowstart = i;
742     }
743   }
744   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
745   PetscFunctionReturn(0);
746 }
747 
748 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
749 {
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753   if (stash->blocktype == MPI_DATATYPE_NULL) {
754     PetscInt     bs2 = PetscSqr(stash->bs);
755     PetscMPIInt  blocklens[2];
756     MPI_Aint     displs[2];
757     MPI_Datatype types[2],stype;
758     /* C++ std::complex is not my favorite datatype.  Since it is not POD, we cannot use offsetof to find the offset of
759      * vals.  But the layout is actually guaranteed by the standard, so we do a little dance here with struct
760      * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset.
761      */
762     struct DummyBlock {PetscInt row,col; PetscReal vals;};
763 
764     stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
765     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
766       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
767     }
768     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
769     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
770     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
771     blocklens[0] = 2;
772     blocklens[1] = bs2;
773     displs[0] = offsetof(struct DummyBlock,row);
774     displs[1] = offsetof(struct DummyBlock,vals);
775     types[0] = MPIU_INT;
776     types[1] = MPIU_SCALAR;
777     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr);
778     ierr = MPI_Type_commit(&stype);CHKERRQ(ierr);
779     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */
780     ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr);
781     ierr = MPI_Type_free(&stype);CHKERRQ(ierr);
782   }
783   PetscFunctionReturn(0);
784 }
785 
786 /* Callback invoked after target rank has initiatied receive of rendezvous message.
787  * Here we post the main sends.
788  */
789 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
790 {
791   MatStash *stash = (MatStash*)ctx;
792   MatStashHeader *hdr = (MatStashHeader*)sdata;
793   PetscErrorCode ierr;
794 
795   PetscFunctionBegin;
796   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
797   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
798   stash->sendframes[rankid].count = hdr->count;
799   stash->sendframes[rankid].pending = 1;
800   PetscFunctionReturn(0);
801 }
802 
803 /* Callback invoked by target after receiving rendezvous message.
804  * Here we post the main recvs.
805  */
806 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
807 {
808   MatStash *stash = (MatStash*)ctx;
809   MatStashHeader *hdr = (MatStashHeader*)rdata;
810   MatStashFrame *frame;
811   PetscErrorCode ierr;
812 
813   PetscFunctionBegin;
814   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
815   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
816   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
817   frame->count = hdr->count;
818   frame->pending = 1;
819   PetscFunctionReturn(0);
820 }
821 
822 /*
823  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
824  */
825 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
826 {
827   PetscErrorCode ierr;
828   size_t nblocks;
829   char *sendblocks;
830 
831   PetscFunctionBegin;
832 #if defined(PETSC_USE_DEBUG)
833   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
834     InsertMode addv;
835     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
836     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
837   }
838 #endif
839 
840   if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */
841     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
842   }
843 
844   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
845   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
846   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
847   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
848   if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */
849     PetscInt i;
850     size_t b;
851     for (i=0,b=0; i<stash->nsendranks; i++) {
852       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
853       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
854       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
855       for ( ; b<nblocks; b++) {
856         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
857         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
858         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
859         stash->sendhdr[i].count++;
860       }
861     }
862   } else {                      /* Dynamically count and pack (first time) */
863     PetscInt sendno;
864     size_t i,rowstart;
865 
866     /* Count number of send ranks and allocate for sends */
867     stash->nsendranks = 0;
868     for (rowstart=0; rowstart<nblocks; ) {
869       PetscInt owner;
870       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
871       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
872       if (owner < 0) owner = -(owner+2);
873       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
874         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
875         if (sendblock_i->row >= owners[owner+1]) break;
876       }
877       stash->nsendranks++;
878       rowstart = i;
879     }
880     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
881 
882     /* Set up sendhdrs and sendframes */
883     sendno = 0;
884     for (rowstart=0; rowstart<nblocks; ) {
885       PetscInt owner;
886       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
887       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
888       if (owner < 0) owner = -(owner+2);
889       stash->sendranks[sendno] = owner;
890       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
891         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
892         if (sendblock_i->row >= owners[owner+1]) break;
893       }
894       stash->sendframes[sendno].buffer = sendblock_rowstart;
895       stash->sendframes[sendno].pending = 0;
896       stash->sendhdr[sendno].count = i - rowstart;
897       sendno++;
898       rowstart = i;
899     }
900     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
901   }
902 
903   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
904    * message or a dummy entry of some sort. */
905   if (mat->insertmode == INSERT_VALUES) {
906     size_t i;
907     for (i=0; i<nblocks; i++) {
908       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
909       sendblock_i->row = -(sendblock_i->row+1);
910     }
911   }
912 
913   if (stash->subset_off_proc && mat->subsetoffprocentries) {
914     PetscMPIInt i,tag;
915     ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr);
916     for (i=0; i<stash->nrecvranks; i++) {
917       ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr);
918     }
919     for (i=0; i<stash->nsendranks; i++) {
920       ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr);
921     }
922     stash->use_status = PETSC_TRUE; /* Use count from message status. */
923   } else {
924     ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
925                                       &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
926                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
927     ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
928     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
929   }
930 
931   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
932   stash->recvframe_active = NULL;
933   stash->recvframe_i      = 0;
934   stash->some_i           = 0;
935   stash->some_count       = 0;
936   stash->recvcount        = 0;
937   stash->subset_off_proc  = mat->subsetoffprocentries;
938   stash->insertmode       = &mat->insertmode;
939   PetscFunctionReturn(0);
940 }
941 
942 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
943 {
944   PetscErrorCode ierr;
945   MatStashBlock *block;
946 
947   PetscFunctionBegin;
948   *flg = 0;
949   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
950     if (stash->some_i == stash->some_count) {
951       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
952       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr);
953       stash->some_i = 0;
954     }
955     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
956     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
957     if (stash->use_status) { /* Count what was actually sent */
958       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr);
959     }
960     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
961       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
962       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
963       if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
964       if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
965     }
966     stash->some_i++;
967     stash->recvcount++;
968     stash->recvframe_i = 0;
969   }
970   *n = 1;
971   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
972   if (block->row < 0) block->row = -(block->row + 1);
973   *row = &block->row;
974   *col = &block->col;
975   *val = block->vals;
976   stash->recvframe_i++;
977   *flg = 1;
978   PetscFunctionReturn(0);
979 }
980 
981 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
982 {
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
987   if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
988     void *dummy;
989     ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr);
990   } else {                      /* No reuse, so collect everything. */
991     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
992   }
993 
994   /* Now update nmaxold to be app 10% more than max n used, this way the
995      wastage of space is reduced the next time this stash is used.
996      Also update the oldmax, only if it increases */
997   if (stash->n) {
998     PetscInt bs2     = stash->bs*stash->bs;
999     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1000     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1001   }
1002 
1003   stash->nmax       = 0;
1004   stash->n          = 0;
1005   stash->reallocs   = -1;
1006   stash->nprocessed = 0;
1007 
1008   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1009 
1010   stash->space = 0;
1011 
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1016 {
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1021   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1022   stash->recvframes = NULL;
1023   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1024   if (stash->blocktype != MPI_DATATYPE_NULL) {
1025     ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr);
1026   }
1027   stash->nsendranks = 0;
1028   stash->nrecvranks = 0;
1029   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1030   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1031   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1032   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1033   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1034   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1035   PetscFunctionReturn(0);
1036 }
1037