xref: /petsc/src/mat/utils/matstash.c (revision 7a3b9f033cf22d0cabcfecf34925be7a11b3c45b)
1 
2 #include <petsc/private/matimpl.h>
3 
4 #define DEFAULT_STASH_SIZE   10000
5 
6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9 #if !defined(PETSC_HAVE_MPIUNI)
10 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
11 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
12 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
13 #endif
14 
15 /*
16   MatStashCreate_Private - Creates a stash,currently used for all the parallel
17   matrix implementations. The stash is where elements of a matrix destined
18   to be stored on other processors are kept until matrix assembly is done.
19 
20   This is a simple minded stash. Simply adds entries to end of stash.
21 
22   Input Parameters:
23   comm - communicator, required for scatters.
24   bs   - stash block size. used when stashing blocks of values
25 
26   Output Parameters:
27   stash    - the newly created stash
28 */
29 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
30 {
31   PetscErrorCode ierr;
32   PetscInt       max,*opt,nopt,i;
33   PetscBool      flg;
34 
35   PetscFunctionBegin;
36   /* Require 2 tags,get the second using PetscCommGetNewTag() */
37   stash->comm = comm;
38 
39   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
40   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
41   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
42   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
43   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
44   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
45 
46 
47   nopt = stash->size;
48   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
49   ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
50   if (flg) {
51     if (nopt == 1)                max = opt[0];
52     else if (nopt == stash->size) max = opt[stash->rank];
53     else if (stash->rank < nopt)  max = opt[stash->rank];
54     else                          max = 0; /* Use default */
55     stash->umax = max;
56   } else {
57     stash->umax = 0;
58   }
59   ierr = PetscFree(opt);CHKERRQ(ierr);
60   if (bs <= 0) bs = 1;
61 
62   stash->bs         = bs;
63   stash->nmax       = 0;
64   stash->oldnmax    = 0;
65   stash->n          = 0;
66   stash->reallocs   = -1;
67   stash->space_head = 0;
68   stash->space      = 0;
69 
70   stash->send_waits  = 0;
71   stash->recv_waits  = 0;
72   stash->send_status = 0;
73   stash->nsends      = 0;
74   stash->nrecvs      = 0;
75   stash->svalues     = 0;
76   stash->rvalues     = 0;
77   stash->rindices    = 0;
78   stash->nprocessed  = 0;
79   stash->reproduce   = PETSC_FALSE;
80   stash->blocktype   = MPI_DATATYPE_NULL;
81 
82   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
83 #if !defined(PETSC_HAVE_MPIUNI)
84   flg  = PETSC_FALSE;
85   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr);
86   if (!flg) {
87     stash->ScatterBegin   = MatStashScatterBegin_BTS;
88     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
89     stash->ScatterEnd     = MatStashScatterEnd_BTS;
90     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
91   } else {
92 #endif
93     stash->ScatterBegin   = MatStashScatterBegin_Ref;
94     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
95     stash->ScatterEnd     = MatStashScatterEnd_Ref;
96     stash->ScatterDestroy = NULL;
97 #if !defined(PETSC_HAVE_MPIUNI)
98   }
99 #endif
100   PetscFunctionReturn(0);
101 }
102 
103 /*
104    MatStashDestroy_Private - Destroy the stash
105 */
106 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
107 {
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
112   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
113 
114   stash->space = 0;
115 
116   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 /*
121    MatStashScatterEnd_Private - This is called as the final stage of
122    scatter. The final stages of message passing is done here, and
123    all the memory used for message passing is cleaned up. This
124    routine also resets the stash, and deallocates the memory used
125    for the stash. It also keeps track of the current memory usage
126    so that the same value can be used the next time through.
127 */
128 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
129 {
130   PetscErrorCode ierr;
131 
132   PetscFunctionBegin;
133   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
138 {
139   PetscErrorCode ierr;
140   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
141   MPI_Status     *send_status;
142 
143   PetscFunctionBegin;
144   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
145   /* wait on sends */
146   if (nsends) {
147     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
148     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
149     ierr = PetscFree(send_status);CHKERRQ(ierr);
150   }
151 
152   /* Now update nmaxold to be app 10% more than max n used, this way the
153      wastage of space is reduced the next time this stash is used.
154      Also update the oldmax, only if it increases */
155   if (stash->n) {
156     bs2     = stash->bs*stash->bs;
157     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
158     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
159   }
160 
161   stash->nmax       = 0;
162   stash->n          = 0;
163   stash->reallocs   = -1;
164   stash->nprocessed = 0;
165 
166   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
167 
168   stash->space = 0;
169 
170   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
171   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
172   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
173   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
174   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
175   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
176   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 /*
181    MatStashGetInfo_Private - Gets the relavant statistics of the stash
182 
183    Input Parameters:
184    stash    - the stash
185    nstash   - the size of the stash. Indicates the number of values stored.
186    reallocs - the number of additional mallocs incurred.
187 
188 */
189 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
190 {
191   PetscInt bs2 = stash->bs*stash->bs;
192 
193   PetscFunctionBegin;
194   if (nstash) *nstash = stash->n*bs2;
195   if (reallocs) {
196     if (stash->reallocs < 0) *reallocs = 0;
197     else                     *reallocs = stash->reallocs;
198   }
199   PetscFunctionReturn(0);
200 }
201 
202 /*
203    MatStashSetInitialSize_Private - Sets the initial size of the stash
204 
205    Input Parameters:
206    stash  - the stash
207    max    - the value that is used as the max size of the stash.
208             this value is used while allocating memory.
209 */
210 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
211 {
212   PetscFunctionBegin;
213   stash->umax = max;
214   PetscFunctionReturn(0);
215 }
216 
217 /* MatStashExpand_Private - Expand the stash. This function is called
218    when the space in the stash is not sufficient to add the new values
219    being inserted into the stash.
220 
221    Input Parameters:
222    stash - the stash
223    incr  - the minimum increase requested
224 
225    Notes:
226    This routine doubles the currently used memory.
227  */
228 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
229 {
230   PetscErrorCode ierr;
231   PetscInt       newnmax,bs2= stash->bs*stash->bs;
232 
233   PetscFunctionBegin;
234   /* allocate a larger stash */
235   if (!stash->oldnmax && !stash->nmax) { /* new stash */
236     if (stash->umax)                  newnmax = stash->umax/bs2;
237     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
238   } else if (!stash->nmax) { /* resuing stash */
239     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
240     else                              newnmax = stash->oldnmax/bs2;
241   } else                              newnmax = stash->nmax*2;
242   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
243 
244   /* Get a MatStashSpace and attach it to stash */
245   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
246   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
247     stash->space_head = stash->space;
248   }
249 
250   stash->reallocs++;
251   stash->nmax = newnmax;
252   PetscFunctionReturn(0);
253 }
254 /*
255   MatStashValuesRow_Private - inserts values into the stash. This function
256   expects the values to be roworiented. Multiple columns belong to the same row
257   can be inserted with a single call to this function.
258 
259   Input Parameters:
260   stash  - the stash
261   row    - the global row correspoiding to the values
262   n      - the number of elements inserted. All elements belong to the above row.
263   idxn   - the global column indices corresponding to each of the values.
264   values - the values inserted
265 */
266 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
267 {
268   PetscErrorCode     ierr;
269   PetscInt           i,k,cnt = 0;
270   PetscMatStashSpace space=stash->space;
271 
272   PetscFunctionBegin;
273   /* Check and see if we have sufficient memory */
274   if (!space || space->local_remaining < n) {
275     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
276   }
277   space = stash->space;
278   k     = space->local_used;
279   for (i=0; i<n; i++) {
280     if (ignorezeroentries && (values[i] == 0.0)) continue;
281     space->idx[k] = row;
282     space->idy[k] = idxn[i];
283     space->val[k] = values[i];
284     k++;
285     cnt++;
286   }
287   stash->n               += cnt;
288   space->local_used      += cnt;
289   space->local_remaining -= cnt;
290   PetscFunctionReturn(0);
291 }
292 
293 /*
294   MatStashValuesCol_Private - inserts values into the stash. This function
295   expects the values to be columnoriented. Multiple columns belong to the same row
296   can be inserted with a single call to this function.
297 
298   Input Parameters:
299   stash   - the stash
300   row     - the global row correspoiding to the values
301   n       - the number of elements inserted. All elements belong to the above row.
302   idxn    - the global column indices corresponding to each of the values.
303   values  - the values inserted
304   stepval - the consecutive values are sepated by a distance of stepval.
305             this happens because the input is columnoriented.
306 */
307 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
308 {
309   PetscErrorCode     ierr;
310   PetscInt           i,k,cnt = 0;
311   PetscMatStashSpace space=stash->space;
312 
313   PetscFunctionBegin;
314   /* Check and see if we have sufficient memory */
315   if (!space || space->local_remaining < n) {
316     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
317   }
318   space = stash->space;
319   k     = space->local_used;
320   for (i=0; i<n; i++) {
321     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
322     space->idx[k] = row;
323     space->idy[k] = idxn[i];
324     space->val[k] = values[i*stepval];
325     k++;
326     cnt++;
327   }
328   stash->n               += cnt;
329   space->local_used      += cnt;
330   space->local_remaining -= cnt;
331   PetscFunctionReturn(0);
332 }
333 
334 /*
335   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
336   This function expects the values to be roworiented. Multiple columns belong
337   to the same block-row can be inserted with a single call to this function.
338   This function extracts the sub-block of values based on the dimensions of
339   the original input block, and the row,col values corresponding to the blocks.
340 
341   Input Parameters:
342   stash  - the stash
343   row    - the global block-row correspoiding to the values
344   n      - the number of elements inserted. All elements belong to the above row.
345   idxn   - the global block-column indices corresponding to each of the blocks of
346            values. Each block is of size bs*bs.
347   values - the values inserted
348   rmax   - the number of block-rows in the original block.
349   cmax   - the number of block-columsn on the original block.
350   idx    - the index of the current block-row in the original block.
351 */
352 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
353 {
354   PetscErrorCode     ierr;
355   PetscInt           i,j,k,bs2,bs=stash->bs,l;
356   const PetscScalar  *vals;
357   PetscScalar        *array;
358   PetscMatStashSpace space=stash->space;
359 
360   PetscFunctionBegin;
361   if (!space || space->local_remaining < n) {
362     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
363   }
364   space = stash->space;
365   l     = space->local_used;
366   bs2   = bs*bs;
367   for (i=0; i<n; i++) {
368     space->idx[l] = row;
369     space->idy[l] = idxn[i];
370     /* Now copy over the block of values. Store the values column oriented.
371        This enables inserting multiple blocks belonging to a row with a single
372        funtion call */
373     array = space->val + bs2*l;
374     vals  = values + idx*bs2*n + bs*i;
375     for (j=0; j<bs; j++) {
376       for (k=0; k<bs; k++) array[k*bs] = vals[k];
377       array++;
378       vals += cmax*bs;
379     }
380     l++;
381   }
382   stash->n               += n;
383   space->local_used      += n;
384   space->local_remaining -= n;
385   PetscFunctionReturn(0);
386 }
387 
388 /*
389   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
390   This function expects the values to be roworiented. Multiple columns belong
391   to the same block-row can be inserted with a single call to this function.
392   This function extracts the sub-block of values based on the dimensions of
393   the original input block, and the row,col values corresponding to the blocks.
394 
395   Input Parameters:
396   stash  - the stash
397   row    - the global block-row correspoiding to the values
398   n      - the number of elements inserted. All elements belong to the above row.
399   idxn   - the global block-column indices corresponding to each of the blocks of
400            values. Each block is of size bs*bs.
401   values - the values inserted
402   rmax   - the number of block-rows in the original block.
403   cmax   - the number of block-columsn on the original block.
404   idx    - the index of the current block-row in the original block.
405 */
406 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
407 {
408   PetscErrorCode     ierr;
409   PetscInt           i,j,k,bs2,bs=stash->bs,l;
410   const PetscScalar  *vals;
411   PetscScalar        *array;
412   PetscMatStashSpace space=stash->space;
413 
414   PetscFunctionBegin;
415   if (!space || space->local_remaining < n) {
416     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
417   }
418   space = stash->space;
419   l     = space->local_used;
420   bs2   = bs*bs;
421   for (i=0; i<n; i++) {
422     space->idx[l] = row;
423     space->idy[l] = idxn[i];
424     /* Now copy over the block of values. Store the values column oriented.
425      This enables inserting multiple blocks belonging to a row with a single
426      funtion call */
427     array = space->val + bs2*l;
428     vals  = values + idx*bs2*n + bs*i;
429     for (j=0; j<bs; j++) {
430       for (k=0; k<bs; k++) array[k] = vals[k];
431       array += bs;
432       vals  += rmax*bs;
433     }
434     l++;
435   }
436   stash->n               += n;
437   space->local_used      += n;
438   space->local_remaining -= n;
439   PetscFunctionReturn(0);
440 }
441 /*
442   MatStashScatterBegin_Private - Initiates the transfer of values to the
443   correct owners. This function goes through the stash, and check the
444   owners of each stashed value, and sends the values off to the owner
445   processors.
446 
447   Input Parameters:
448   stash  - the stash
449   owners - an array of size 'no-of-procs' which gives the ownership range
450            for each node.
451 
452   Notes:
453     The 'owners' array in the cased of the blocked-stash has the
454   ranges specified blocked global indices, and for the regular stash in
455   the proper global indices.
456 */
457 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
458 {
459   PetscErrorCode ierr;
460 
461   PetscFunctionBegin;
462   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465 
466 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
467 {
468   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
469   PetscInt           size=stash->size,nsends;
470   PetscErrorCode     ierr;
471   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
472   PetscScalar        **rvalues,*svalues;
473   MPI_Comm           comm = stash->comm;
474   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
475   PetscMPIInt        *sizes,*nlengths,nreceives;
476   PetscInt           *sp_idx,*sp_idy;
477   PetscScalar        *sp_val;
478   PetscMatStashSpace space,space_next;
479 
480   PetscFunctionBegin;
481   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
482     InsertMode addv;
483     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
484     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
485     mat->insertmode = addv; /* in case this processor had no cache */
486   }
487 
488   bs2 = stash->bs*stash->bs;
489 
490   /*  first count number of contributors to each processor */
491   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
492   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
493 
494   i       = j    = 0;
495   lastidx = -1;
496   space   = stash->space_head;
497   while (space) {
498     space_next = space->next;
499     sp_idx     = space->idx;
500     for (l=0; l<space->local_used; l++) {
501       /* if indices are NOT locally sorted, need to start search at the beginning */
502       if (lastidx > (idx = sp_idx[l])) j = 0;
503       lastidx = idx;
504       for (; j<size; j++) {
505         if (idx >= owners[j] && idx < owners[j+1]) {
506           nlengths[j]++; owner[i] = j; break;
507         }
508       }
509       i++;
510     }
511     space = space_next;
512   }
513 
514   /* Now check what procs get messages - and compute nsends. */
515   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
516   for (i=0, nsends=0; i<size; i++) {
517     if (nlengths[i]) {
518       sizes[i] = 1; nsends++;
519     }
520   }
521 
522   {PetscMPIInt *onodes,*olengths;
523    /* Determine the number of messages to expect, their lengths, from from-ids */
524    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
525    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
526    /* since clubbing row,col - lengths are multiplied by 2 */
527    for (i=0; i<nreceives; i++) olengths[i] *=2;
528    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
529    /* values are size 'bs2' lengths (and remove earlier factor 2 */
530    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
531    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
532    ierr = PetscFree(onodes);CHKERRQ(ierr);
533    ierr = PetscFree(olengths);CHKERRQ(ierr);}
534 
535   /* do sends:
536       1) starts[i] gives the starting index in svalues for stuff going to
537          the ith processor
538   */
539   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
540   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
541   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
542   /* use 2 sends the first with all_a, the next with all_i and all_j */
543   startv[0] = 0; starti[0] = 0;
544   for (i=1; i<size; i++) {
545     startv[i] = startv[i-1] + nlengths[i-1];
546     starti[i] = starti[i-1] + 2*nlengths[i-1];
547   }
548 
549   i     = 0;
550   space = stash->space_head;
551   while (space) {
552     space_next = space->next;
553     sp_idx     = space->idx;
554     sp_idy     = space->idy;
555     sp_val     = space->val;
556     for (l=0; l<space->local_used; l++) {
557       j = owner[i];
558       if (bs2 == 1) {
559         svalues[startv[j]] = sp_val[l];
560       } else {
561         PetscInt    k;
562         PetscScalar *buf1,*buf2;
563         buf1 = svalues+bs2*startv[j];
564         buf2 = space->val + bs2*l;
565         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
566       }
567       sindices[starti[j]]             = sp_idx[l];
568       sindices[starti[j]+nlengths[j]] = sp_idy[l];
569       startv[j]++;
570       starti[j]++;
571       i++;
572     }
573     space = space_next;
574   }
575   startv[0] = 0;
576   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
577 
578   for (i=0,count=0; i<size; i++) {
579     if (sizes[i]) {
580       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
581       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
582     }
583   }
584 #if defined(PETSC_USE_INFO)
585   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
586   for (i=0; i<size; i++) {
587     if (sizes[i]) {
588       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
589     }
590   }
591 #endif
592   ierr = PetscFree(nlengths);CHKERRQ(ierr);
593   ierr = PetscFree(owner);CHKERRQ(ierr);
594   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
595   ierr = PetscFree(sizes);CHKERRQ(ierr);
596 
597   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
598   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
599 
600   for (i=0; i<nreceives; i++) {
601     recv_waits[2*i]   = recv_waits1[i];
602     recv_waits[2*i+1] = recv_waits2[i];
603   }
604   stash->recv_waits = recv_waits;
605 
606   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
607   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
608 
609   stash->svalues         = svalues;
610   stash->sindices        = sindices;
611   stash->rvalues         = rvalues;
612   stash->rindices        = rindices;
613   stash->send_waits      = send_waits;
614   stash->nsends          = nsends;
615   stash->nrecvs          = nreceives;
616   stash->reproduce_count = 0;
617   PetscFunctionReturn(0);
618 }
619 
620 /*
621    MatStashScatterGetMesg_Private - This function waits on the receives posted
622    in the function MatStashScatterBegin_Private() and returns one message at
623    a time to the calling function. If no messages are left, it indicates this
624    by setting flg = 0, else it sets flg = 1.
625 
626    Input Parameters:
627    stash - the stash
628 
629    Output Parameters:
630    nvals - the number of entries in the current message.
631    rows  - an array of row indices (or blocked indices) corresponding to the values
632    cols  - an array of columnindices (or blocked indices) corresponding to the values
633    vals  - the values
634    flg   - 0 indicates no more message left, and the current call has no values associated.
635            1 indicates that the current call successfully received a message, and the
636              other output parameters nvals,rows,cols,vals are set appropriately.
637 */
638 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
639 {
640   PetscErrorCode ierr;
641 
642   PetscFunctionBegin;
643   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
648 {
649   PetscErrorCode ierr;
650   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
651   PetscInt       bs2;
652   MPI_Status     recv_status;
653   PetscBool      match_found = PETSC_FALSE;
654 
655   PetscFunctionBegin;
656   *flg = 0; /* When a message is discovered this is reset to 1 */
657   /* Return if no more messages to process */
658   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
659 
660   bs2 = stash->bs*stash->bs;
661   /* If a matching pair of receives are found, process them, and return the data to
662      the calling function. Until then keep receiving messages */
663   while (!match_found) {
664     if (stash->reproduce) {
665       i    = stash->reproduce_count++;
666       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
667     } else {
668       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
669     }
670     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
671 
672     /* Now pack the received message into a structure which is usable by others */
673     if (i % 2) {
674       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
675 
676       flg_v[2*recv_status.MPI_SOURCE] = i/2;
677 
678       *nvals = *nvals/bs2;
679     } else {
680       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
681 
682       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
683 
684       *nvals = *nvals/2; /* This message has both row indices and col indices */
685     }
686 
687     /* Check if we have both messages from this proc */
688     i1 = flg_v[2*recv_status.MPI_SOURCE];
689     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
690     if (i1 != -1 && i2 != -1) {
691       *rows = stash->rindices[i2];
692       *cols = *rows + *nvals;
693       *vals = stash->rvalues[i1];
694       *flg  = 1;
695       stash->nprocessed++;
696       match_found = PETSC_TRUE;
697     }
698   }
699   PetscFunctionReturn(0);
700 }
701 
702 #if !defined(PETSC_HAVE_MPIUNI)
703 typedef struct {
704   PetscInt row;
705   PetscInt col;
706   PetscScalar vals[1];          /* Actually an array of length bs2 */
707 } MatStashBlock;
708 
709 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
710 {
711   PetscErrorCode ierr;
712   PetscMatStashSpace space;
713   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
714   PetscScalar **valptr;
715 
716   PetscFunctionBegin;
717   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
718   for (space=stash->space_head,cnt=0; space; space=space->next) {
719     for (i=0; i<space->local_used; i++) {
720       row[cnt] = space->idx[i];
721       col[cnt] = space->idy[i];
722       valptr[cnt] = &space->val[i*bs2];
723       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
724       cnt++;
725     }
726   }
727   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
728   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
729   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
730   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
731     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
732       PetscInt colstart;
733       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
734       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
735         PetscInt j,l;
736         MatStashBlock *block;
737         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
738         block->row = row[rowstart];
739         block->col = col[colstart];
740         ierr = PetscArraycpy(block->vals,valptr[perm[colstart]],bs2);CHKERRQ(ierr);
741         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
742           if (insertmode == ADD_VALUES) {
743             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
744           } else {
745             ierr = PetscArraycpy(block->vals,valptr[perm[j]],bs2);CHKERRQ(ierr);
746           }
747         }
748         colstart = j;
749       }
750       rowstart = i;
751     }
752   }
753   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
758 {
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   if (stash->blocktype == MPI_DATATYPE_NULL) {
763     PetscInt     bs2 = PetscSqr(stash->bs);
764     PetscMPIInt  blocklens[2];
765     MPI_Aint     displs[2];
766     MPI_Datatype types[2],stype;
767     /* Note that DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex.
768        std::complex itself has standard layout, so does DummyBlock, recursively.
769        To be compatiable with C++ std::complex, complex implementations on GPUs must also have standard layout,
770        though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++.
771        offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof.
772 
773        We can test if std::complex has standard layout with the following code:
774        #include <iostream>
775        #include <type_traits>
776        #include <complex>
777        int main() {
778          std::cout << std::boolalpha;
779          std::cout << std::is_standard_layout<std::complex<double> >::value << '\n';
780        }
781        Output: true
782      */
783     struct DummyBlock {PetscInt row,col; PetscScalar vals;};
784 
785     stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
786     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
787       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
788     }
789     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
790     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
791     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
792     blocklens[0] = 2;
793     blocklens[1] = bs2;
794     displs[0] = offsetof(struct DummyBlock,row);
795     displs[1] = offsetof(struct DummyBlock,vals);
796     types[0] = MPIU_INT;
797     types[1] = MPIU_SCALAR;
798     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr);
799     ierr = MPI_Type_commit(&stype);CHKERRQ(ierr);
800     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */
801     ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr);
802     ierr = MPI_Type_free(&stype);CHKERRQ(ierr);
803   }
804   PetscFunctionReturn(0);
805 }
806 
807 /* Callback invoked after target rank has initiatied receive of rendezvous message.
808  * Here we post the main sends.
809  */
810 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
811 {
812   MatStash *stash = (MatStash*)ctx;
813   MatStashHeader *hdr = (MatStashHeader*)sdata;
814   PetscErrorCode ierr;
815 
816   PetscFunctionBegin;
817   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
818   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
819   stash->sendframes[rankid].count = hdr->count;
820   stash->sendframes[rankid].pending = 1;
821   PetscFunctionReturn(0);
822 }
823 
824 /* Callback invoked by target after receiving rendezvous message.
825  * Here we post the main recvs.
826  */
827 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
828 {
829   MatStash *stash = (MatStash*)ctx;
830   MatStashHeader *hdr = (MatStashHeader*)rdata;
831   MatStashFrame *frame;
832   PetscErrorCode ierr;
833 
834   PetscFunctionBegin;
835   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
836   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
837   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
838   frame->count = hdr->count;
839   frame->pending = 1;
840   PetscFunctionReturn(0);
841 }
842 
843 /*
844  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
845  */
846 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
847 {
848   PetscErrorCode ierr;
849   size_t nblocks;
850   char *sendblocks;
851 
852   PetscFunctionBegin;
853   if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */
854     InsertMode addv;
855     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
856     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
857   }
858 
859   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
860   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
861   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
862   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
863   if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
864     PetscInt i;
865     size_t b;
866     for (i=0,b=0; i<stash->nsendranks; i++) {
867       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
868       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
869       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
870       for ( ; b<nblocks; b++) {
871         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
872         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
873         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
874         stash->sendhdr[i].count++;
875       }
876     }
877   } else {                      /* Dynamically count and pack (first time) */
878     PetscInt sendno;
879     size_t i,rowstart;
880 
881     /* Count number of send ranks and allocate for sends */
882     stash->nsendranks = 0;
883     for (rowstart=0; rowstart<nblocks; ) {
884       PetscInt owner;
885       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
886       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
887       if (owner < 0) owner = -(owner+2);
888       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
889         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
890         if (sendblock_i->row >= owners[owner+1]) break;
891       }
892       stash->nsendranks++;
893       rowstart = i;
894     }
895     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
896 
897     /* Set up sendhdrs and sendframes */
898     sendno = 0;
899     for (rowstart=0; rowstart<nblocks; ) {
900       PetscInt owner;
901       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
902       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
903       if (owner < 0) owner = -(owner+2);
904       stash->sendranks[sendno] = owner;
905       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
906         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
907         if (sendblock_i->row >= owners[owner+1]) break;
908       }
909       stash->sendframes[sendno].buffer = sendblock_rowstart;
910       stash->sendframes[sendno].pending = 0;
911       stash->sendhdr[sendno].count = i - rowstart;
912       sendno++;
913       rowstart = i;
914     }
915     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
916   }
917 
918   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
919    * message or a dummy entry of some sort. */
920   if (mat->insertmode == INSERT_VALUES) {
921     size_t i;
922     for (i=0; i<nblocks; i++) {
923       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
924       sendblock_i->row = -(sendblock_i->row+1);
925     }
926   }
927 
928   if (stash->first_assembly_done) {
929     PetscMPIInt i,tag;
930     ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr);
931     for (i=0; i<stash->nrecvranks; i++) {
932       ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr);
933     }
934     for (i=0; i<stash->nsendranks; i++) {
935       ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr);
936     }
937     stash->use_status = PETSC_TRUE; /* Use count from message status. */
938   } else {
939     ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
940                                       &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
941                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
942     ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
943     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
944   }
945 
946   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
947   stash->recvframe_active     = NULL;
948   stash->recvframe_i          = 0;
949   stash->some_i               = 0;
950   stash->some_count           = 0;
951   stash->recvcount            = 0;
952   stash->first_assembly_done  = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
953   stash->insertmode           = &mat->insertmode;
954   PetscFunctionReturn(0);
955 }
956 
957 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
958 {
959   PetscErrorCode ierr;
960   MatStashBlock *block;
961 
962   PetscFunctionBegin;
963   *flg = 0;
964   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
965     if (stash->some_i == stash->some_count) {
966       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
967       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr);
968       stash->some_i = 0;
969     }
970     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
971     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
972     if (stash->use_status) { /* Count what was actually sent */
973       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr);
974     }
975     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
976       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
977       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
978       if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
979       if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
980     }
981     stash->some_i++;
982     stash->recvcount++;
983     stash->recvframe_i = 0;
984   }
985   *n = 1;
986   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
987   if (block->row < 0) block->row = -(block->row + 1);
988   *row = &block->row;
989   *col = &block->col;
990   *val = block->vals;
991   stash->recvframe_i++;
992   *flg = 1;
993   PetscFunctionReturn(0);
994 }
995 
996 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
997 {
998   PetscErrorCode ierr;
999 
1000   PetscFunctionBegin;
1001   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
1002   if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
1003     void *dummy;
1004     ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr);
1005   } else {                      /* No reuse, so collect everything. */
1006     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
1007   }
1008 
1009   /* Now update nmaxold to be app 10% more than max n used, this way the
1010      wastage of space is reduced the next time this stash is used.
1011      Also update the oldmax, only if it increases */
1012   if (stash->n) {
1013     PetscInt bs2     = stash->bs*stash->bs;
1014     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1015     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1016   }
1017 
1018   stash->nmax       = 0;
1019   stash->n          = 0;
1020   stash->reallocs   = -1;
1021   stash->nprocessed = 0;
1022 
1023   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1024 
1025   stash->space = 0;
1026 
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1031 {
1032   PetscErrorCode ierr;
1033 
1034   PetscFunctionBegin;
1035   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1036   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1037   stash->recvframes = NULL;
1038   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1039   if (stash->blocktype != MPI_DATATYPE_NULL) {
1040     ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr);
1041   }
1042   stash->nsendranks = 0;
1043   stash->nrecvranks = 0;
1044   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1045   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1046   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1047   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1048   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1049   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1050   PetscFunctionReturn(0);
1051 }
1052 #endif
1053