xref: /petsc/src/mat/utils/matstash.c (revision 83d0d507e8eaf8d844b49c5dbd0d8d7c8cefa37b)
1 #include <petsc/private/matimpl.h>
2 
3 #define DEFAULT_STASH_SIZE 10000
4 
5 static PetscErrorCode       MatStashScatterBegin_Ref(Mat, MatStash *, PetscInt *);
6 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
7 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
8 #if !defined(PETSC_HAVE_MPIUNI)
9 static PetscErrorCode MatStashScatterBegin_BTS(Mat, MatStash *, PetscInt *);
10 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
11 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *);
12 #endif
13 
14 /*
15   MatStashCreate_Private - Creates a stash,currently used for all the parallel
16   matrix implementations. The stash is where elements of a matrix destined
17   to be stored on other processors are kept until matrix assembly is done.
18 
19   This is a simple minded stash. Simply adds entries to end of stash.
20 
21   Input Parameters:
22   comm - communicator, required for scatters.
23   bs   - stash block size. used when stashing blocks of values
24 
25   Output Parameter:
26   stash    - the newly created stash
27 */
28 PetscErrorCode MatStashCreate_Private(MPI_Comm comm, PetscInt bs, MatStash *stash)
29 {
30   PetscInt  max, *opt, nopt, i;
31   PetscBool flg;
32 
33   PetscFunctionBegin;
34   /* Require 2 tags,get the second using PetscCommGetNewTag() */
35   stash->comm = comm;
36 
37   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1));
38   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2));
39   PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size));
40   PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank));
41   PetscCall(PetscMalloc1(2 * stash->size, &stash->flg_v));
42   for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
43 
44   nopt = stash->size;
45   PetscCall(PetscMalloc1(nopt, &opt));
46   PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-matstash_initial_size", opt, &nopt, &flg));
47   if (flg) {
48     if (nopt == 1) max = opt[0];
49     else if (nopt == stash->size) max = opt[stash->rank];
50     else if (stash->rank < nopt) max = opt[stash->rank];
51     else max = 0; /* Use default */
52     stash->umax = max;
53   } else {
54     stash->umax = 0;
55   }
56   PetscCall(PetscFree(opt));
57   if (bs <= 0) bs = 1;
58 
59   stash->bs         = bs;
60   stash->nmax       = 0;
61   stash->oldnmax    = 0;
62   stash->n          = 0;
63   stash->reallocs   = -1;
64   stash->space_head = NULL;
65   stash->space      = NULL;
66 
67   stash->send_waits  = NULL;
68   stash->recv_waits  = NULL;
69   stash->send_status = NULL;
70   stash->nsends      = 0;
71   stash->nrecvs      = 0;
72   stash->svalues     = NULL;
73   stash->rvalues     = NULL;
74   stash->rindices    = NULL;
75   stash->nprocessed  = 0;
76   stash->reproduce   = PETSC_FALSE;
77   stash->blocktype   = MPI_DATATYPE_NULL;
78 
79   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_reproduce", &stash->reproduce, NULL));
80 #if !defined(PETSC_HAVE_MPIUNI)
81   flg = PETSC_FALSE;
82   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_legacy", &flg, NULL));
83   if (!flg) {
84     stash->ScatterBegin   = MatStashScatterBegin_BTS;
85     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
86     stash->ScatterEnd     = MatStashScatterEnd_BTS;
87     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
88   } else {
89 #endif
90     stash->ScatterBegin   = MatStashScatterBegin_Ref;
91     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
92     stash->ScatterEnd     = MatStashScatterEnd_Ref;
93     stash->ScatterDestroy = NULL;
94 #if !defined(PETSC_HAVE_MPIUNI)
95   }
96 #endif
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 /*
101    MatStashDestroy_Private - Destroy the stash
102 */
103 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
104 {
105   PetscFunctionBegin;
106   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
107   if (stash->ScatterDestroy) PetscCall((*stash->ScatterDestroy)(stash));
108   stash->space = NULL;
109   PetscCall(PetscFree(stash->flg_v));
110   PetscFunctionReturn(PETSC_SUCCESS);
111 }
112 
113 /*
114    MatStashScatterEnd_Private - This is called as the final stage of
115    scatter. The final stages of message passing is done here, and
116    all the memory used for message passing is cleaned up. This
117    routine also resets the stash, and deallocates the memory used
118    for the stash. It also keeps track of the current memory usage
119    so that the same value can be used the next time through.
120 */
121 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
122 {
123   PetscFunctionBegin;
124   PetscCall((*stash->ScatterEnd)(stash));
125   PetscFunctionReturn(PETSC_SUCCESS);
126 }
127 
128 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
129 {
130   PetscMPIInt nsends = stash->nsends;
131   PetscInt    bs2, oldnmax;
132   MPI_Status *send_status;
133 
134   PetscFunctionBegin;
135   for (PetscMPIInt i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
136   /* wait on sends */
137   if (nsends) {
138     PetscCall(PetscMalloc1(2 * nsends, &send_status));
139     PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status));
140     PetscCall(PetscFree(send_status));
141   }
142 
143   /* Now update nmaxold to be app 10% more than max n used, this way the
144      wastage of space is reduced the next time this stash is used.
145      Also update the oldmax, only if it increases */
146   if (stash->n) {
147     bs2     = stash->bs * stash->bs;
148     oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
149     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
150   }
151 
152   stash->nmax       = 0;
153   stash->n          = 0;
154   stash->reallocs   = -1;
155   stash->nprocessed = 0;
156 
157   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
158 
159   stash->space = NULL;
160 
161   PetscCall(PetscFree(stash->send_waits));
162   PetscCall(PetscFree(stash->recv_waits));
163   PetscCall(PetscFree2(stash->svalues, stash->sindices));
164   PetscCall(PetscFree(stash->rvalues[0]));
165   PetscCall(PetscFree(stash->rvalues));
166   PetscCall(PetscFree(stash->rindices[0]));
167   PetscCall(PetscFree(stash->rindices));
168   PetscFunctionReturn(PETSC_SUCCESS);
169 }
170 
171 /*
172    MatStashGetInfo_Private - Gets the relevant statistics of the stash
173 
174    Input Parameters:
175    stash    - the stash
176    nstash   - the size of the stash. Indicates the number of values stored.
177    reallocs - the number of additional mallocs incurred.
178 
179 */
180 PetscErrorCode MatStashGetInfo_Private(MatStash *stash, PetscInt *nstash, PetscInt *reallocs)
181 {
182   PetscInt bs2 = stash->bs * stash->bs;
183 
184   PetscFunctionBegin;
185   if (nstash) *nstash = stash->n * bs2;
186   if (reallocs) {
187     if (stash->reallocs < 0) *reallocs = 0;
188     else *reallocs = stash->reallocs;
189   }
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 /*
194    MatStashSetInitialSize_Private - Sets the initial size of the stash
195 
196    Input Parameters:
197    stash  - the stash
198    max    - the value that is used as the max size of the stash.
199             this value is used while allocating memory.
200 */
201 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash, PetscInt max)
202 {
203   PetscFunctionBegin;
204   stash->umax = max;
205   PetscFunctionReturn(PETSC_SUCCESS);
206 }
207 
208 /* MatStashExpand_Private - Expand the stash. This function is called
209    when the space in the stash is not sufficient to add the new values
210    being inserted into the stash.
211 
212    Input Parameters:
213    stash - the stash
214    incr  - the minimum increase requested
215 
216    Note:
217    This routine doubles the currently used memory.
218  */
219 static PetscErrorCode MatStashExpand_Private(MatStash *stash, PetscInt incr)
220 {
221   PetscInt newnmax, bs2 = stash->bs * stash->bs;
222 
223   PetscFunctionBegin;
224   /* allocate a larger stash */
225   if (!stash->oldnmax && !stash->nmax) { /* new stash */
226     if (stash->umax) newnmax = stash->umax / bs2;
227     else newnmax = DEFAULT_STASH_SIZE / bs2;
228   } else if (!stash->nmax) { /* reusing stash */
229     if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs2;
230     else newnmax = stash->oldnmax / bs2;
231   } else newnmax = stash->nmax * 2;
232   if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr;
233 
234   /* Get a MatStashSpace and attach it to stash */
235   PetscCall(PetscMatStashSpaceGet(bs2, newnmax, &stash->space));
236   if (!stash->space_head) { /* new stash or reusing stash->oldnmax */
237     stash->space_head = stash->space;
238   }
239 
240   stash->reallocs++;
241   stash->nmax = newnmax;
242   PetscFunctionReturn(PETSC_SUCCESS);
243 }
244 /*
245   MatStashValuesRow_Private - inserts values into the stash. This function
246   expects the values to be row-oriented. Multiple columns belong to the same row
247   can be inserted with a single call to this function.
248 
249   Input Parameters:
250   stash  - the stash
251   row    - the global row corresponding to the values
252   n      - the number of elements inserted. All elements belong to the above row.
253   idxn   - the global column indices corresponding to each of the values.
254   values - the values inserted
255 */
256 PetscErrorCode MatStashValuesRow_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscBool ignorezeroentries)
257 {
258   PetscInt           i, k, cnt = 0;
259   PetscMatStashSpace space = stash->space;
260 
261   PetscFunctionBegin;
262   /* Check and see if we have sufficient memory */
263   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
264   space = stash->space;
265   k     = space->local_used;
266   for (i = 0; i < n; i++) {
267     if (ignorezeroentries && values && values[i] == 0.0) continue;
268     space->idx[k] = row;
269     space->idy[k] = idxn[i];
270     space->val[k] = values ? values[i] : 0.0;
271     k++;
272     cnt++;
273   }
274   stash->n += cnt;
275   space->local_used += cnt;
276   space->local_remaining -= cnt;
277   PetscFunctionReturn(PETSC_SUCCESS);
278 }
279 
280 /*
281   MatStashValuesCol_Private - inserts values into the stash. This function
282   expects the values to be column-oriented. Multiple columns belong to the same row
283   can be inserted with a single call to this function.
284 
285   Input Parameters:
286   stash   - the stash
287   row     - the global row corresponding to the values
288   n       - the number of elements inserted. All elements belong to the above row.
289   idxn    - the global column indices corresponding to each of the values.
290   values  - the values inserted
291   stepval - the consecutive values are sepated by a distance of stepval.
292             this happens because the input is column-oriented.
293 */
294 PetscErrorCode MatStashValuesCol_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt stepval, PetscBool ignorezeroentries)
295 {
296   PetscInt           i, k, cnt = 0;
297   PetscMatStashSpace space = stash->space;
298 
299   PetscFunctionBegin;
300   /* Check and see if we have sufficient memory */
301   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
302   space = stash->space;
303   k     = space->local_used;
304   for (i = 0; i < n; i++) {
305     if (ignorezeroentries && values && values[i * stepval] == 0.0) continue;
306     space->idx[k] = row;
307     space->idy[k] = idxn[i];
308     space->val[k] = values ? values[i * stepval] : 0.0;
309     k++;
310     cnt++;
311   }
312   stash->n += cnt;
313   space->local_used += cnt;
314   space->local_remaining -= cnt;
315   PetscFunctionReturn(PETSC_SUCCESS);
316 }
317 
318 /*
319   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
320   This function expects the values to be row-oriented. Multiple columns belong
321   to the same block-row can be inserted with a single call to this function.
322   This function extracts the sub-block of values based on the dimensions of
323   the original input block, and the row,col values corresponding to the blocks.
324 
325   Input Parameters:
326   stash  - the stash
327   row    - the global block-row corresponding to the values
328   n      - the number of elements inserted. All elements belong to the above row.
329   idxn   - the global block-column indices corresponding to each of the blocks of
330            values. Each block is of size bs*bs.
331   values - the values inserted
332   rmax   - the number of block-rows in the original block.
333   cmax   - the number of block-columns on the original block.
334   idx    - the index of the current block-row in the original block.
335 */
336 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
337 {
338   PetscInt           i, j, k, bs2, bs = stash->bs, l;
339   const PetscScalar *vals;
340   PetscScalar       *array;
341   PetscMatStashSpace space = stash->space;
342 
343   PetscFunctionBegin;
344   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
345   space = stash->space;
346   l     = space->local_used;
347   bs2   = bs * bs;
348   for (i = 0; i < n; i++) {
349     space->idx[l] = row;
350     space->idy[l] = idxn[i];
351     /* Now copy over the block of values. Store the values column-oriented.
352        This enables inserting multiple blocks belonging to a row with a single
353        function call */
354     array = space->val + bs2 * l;
355     vals  = values + idx * bs2 * n + bs * i;
356     for (j = 0; j < bs; j++) {
357       for (k = 0; k < bs; k++) array[k * bs] = values ? vals[k] : 0.0;
358       array++;
359       vals += cmax * bs;
360     }
361     l++;
362   }
363   stash->n += n;
364   space->local_used += n;
365   space->local_remaining -= n;
366   PetscFunctionReturn(PETSC_SUCCESS);
367 }
368 
369 /*
370   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
371   This function expects the values to be column-oriented. Multiple columns belong
372   to the same block-row can be inserted with a single call to this function.
373   This function extracts the sub-block of values based on the dimensions of
374   the original input block, and the row,col values corresponding to the blocks.
375 
376   Input Parameters:
377   stash  - the stash
378   row    - the global block-row corresponding to the values
379   n      - the number of elements inserted. All elements belong to the above row.
380   idxn   - the global block-column indices corresponding to each of the blocks of
381            values. Each block is of size bs*bs.
382   values - the values inserted
383   rmax   - the number of block-rows in the original block.
384   cmax   - the number of block-columns on the original block.
385   idx    - the index of the current block-row in the original block.
386 */
387 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
388 {
389   PetscInt           i, j, k, bs2, bs = stash->bs, l;
390   const PetscScalar *vals;
391   PetscScalar       *array;
392   PetscMatStashSpace space = stash->space;
393 
394   PetscFunctionBegin;
395   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
396   space = stash->space;
397   l     = space->local_used;
398   bs2   = bs * bs;
399   for (i = 0; i < n; i++) {
400     space->idx[l] = row;
401     space->idy[l] = idxn[i];
402     /* Now copy over the block of values. Store the values column-oriented.
403      This enables inserting multiple blocks belonging to a row with a single
404      function call */
405     array = space->val + bs2 * l;
406     vals  = values + idx * bs2 * n + bs * i;
407     for (j = 0; j < bs; j++) {
408       for (k = 0; k < bs; k++) array[k] = values ? vals[k] : 0.0;
409       array += bs;
410       vals += rmax * bs;
411     }
412     l++;
413   }
414   stash->n += n;
415   space->local_used += n;
416   space->local_remaining -= n;
417   PetscFunctionReturn(PETSC_SUCCESS);
418 }
419 /*
420   MatStashScatterBegin_Private - Initiates the transfer of values to the
421   correct owners. This function goes through the stash, and check the
422   owners of each stashed value, and sends the values off to the owner
423   processors.
424 
425   Input Parameters:
426   stash  - the stash
427   owners - an array of size 'no-of-procs' which gives the ownership range
428            for each node.
429 
430   Note:
431     The 'owners' array in the cased of the blocked-stash has the
432   ranges specified blocked global indices, and for the regular stash in
433   the proper global indices.
434 */
435 PetscErrorCode MatStashScatterBegin_Private(Mat mat, MatStash *stash, PetscInt *owners)
436 {
437   PetscFunctionBegin;
438   PetscCall((*stash->ScatterBegin)(mat, stash, owners));
439   PetscFunctionReturn(PETSC_SUCCESS);
440 }
441 
442 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat, MatStash *stash, PetscInt *owners)
443 {
444   PetscInt          *owner, *startv, *starti, bs2;
445   PetscInt           size = stash->size;
446   PetscInt          *sindices, **rindices, j, ii, idx, lastidx, l;
447   PetscScalar      **rvalues, *svalues;
448   MPI_Comm           comm = stash->comm;
449   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
450   PetscMPIInt       *sizes, *nlengths, nreceives, nsends, tag1 = stash->tag1, tag2 = stash->tag2;
451   PetscInt          *sp_idx, *sp_idy;
452   PetscScalar       *sp_val;
453   PetscMatStashSpace space, space_next;
454 
455   PetscFunctionBegin;
456   { /* make sure all processors are either in INSERTMODE or ADDMODE */
457     InsertMode addv;
458     PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
459     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
460     mat->insertmode = addv; /* in case this processor had no cache */
461   }
462 
463   bs2 = stash->bs * stash->bs;
464 
465   /*  first count number of contributors to each processor */
466   PetscCall(PetscCalloc1(size, &nlengths));
467   PetscCall(PetscMalloc1(stash->n + 1, &owner));
468 
469   ii = j  = 0;
470   lastidx = -1;
471   space   = stash->space_head;
472   while (space) {
473     space_next = space->next;
474     sp_idx     = space->idx;
475     for (l = 0; l < space->local_used; l++) {
476       /* if indices are NOT locally sorted, need to start search at the beginning */
477       if (lastidx > (idx = sp_idx[l])) j = 0;
478       lastidx = idx;
479       for (; j < size; j++) {
480         if (idx >= owners[j] && idx < owners[j + 1]) {
481           nlengths[j]++;
482           owner[ii] = j;
483           break;
484         }
485       }
486       ii++;
487     }
488     space = space_next;
489   }
490 
491   /* Now check what procs get messages - and compute nsends. */
492   PetscCall(PetscCalloc1(size, &sizes));
493   nsends = 0;
494   for (PetscMPIInt i = 0; i < size; i++) {
495     if (nlengths[i]) {
496       sizes[i] = 1;
497       nsends++;
498     }
499   }
500 
501   {
502     PetscMPIInt *onodes, *olengths;
503     /* Determine the number of messages to expect, their lengths, from from-ids */
504     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
505     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
506     /* since clubbing row,col - lengths are multiplied by 2 */
507     for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2;
508     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
509     /* values are size 'bs2' lengths (and remove earlier factor 2 */
510     for (PetscMPIInt i = 0; i < nreceives; i++) PetscCall(PetscMPIIntCast(olengths[i] * bs2 / 2, &olengths[i]));
511     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
512     PetscCall(PetscFree(onodes));
513     PetscCall(PetscFree(olengths));
514   }
515 
516   /* do sends:
517       1) starts[i] gives the starting index in svalues for stuff going to
518          the ith processor
519   */
520   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
521   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
522   PetscCall(PetscMalloc2(size, &startv, size, &starti));
523   /* use 2 sends the first with all_a, the next with all_i and all_j */
524   startv[0] = 0;
525   starti[0] = 0;
526   for (PetscMPIInt i = 1; i < size; i++) {
527     startv[i] = startv[i - 1] + nlengths[i - 1];
528     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
529   }
530 
531   ii    = 0;
532   space = stash->space_head;
533   while (space) {
534     space_next = space->next;
535     sp_idx     = space->idx;
536     sp_idy     = space->idy;
537     sp_val     = space->val;
538     for (l = 0; l < space->local_used; l++) {
539       j = owner[ii];
540       if (bs2 == 1) {
541         svalues[startv[j]] = sp_val[l];
542       } else {
543         PetscInt     k;
544         PetscScalar *buf1, *buf2;
545         buf1 = svalues + bs2 * startv[j];
546         buf2 = space->val + bs2 * l;
547         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
548       }
549       sindices[starti[j]]               = sp_idx[l];
550       sindices[starti[j] + nlengths[j]] = sp_idy[l];
551       startv[j]++;
552       starti[j]++;
553       ii++;
554     }
555     space = space_next;
556   }
557   startv[0] = 0;
558   for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
559 
560   for (PetscMPIInt i = 0, count = 0; i < size; i++) {
561     if (sizes[i]) {
562       PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
563       PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
564     }
565   }
566 #if defined(PETSC_USE_INFO)
567   PetscCall(PetscInfo(NULL, "No of messages: %d \n", nsends));
568   for (PetscMPIInt i = 0; i < size; i++) {
569     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
570   }
571 #endif
572   PetscCall(PetscFree(nlengths));
573   PetscCall(PetscFree(owner));
574   PetscCall(PetscFree2(startv, starti));
575   PetscCall(PetscFree(sizes));
576 
577   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
578   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
579 
580   for (PetscMPIInt i = 0; i < nreceives; i++) {
581     recv_waits[2 * i]     = recv_waits1[i];
582     recv_waits[2 * i + 1] = recv_waits2[i];
583   }
584   stash->recv_waits = recv_waits;
585 
586   PetscCall(PetscFree(recv_waits1));
587   PetscCall(PetscFree(recv_waits2));
588 
589   stash->svalues         = svalues;
590   stash->sindices        = sindices;
591   stash->rvalues         = rvalues;
592   stash->rindices        = rindices;
593   stash->send_waits      = send_waits;
594   stash->nsends          = nsends;
595   stash->nrecvs          = nreceives;
596   stash->reproduce_count = 0;
597   PetscFunctionReturn(PETSC_SUCCESS);
598 }
599 
600 /*
601    MatStashScatterGetMesg_Private - This function waits on the receives posted
602    in the function MatStashScatterBegin_Private() and returns one message at
603    a time to the calling function. If no messages are left, it indicates this
604    by setting flg = 0, else it sets flg = 1.
605 
606    Input Parameter:
607    stash - the stash
608 
609    Output Parameters:
610    nvals - the number of entries in the current message.
611    rows  - an array of row indices (or blocked indices) corresponding to the values
612    cols  - an array of columnindices (or blocked indices) corresponding to the values
613    vals  - the values
614    flg   - 0 indicates no more message left, and the current call has no values associated.
615            1 indicates that the current call successfully received a message, and the
616              other output parameters nvals,rows,cols,vals are set appropriately.
617 */
618 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
619 {
620   PetscFunctionBegin;
621   PetscCall((*stash->ScatterGetMesg)(stash, nvals, rows, cols, vals, flg));
622   PetscFunctionReturn(PETSC_SUCCESS);
623 }
624 
625 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
626 {
627   PetscMPIInt i, *flg_v = stash->flg_v, i1, i2;
628   PetscInt    bs2;
629   MPI_Status  recv_status;
630   PetscBool   match_found = PETSC_FALSE;
631 
632   PetscFunctionBegin;
633   *flg = 0; /* When a message is discovered this is reset to 1 */
634   /* Return if no more messages to process */
635   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(PETSC_SUCCESS);
636 
637   bs2 = stash->bs * stash->bs;
638   /* If a matching pair of receives are found, process them, and return the data to
639      the calling function. Until then keep receiving messages */
640   while (!match_found) {
641     if (stash->reproduce) {
642       i = stash->reproduce_count++;
643       PetscCallMPI(MPI_Wait(stash->recv_waits + i, &recv_status));
644     } else {
645       PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status));
646     }
647     PetscCheck(recv_status.MPI_SOURCE >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Negative MPI source!");
648 
649     /* Now pack the received message into a structure which is usable by others */
650     if (i % 2) {
651       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals));
652       flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
653       *nvals                            = *nvals / bs2;
654     } else {
655       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals));
656       flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
657       *nvals                                = *nvals / 2; /* This message has both row indices and col indices */
658     }
659 
660     /* Check if we have both messages from this proc */
661     i1 = flg_v[2 * recv_status.MPI_SOURCE];
662     i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
663     if (i1 != -1 && i2 != -1) {
664       *rows = stash->rindices[i2];
665       *cols = *rows + *nvals;
666       *vals = stash->rvalues[i1];
667       *flg  = 1;
668       stash->nprocessed++;
669       match_found = PETSC_TRUE;
670     }
671   }
672   PetscFunctionReturn(PETSC_SUCCESS);
673 }
674 
675 #if !defined(PETSC_HAVE_MPIUNI)
676 typedef struct {
677   PetscInt    row;
678   PetscInt    col;
679   PetscScalar vals[1]; /* Actually an array of length bs2 */
680 } MatStashBlock;
681 
682 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash, InsertMode insertmode)
683 {
684   PetscMatStashSpace space;
685   PetscInt           n = stash->n, bs = stash->bs, bs2 = bs * bs, cnt, *row, *col, *perm, rowstart, i;
686   PetscScalar      **valptr;
687 
688   PetscFunctionBegin;
689   PetscCall(PetscMalloc4(n, &row, n, &col, n, &valptr, n, &perm));
690   for (space = stash->space_head, cnt = 0; space; space = space->next) {
691     for (i = 0; i < space->local_used; i++) {
692       row[cnt]    = space->idx[i];
693       col[cnt]    = space->idy[i];
694       valptr[cnt] = &space->val[i * bs2];
695       perm[cnt]   = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */
696       cnt++;
697     }
698   }
699   PetscCheck(cnt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries", n, cnt);
700   PetscCall(PetscSortIntWithArrayPair(n, row, col, perm));
701   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
702   for (rowstart = 0, cnt = 0, i = 1; i <= n; i++) {
703     if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */
704       PetscInt colstart;
705       PetscCall(PetscSortIntWithArray(i - rowstart, &col[rowstart], &perm[rowstart]));
706       for (colstart = rowstart; colstart < i;) { /* Compress multiple insertions to the same location */
707         PetscInt       j, l;
708         MatStashBlock *block;
709         PetscCall(PetscSegBufferGet(stash->segsendblocks, 1, &block));
710         block->row = row[rowstart];
711         block->col = col[colstart];
712         PetscCall(PetscArraycpy(block->vals, valptr[perm[colstart]], bs2));
713         for (j = colstart + 1; j < i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
714           if (insertmode == ADD_VALUES) {
715             for (l = 0; l < bs2; l++) block->vals[l] += valptr[perm[j]][l];
716           } else {
717             PetscCall(PetscArraycpy(block->vals, valptr[perm[j]], bs2));
718           }
719         }
720         colstart = j;
721       }
722       rowstart = i;
723     }
724   }
725   PetscCall(PetscFree4(row, col, valptr, perm));
726   PetscFunctionReturn(PETSC_SUCCESS);
727 }
728 
729 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
730 {
731   PetscFunctionBegin;
732   if (stash->blocktype == MPI_DATATYPE_NULL) {
733     PetscInt     bs2 = PetscSqr(stash->bs);
734     PetscMPIInt  blocklens[2];
735     MPI_Aint     displs[2];
736     MPI_Datatype types[2], stype;
737     /*
738         DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex.
739        std::complex itself has standard layout, so does DummyBlock, recursively.
740        To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout,
741        though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++.
742        offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof.
743 
744        We can test if std::complex has standard layout with the following code:
745        #include <iostream>
746        #include <type_traits>
747        #include <complex>
748        int main() {
749          std::cout << std::boolalpha;
750          std::cout << std::is_standard_layout<std::complex<double> >::value << '\n';
751        }
752        Output: true
753      */
754     struct DummyBlock {
755       PetscInt    row, col;
756       PetscScalar vals;
757     };
758 
759     stash->blocktype_size = offsetof(struct DummyBlock, vals) + bs2 * sizeof(PetscScalar);
760     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
761       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
762     }
763     PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segsendblocks));
764     PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segrecvblocks));
765     PetscCall(PetscSegBufferCreate(sizeof(MatStashFrame), 1, &stash->segrecvframe));
766     blocklens[0] = 2;
767     blocklens[1] = (PetscMPIInt)bs2;
768     displs[0]    = offsetof(struct DummyBlock, row);
769     displs[1]    = offsetof(struct DummyBlock, vals);
770     types[0]     = MPIU_INT;
771     types[1]     = MPIU_SCALAR;
772     PetscCallMPI(MPI_Type_create_struct(2, blocklens, displs, types, &stype));
773     PetscCallMPI(MPI_Type_commit(&stype));
774     PetscCallMPI(MPI_Type_create_resized(stype, 0, stash->blocktype_size, &stash->blocktype));
775     PetscCallMPI(MPI_Type_commit(&stash->blocktype));
776     PetscCallMPI(MPI_Type_free(&stype));
777   }
778   PetscFunctionReturn(PETSC_SUCCESS);
779 }
780 
781 /* Callback invoked after target rank has initiated receive of rendezvous message.
782  * Here we post the main sends.
783  */
784 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx)
785 {
786   MatStash       *stash = (MatStash *)ctx;
787   MatStashHeader *hdr   = (MatStashHeader *)sdata;
788 
789   PetscFunctionBegin;
790   PetscCheck(rank == stash->sendranks[rankid], comm, PETSC_ERR_PLIB, "BTS Send rank %d does not match sendranks[%d] %d", rank, rankid, stash->sendranks[rankid]);
791   PetscCallMPI(MPIU_Isend(stash->sendframes[rankid].buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
792   stash->sendframes[rankid].count   = hdr->count;
793   stash->sendframes[rankid].pending = 1;
794   PetscFunctionReturn(PETSC_SUCCESS);
795 }
796 
797 /*
798     Callback invoked by target after receiving rendezvous message.
799     Here we post the main recvs.
800  */
801 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx)
802 {
803   MatStash       *stash = (MatStash *)ctx;
804   MatStashHeader *hdr   = (MatStashHeader *)rdata;
805   MatStashFrame  *frame;
806 
807   PetscFunctionBegin;
808   PetscCall(PetscSegBufferGet(stash->segrecvframe, 1, &frame));
809   PetscCall(PetscSegBufferGet(stash->segrecvblocks, hdr->count, &frame->buffer));
810   PetscCallMPI(MPIU_Irecv(frame->buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
811   frame->count   = hdr->count;
812   frame->pending = 1;
813   PetscFunctionReturn(PETSC_SUCCESS);
814 }
815 
816 /*
817  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
818  */
819 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat, MatStash *stash, PetscInt owners[])
820 {
821   PetscCount nblocks;
822   char      *sendblocks;
823 
824   PetscFunctionBegin;
825   if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */
826     InsertMode addv;
827     PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
828     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
829   }
830 
831   PetscCall(MatStashBlockTypeSetUp(stash));
832   PetscCall(MatStashSortCompress_Private(stash, mat->insertmode));
833   PetscCall(PetscSegBufferGetSize(stash->segsendblocks, &nblocks));
834   PetscCall(PetscSegBufferExtractInPlace(stash->segsendblocks, &sendblocks));
835   if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
836     PetscInt   i;
837     PetscCount b;
838     for (i = 0, b = 0; i < stash->nsendranks; i++) {
839       stash->sendframes[i].buffer = &sendblocks[b * stash->blocktype_size];
840       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
841       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
842       for (; b < nblocks; b++) {
843         MatStashBlock *sendblock_b = (MatStashBlock *)&sendblocks[b * stash->blocktype_size];
844         PetscCheck(sendblock_b->row >= owners[stash->sendranks[i]], stash->comm, PETSC_ERR_ARG_WRONG, "MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly", sendblock_b->row, stash->sendranks[i]);
845         if (sendblock_b->row >= owners[stash->sendranks[i] + 1]) break;
846         stash->sendhdr[i].count++;
847       }
848     }
849   } else { /* Dynamically count and pack (first time) */
850     PetscInt   sendno;
851     PetscCount i, rowstart;
852 
853     /* Count number of send ranks and allocate for sends */
854     stash->nsendranks = 0;
855     for (rowstart = 0; rowstart < nblocks;) {
856       PetscInt       owner;
857       MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
858 
859       PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner));
860       if (owner < 0) owner = -(owner + 2);
861       for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
862         MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
863 
864         if (sendblock_i->row >= owners[owner + 1]) break;
865       }
866       stash->nsendranks++;
867       rowstart = i;
868     }
869     PetscCall(PetscMalloc3(stash->nsendranks, &stash->sendranks, stash->nsendranks, &stash->sendhdr, stash->nsendranks, &stash->sendframes));
870 
871     /* Set up sendhdrs and sendframes */
872     sendno = 0;
873     for (rowstart = 0; rowstart < nblocks;) {
874       PetscInt       iowner;
875       PetscMPIInt    owner;
876       MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
877 
878       PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &iowner));
879       PetscCall(PetscMPIIntCast(iowner, &owner));
880       if (owner < 0) owner = -(owner + 2);
881       stash->sendranks[sendno] = owner;
882       for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
883         MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
884 
885         if (sendblock_i->row >= owners[owner + 1]) break;
886       }
887       stash->sendframes[sendno].buffer  = sendblock_rowstart;
888       stash->sendframes[sendno].pending = 0;
889       PetscCall(PetscIntCast(i - rowstart, &stash->sendhdr[sendno].count));
890       sendno++;
891       rowstart = i;
892     }
893     PetscCheck(sendno == stash->nsendranks, stash->comm, PETSC_ERR_PLIB, "BTS counted %d sendranks, but %" PetscInt_FMT " sends", stash->nsendranks, sendno);
894   }
895 
896   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
897    * message or a dummy entry of some sort. */
898   if (mat->insertmode == INSERT_VALUES) {
899     for (PetscCount i = 0; i < nblocks; i++) {
900       MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
901       sendblock_i->row           = -(sendblock_i->row + 1);
902     }
903   }
904 
905   if (stash->first_assembly_done) {
906     PetscMPIInt i, tag;
907 
908     PetscCall(PetscCommGetNewTag(stash->comm, &tag));
909     for (i = 0; i < stash->nrecvranks; i++) PetscCall(MatStashBTSRecv_Private(stash->comm, &tag, stash->recvranks[i], &stash->recvhdr[i], &stash->recvreqs[i], stash));
910     for (i = 0; i < stash->nsendranks; i++) PetscCall(MatStashBTSSend_Private(stash->comm, &tag, i, stash->sendranks[i], &stash->sendhdr[i], &stash->sendreqs[i], stash));
911     stash->use_status = PETSC_TRUE; /* Use count from message status. */
912   } else {
913     PetscCall(PetscCommBuildTwoSidedFReq(stash->comm, 1, MPIU_INT, stash->nsendranks, stash->sendranks, (PetscInt *)stash->sendhdr, &stash->nrecvranks, &stash->recvranks, (PetscInt *)&stash->recvhdr, 1, &stash->sendreqs, &stash->recvreqs, MatStashBTSSend_Private, MatStashBTSRecv_Private, stash));
914     PetscCall(PetscMalloc2(stash->nrecvranks, &stash->some_indices, stash->nrecvranks, &stash->some_statuses));
915     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
916   }
917 
918   PetscCall(PetscSegBufferExtractInPlace(stash->segrecvframe, &stash->recvframes));
919   stash->recvframe_active    = NULL;
920   stash->recvframe_i         = 0;
921   stash->some_i              = 0;
922   stash->some_count          = 0;
923   stash->recvcount           = 0;
924   stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
925   stash->insertmode          = &mat->insertmode;
926   PetscFunctionReturn(PETSC_SUCCESS);
927 }
928 
929 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash, PetscMPIInt *n, PetscInt **row, PetscInt **col, PetscScalar **val, PetscInt *flg)
930 {
931   MatStashBlock *block;
932 
933   PetscFunctionBegin;
934   *flg = 0;
935   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
936     if (stash->some_i == stash->some_count) {
937       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(PETSC_SUCCESS); /* Done */
938       PetscCallMPI(MPI_Waitsome(stash->nrecvranks, stash->recvreqs, &stash->some_count, stash->some_indices, stash->use_status ? stash->some_statuses : MPI_STATUSES_IGNORE));
939       stash->some_i = 0;
940     }
941     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
942     stash->recvframe_count  = stash->recvframe_active->count; /* From header; maximum count */
943     if (stash->use_status) {                                  /* Count what was actually sent */
944       PetscMPIInt ic;
945 
946       PetscCallMPI(MPI_Get_count(&stash->some_statuses[stash->some_i], stash->blocktype, &ic));
947       stash->recvframe_count = ic;
948     }
949     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
950       block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[0];
951       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
952       PetscCheck(*stash->insertmode != INSERT_VALUES || block->row < 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling INSERT_VALUES, but rank %d requested ADD_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
953       PetscCheck(*stash->insertmode != ADD_VALUES || block->row >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling ADD_VALUES, but rank %d requested INSERT_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
954     }
955     stash->some_i++;
956     stash->recvcount++;
957     stash->recvframe_i = 0;
958   }
959   *n    = 1;
960   block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[stash->recvframe_i * stash->blocktype_size];
961   if (block->row < 0) block->row = -(block->row + 1);
962   *row = &block->row;
963   *col = &block->col;
964   *val = block->vals;
965   stash->recvframe_i++;
966   *flg = 1;
967   PetscFunctionReturn(PETSC_SUCCESS);
968 }
969 
970 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
971 {
972   PetscFunctionBegin;
973   PetscCallMPI(MPI_Waitall(stash->nsendranks, stash->sendreqs, MPI_STATUSES_IGNORE));
974   if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
975     PetscCall(PetscSegBufferExtractInPlace(stash->segrecvblocks, NULL));
976   } else { /* No reuse, so collect everything. */
977     PetscCall(MatStashScatterDestroy_BTS(stash));
978   }
979 
980   /* Now update nmaxold to be app 10% more than max n used, this way the
981      wastage of space is reduced the next time this stash is used.
982      Also update the oldmax, only if it increases */
983   if (stash->n) {
984     PetscInt bs2     = stash->bs * stash->bs;
985     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
986     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
987   }
988 
989   stash->nmax       = 0;
990   stash->n          = 0;
991   stash->reallocs   = -1;
992   stash->nprocessed = 0;
993 
994   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
995 
996   stash->space = NULL;
997   PetscFunctionReturn(PETSC_SUCCESS);
998 }
999 
1000 PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1001 {
1002   PetscFunctionBegin;
1003   PetscCall(PetscSegBufferDestroy(&stash->segsendblocks));
1004   PetscCall(PetscSegBufferDestroy(&stash->segrecvframe));
1005   stash->recvframes = NULL;
1006   PetscCall(PetscSegBufferDestroy(&stash->segrecvblocks));
1007   if (stash->blocktype != MPI_DATATYPE_NULL) PetscCallMPI(MPI_Type_free(&stash->blocktype));
1008   stash->nsendranks = 0;
1009   stash->nrecvranks = 0;
1010   PetscCall(PetscFree3(stash->sendranks, stash->sendhdr, stash->sendframes));
1011   PetscCall(PetscFree(stash->sendreqs));
1012   PetscCall(PetscFree(stash->recvreqs));
1013   PetscCall(PetscFree(stash->recvranks));
1014   PetscCall(PetscFree(stash->recvhdr));
1015   PetscCall(PetscFree2(stash->some_indices, stash->some_statuses));
1016   PetscFunctionReturn(PETSC_SUCCESS);
1017 }
1018 #endif
1019