xref: /petsc/src/mat/utils/matstash.c (revision 66af8762ec03dbef0e079729eb2a1734a35ed7ff)
1 #include <petsc/private/matimpl.h>
2 
3 #define DEFAULT_STASH_SIZE 10000
4 
5 static PetscErrorCode       MatStashScatterBegin_Ref(Mat, MatStash *, PetscInt *);
6 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
7 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
8 #if !defined(PETSC_HAVE_MPIUNI)
9 static PetscErrorCode MatStashScatterBegin_BTS(Mat, MatStash *, PetscInt *);
10 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
11 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *);
12 #endif
13 
14 /*
15   MatStashCreate_Private - Creates a stash,currently used for all the parallel
16   matrix implementations. The stash is where elements of a matrix destined
17   to be stored on other processors are kept until matrix assembly is done.
18 
19   This is a simple minded stash. Simply adds entries to end of stash.
20 
21   Input Parameters:
22   comm - communicator, required for scatters.
23   bs   - stash block size. used when stashing blocks of values
24 
25   Output Parameter:
26   stash    - the newly created stash
27 */
28 PetscErrorCode MatStashCreate_Private(MPI_Comm comm, PetscInt bs, MatStash *stash)
29 {
30   PetscInt  max, *opt, nopt, i;
31   PetscBool flg;
32 
33   PetscFunctionBegin;
34   /* Require 2 tags,get the second using PetscCommGetNewTag() */
35   stash->comm = comm;
36 
37   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1));
38   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2));
39   PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size));
40   PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank));
41   PetscCall(PetscMalloc1(2 * stash->size, &stash->flg_v));
42   for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
43 
44   nopt = stash->size;
45   PetscCall(PetscMalloc1(nopt, &opt));
46   PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-matstash_initial_size", opt, &nopt, &flg));
47   if (flg) {
48     if (nopt == 1) max = opt[0];
49     else if (nopt == stash->size) max = opt[stash->rank];
50     else if (stash->rank < nopt) max = opt[stash->rank];
51     else max = 0; /* Use default */
52     stash->umax = max;
53   } else {
54     stash->umax = 0;
55   }
56   PetscCall(PetscFree(opt));
57   if (bs <= 0) bs = 1;
58 
59   stash->bs         = bs;
60   stash->nmax       = 0;
61   stash->oldnmax    = 0;
62   stash->n          = 0;
63   stash->reallocs   = -1;
64   stash->space_head = NULL;
65   stash->space      = NULL;
66 
67   stash->send_waits  = NULL;
68   stash->recv_waits  = NULL;
69   stash->send_status = NULL;
70   stash->nsends      = 0;
71   stash->nrecvs      = 0;
72   stash->svalues     = NULL;
73   stash->rvalues     = NULL;
74   stash->rindices    = NULL;
75   stash->nprocessed  = 0;
76   stash->reproduce   = PETSC_FALSE;
77   stash->blocktype   = MPI_DATATYPE_NULL;
78 
79   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_reproduce", &stash->reproduce, NULL));
80 #if !defined(PETSC_HAVE_MPIUNI)
81   flg = PETSC_FALSE;
82   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_legacy", &flg, NULL));
83   if (!flg) {
84     stash->ScatterBegin   = MatStashScatterBegin_BTS;
85     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
86     stash->ScatterEnd     = MatStashScatterEnd_BTS;
87     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
88   } else {
89 #endif
90     stash->ScatterBegin   = MatStashScatterBegin_Ref;
91     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
92     stash->ScatterEnd     = MatStashScatterEnd_Ref;
93     stash->ScatterDestroy = NULL;
94 #if !defined(PETSC_HAVE_MPIUNI)
95   }
96 #endif
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 /*
101    MatStashDestroy_Private - Destroy the stash
102 */
103 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
104 {
105   PetscFunctionBegin;
106   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
107   if (stash->ScatterDestroy) PetscCall((*stash->ScatterDestroy)(stash));
108   stash->space = NULL;
109   PetscCall(PetscFree(stash->flg_v));
110   PetscFunctionReturn(PETSC_SUCCESS);
111 }
112 
113 /*
114    MatStashScatterEnd_Private - This is called as the final stage of
115    scatter. The final stages of message passing is done here, and
116    all the memory used for message passing is cleaned up. This
117    routine also resets the stash, and deallocates the memory used
118    for the stash. It also keeps track of the current memory usage
119    so that the same value can be used the next time through.
120 */
121 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
122 {
123   PetscFunctionBegin;
124   PetscCall((*stash->ScatterEnd)(stash));
125   PetscFunctionReturn(PETSC_SUCCESS);
126 }
127 
128 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
129 {
130   PetscInt    nsends = stash->nsends, bs2, oldnmax, i;
131   MPI_Status *send_status;
132 
133   PetscFunctionBegin;
134   for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
135   /* wait on sends */
136   if (nsends) {
137     PetscCall(PetscMalloc1(2 * nsends, &send_status));
138     PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status));
139     PetscCall(PetscFree(send_status));
140   }
141 
142   /* Now update nmaxold to be app 10% more than max n used, this way the
143      wastage of space is reduced the next time this stash is used.
144      Also update the oldmax, only if it increases */
145   if (stash->n) {
146     bs2     = stash->bs * stash->bs;
147     oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
148     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
149   }
150 
151   stash->nmax       = 0;
152   stash->n          = 0;
153   stash->reallocs   = -1;
154   stash->nprocessed = 0;
155 
156   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
157 
158   stash->space = NULL;
159 
160   PetscCall(PetscFree(stash->send_waits));
161   PetscCall(PetscFree(stash->recv_waits));
162   PetscCall(PetscFree2(stash->svalues, stash->sindices));
163   PetscCall(PetscFree(stash->rvalues[0]));
164   PetscCall(PetscFree(stash->rvalues));
165   PetscCall(PetscFree(stash->rindices[0]));
166   PetscCall(PetscFree(stash->rindices));
167   PetscFunctionReturn(PETSC_SUCCESS);
168 }
169 
170 /*
171    MatStashGetInfo_Private - Gets the relevant statistics of the stash
172 
173    Input Parameters:
174    stash    - the stash
175    nstash   - the size of the stash. Indicates the number of values stored.
176    reallocs - the number of additional mallocs incurred.
177 
178 */
179 PetscErrorCode MatStashGetInfo_Private(MatStash *stash, PetscInt *nstash, PetscInt *reallocs)
180 {
181   PetscInt bs2 = stash->bs * stash->bs;
182 
183   PetscFunctionBegin;
184   if (nstash) *nstash = stash->n * bs2;
185   if (reallocs) {
186     if (stash->reallocs < 0) *reallocs = 0;
187     else *reallocs = stash->reallocs;
188   }
189   PetscFunctionReturn(PETSC_SUCCESS);
190 }
191 
192 /*
193    MatStashSetInitialSize_Private - Sets the initial size of the stash
194 
195    Input Parameters:
196    stash  - the stash
197    max    - the value that is used as the max size of the stash.
198             this value is used while allocating memory.
199 */
200 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash, PetscInt max)
201 {
202   PetscFunctionBegin;
203   stash->umax = max;
204   PetscFunctionReturn(PETSC_SUCCESS);
205 }
206 
207 /* MatStashExpand_Private - Expand the stash. This function is called
208    when the space in the stash is not sufficient to add the new values
209    being inserted into the stash.
210 
211    Input Parameters:
212    stash - the stash
213    incr  - the minimum increase requested
214 
215    Note:
216    This routine doubles the currently used memory.
217  */
218 static PetscErrorCode MatStashExpand_Private(MatStash *stash, PetscInt incr)
219 {
220   PetscInt newnmax, bs2 = stash->bs * stash->bs;
221 
222   PetscFunctionBegin;
223   /* allocate a larger stash */
224   if (!stash->oldnmax && !stash->nmax) { /* new stash */
225     if (stash->umax) newnmax = stash->umax / bs2;
226     else newnmax = DEFAULT_STASH_SIZE / bs2;
227   } else if (!stash->nmax) { /* resuing stash */
228     if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs2;
229     else newnmax = stash->oldnmax / bs2;
230   } else newnmax = stash->nmax * 2;
231   if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr;
232 
233   /* Get a MatStashSpace and attach it to stash */
234   PetscCall(PetscMatStashSpaceGet(bs2, newnmax, &stash->space));
235   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
236     stash->space_head = stash->space;
237   }
238 
239   stash->reallocs++;
240   stash->nmax = newnmax;
241   PetscFunctionReturn(PETSC_SUCCESS);
242 }
243 /*
244   MatStashValuesRow_Private - inserts values into the stash. This function
245   expects the values to be roworiented. Multiple columns belong to the same row
246   can be inserted with a single call to this function.
247 
248   Input Parameters:
249   stash  - the stash
250   row    - the global row correspoiding to the values
251   n      - the number of elements inserted. All elements belong to the above row.
252   idxn   - the global column indices corresponding to each of the values.
253   values - the values inserted
254 */
255 PetscErrorCode MatStashValuesRow_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscBool ignorezeroentries)
256 {
257   PetscInt           i, k, cnt = 0;
258   PetscMatStashSpace space = stash->space;
259 
260   PetscFunctionBegin;
261   /* Check and see if we have sufficient memory */
262   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
263   space = stash->space;
264   k     = space->local_used;
265   for (i = 0; i < n; i++) {
266     if (ignorezeroentries && values && values[i] == 0.0) continue;
267     space->idx[k] = row;
268     space->idy[k] = idxn[i];
269     space->val[k] = values ? values[i] : 0.0;
270     k++;
271     cnt++;
272   }
273   stash->n += cnt;
274   space->local_used += cnt;
275   space->local_remaining -= cnt;
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 /*
280   MatStashValuesCol_Private - inserts values into the stash. This function
281   expects the values to be columnoriented. Multiple columns belong to the same row
282   can be inserted with a single call to this function.
283 
284   Input Parameters:
285   stash   - the stash
286   row     - the global row correspoiding to the values
287   n       - the number of elements inserted. All elements belong to the above row.
288   idxn    - the global column indices corresponding to each of the values.
289   values  - the values inserted
290   stepval - the consecutive values are sepated by a distance of stepval.
291             this happens because the input is columnoriented.
292 */
293 PetscErrorCode MatStashValuesCol_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt stepval, PetscBool ignorezeroentries)
294 {
295   PetscInt           i, k, cnt = 0;
296   PetscMatStashSpace space = stash->space;
297 
298   PetscFunctionBegin;
299   /* Check and see if we have sufficient memory */
300   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
301   space = stash->space;
302   k     = space->local_used;
303   for (i = 0; i < n; i++) {
304     if (ignorezeroentries && values && values[i * stepval] == 0.0) continue;
305     space->idx[k] = row;
306     space->idy[k] = idxn[i];
307     space->val[k] = values ? values[i * stepval] : 0.0;
308     k++;
309     cnt++;
310   }
311   stash->n += cnt;
312   space->local_used += cnt;
313   space->local_remaining -= cnt;
314   PetscFunctionReturn(PETSC_SUCCESS);
315 }
316 
317 /*
318   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
319   This function expects the values to be roworiented. Multiple columns belong
320   to the same block-row can be inserted with a single call to this function.
321   This function extracts the sub-block of values based on the dimensions of
322   the original input block, and the row,col values corresponding to the blocks.
323 
324   Input Parameters:
325   stash  - the stash
326   row    - the global block-row correspoiding to the values
327   n      - the number of elements inserted. All elements belong to the above row.
328   idxn   - the global block-column indices corresponding to each of the blocks of
329            values. Each block is of size bs*bs.
330   values - the values inserted
331   rmax   - the number of block-rows in the original block.
332   cmax   - the number of block-columns on the original block.
333   idx    - the index of the current block-row in the original block.
334 */
335 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
336 {
337   PetscInt           i, j, k, bs2, bs = stash->bs, l;
338   const PetscScalar *vals;
339   PetscScalar       *array;
340   PetscMatStashSpace space = stash->space;
341 
342   PetscFunctionBegin;
343   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
344   space = stash->space;
345   l     = space->local_used;
346   bs2   = bs * bs;
347   for (i = 0; i < n; i++) {
348     space->idx[l] = row;
349     space->idy[l] = idxn[i];
350     /* Now copy over the block of values. Store the values column oriented.
351        This enables inserting multiple blocks belonging to a row with a single
352        function call */
353     array = space->val + bs2 * l;
354     vals  = values + idx * bs2 * n + bs * i;
355     for (j = 0; j < bs; j++) {
356       for (k = 0; k < bs; k++) array[k * bs] = values ? vals[k] : 0.0;
357       array++;
358       vals += cmax * bs;
359     }
360     l++;
361   }
362   stash->n += n;
363   space->local_used += n;
364   space->local_remaining -= n;
365   PetscFunctionReturn(PETSC_SUCCESS);
366 }
367 
368 /*
369   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
370   This function expects the values to be roworiented. Multiple columns belong
371   to the same block-row can be inserted with a single call to this function.
372   This function extracts the sub-block of values based on the dimensions of
373   the original input block, and the row,col values corresponding to the blocks.
374 
375   Input Parameters:
376   stash  - the stash
377   row    - the global block-row correspoiding to the values
378   n      - the number of elements inserted. All elements belong to the above row.
379   idxn   - the global block-column indices corresponding to each of the blocks of
380            values. Each block is of size bs*bs.
381   values - the values inserted
382   rmax   - the number of block-rows in the original block.
383   cmax   - the number of block-columns on the original block.
384   idx    - the index of the current block-row in the original block.
385 */
386 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
387 {
388   PetscInt           i, j, k, bs2, bs = stash->bs, l;
389   const PetscScalar *vals;
390   PetscScalar       *array;
391   PetscMatStashSpace space = stash->space;
392 
393   PetscFunctionBegin;
394   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
395   space = stash->space;
396   l     = space->local_used;
397   bs2   = bs * bs;
398   for (i = 0; i < n; i++) {
399     space->idx[l] = row;
400     space->idy[l] = idxn[i];
401     /* Now copy over the block of values. Store the values column oriented.
402      This enables inserting multiple blocks belonging to a row with a single
403      function call */
404     array = space->val + bs2 * l;
405     vals  = values + idx * bs2 * n + bs * i;
406     for (j = 0; j < bs; j++) {
407       for (k = 0; k < bs; k++) array[k] = values ? vals[k] : 0.0;
408       array += bs;
409       vals += rmax * bs;
410     }
411     l++;
412   }
413   stash->n += n;
414   space->local_used += n;
415   space->local_remaining -= n;
416   PetscFunctionReturn(PETSC_SUCCESS);
417 }
418 /*
419   MatStashScatterBegin_Private - Initiates the transfer of values to the
420   correct owners. This function goes through the stash, and check the
421   owners of each stashed value, and sends the values off to the owner
422   processors.
423 
424   Input Parameters:
425   stash  - the stash
426   owners - an array of size 'no-of-procs' which gives the ownership range
427            for each node.
428 
429   Note:
430     The 'owners' array in the cased of the blocked-stash has the
431   ranges specified blocked global indices, and for the regular stash in
432   the proper global indices.
433 */
434 PetscErrorCode MatStashScatterBegin_Private(Mat mat, MatStash *stash, PetscInt *owners)
435 {
436   PetscFunctionBegin;
437   PetscCall((*stash->ScatterBegin)(mat, stash, owners));
438   PetscFunctionReturn(PETSC_SUCCESS);
439 }
440 
441 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat, MatStash *stash, PetscInt *owners)
442 {
443   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
444   PetscInt           size = stash->size, nsends;
445   PetscInt           count, *sindices, **rindices, i, j, idx, lastidx, l;
446   PetscScalar      **rvalues, *svalues;
447   MPI_Comm           comm = stash->comm;
448   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
449   PetscMPIInt       *sizes, *nlengths, nreceives;
450   PetscInt          *sp_idx, *sp_idy;
451   PetscScalar       *sp_val;
452   PetscMatStashSpace space, space_next;
453 
454   PetscFunctionBegin;
455   { /* make sure all processors are either in INSERTMODE or ADDMODE */
456     InsertMode addv;
457     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
458     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
459     mat->insertmode = addv; /* in case this processor had no cache */
460   }
461 
462   bs2 = stash->bs * stash->bs;
463 
464   /*  first count number of contributors to each processor */
465   PetscCall(PetscCalloc1(size, &nlengths));
466   PetscCall(PetscMalloc1(stash->n + 1, &owner));
467 
468   i = j   = 0;
469   lastidx = -1;
470   space   = stash->space_head;
471   while (space) {
472     space_next = space->next;
473     sp_idx     = space->idx;
474     for (l = 0; l < space->local_used; l++) {
475       /* if indices are NOT locally sorted, need to start search at the beginning */
476       if (lastidx > (idx = sp_idx[l])) j = 0;
477       lastidx = idx;
478       for (; j < size; j++) {
479         if (idx >= owners[j] && idx < owners[j + 1]) {
480           nlengths[j]++;
481           owner[i] = j;
482           break;
483         }
484       }
485       i++;
486     }
487     space = space_next;
488   }
489 
490   /* Now check what procs get messages - and compute nsends. */
491   PetscCall(PetscCalloc1(size, &sizes));
492   for (i = 0, nsends = 0; i < size; i++) {
493     if (nlengths[i]) {
494       sizes[i] = 1;
495       nsends++;
496     }
497   }
498 
499   {
500     PetscMPIInt *onodes, *olengths;
501     /* Determine the number of messages to expect, their lengths, from from-ids */
502     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
503     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
504     /* since clubbing row,col - lengths are multiplied by 2 */
505     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
506     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
507     /* values are size 'bs2' lengths (and remove earlier factor 2 */
508     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
509     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
510     PetscCall(PetscFree(onodes));
511     PetscCall(PetscFree(olengths));
512   }
513 
514   /* do sends:
515       1) starts[i] gives the starting index in svalues for stuff going to
516          the ith processor
517   */
518   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
519   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
520   PetscCall(PetscMalloc2(size, &startv, size, &starti));
521   /* use 2 sends the first with all_a, the next with all_i and all_j */
522   startv[0] = 0;
523   starti[0] = 0;
524   for (i = 1; i < size; i++) {
525     startv[i] = startv[i - 1] + nlengths[i - 1];
526     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
527   }
528 
529   i     = 0;
530   space = stash->space_head;
531   while (space) {
532     space_next = space->next;
533     sp_idx     = space->idx;
534     sp_idy     = space->idy;
535     sp_val     = space->val;
536     for (l = 0; l < space->local_used; l++) {
537       j = owner[i];
538       if (bs2 == 1) {
539         svalues[startv[j]] = sp_val[l];
540       } else {
541         PetscInt     k;
542         PetscScalar *buf1, *buf2;
543         buf1 = svalues + bs2 * startv[j];
544         buf2 = space->val + bs2 * l;
545         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
546       }
547       sindices[starti[j]]               = sp_idx[l];
548       sindices[starti[j] + nlengths[j]] = sp_idy[l];
549       startv[j]++;
550       starti[j]++;
551       i++;
552     }
553     space = space_next;
554   }
555   startv[0] = 0;
556   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
557 
558   for (i = 0, count = 0; i < size; i++) {
559     if (sizes[i]) {
560       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
561       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
562     }
563   }
564 #if defined(PETSC_USE_INFO)
565   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT " \n", nsends));
566   for (i = 0; i < size; i++) {
567     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
568   }
569 #endif
570   PetscCall(PetscFree(nlengths));
571   PetscCall(PetscFree(owner));
572   PetscCall(PetscFree2(startv, starti));
573   PetscCall(PetscFree(sizes));
574 
575   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
576   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
577 
578   for (i = 0; i < nreceives; i++) {
579     recv_waits[2 * i]     = recv_waits1[i];
580     recv_waits[2 * i + 1] = recv_waits2[i];
581   }
582   stash->recv_waits = recv_waits;
583 
584   PetscCall(PetscFree(recv_waits1));
585   PetscCall(PetscFree(recv_waits2));
586 
587   stash->svalues         = svalues;
588   stash->sindices        = sindices;
589   stash->rvalues         = rvalues;
590   stash->rindices        = rindices;
591   stash->send_waits      = send_waits;
592   stash->nsends          = nsends;
593   stash->nrecvs          = nreceives;
594   stash->reproduce_count = 0;
595   PetscFunctionReturn(PETSC_SUCCESS);
596 }
597 
598 /*
599    MatStashScatterGetMesg_Private - This function waits on the receives posted
600    in the function MatStashScatterBegin_Private() and returns one message at
601    a time to the calling function. If no messages are left, it indicates this
602    by setting flg = 0, else it sets flg = 1.
603 
604    Input Parameter:
605    stash - the stash
606 
607    Output Parameters:
608    nvals - the number of entries in the current message.
609    rows  - an array of row indices (or blocked indices) corresponding to the values
610    cols  - an array of columnindices (or blocked indices) corresponding to the values
611    vals  - the values
612    flg   - 0 indicates no more message left, and the current call has no values associated.
613            1 indicates that the current call successfully received a message, and the
614              other output parameters nvals,rows,cols,vals are set appropriately.
615 */
616 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
617 {
618   PetscFunctionBegin;
619   PetscCall((*stash->ScatterGetMesg)(stash, nvals, rows, cols, vals, flg));
620   PetscFunctionReturn(PETSC_SUCCESS);
621 }
622 
623 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
624 {
625   PetscMPIInt i, *flg_v = stash->flg_v, i1, i2;
626   PetscInt    bs2;
627   MPI_Status  recv_status;
628   PetscBool   match_found = PETSC_FALSE;
629 
630   PetscFunctionBegin;
631   *flg = 0; /* When a message is discovered this is reset to 1 */
632   /* Return if no more messages to process */
633   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(PETSC_SUCCESS);
634 
635   bs2 = stash->bs * stash->bs;
636   /* If a matching pair of receives are found, process them, and return the data to
637      the calling function. Until then keep receiving messages */
638   while (!match_found) {
639     if (stash->reproduce) {
640       i = stash->reproduce_count++;
641       PetscCallMPI(MPI_Wait(stash->recv_waits + i, &recv_status));
642     } else {
643       PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status));
644     }
645     PetscCheck(recv_status.MPI_SOURCE >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Negative MPI source!");
646 
647     /* Now pack the received message into a structure which is usable by others */
648     if (i % 2) {
649       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals));
650       flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
651       *nvals                            = *nvals / bs2;
652     } else {
653       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals));
654       flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
655       *nvals                                = *nvals / 2; /* This message has both row indices and col indices */
656     }
657 
658     /* Check if we have both messages from this proc */
659     i1 = flg_v[2 * recv_status.MPI_SOURCE];
660     i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
661     if (i1 != -1 && i2 != -1) {
662       *rows = stash->rindices[i2];
663       *cols = *rows + *nvals;
664       *vals = stash->rvalues[i1];
665       *flg  = 1;
666       stash->nprocessed++;
667       match_found = PETSC_TRUE;
668     }
669   }
670   PetscFunctionReturn(PETSC_SUCCESS);
671 }
672 
673 #if !defined(PETSC_HAVE_MPIUNI)
674 typedef struct {
675   PetscInt    row;
676   PetscInt    col;
677   PetscScalar vals[1]; /* Actually an array of length bs2 */
678 } MatStashBlock;
679 
680 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash, InsertMode insertmode)
681 {
682   PetscMatStashSpace space;
683   PetscInt           n = stash->n, bs = stash->bs, bs2 = bs * bs, cnt, *row, *col, *perm, rowstart, i;
684   PetscScalar      **valptr;
685 
686   PetscFunctionBegin;
687   PetscCall(PetscMalloc4(n, &row, n, &col, n, &valptr, n, &perm));
688   for (space = stash->space_head, cnt = 0; space; space = space->next) {
689     for (i = 0; i < space->local_used; i++) {
690       row[cnt]    = space->idx[i];
691       col[cnt]    = space->idy[i];
692       valptr[cnt] = &space->val[i * bs2];
693       perm[cnt]   = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */
694       cnt++;
695     }
696   }
697   PetscCheck(cnt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries", n, cnt);
698   PetscCall(PetscSortIntWithArrayPair(n, row, col, perm));
699   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
700   for (rowstart = 0, cnt = 0, i = 1; i <= n; i++) {
701     if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */
702       PetscInt colstart;
703       PetscCall(PetscSortIntWithArray(i - rowstart, &col[rowstart], &perm[rowstart]));
704       for (colstart = rowstart; colstart < i;) { /* Compress multiple insertions to the same location */
705         PetscInt       j, l;
706         MatStashBlock *block;
707         PetscCall(PetscSegBufferGet(stash->segsendblocks, 1, &block));
708         block->row = row[rowstart];
709         block->col = col[colstart];
710         PetscCall(PetscArraycpy(block->vals, valptr[perm[colstart]], bs2));
711         for (j = colstart + 1; j < i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
712           if (insertmode == ADD_VALUES) {
713             for (l = 0; l < bs2; l++) block->vals[l] += valptr[perm[j]][l];
714           } else {
715             PetscCall(PetscArraycpy(block->vals, valptr[perm[j]], bs2));
716           }
717         }
718         colstart = j;
719       }
720       rowstart = i;
721     }
722   }
723   PetscCall(PetscFree4(row, col, valptr, perm));
724   PetscFunctionReturn(PETSC_SUCCESS);
725 }
726 
727 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
728 {
729   PetscFunctionBegin;
730   if (stash->blocktype == MPI_DATATYPE_NULL) {
731     PetscInt     bs2 = PetscSqr(stash->bs);
732     PetscMPIInt  blocklens[2];
733     MPI_Aint     displs[2];
734     MPI_Datatype types[2], stype;
735     /*
736         DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex.
737        std::complex itself has standard layout, so does DummyBlock, recursively.
738        To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout,
739        though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++.
740        offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof.
741 
742        We can test if std::complex has standard layout with the following code:
743        #include <iostream>
744        #include <type_traits>
745        #include <complex>
746        int main() {
747          std::cout << std::boolalpha;
748          std::cout << std::is_standard_layout<std::complex<double> >::value << '\n';
749        }
750        Output: true
751      */
752     struct DummyBlock {
753       PetscInt    row, col;
754       PetscScalar vals;
755     };
756 
757     stash->blocktype_size = offsetof(struct DummyBlock, vals) + bs2 * sizeof(PetscScalar);
758     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
759       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
760     }
761     PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segsendblocks));
762     PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segrecvblocks));
763     PetscCall(PetscSegBufferCreate(sizeof(MatStashFrame), 1, &stash->segrecvframe));
764     blocklens[0] = 2;
765     blocklens[1] = bs2;
766     displs[0]    = offsetof(struct DummyBlock, row);
767     displs[1]    = offsetof(struct DummyBlock, vals);
768     types[0]     = MPIU_INT;
769     types[1]     = MPIU_SCALAR;
770     PetscCallMPI(MPI_Type_create_struct(2, blocklens, displs, types, &stype));
771     PetscCallMPI(MPI_Type_commit(&stype));
772     PetscCallMPI(MPI_Type_create_resized(stype, 0, stash->blocktype_size, &stash->blocktype));
773     PetscCallMPI(MPI_Type_commit(&stash->blocktype));
774     PetscCallMPI(MPI_Type_free(&stype));
775   }
776   PetscFunctionReturn(PETSC_SUCCESS);
777 }
778 
779 /* Callback invoked after target rank has initiated receive of rendezvous message.
780  * Here we post the main sends.
781  */
782 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx)
783 {
784   MatStash       *stash = (MatStash *)ctx;
785   MatStashHeader *hdr   = (MatStashHeader *)sdata;
786 
787   PetscFunctionBegin;
788   PetscCheck(rank == stash->sendranks[rankid], comm, PETSC_ERR_PLIB, "BTS Send rank %d does not match sendranks[%d] %d", rank, rankid, stash->sendranks[rankid]);
789   PetscCallMPI(MPI_Isend(stash->sendframes[rankid].buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
790   stash->sendframes[rankid].count   = hdr->count;
791   stash->sendframes[rankid].pending = 1;
792   PetscFunctionReturn(PETSC_SUCCESS);
793 }
794 
795 /*
796     Callback invoked by target after receiving rendezvous message.
797     Here we post the main recvs.
798  */
799 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx)
800 {
801   MatStash       *stash = (MatStash *)ctx;
802   MatStashHeader *hdr   = (MatStashHeader *)rdata;
803   MatStashFrame  *frame;
804 
805   PetscFunctionBegin;
806   PetscCall(PetscSegBufferGet(stash->segrecvframe, 1, &frame));
807   PetscCall(PetscSegBufferGet(stash->segrecvblocks, hdr->count, &frame->buffer));
808   PetscCallMPI(MPI_Irecv(frame->buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
809   frame->count   = hdr->count;
810   frame->pending = 1;
811   PetscFunctionReturn(PETSC_SUCCESS);
812 }
813 
814 /*
815  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
816  */
817 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat, MatStash *stash, PetscInt owners[])
818 {
819   size_t nblocks;
820   char  *sendblocks;
821 
822   PetscFunctionBegin;
823   if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */
824     InsertMode addv;
825     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
826     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
827   }
828 
829   PetscCall(MatStashBlockTypeSetUp(stash));
830   PetscCall(MatStashSortCompress_Private(stash, mat->insertmode));
831   PetscCall(PetscSegBufferGetSize(stash->segsendblocks, &nblocks));
832   PetscCall(PetscSegBufferExtractInPlace(stash->segsendblocks, &sendblocks));
833   if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
834     PetscInt i;
835     size_t   b;
836     for (i = 0, b = 0; i < stash->nsendranks; i++) {
837       stash->sendframes[i].buffer = &sendblocks[b * stash->blocktype_size];
838       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
839       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
840       for (; b < nblocks; b++) {
841         MatStashBlock *sendblock_b = (MatStashBlock *)&sendblocks[b * stash->blocktype_size];
842         PetscCheck(sendblock_b->row >= owners[stash->sendranks[i]], stash->comm, PETSC_ERR_ARG_WRONG, "MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly", sendblock_b->row, stash->sendranks[i]);
843         if (sendblock_b->row >= owners[stash->sendranks[i] + 1]) break;
844         stash->sendhdr[i].count++;
845       }
846     }
847   } else { /* Dynamically count and pack (first time) */
848     PetscInt sendno;
849     size_t   i, rowstart;
850 
851     /* Count number of send ranks and allocate for sends */
852     stash->nsendranks = 0;
853     for (rowstart = 0; rowstart < nblocks;) {
854       PetscInt       owner;
855       MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
856       PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner));
857       if (owner < 0) owner = -(owner + 2);
858       for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
859         MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
860         if (sendblock_i->row >= owners[owner + 1]) break;
861       }
862       stash->nsendranks++;
863       rowstart = i;
864     }
865     PetscCall(PetscMalloc3(stash->nsendranks, &stash->sendranks, stash->nsendranks, &stash->sendhdr, stash->nsendranks, &stash->sendframes));
866 
867     /* Set up sendhdrs and sendframes */
868     sendno = 0;
869     for (rowstart = 0; rowstart < nblocks;) {
870       PetscInt       owner;
871       MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
872       PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner));
873       if (owner < 0) owner = -(owner + 2);
874       stash->sendranks[sendno] = owner;
875       for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
876         MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
877         if (sendblock_i->row >= owners[owner + 1]) break;
878       }
879       stash->sendframes[sendno].buffer  = sendblock_rowstart;
880       stash->sendframes[sendno].pending = 0;
881       stash->sendhdr[sendno].count      = i - rowstart;
882       sendno++;
883       rowstart = i;
884     }
885     PetscCheck(sendno == stash->nsendranks, stash->comm, PETSC_ERR_PLIB, "BTS counted %d sendranks, but %" PetscInt_FMT " sends", stash->nsendranks, sendno);
886   }
887 
888   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
889    * message or a dummy entry of some sort. */
890   if (mat->insertmode == INSERT_VALUES) {
891     size_t i;
892     for (i = 0; i < nblocks; i++) {
893       MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
894       sendblock_i->row           = -(sendblock_i->row + 1);
895     }
896   }
897 
898   if (stash->first_assembly_done) {
899     PetscMPIInt i, tag;
900     PetscCall(PetscCommGetNewTag(stash->comm, &tag));
901     for (i = 0; i < stash->nrecvranks; i++) PetscCall(MatStashBTSRecv_Private(stash->comm, &tag, stash->recvranks[i], &stash->recvhdr[i], &stash->recvreqs[i], stash));
902     for (i = 0; i < stash->nsendranks; i++) PetscCall(MatStashBTSSend_Private(stash->comm, &tag, i, stash->sendranks[i], &stash->sendhdr[i], &stash->sendreqs[i], stash));
903     stash->use_status = PETSC_TRUE; /* Use count from message status. */
904   } else {
905     PetscCall(PetscCommBuildTwoSidedFReq(stash->comm, 1, MPIU_INT, stash->nsendranks, stash->sendranks, (PetscInt *)stash->sendhdr, &stash->nrecvranks, &stash->recvranks, (PetscInt *)&stash->recvhdr, 1, &stash->sendreqs, &stash->recvreqs, MatStashBTSSend_Private, MatStashBTSRecv_Private, stash));
906     PetscCall(PetscMalloc2(stash->nrecvranks, &stash->some_indices, stash->nrecvranks, &stash->some_statuses));
907     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
908   }
909 
910   PetscCall(PetscSegBufferExtractInPlace(stash->segrecvframe, &stash->recvframes));
911   stash->recvframe_active    = NULL;
912   stash->recvframe_i         = 0;
913   stash->some_i              = 0;
914   stash->some_count          = 0;
915   stash->recvcount           = 0;
916   stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
917   stash->insertmode          = &mat->insertmode;
918   PetscFunctionReturn(PETSC_SUCCESS);
919 }
920 
921 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash, PetscMPIInt *n, PetscInt **row, PetscInt **col, PetscScalar **val, PetscInt *flg)
922 {
923   MatStashBlock *block;
924 
925   PetscFunctionBegin;
926   *flg = 0;
927   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
928     if (stash->some_i == stash->some_count) {
929       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(PETSC_SUCCESS); /* Done */
930       PetscCallMPI(MPI_Waitsome(stash->nrecvranks, stash->recvreqs, &stash->some_count, stash->some_indices, stash->use_status ? stash->some_statuses : MPI_STATUSES_IGNORE));
931       stash->some_i = 0;
932     }
933     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
934     stash->recvframe_count  = stash->recvframe_active->count; /* From header; maximum count */
935     if (stash->use_status) {                                  /* Count what was actually sent */
936       PetscCallMPI(MPI_Get_count(&stash->some_statuses[stash->some_i], stash->blocktype, &stash->recvframe_count));
937     }
938     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
939       block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[0];
940       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
941       PetscCheck(*stash->insertmode != INSERT_VALUES || block->row < 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling INSERT_VALUES, but rank %d requested ADD_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
942       PetscCheck(*stash->insertmode != ADD_VALUES || block->row >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling ADD_VALUES, but rank %d requested INSERT_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
943     }
944     stash->some_i++;
945     stash->recvcount++;
946     stash->recvframe_i = 0;
947   }
948   *n    = 1;
949   block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[stash->recvframe_i * stash->blocktype_size];
950   if (block->row < 0) block->row = -(block->row + 1);
951   *row = &block->row;
952   *col = &block->col;
953   *val = block->vals;
954   stash->recvframe_i++;
955   *flg = 1;
956   PetscFunctionReturn(PETSC_SUCCESS);
957 }
958 
959 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
960 {
961   PetscFunctionBegin;
962   PetscCallMPI(MPI_Waitall(stash->nsendranks, stash->sendreqs, MPI_STATUSES_IGNORE));
963   if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
964     PetscCall(PetscSegBufferExtractInPlace(stash->segrecvblocks, NULL));
965   } else { /* No reuse, so collect everything. */
966     PetscCall(MatStashScatterDestroy_BTS(stash));
967   }
968 
969   /* Now update nmaxold to be app 10% more than max n used, this way the
970      wastage of space is reduced the next time this stash is used.
971      Also update the oldmax, only if it increases */
972   if (stash->n) {
973     PetscInt bs2     = stash->bs * stash->bs;
974     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
975     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
976   }
977 
978   stash->nmax       = 0;
979   stash->n          = 0;
980   stash->reallocs   = -1;
981   stash->nprocessed = 0;
982 
983   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
984 
985   stash->space = NULL;
986 
987   PetscFunctionReturn(PETSC_SUCCESS);
988 }
989 
990 PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
991 {
992   PetscFunctionBegin;
993   PetscCall(PetscSegBufferDestroy(&stash->segsendblocks));
994   PetscCall(PetscSegBufferDestroy(&stash->segrecvframe));
995   stash->recvframes = NULL;
996   PetscCall(PetscSegBufferDestroy(&stash->segrecvblocks));
997   if (stash->blocktype != MPI_DATATYPE_NULL) PetscCallMPI(MPI_Type_free(&stash->blocktype));
998   stash->nsendranks = 0;
999   stash->nrecvranks = 0;
1000   PetscCall(PetscFree3(stash->sendranks, stash->sendhdr, stash->sendframes));
1001   PetscCall(PetscFree(stash->sendreqs));
1002   PetscCall(PetscFree(stash->recvreqs));
1003   PetscCall(PetscFree(stash->recvranks));
1004   PetscCall(PetscFree(stash->recvhdr));
1005   PetscCall(PetscFree2(stash->some_indices, stash->some_statuses));
1006   PetscFunctionReturn(PETSC_SUCCESS);
1007 }
1008 #endif
1009