xref: /petsc/src/mat/utils/matstash.c (revision 4e8208cbcbc709572b8abe32f33c78b69c819375)
1 #include <petsc/private/matimpl.h>
2 
3 #define DEFAULT_STASH_SIZE 10000
4 
5 static PetscErrorCode       MatStashScatterBegin_Ref(Mat, MatStash *, PetscInt *);
6 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
7 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
8 #if !defined(PETSC_HAVE_MPIUNI)
9 static PetscErrorCode MatStashScatterBegin_BTS(Mat, MatStash *, PetscInt *);
10 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
11 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *);
12 #endif
13 
14 /*
15   MatStashCreate_Private - Creates a stash,currently used for all the parallel
16   matrix implementations. The stash is where elements of a matrix destined
17   to be stored on other processors are kept until matrix assembly is done.
18 
19   This is a simple minded stash. Simply adds entries to end of stash.
20 
21   Input Parameters:
22   comm - communicator, required for scatters.
23   bs   - stash block size. used when stashing blocks of values
24 
25   Output Parameter:
26   stash    - the newly created stash
27 */
MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash * stash)28 PetscErrorCode MatStashCreate_Private(MPI_Comm comm, PetscInt bs, MatStash *stash)
29 {
30   PetscInt  max, *opt, nopt, i;
31   PetscBool flg;
32 
33   PetscFunctionBegin;
34   /* Require 2 tags,get the second using PetscCommGetNewTag() */
35   stash->comm = comm;
36 
37   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1));
38   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2));
39   PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size));
40   PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank));
41   PetscCall(PetscMalloc1(2 * stash->size, &stash->flg_v));
42   for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
43 
44   nopt = stash->size;
45   PetscCall(PetscMalloc1(nopt, &opt));
46   PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-matstash_initial_size", opt, &nopt, &flg));
47   if (flg) {
48     if (nopt == 1) max = opt[0];
49     else if (nopt == stash->size) max = opt[stash->rank];
50     else if (stash->rank < nopt) max = opt[stash->rank];
51     else max = 0; /* Use default */
52     stash->umax = max;
53   } else {
54     stash->umax = 0;
55   }
56   PetscCall(PetscFree(opt));
57   if (bs <= 0) bs = 1;
58 
59   stash->bs         = bs;
60   stash->nmax       = 0;
61   stash->oldnmax    = 0;
62   stash->n          = 0;
63   stash->reallocs   = -1;
64   stash->space_head = NULL;
65   stash->space      = NULL;
66 
67   stash->send_waits  = NULL;
68   stash->recv_waits  = NULL;
69   stash->send_status = NULL;
70   stash->nsends      = 0;
71   stash->nrecvs      = 0;
72   stash->svalues     = NULL;
73   stash->rvalues     = NULL;
74   stash->rindices    = NULL;
75   stash->nprocessed  = 0;
76   stash->reproduce   = PETSC_FALSE;
77   stash->blocktype   = MPI_DATATYPE_NULL;
78 
79   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_reproduce", &stash->reproduce, NULL));
80 #if !defined(PETSC_HAVE_MPIUNI)
81   flg = PETSC_FALSE;
82   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_legacy", &flg, NULL));
83   if (!flg) {
84     stash->ScatterBegin   = MatStashScatterBegin_BTS;
85     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
86     stash->ScatterEnd     = MatStashScatterEnd_BTS;
87     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
88   } else {
89 #endif
90     stash->ScatterBegin   = MatStashScatterBegin_Ref;
91     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
92     stash->ScatterEnd     = MatStashScatterEnd_Ref;
93     stash->ScatterDestroy = NULL;
94 #if !defined(PETSC_HAVE_MPIUNI)
95   }
96 #endif
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 /*
101    MatStashDestroy_Private - Destroy the stash
102 */
MatStashDestroy_Private(MatStash * stash)103 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
104 {
105   PetscFunctionBegin;
106   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
107   if (stash->ScatterDestroy) PetscCall((*stash->ScatterDestroy)(stash));
108   stash->space = NULL;
109   PetscCall(PetscFree(stash->flg_v));
110   PetscFunctionReturn(PETSC_SUCCESS);
111 }
112 
113 /*
114    MatStashScatterEnd_Private - This is called as the final stage of
115    scatter. The final stages of message passing is done here, and
116    all the memory used for message passing is cleaned up. This
117    routine also resets the stash, and deallocates the memory used
118    for the stash. It also keeps track of the current memory usage
119    so that the same value can be used the next time through.
120 */
MatStashScatterEnd_Private(MatStash * stash)121 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
122 {
123   PetscFunctionBegin;
124   PetscCall((*stash->ScatterEnd)(stash));
125   PetscFunctionReturn(PETSC_SUCCESS);
126 }
127 
MatStashScatterEnd_Ref(MatStash * stash)128 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
129 {
130   PetscMPIInt nsends = stash->nsends;
131   PetscInt    bs2, oldnmax;
132   MPI_Status *send_status;
133 
134   PetscFunctionBegin;
135   for (PetscMPIInt i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
136   /* wait on sends */
137   if (nsends) {
138     PetscCall(PetscMalloc1(2 * nsends, &send_status));
139     PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status));
140     PetscCall(PetscFree(send_status));
141   }
142 
143   /* Now update nmaxold to be app 10% more than max n used, this way the
144      wastage of space is reduced the next time this stash is used.
145      Also update the oldmax, only if it increases */
146   if (stash->n) {
147     bs2     = stash->bs * stash->bs;
148     oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
149     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
150   }
151 
152   stash->nmax       = 0;
153   stash->n          = 0;
154   stash->reallocs   = -1;
155   stash->nprocessed = 0;
156 
157   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
158 
159   stash->space = NULL;
160 
161   PetscCall(PetscFree(stash->send_waits));
162   PetscCall(PetscFree(stash->recv_waits));
163   PetscCall(PetscFree2(stash->svalues, stash->sindices));
164   PetscCall(PetscFree(stash->rvalues[0]));
165   PetscCall(PetscFree(stash->rvalues));
166   PetscCall(PetscFree(stash->rindices[0]));
167   PetscCall(PetscFree(stash->rindices));
168   PetscFunctionReturn(PETSC_SUCCESS);
169 }
170 
171 /*
172    MatStashGetInfo_Private - Gets the relevant statistics of the stash
173 
174    Input Parameters:
175    stash    - the stash
176    nstash   - the size of the stash. Indicates the number of values stored.
177    reallocs - the number of additional mallocs incurred.
178 
179 */
MatStashGetInfo_Private(MatStash * stash,PetscInt * nstash,PetscInt * reallocs)180 PetscErrorCode MatStashGetInfo_Private(MatStash *stash, PetscInt *nstash, PetscInt *reallocs)
181 {
182   PetscInt bs2 = stash->bs * stash->bs;
183 
184   PetscFunctionBegin;
185   if (nstash) *nstash = stash->n * bs2;
186   if (reallocs) {
187     if (stash->reallocs < 0) *reallocs = 0;
188     else *reallocs = stash->reallocs;
189   }
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 /*
194    MatStashSetInitialSize_Private - Sets the initial size of the stash
195 
196    Input Parameters:
197    stash  - the stash
198    max    - the value that is used as the max size of the stash.
199             this value is used while allocating memory.
200 */
MatStashSetInitialSize_Private(MatStash * stash,PetscInt max)201 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash, PetscInt max)
202 {
203   PetscFunctionBegin;
204   stash->umax = max;
205   PetscFunctionReturn(PETSC_SUCCESS);
206 }
207 
208 /* MatStashExpand_Private - Expand the stash. This function is called
209    when the space in the stash is not sufficient to add the new values
210    being inserted into the stash.
211 
212    Input Parameters:
213    stash - the stash
214    incr  - the minimum increase requested
215 
216    Note:
217    This routine doubles the currently used memory.
218  */
MatStashExpand_Private(MatStash * stash,PetscInt incr)219 static PetscErrorCode MatStashExpand_Private(MatStash *stash, PetscInt incr)
220 {
221   PetscInt   newnmax, bs2 = stash->bs * stash->bs;
222   PetscCount cnewnmax;
223 
224   PetscFunctionBegin;
225   /* allocate a larger stash */
226   if (!stash->oldnmax && !stash->nmax) { /* new stash */
227     if (stash->umax) cnewnmax = stash->umax / bs2;
228     else cnewnmax = DEFAULT_STASH_SIZE / bs2;
229   } else if (!stash->nmax) { /* reusing stash */
230     if (stash->umax > stash->oldnmax) cnewnmax = stash->umax / bs2;
231     else cnewnmax = stash->oldnmax / bs2;
232   } else cnewnmax = stash->nmax * 2;
233   if (cnewnmax < (stash->nmax + incr)) cnewnmax += 2 * incr;
234   PetscCall(PetscIntCast(cnewnmax, &newnmax));
235 
236   /* Get a MatStashSpace and attach it to stash */
237   PetscCall(PetscMatStashSpaceGet(bs2, newnmax, &stash->space));
238   if (!stash->space_head) { /* new stash or reusing stash->oldnmax */
239     stash->space_head = stash->space;
240   }
241 
242   stash->reallocs++;
243   stash->nmax = newnmax;
244   PetscFunctionReturn(PETSC_SUCCESS);
245 }
246 /*
247   MatStashValuesRow_Private - inserts values into the stash. This function
248   expects the values to be row-oriented. Multiple columns belong to the same row
249   can be inserted with a single call to this function.
250 
251   Input Parameters:
252   stash  - the stash
253   row    - the global row corresponding to the values
254   n      - the number of elements inserted. All elements belong to the above row.
255   idxn   - the global column indices corresponding to each of the values.
256   values - the values inserted
257 */
MatStashValuesRow_Private(MatStash * stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)258 PetscErrorCode MatStashValuesRow_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscBool ignorezeroentries)
259 {
260   PetscInt           i, k, cnt = 0;
261   PetscMatStashSpace space = stash->space;
262 
263   PetscFunctionBegin;
264   /* Check and see if we have sufficient memory */
265   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
266   space = stash->space;
267   k     = space->local_used;
268   for (i = 0; i < n; i++) {
269     if (ignorezeroentries && values && values[i] == 0.0) continue;
270     space->idx[k] = row;
271     space->idy[k] = idxn[i];
272     space->val[k] = values ? values[i] : 0.0;
273     k++;
274     cnt++;
275   }
276   stash->n += cnt;
277   space->local_used += cnt;
278   space->local_remaining -= cnt;
279   PetscFunctionReturn(PETSC_SUCCESS);
280 }
281 
282 /*
283   MatStashValuesCol_Private - inserts values into the stash. This function
284   expects the values to be column-oriented. Multiple columns belong to the same row
285   can be inserted with a single call to this function.
286 
287   Input Parameters:
288   stash   - the stash
289   row     - the global row corresponding to the values
290   n       - the number of elements inserted. All elements belong to the above row.
291   idxn    - the global column indices corresponding to each of the values.
292   values  - the values inserted
293   stepval - the consecutive values are sepated by a distance of stepval.
294             this happens because the input is column-oriented.
295 */
MatStashValuesCol_Private(MatStash * stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)296 PetscErrorCode MatStashValuesCol_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt stepval, PetscBool ignorezeroentries)
297 {
298   PetscInt           i, k, cnt = 0;
299   PetscMatStashSpace space = stash->space;
300 
301   PetscFunctionBegin;
302   /* Check and see if we have sufficient memory */
303   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
304   space = stash->space;
305   k     = space->local_used;
306   for (i = 0; i < n; i++) {
307     if (ignorezeroentries && values && values[i * stepval] == 0.0) continue;
308     space->idx[k] = row;
309     space->idy[k] = idxn[i];
310     space->val[k] = values ? values[i * stepval] : 0.0;
311     k++;
312     cnt++;
313   }
314   stash->n += cnt;
315   space->local_used += cnt;
316   space->local_remaining -= cnt;
317   PetscFunctionReturn(PETSC_SUCCESS);
318 }
319 
320 /*
321   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
322   This function expects the values to be row-oriented. Multiple columns belong
323   to the same block-row can be inserted with a single call to this function.
324   This function extracts the sub-block of values based on the dimensions of
325   the original input block, and the row,col values corresponding to the blocks.
326 
327   Input Parameters:
328   stash  - the stash
329   row    - the global block-row corresponding to the values
330   n      - the number of elements inserted. All elements belong to the above row.
331   idxn   - the global block-column indices corresponding to each of the blocks of
332            values. Each block is of size bs*bs.
333   values - the values inserted
334   rmax   - the number of block-rows in the original block.
335   cmax   - the number of block-columns on the original block.
336   idx    - the index of the current block-row in the original block.
337 */
MatStashValuesRowBlocked_Private(MatStash * stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)338 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
339 {
340   PetscInt           i, j, k, bs2, bs = stash->bs, l;
341   const PetscScalar *vals;
342   PetscScalar       *array;
343   PetscMatStashSpace space = stash->space;
344 
345   PetscFunctionBegin;
346   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
347   space = stash->space;
348   l     = space->local_used;
349   bs2   = bs * bs;
350   for (i = 0; i < n; i++) {
351     space->idx[l] = row;
352     space->idy[l] = idxn[i];
353     /* Now copy over the block of values. Store the values column-oriented.
354        This enables inserting multiple blocks belonging to a row with a single
355        function call */
356     array = space->val + bs2 * l;
357     vals  = values + idx * bs2 * n + bs * i;
358     for (j = 0; j < bs; j++) {
359       for (k = 0; k < bs; k++) array[k * bs] = values ? vals[k] : 0.0;
360       array++;
361       vals += cmax * bs;
362     }
363     l++;
364   }
365   stash->n += n;
366   space->local_used += n;
367   space->local_remaining -= n;
368   PetscFunctionReturn(PETSC_SUCCESS);
369 }
370 
371 /*
372   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
373   This function expects the values to be column-oriented. Multiple columns belong
374   to the same block-row can be inserted with a single call to this function.
375   This function extracts the sub-block of values based on the dimensions of
376   the original input block, and the row,col values corresponding to the blocks.
377 
378   Input Parameters:
379   stash  - the stash
380   row    - the global block-row corresponding to the values
381   n      - the number of elements inserted. All elements belong to the above row.
382   idxn   - the global block-column indices corresponding to each of the blocks of
383            values. Each block is of size bs*bs.
384   values - the values inserted
385   rmax   - the number of block-rows in the original block.
386   cmax   - the number of block-columns on the original block.
387   idx    - the index of the current block-row in the original block.
388 */
MatStashValuesColBlocked_Private(MatStash * stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)389 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
390 {
391   PetscInt           i, j, k, bs2, bs = stash->bs, l;
392   const PetscScalar *vals;
393   PetscScalar       *array;
394   PetscMatStashSpace space = stash->space;
395 
396   PetscFunctionBegin;
397   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
398   space = stash->space;
399   l     = space->local_used;
400   bs2   = bs * bs;
401   for (i = 0; i < n; i++) {
402     space->idx[l] = row;
403     space->idy[l] = idxn[i];
404     /* Now copy over the block of values. Store the values column-oriented.
405      This enables inserting multiple blocks belonging to a row with a single
406      function call */
407     array = space->val + bs2 * l;
408     vals  = values + idx * bs2 * n + bs * i;
409     for (j = 0; j < bs; j++) {
410       for (k = 0; k < bs; k++) array[k] = values ? vals[k] : 0.0;
411       array += bs;
412       vals += rmax * bs;
413     }
414     l++;
415   }
416   stash->n += n;
417   space->local_used += n;
418   space->local_remaining -= n;
419   PetscFunctionReturn(PETSC_SUCCESS);
420 }
421 /*
422   MatStashScatterBegin_Private - Initiates the transfer of values to the
423   correct owners. This function goes through the stash, and check the
424   owners of each stashed value, and sends the values off to the owner
425   processors.
426 
427   Input Parameters:
428   stash  - the stash
429   owners - an array of size 'no-of-procs' which gives the ownership range
430            for each node.
431 
432   Note:
433     The 'owners' array in the cased of the blocked-stash has the
434   ranges specified blocked global indices, and for the regular stash in
435   the proper global indices.
436 */
MatStashScatterBegin_Private(Mat mat,MatStash * stash,PetscInt * owners)437 PetscErrorCode MatStashScatterBegin_Private(Mat mat, MatStash *stash, PetscInt *owners)
438 {
439   PetscFunctionBegin;
440   PetscCall((*stash->ScatterBegin)(mat, stash, owners));
441   PetscFunctionReturn(PETSC_SUCCESS);
442 }
443 
MatStashScatterBegin_Ref(Mat mat,MatStash * stash,PetscInt * owners)444 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat, MatStash *stash, PetscInt *owners)
445 {
446   PetscInt          *owner, *startv, *starti, bs2;
447   PetscInt           size = stash->size;
448   PetscInt          *sindices, **rindices, j, ii, idx, lastidx, l;
449   PetscScalar      **rvalues, *svalues;
450   MPI_Comm           comm = stash->comm;
451   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
452   PetscMPIInt       *sizes, *nlengths, nreceives, nsends, tag1 = stash->tag1, tag2 = stash->tag2;
453   PetscInt          *sp_idx, *sp_idy;
454   PetscScalar       *sp_val;
455   PetscMatStashSpace space, space_next;
456 
457   PetscFunctionBegin;
458   { /* make sure all processors are either in INSERTMODE or ADDMODE */
459     InsertMode addv;
460     PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
461     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
462     mat->insertmode = addv; /* in case this processor had no cache */
463   }
464 
465   bs2 = stash->bs * stash->bs;
466 
467   /*  first count number of contributors to each processor */
468   PetscCall(PetscCalloc1(size, &nlengths));
469   PetscCall(PetscMalloc1(stash->n + 1, &owner));
470 
471   ii = j  = 0;
472   lastidx = -1;
473   space   = stash->space_head;
474   while (space) {
475     space_next = space->next;
476     sp_idx     = space->idx;
477     for (l = 0; l < space->local_used; l++) {
478       /* if indices are NOT locally sorted, need to start search at the beginning */
479       if (lastidx > (idx = sp_idx[l])) j = 0;
480       lastidx = idx;
481       for (; j < size; j++) {
482         if (idx >= owners[j] && idx < owners[j + 1]) {
483           nlengths[j]++;
484           owner[ii] = j;
485           break;
486         }
487       }
488       ii++;
489     }
490     space = space_next;
491   }
492 
493   /* Now check what procs get messages - and compute nsends. */
494   PetscCall(PetscCalloc1(size, &sizes));
495   nsends = 0;
496   for (PetscMPIInt i = 0; i < size; i++) {
497     if (nlengths[i]) {
498       sizes[i] = 1;
499       nsends++;
500     }
501   }
502 
503   {
504     PetscMPIInt *onodes, *olengths;
505     /* Determine the number of messages to expect, their lengths, from from-ids */
506     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
507     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
508     /* since clubbing row,col - lengths are multiplied by 2 */
509     for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2;
510     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
511     /* values are size 'bs2' lengths (and remove earlier factor 2 */
512     for (PetscMPIInt i = 0; i < nreceives; i++) PetscCall(PetscMPIIntCast(olengths[i] * bs2 / 2, &olengths[i]));
513     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
514     PetscCall(PetscFree(onodes));
515     PetscCall(PetscFree(olengths));
516   }
517 
518   /* do sends:
519       1) starts[i] gives the starting index in svalues for stuff going to
520          the ith processor
521   */
522   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
523   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
524   PetscCall(PetscMalloc2(size, &startv, size, &starti));
525   /* use 2 sends the first with all_a, the next with all_i and all_j */
526   startv[0] = 0;
527   starti[0] = 0;
528   for (PetscMPIInt i = 1; i < size; i++) {
529     startv[i] = startv[i - 1] + nlengths[i - 1];
530     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
531   }
532 
533   ii    = 0;
534   space = stash->space_head;
535   while (space) {
536     space_next = space->next;
537     sp_idx     = space->idx;
538     sp_idy     = space->idy;
539     sp_val     = space->val;
540     for (l = 0; l < space->local_used; l++) {
541       j = owner[ii];
542       if (bs2 == 1) {
543         svalues[startv[j]] = sp_val[l];
544       } else {
545         PetscInt     k;
546         PetscScalar *buf1, *buf2;
547         buf1 = svalues + bs2 * startv[j];
548         buf2 = space->val + bs2 * l;
549         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
550       }
551       sindices[starti[j]]               = sp_idx[l];
552       sindices[starti[j] + nlengths[j]] = sp_idy[l];
553       startv[j]++;
554       starti[j]++;
555       ii++;
556     }
557     space = space_next;
558   }
559   startv[0] = 0;
560   for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
561 
562   for (PetscMPIInt i = 0, count = 0; i < size; i++) {
563     if (sizes[i]) {
564       PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
565       PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
566     }
567   }
568 #if defined(PETSC_USE_INFO)
569   PetscCall(PetscInfo(NULL, "No of messages: %d \n", nsends));
570   for (PetscMPIInt i = 0; i < size; i++) {
571     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
572   }
573 #endif
574   PetscCall(PetscFree(nlengths));
575   PetscCall(PetscFree(owner));
576   PetscCall(PetscFree2(startv, starti));
577   PetscCall(PetscFree(sizes));
578 
579   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
580   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
581 
582   for (PetscMPIInt i = 0; i < nreceives; i++) {
583     recv_waits[2 * i]     = recv_waits1[i];
584     recv_waits[2 * i + 1] = recv_waits2[i];
585   }
586   stash->recv_waits = recv_waits;
587 
588   PetscCall(PetscFree(recv_waits1));
589   PetscCall(PetscFree(recv_waits2));
590 
591   stash->svalues         = svalues;
592   stash->sindices        = sindices;
593   stash->rvalues         = rvalues;
594   stash->rindices        = rindices;
595   stash->send_waits      = send_waits;
596   stash->nsends          = nsends;
597   stash->nrecvs          = nreceives;
598   stash->reproduce_count = 0;
599   PetscFunctionReturn(PETSC_SUCCESS);
600 }
601 
602 /*
603    MatStashScatterGetMesg_Private - This function waits on the receives posted
604    in the function MatStashScatterBegin_Private() and returns one message at
605    a time to the calling function. If no messages are left, it indicates this
606    by setting flg = 0, else it sets flg = 1.
607 
608    Input Parameter:
609    stash - the stash
610 
611    Output Parameters:
612    nvals - the number of entries in the current message.
613    rows  - an array of row indices (or blocked indices) corresponding to the values
614    cols  - an array of columnindices (or blocked indices) corresponding to the values
615    vals  - the values
616    flg   - 0 indicates no more message left, and the current call has no values associated.
617            1 indicates that the current call successfully received a message, and the
618              other output parameters nvals,rows,cols,vals are set appropriately.
619 */
MatStashScatterGetMesg_Private(MatStash * stash,PetscMPIInt * nvals,PetscInt ** rows,PetscInt ** cols,PetscScalar ** vals,PetscInt * flg)620 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
621 {
622   PetscFunctionBegin;
623   PetscCall((*stash->ScatterGetMesg)(stash, nvals, rows, cols, vals, flg));
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 
MatStashScatterGetMesg_Ref(MatStash * stash,PetscMPIInt * nvals,PetscInt ** rows,PetscInt ** cols,PetscScalar ** vals,PetscInt * flg)627 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
628 {
629   PetscMPIInt i, *flg_v = stash->flg_v, i1, i2;
630   PetscInt    bs2;
631   MPI_Status  recv_status;
632   PetscBool   match_found = PETSC_FALSE;
633 
634   PetscFunctionBegin;
635   *flg = 0; /* When a message is discovered this is reset to 1 */
636   /* Return if no more messages to process */
637   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(PETSC_SUCCESS);
638 
639   bs2 = stash->bs * stash->bs;
640   /* If a matching pair of receives are found, process them, and return the data to
641      the calling function. Until then keep receiving messages */
642   while (!match_found) {
643     if (stash->reproduce) {
644       i = stash->reproduce_count++;
645       PetscCallMPI(MPI_Wait(stash->recv_waits + i, &recv_status));
646     } else {
647       PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status));
648     }
649     PetscCheck(recv_status.MPI_SOURCE >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Negative MPI source!");
650 
651     /* Now pack the received message into a structure which is usable by others */
652     if (i % 2) {
653       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals));
654       flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
655       *nvals                            = *nvals / bs2;
656     } else {
657       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals));
658       flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
659       *nvals                                = *nvals / 2; /* This message has both row indices and col indices */
660     }
661 
662     /* Check if we have both messages from this proc */
663     i1 = flg_v[2 * recv_status.MPI_SOURCE];
664     i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
665     if (i1 != -1 && i2 != -1) {
666       *rows = stash->rindices[i2];
667       *cols = *rows + *nvals;
668       *vals = stash->rvalues[i1];
669       *flg  = 1;
670       stash->nprocessed++;
671       match_found = PETSC_TRUE;
672     }
673   }
674   PetscFunctionReturn(PETSC_SUCCESS);
675 }
676 
677 #if !defined(PETSC_HAVE_MPIUNI)
678 typedef struct {
679   PetscInt    row;
680   PetscInt    col;
681   PetscScalar vals[1]; /* Actually an array of length bs2 */
682 } MatStashBlock;
683 
MatStashSortCompress_Private(MatStash * stash,InsertMode insertmode)684 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash, InsertMode insertmode)
685 {
686   PetscMatStashSpace space;
687   PetscInt           n = stash->n, bs = stash->bs, bs2 = bs * bs, cnt, *row, *col, *perm, rowstart, i;
688   PetscScalar      **valptr;
689 
690   PetscFunctionBegin;
691   PetscCall(PetscMalloc4(n, &row, n, &col, n, &valptr, n, &perm));
692   for (space = stash->space_head, cnt = 0; space; space = space->next) {
693     for (i = 0; i < space->local_used; i++) {
694       row[cnt]    = space->idx[i];
695       col[cnt]    = space->idy[i];
696       valptr[cnt] = &space->val[i * bs2];
697       perm[cnt]   = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */
698       cnt++;
699     }
700   }
701   PetscCheck(cnt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries", n, cnt);
702   PetscCall(PetscSortIntWithArrayPair(n, row, col, perm));
703   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
704   for (rowstart = 0, cnt = 0, i = 1; i <= n; i++) {
705     if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */
706       PetscInt colstart;
707       PetscCall(PetscSortIntWithArray(i - rowstart, &col[rowstart], &perm[rowstart]));
708       for (colstart = rowstart; colstart < i;) { /* Compress multiple insertions to the same location */
709         PetscInt       j, l;
710         MatStashBlock *block;
711         PetscCall(PetscSegBufferGet(stash->segsendblocks, 1, &block));
712         block->row = row[rowstart];
713         block->col = col[colstart];
714         PetscCall(PetscArraycpy(block->vals, valptr[perm[colstart]], bs2));
715         for (j = colstart + 1; j < i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
716           if (insertmode == ADD_VALUES) {
717             for (l = 0; l < bs2; l++) block->vals[l] += valptr[perm[j]][l];
718           } else {
719             PetscCall(PetscArraycpy(block->vals, valptr[perm[j]], bs2));
720           }
721         }
722         colstart = j;
723       }
724       rowstart = i;
725     }
726   }
727   PetscCall(PetscFree4(row, col, valptr, perm));
728   PetscFunctionReturn(PETSC_SUCCESS);
729 }
730 
MatStashBlockTypeSetUp(MatStash * stash)731 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
732 {
733   PetscFunctionBegin;
734   if (stash->blocktype == MPI_DATATYPE_NULL) {
735     PetscInt     bs2 = PetscSqr(stash->bs);
736     PetscMPIInt  blocklens[2];
737     MPI_Aint     displs[2];
738     MPI_Datatype types[2], stype;
739     /*
740         DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex.
741        std::complex itself has standard layout, so does DummyBlock, recursively.
742        To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout,
743        though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++.
744        offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof.
745 
746        We can test if std::complex has standard layout with the following code:
747        #include <iostream>
748        #include <type_traits>
749        #include <complex>
750        int main() {
751          std::cout << std::boolalpha;
752          std::cout << std::is_standard_layout<std::complex<double> >::value << '\n';
753        }
754        Output: true
755      */
756     struct DummyBlock {
757       PetscInt    row, col;
758       PetscScalar vals;
759     };
760 
761     stash->blocktype_size = offsetof(struct DummyBlock, vals) + bs2 * sizeof(PetscScalar);
762     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
763       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
764     }
765     PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segsendblocks));
766     PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segrecvblocks));
767     PetscCall(PetscSegBufferCreate(sizeof(MatStashFrame), 1, &stash->segrecvframe));
768     blocklens[0] = 2;
769     blocklens[1] = (PetscMPIInt)bs2;
770     displs[0]    = offsetof(struct DummyBlock, row);
771     displs[1]    = offsetof(struct DummyBlock, vals);
772     types[0]     = MPIU_INT;
773     types[1]     = MPIU_SCALAR;
774     PetscCallMPI(MPI_Type_create_struct(2, blocklens, displs, types, &stype));
775     PetscCallMPI(MPI_Type_commit(&stype));
776     PetscCallMPI(MPI_Type_create_resized(stype, 0, stash->blocktype_size, &stash->blocktype));
777     PetscCallMPI(MPI_Type_commit(&stash->blocktype));
778     PetscCallMPI(MPI_Type_free(&stype));
779   }
780   PetscFunctionReturn(PETSC_SUCCESS);
781 }
782 
783 /* Callback invoked after target rank has initiated receive of rendezvous message.
784  * Here we post the main sends.
785  */
MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void * sdata,MPI_Request req[],PetscCtx ctx)786 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], PetscCtx ctx)
787 {
788   MatStash       *stash = (MatStash *)ctx;
789   MatStashHeader *hdr   = (MatStashHeader *)sdata;
790 
791   PetscFunctionBegin;
792   PetscCheck(rank == stash->sendranks[rankid], comm, PETSC_ERR_PLIB, "BTS Send rank %d does not match sendranks[%d] %d", rank, rankid, stash->sendranks[rankid]);
793   PetscCallMPI(MPIU_Isend(stash->sendframes[rankid].buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
794   stash->sendframes[rankid].count   = hdr->count;
795   stash->sendframes[rankid].pending = 1;
796   PetscFunctionReturn(PETSC_SUCCESS);
797 }
798 
799 /*
800     Callback invoked by target after receiving rendezvous message.
801     Here we post the main recvs.
802  */
MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void * rdata,MPI_Request req[],PetscCtx ctx)803 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], PetscCtx ctx)
804 {
805   MatStash       *stash = (MatStash *)ctx;
806   MatStashHeader *hdr   = (MatStashHeader *)rdata;
807   MatStashFrame  *frame;
808 
809   PetscFunctionBegin;
810   PetscCall(PetscSegBufferGet(stash->segrecvframe, 1, &frame));
811   PetscCall(PetscSegBufferGet(stash->segrecvblocks, hdr->count, &frame->buffer));
812   PetscCallMPI(MPIU_Irecv(frame->buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
813   frame->count   = hdr->count;
814   frame->pending = 1;
815   PetscFunctionReturn(PETSC_SUCCESS);
816 }
817 
818 /*
819  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
820  */
MatStashScatterBegin_BTS(Mat mat,MatStash * stash,PetscInt owners[])821 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat, MatStash *stash, PetscInt owners[])
822 {
823   PetscCount nblocks;
824   char      *sendblocks;
825 
826   PetscFunctionBegin;
827   if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */
828     InsertMode addv;
829     PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
830     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
831   }
832 
833   PetscCall(MatStashBlockTypeSetUp(stash));
834   PetscCall(MatStashSortCompress_Private(stash, mat->insertmode));
835   PetscCall(PetscSegBufferGetSize(stash->segsendblocks, &nblocks));
836   PetscCall(PetscSegBufferExtractInPlace(stash->segsendblocks, &sendblocks));
837   if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
838     PetscInt   i;
839     PetscCount b;
840     for (i = 0, b = 0; i < stash->nsendranks; i++) {
841       stash->sendframes[i].buffer = &sendblocks[b * stash->blocktype_size];
842       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
843       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
844       for (; b < nblocks; b++) {
845         MatStashBlock *sendblock_b = (MatStashBlock *)&sendblocks[b * stash->blocktype_size];
846         PetscCheck(sendblock_b->row >= owners[stash->sendranks[i]], stash->comm, PETSC_ERR_ARG_WRONG, "MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly", sendblock_b->row, stash->sendranks[i]);
847         if (sendblock_b->row >= owners[stash->sendranks[i] + 1]) break;
848         stash->sendhdr[i].count++;
849       }
850     }
851   } else { /* Dynamically count and pack (first time) */
852     PetscInt   sendno;
853     PetscCount i, rowstart;
854 
855     /* Count number of send ranks and allocate for sends */
856     stash->nsendranks = 0;
857     for (rowstart = 0; rowstart < nblocks;) {
858       PetscInt       owner;
859       MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
860 
861       PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner));
862       if (owner < 0) owner = -(owner + 2);
863       for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
864         MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
865 
866         if (sendblock_i->row >= owners[owner + 1]) break;
867       }
868       stash->nsendranks++;
869       rowstart = i;
870     }
871     PetscCall(PetscMalloc3(stash->nsendranks, &stash->sendranks, stash->nsendranks, &stash->sendhdr, stash->nsendranks, &stash->sendframes));
872 
873     /* Set up sendhdrs and sendframes */
874     sendno = 0;
875     for (rowstart = 0; rowstart < nblocks;) {
876       PetscInt       iowner;
877       PetscMPIInt    owner;
878       MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
879 
880       PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &iowner));
881       PetscCall(PetscMPIIntCast(iowner, &owner));
882       if (owner < 0) owner = -(owner + 2);
883       stash->sendranks[sendno] = owner;
884       for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
885         MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
886 
887         if (sendblock_i->row >= owners[owner + 1]) break;
888       }
889       stash->sendframes[sendno].buffer  = sendblock_rowstart;
890       stash->sendframes[sendno].pending = 0;
891       PetscCall(PetscIntCast(i - rowstart, &stash->sendhdr[sendno].count));
892       sendno++;
893       rowstart = i;
894     }
895     PetscCheck(sendno == stash->nsendranks, stash->comm, PETSC_ERR_PLIB, "BTS counted %d sendranks, but %" PetscInt_FMT " sends", stash->nsendranks, sendno);
896   }
897 
898   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
899    * message or a dummy entry of some sort. */
900   if (mat->insertmode == INSERT_VALUES) {
901     for (PetscCount i = 0; i < nblocks; i++) {
902       MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
903       sendblock_i->row           = -(sendblock_i->row + 1);
904     }
905   }
906 
907   if (stash->first_assembly_done) {
908     PetscMPIInt i, tag;
909 
910     PetscCall(PetscCommGetNewTag(stash->comm, &tag));
911     for (i = 0; i < stash->nrecvranks; i++) PetscCall(MatStashBTSRecv_Private(stash->comm, &tag, stash->recvranks[i], &stash->recvhdr[i], &stash->recvreqs[i], stash));
912     for (i = 0; i < stash->nsendranks; i++) PetscCall(MatStashBTSSend_Private(stash->comm, &tag, i, stash->sendranks[i], &stash->sendhdr[i], &stash->sendreqs[i], stash));
913     stash->use_status = PETSC_TRUE; /* Use count from message status. */
914   } else {
915     PetscCall(PetscCommBuildTwoSidedFReq(stash->comm, 1, MPIU_INT, stash->nsendranks, stash->sendranks, (PetscInt *)stash->sendhdr, &stash->nrecvranks, &stash->recvranks, (PetscInt *)&stash->recvhdr, 1, &stash->sendreqs, &stash->recvreqs, MatStashBTSSend_Private, MatStashBTSRecv_Private, stash));
916     PetscCall(PetscMalloc2(stash->nrecvranks, &stash->some_indices, stash->nrecvranks, &stash->some_statuses));
917     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
918   }
919 
920   PetscCall(PetscSegBufferExtractInPlace(stash->segrecvframe, &stash->recvframes));
921   stash->recvframe_active    = NULL;
922   stash->recvframe_i         = 0;
923   stash->some_i              = 0;
924   stash->some_count          = 0;
925   stash->recvcount           = 0;
926   stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
927   stash->insertmode          = &mat->insertmode;
928   PetscFunctionReturn(PETSC_SUCCESS);
929 }
930 
MatStashScatterGetMesg_BTS(MatStash * stash,PetscMPIInt * n,PetscInt ** row,PetscInt ** col,PetscScalar ** val,PetscInt * flg)931 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash, PetscMPIInt *n, PetscInt **row, PetscInt **col, PetscScalar **val, PetscInt *flg)
932 {
933   MatStashBlock *block;
934 
935   PetscFunctionBegin;
936   *flg = 0;
937   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
938     if (stash->some_i == stash->some_count) {
939       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(PETSC_SUCCESS); /* Done */
940       PetscCallMPI(MPI_Waitsome(stash->nrecvranks, stash->recvreqs, &stash->some_count, stash->some_indices, stash->use_status ? stash->some_statuses : MPI_STATUSES_IGNORE));
941       stash->some_i = 0;
942     }
943     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
944     stash->recvframe_count  = stash->recvframe_active->count; /* From header; maximum count */
945     if (stash->use_status) {                                  /* Count what was actually sent */
946       PetscMPIInt ic;
947 
948       PetscCallMPI(MPI_Get_count(&stash->some_statuses[stash->some_i], stash->blocktype, &ic));
949       stash->recvframe_count = ic;
950     }
951     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
952       block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[0];
953       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
954       PetscCheck(*stash->insertmode != INSERT_VALUES || block->row < 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling INSERT_VALUES, but rank %d requested ADD_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
955       PetscCheck(*stash->insertmode != ADD_VALUES || block->row >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling ADD_VALUES, but rank %d requested INSERT_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
956     }
957     stash->some_i++;
958     stash->recvcount++;
959     stash->recvframe_i = 0;
960   }
961   *n    = 1;
962   block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[stash->recvframe_i * stash->blocktype_size];
963   if (block->row < 0) block->row = -(block->row + 1);
964   *row = &block->row;
965   *col = &block->col;
966   *val = block->vals;
967   stash->recvframe_i++;
968   *flg = 1;
969   PetscFunctionReturn(PETSC_SUCCESS);
970 }
971 
MatStashScatterEnd_BTS(MatStash * stash)972 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
973 {
974   PetscFunctionBegin;
975   PetscCallMPI(MPI_Waitall(stash->nsendranks, stash->sendreqs, MPI_STATUSES_IGNORE));
976   if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
977     PetscCall(PetscSegBufferExtractInPlace(stash->segrecvblocks, NULL));
978   } else { /* No reuse, so collect everything. */
979     PetscCall(MatStashScatterDestroy_BTS(stash));
980   }
981 
982   /* Now update nmaxold to be app 10% more than max n used, this way the
983      wastage of space is reduced the next time this stash is used.
984      Also update the oldmax, only if it increases */
985   if (stash->n) {
986     PetscInt bs2     = stash->bs * stash->bs;
987     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
988     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
989   }
990 
991   stash->nmax       = 0;
992   stash->n          = 0;
993   stash->reallocs   = -1;
994   stash->nprocessed = 0;
995 
996   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
997 
998   stash->space = NULL;
999   PetscFunctionReturn(PETSC_SUCCESS);
1000 }
1001 
MatStashScatterDestroy_BTS(MatStash * stash)1002 PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1003 {
1004   PetscFunctionBegin;
1005   PetscCall(PetscSegBufferDestroy(&stash->segsendblocks));
1006   PetscCall(PetscSegBufferDestroy(&stash->segrecvframe));
1007   stash->recvframes = NULL;
1008   PetscCall(PetscSegBufferDestroy(&stash->segrecvblocks));
1009   if (stash->blocktype != MPI_DATATYPE_NULL) PetscCallMPI(MPI_Type_free(&stash->blocktype));
1010   stash->nsendranks = 0;
1011   stash->nrecvranks = 0;
1012   PetscCall(PetscFree3(stash->sendranks, stash->sendhdr, stash->sendframes));
1013   PetscCall(PetscFree(stash->sendreqs));
1014   PetscCall(PetscFree(stash->recvreqs));
1015   PetscCall(PetscFree(stash->recvranks));
1016   PetscCall(PetscFree(stash->recvhdr));
1017   PetscCall(PetscFree2(stash->some_indices, stash->some_statuses));
1018   PetscFunctionReturn(PETSC_SUCCESS);
1019 }
1020 #endif
1021