1 #include <petsc/private/matimpl.h>
2
3 #define DEFAULT_STASH_SIZE 10000
4
5 static PetscErrorCode MatStashScatterBegin_Ref(Mat, MatStash *, PetscInt *);
6 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
7 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
8 #if !defined(PETSC_HAVE_MPIUNI)
9 static PetscErrorCode MatStashScatterBegin_BTS(Mat, MatStash *, PetscInt *);
10 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
11 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *);
12 #endif
13
14 /*
15 MatStashCreate_Private - Creates a stash,currently used for all the parallel
16 matrix implementations. The stash is where elements of a matrix destined
17 to be stored on other processors are kept until matrix assembly is done.
18
19 This is a simple minded stash. Simply adds entries to end of stash.
20
21 Input Parameters:
22 comm - communicator, required for scatters.
23 bs - stash block size. used when stashing blocks of values
24
25 Output Parameter:
26 stash - the newly created stash
27 */
MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash * stash)28 PetscErrorCode MatStashCreate_Private(MPI_Comm comm, PetscInt bs, MatStash *stash)
29 {
30 PetscInt max, *opt, nopt, i;
31 PetscBool flg;
32
33 PetscFunctionBegin;
34 /* Require 2 tags,get the second using PetscCommGetNewTag() */
35 stash->comm = comm;
36
37 PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1));
38 PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2));
39 PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size));
40 PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank));
41 PetscCall(PetscMalloc1(2 * stash->size, &stash->flg_v));
42 for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
43
44 nopt = stash->size;
45 PetscCall(PetscMalloc1(nopt, &opt));
46 PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-matstash_initial_size", opt, &nopt, &flg));
47 if (flg) {
48 if (nopt == 1) max = opt[0];
49 else if (nopt == stash->size) max = opt[stash->rank];
50 else if (stash->rank < nopt) max = opt[stash->rank];
51 else max = 0; /* Use default */
52 stash->umax = max;
53 } else {
54 stash->umax = 0;
55 }
56 PetscCall(PetscFree(opt));
57 if (bs <= 0) bs = 1;
58
59 stash->bs = bs;
60 stash->nmax = 0;
61 stash->oldnmax = 0;
62 stash->n = 0;
63 stash->reallocs = -1;
64 stash->space_head = NULL;
65 stash->space = NULL;
66
67 stash->send_waits = NULL;
68 stash->recv_waits = NULL;
69 stash->send_status = NULL;
70 stash->nsends = 0;
71 stash->nrecvs = 0;
72 stash->svalues = NULL;
73 stash->rvalues = NULL;
74 stash->rindices = NULL;
75 stash->nprocessed = 0;
76 stash->reproduce = PETSC_FALSE;
77 stash->blocktype = MPI_DATATYPE_NULL;
78
79 PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_reproduce", &stash->reproduce, NULL));
80 #if !defined(PETSC_HAVE_MPIUNI)
81 flg = PETSC_FALSE;
82 PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_legacy", &flg, NULL));
83 if (!flg) {
84 stash->ScatterBegin = MatStashScatterBegin_BTS;
85 stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
86 stash->ScatterEnd = MatStashScatterEnd_BTS;
87 stash->ScatterDestroy = MatStashScatterDestroy_BTS;
88 } else {
89 #endif
90 stash->ScatterBegin = MatStashScatterBegin_Ref;
91 stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
92 stash->ScatterEnd = MatStashScatterEnd_Ref;
93 stash->ScatterDestroy = NULL;
94 #if !defined(PETSC_HAVE_MPIUNI)
95 }
96 #endif
97 PetscFunctionReturn(PETSC_SUCCESS);
98 }
99
100 /*
101 MatStashDestroy_Private - Destroy the stash
102 */
MatStashDestroy_Private(MatStash * stash)103 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
104 {
105 PetscFunctionBegin;
106 PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
107 if (stash->ScatterDestroy) PetscCall((*stash->ScatterDestroy)(stash));
108 stash->space = NULL;
109 PetscCall(PetscFree(stash->flg_v));
110 PetscFunctionReturn(PETSC_SUCCESS);
111 }
112
113 /*
114 MatStashScatterEnd_Private - This is called as the final stage of
115 scatter. The final stages of message passing is done here, and
116 all the memory used for message passing is cleaned up. This
117 routine also resets the stash, and deallocates the memory used
118 for the stash. It also keeps track of the current memory usage
119 so that the same value can be used the next time through.
120 */
MatStashScatterEnd_Private(MatStash * stash)121 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
122 {
123 PetscFunctionBegin;
124 PetscCall((*stash->ScatterEnd)(stash));
125 PetscFunctionReturn(PETSC_SUCCESS);
126 }
127
MatStashScatterEnd_Ref(MatStash * stash)128 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
129 {
130 PetscMPIInt nsends = stash->nsends;
131 PetscInt bs2, oldnmax;
132 MPI_Status *send_status;
133
134 PetscFunctionBegin;
135 for (PetscMPIInt i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
136 /* wait on sends */
137 if (nsends) {
138 PetscCall(PetscMalloc1(2 * nsends, &send_status));
139 PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status));
140 PetscCall(PetscFree(send_status));
141 }
142
143 /* Now update nmaxold to be app 10% more than max n used, this way the
144 wastage of space is reduced the next time this stash is used.
145 Also update the oldmax, only if it increases */
146 if (stash->n) {
147 bs2 = stash->bs * stash->bs;
148 oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
149 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
150 }
151
152 stash->nmax = 0;
153 stash->n = 0;
154 stash->reallocs = -1;
155 stash->nprocessed = 0;
156
157 PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
158
159 stash->space = NULL;
160
161 PetscCall(PetscFree(stash->send_waits));
162 PetscCall(PetscFree(stash->recv_waits));
163 PetscCall(PetscFree2(stash->svalues, stash->sindices));
164 PetscCall(PetscFree(stash->rvalues[0]));
165 PetscCall(PetscFree(stash->rvalues));
166 PetscCall(PetscFree(stash->rindices[0]));
167 PetscCall(PetscFree(stash->rindices));
168 PetscFunctionReturn(PETSC_SUCCESS);
169 }
170
171 /*
172 MatStashGetInfo_Private - Gets the relevant statistics of the stash
173
174 Input Parameters:
175 stash - the stash
176 nstash - the size of the stash. Indicates the number of values stored.
177 reallocs - the number of additional mallocs incurred.
178
179 */
MatStashGetInfo_Private(MatStash * stash,PetscInt * nstash,PetscInt * reallocs)180 PetscErrorCode MatStashGetInfo_Private(MatStash *stash, PetscInt *nstash, PetscInt *reallocs)
181 {
182 PetscInt bs2 = stash->bs * stash->bs;
183
184 PetscFunctionBegin;
185 if (nstash) *nstash = stash->n * bs2;
186 if (reallocs) {
187 if (stash->reallocs < 0) *reallocs = 0;
188 else *reallocs = stash->reallocs;
189 }
190 PetscFunctionReturn(PETSC_SUCCESS);
191 }
192
193 /*
194 MatStashSetInitialSize_Private - Sets the initial size of the stash
195
196 Input Parameters:
197 stash - the stash
198 max - the value that is used as the max size of the stash.
199 this value is used while allocating memory.
200 */
MatStashSetInitialSize_Private(MatStash * stash,PetscInt max)201 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash, PetscInt max)
202 {
203 PetscFunctionBegin;
204 stash->umax = max;
205 PetscFunctionReturn(PETSC_SUCCESS);
206 }
207
208 /* MatStashExpand_Private - Expand the stash. This function is called
209 when the space in the stash is not sufficient to add the new values
210 being inserted into the stash.
211
212 Input Parameters:
213 stash - the stash
214 incr - the minimum increase requested
215
216 Note:
217 This routine doubles the currently used memory.
218 */
MatStashExpand_Private(MatStash * stash,PetscInt incr)219 static PetscErrorCode MatStashExpand_Private(MatStash *stash, PetscInt incr)
220 {
221 PetscInt newnmax, bs2 = stash->bs * stash->bs;
222 PetscCount cnewnmax;
223
224 PetscFunctionBegin;
225 /* allocate a larger stash */
226 if (!stash->oldnmax && !stash->nmax) { /* new stash */
227 if (stash->umax) cnewnmax = stash->umax / bs2;
228 else cnewnmax = DEFAULT_STASH_SIZE / bs2;
229 } else if (!stash->nmax) { /* reusing stash */
230 if (stash->umax > stash->oldnmax) cnewnmax = stash->umax / bs2;
231 else cnewnmax = stash->oldnmax / bs2;
232 } else cnewnmax = stash->nmax * 2;
233 if (cnewnmax < (stash->nmax + incr)) cnewnmax += 2 * incr;
234 PetscCall(PetscIntCast(cnewnmax, &newnmax));
235
236 /* Get a MatStashSpace and attach it to stash */
237 PetscCall(PetscMatStashSpaceGet(bs2, newnmax, &stash->space));
238 if (!stash->space_head) { /* new stash or reusing stash->oldnmax */
239 stash->space_head = stash->space;
240 }
241
242 stash->reallocs++;
243 stash->nmax = newnmax;
244 PetscFunctionReturn(PETSC_SUCCESS);
245 }
246 /*
247 MatStashValuesRow_Private - inserts values into the stash. This function
248 expects the values to be row-oriented. Multiple columns belong to the same row
249 can be inserted with a single call to this function.
250
251 Input Parameters:
252 stash - the stash
253 row - the global row corresponding to the values
254 n - the number of elements inserted. All elements belong to the above row.
255 idxn - the global column indices corresponding to each of the values.
256 values - the values inserted
257 */
MatStashValuesRow_Private(MatStash * stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)258 PetscErrorCode MatStashValuesRow_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscBool ignorezeroentries)
259 {
260 PetscInt i, k, cnt = 0;
261 PetscMatStashSpace space = stash->space;
262
263 PetscFunctionBegin;
264 /* Check and see if we have sufficient memory */
265 if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
266 space = stash->space;
267 k = space->local_used;
268 for (i = 0; i < n; i++) {
269 if (ignorezeroentries && values && values[i] == 0.0) continue;
270 space->idx[k] = row;
271 space->idy[k] = idxn[i];
272 space->val[k] = values ? values[i] : 0.0;
273 k++;
274 cnt++;
275 }
276 stash->n += cnt;
277 space->local_used += cnt;
278 space->local_remaining -= cnt;
279 PetscFunctionReturn(PETSC_SUCCESS);
280 }
281
282 /*
283 MatStashValuesCol_Private - inserts values into the stash. This function
284 expects the values to be column-oriented. Multiple columns belong to the same row
285 can be inserted with a single call to this function.
286
287 Input Parameters:
288 stash - the stash
289 row - the global row corresponding to the values
290 n - the number of elements inserted. All elements belong to the above row.
291 idxn - the global column indices corresponding to each of the values.
292 values - the values inserted
293 stepval - the consecutive values are sepated by a distance of stepval.
294 this happens because the input is column-oriented.
295 */
MatStashValuesCol_Private(MatStash * stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)296 PetscErrorCode MatStashValuesCol_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt stepval, PetscBool ignorezeroentries)
297 {
298 PetscInt i, k, cnt = 0;
299 PetscMatStashSpace space = stash->space;
300
301 PetscFunctionBegin;
302 /* Check and see if we have sufficient memory */
303 if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
304 space = stash->space;
305 k = space->local_used;
306 for (i = 0; i < n; i++) {
307 if (ignorezeroentries && values && values[i * stepval] == 0.0) continue;
308 space->idx[k] = row;
309 space->idy[k] = idxn[i];
310 space->val[k] = values ? values[i * stepval] : 0.0;
311 k++;
312 cnt++;
313 }
314 stash->n += cnt;
315 space->local_used += cnt;
316 space->local_remaining -= cnt;
317 PetscFunctionReturn(PETSC_SUCCESS);
318 }
319
320 /*
321 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
322 This function expects the values to be row-oriented. Multiple columns belong
323 to the same block-row can be inserted with a single call to this function.
324 This function extracts the sub-block of values based on the dimensions of
325 the original input block, and the row,col values corresponding to the blocks.
326
327 Input Parameters:
328 stash - the stash
329 row - the global block-row corresponding to the values
330 n - the number of elements inserted. All elements belong to the above row.
331 idxn - the global block-column indices corresponding to each of the blocks of
332 values. Each block is of size bs*bs.
333 values - the values inserted
334 rmax - the number of block-rows in the original block.
335 cmax - the number of block-columns on the original block.
336 idx - the index of the current block-row in the original block.
337 */
MatStashValuesRowBlocked_Private(MatStash * stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)338 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
339 {
340 PetscInt i, j, k, bs2, bs = stash->bs, l;
341 const PetscScalar *vals;
342 PetscScalar *array;
343 PetscMatStashSpace space = stash->space;
344
345 PetscFunctionBegin;
346 if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
347 space = stash->space;
348 l = space->local_used;
349 bs2 = bs * bs;
350 for (i = 0; i < n; i++) {
351 space->idx[l] = row;
352 space->idy[l] = idxn[i];
353 /* Now copy over the block of values. Store the values column-oriented.
354 This enables inserting multiple blocks belonging to a row with a single
355 function call */
356 array = space->val + bs2 * l;
357 vals = values + idx * bs2 * n + bs * i;
358 for (j = 0; j < bs; j++) {
359 for (k = 0; k < bs; k++) array[k * bs] = values ? vals[k] : 0.0;
360 array++;
361 vals += cmax * bs;
362 }
363 l++;
364 }
365 stash->n += n;
366 space->local_used += n;
367 space->local_remaining -= n;
368 PetscFunctionReturn(PETSC_SUCCESS);
369 }
370
371 /*
372 MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
373 This function expects the values to be column-oriented. Multiple columns belong
374 to the same block-row can be inserted with a single call to this function.
375 This function extracts the sub-block of values based on the dimensions of
376 the original input block, and the row,col values corresponding to the blocks.
377
378 Input Parameters:
379 stash - the stash
380 row - the global block-row corresponding to the values
381 n - the number of elements inserted. All elements belong to the above row.
382 idxn - the global block-column indices corresponding to each of the blocks of
383 values. Each block is of size bs*bs.
384 values - the values inserted
385 rmax - the number of block-rows in the original block.
386 cmax - the number of block-columns on the original block.
387 idx - the index of the current block-row in the original block.
388 */
MatStashValuesColBlocked_Private(MatStash * stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)389 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
390 {
391 PetscInt i, j, k, bs2, bs = stash->bs, l;
392 const PetscScalar *vals;
393 PetscScalar *array;
394 PetscMatStashSpace space = stash->space;
395
396 PetscFunctionBegin;
397 if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
398 space = stash->space;
399 l = space->local_used;
400 bs2 = bs * bs;
401 for (i = 0; i < n; i++) {
402 space->idx[l] = row;
403 space->idy[l] = idxn[i];
404 /* Now copy over the block of values. Store the values column-oriented.
405 This enables inserting multiple blocks belonging to a row with a single
406 function call */
407 array = space->val + bs2 * l;
408 vals = values + idx * bs2 * n + bs * i;
409 for (j = 0; j < bs; j++) {
410 for (k = 0; k < bs; k++) array[k] = values ? vals[k] : 0.0;
411 array += bs;
412 vals += rmax * bs;
413 }
414 l++;
415 }
416 stash->n += n;
417 space->local_used += n;
418 space->local_remaining -= n;
419 PetscFunctionReturn(PETSC_SUCCESS);
420 }
421 /*
422 MatStashScatterBegin_Private - Initiates the transfer of values to the
423 correct owners. This function goes through the stash, and check the
424 owners of each stashed value, and sends the values off to the owner
425 processors.
426
427 Input Parameters:
428 stash - the stash
429 owners - an array of size 'no-of-procs' which gives the ownership range
430 for each node.
431
432 Note:
433 The 'owners' array in the cased of the blocked-stash has the
434 ranges specified blocked global indices, and for the regular stash in
435 the proper global indices.
436 */
MatStashScatterBegin_Private(Mat mat,MatStash * stash,PetscInt * owners)437 PetscErrorCode MatStashScatterBegin_Private(Mat mat, MatStash *stash, PetscInt *owners)
438 {
439 PetscFunctionBegin;
440 PetscCall((*stash->ScatterBegin)(mat, stash, owners));
441 PetscFunctionReturn(PETSC_SUCCESS);
442 }
443
MatStashScatterBegin_Ref(Mat mat,MatStash * stash,PetscInt * owners)444 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat, MatStash *stash, PetscInt *owners)
445 {
446 PetscInt *owner, *startv, *starti, bs2;
447 PetscInt size = stash->size;
448 PetscInt *sindices, **rindices, j, ii, idx, lastidx, l;
449 PetscScalar **rvalues, *svalues;
450 MPI_Comm comm = stash->comm;
451 MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
452 PetscMPIInt *sizes, *nlengths, nreceives, nsends, tag1 = stash->tag1, tag2 = stash->tag2;
453 PetscInt *sp_idx, *sp_idy;
454 PetscScalar *sp_val;
455 PetscMatStashSpace space, space_next;
456
457 PetscFunctionBegin;
458 { /* make sure all processors are either in INSERTMODE or ADDMODE */
459 InsertMode addv;
460 PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
461 PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
462 mat->insertmode = addv; /* in case this processor had no cache */
463 }
464
465 bs2 = stash->bs * stash->bs;
466
467 /* first count number of contributors to each processor */
468 PetscCall(PetscCalloc1(size, &nlengths));
469 PetscCall(PetscMalloc1(stash->n + 1, &owner));
470
471 ii = j = 0;
472 lastidx = -1;
473 space = stash->space_head;
474 while (space) {
475 space_next = space->next;
476 sp_idx = space->idx;
477 for (l = 0; l < space->local_used; l++) {
478 /* if indices are NOT locally sorted, need to start search at the beginning */
479 if (lastidx > (idx = sp_idx[l])) j = 0;
480 lastidx = idx;
481 for (; j < size; j++) {
482 if (idx >= owners[j] && idx < owners[j + 1]) {
483 nlengths[j]++;
484 owner[ii] = j;
485 break;
486 }
487 }
488 ii++;
489 }
490 space = space_next;
491 }
492
493 /* Now check what procs get messages - and compute nsends. */
494 PetscCall(PetscCalloc1(size, &sizes));
495 nsends = 0;
496 for (PetscMPIInt i = 0; i < size; i++) {
497 if (nlengths[i]) {
498 sizes[i] = 1;
499 nsends++;
500 }
501 }
502
503 {
504 PetscMPIInt *onodes, *olengths;
505 /* Determine the number of messages to expect, their lengths, from from-ids */
506 PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
507 PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
508 /* since clubbing row,col - lengths are multiplied by 2 */
509 for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2;
510 PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
511 /* values are size 'bs2' lengths (and remove earlier factor 2 */
512 for (PetscMPIInt i = 0; i < nreceives; i++) PetscCall(PetscMPIIntCast(olengths[i] * bs2 / 2, &olengths[i]));
513 PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
514 PetscCall(PetscFree(onodes));
515 PetscCall(PetscFree(olengths));
516 }
517
518 /* do sends:
519 1) starts[i] gives the starting index in svalues for stuff going to
520 the ith processor
521 */
522 PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
523 PetscCall(PetscMalloc1(2 * nsends, &send_waits));
524 PetscCall(PetscMalloc2(size, &startv, size, &starti));
525 /* use 2 sends the first with all_a, the next with all_i and all_j */
526 startv[0] = 0;
527 starti[0] = 0;
528 for (PetscMPIInt i = 1; i < size; i++) {
529 startv[i] = startv[i - 1] + nlengths[i - 1];
530 starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
531 }
532
533 ii = 0;
534 space = stash->space_head;
535 while (space) {
536 space_next = space->next;
537 sp_idx = space->idx;
538 sp_idy = space->idy;
539 sp_val = space->val;
540 for (l = 0; l < space->local_used; l++) {
541 j = owner[ii];
542 if (bs2 == 1) {
543 svalues[startv[j]] = sp_val[l];
544 } else {
545 PetscInt k;
546 PetscScalar *buf1, *buf2;
547 buf1 = svalues + bs2 * startv[j];
548 buf2 = space->val + bs2 * l;
549 for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
550 }
551 sindices[starti[j]] = sp_idx[l];
552 sindices[starti[j] + nlengths[j]] = sp_idy[l];
553 startv[j]++;
554 starti[j]++;
555 ii++;
556 }
557 space = space_next;
558 }
559 startv[0] = 0;
560 for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
561
562 for (PetscMPIInt i = 0, count = 0; i < size; i++) {
563 if (sizes[i]) {
564 PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
565 PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
566 }
567 }
568 #if defined(PETSC_USE_INFO)
569 PetscCall(PetscInfo(NULL, "No of messages: %d \n", nsends));
570 for (PetscMPIInt i = 0; i < size; i++) {
571 if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
572 }
573 #endif
574 PetscCall(PetscFree(nlengths));
575 PetscCall(PetscFree(owner));
576 PetscCall(PetscFree2(startv, starti));
577 PetscCall(PetscFree(sizes));
578
579 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
580 PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
581
582 for (PetscMPIInt i = 0; i < nreceives; i++) {
583 recv_waits[2 * i] = recv_waits1[i];
584 recv_waits[2 * i + 1] = recv_waits2[i];
585 }
586 stash->recv_waits = recv_waits;
587
588 PetscCall(PetscFree(recv_waits1));
589 PetscCall(PetscFree(recv_waits2));
590
591 stash->svalues = svalues;
592 stash->sindices = sindices;
593 stash->rvalues = rvalues;
594 stash->rindices = rindices;
595 stash->send_waits = send_waits;
596 stash->nsends = nsends;
597 stash->nrecvs = nreceives;
598 stash->reproduce_count = 0;
599 PetscFunctionReturn(PETSC_SUCCESS);
600 }
601
602 /*
603 MatStashScatterGetMesg_Private - This function waits on the receives posted
604 in the function MatStashScatterBegin_Private() and returns one message at
605 a time to the calling function. If no messages are left, it indicates this
606 by setting flg = 0, else it sets flg = 1.
607
608 Input Parameter:
609 stash - the stash
610
611 Output Parameters:
612 nvals - the number of entries in the current message.
613 rows - an array of row indices (or blocked indices) corresponding to the values
614 cols - an array of columnindices (or blocked indices) corresponding to the values
615 vals - the values
616 flg - 0 indicates no more message left, and the current call has no values associated.
617 1 indicates that the current call successfully received a message, and the
618 other output parameters nvals,rows,cols,vals are set appropriately.
619 */
MatStashScatterGetMesg_Private(MatStash * stash,PetscMPIInt * nvals,PetscInt ** rows,PetscInt ** cols,PetscScalar ** vals,PetscInt * flg)620 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
621 {
622 PetscFunctionBegin;
623 PetscCall((*stash->ScatterGetMesg)(stash, nvals, rows, cols, vals, flg));
624 PetscFunctionReturn(PETSC_SUCCESS);
625 }
626
MatStashScatterGetMesg_Ref(MatStash * stash,PetscMPIInt * nvals,PetscInt ** rows,PetscInt ** cols,PetscScalar ** vals,PetscInt * flg)627 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
628 {
629 PetscMPIInt i, *flg_v = stash->flg_v, i1, i2;
630 PetscInt bs2;
631 MPI_Status recv_status;
632 PetscBool match_found = PETSC_FALSE;
633
634 PetscFunctionBegin;
635 *flg = 0; /* When a message is discovered this is reset to 1 */
636 /* Return if no more messages to process */
637 if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(PETSC_SUCCESS);
638
639 bs2 = stash->bs * stash->bs;
640 /* If a matching pair of receives are found, process them, and return the data to
641 the calling function. Until then keep receiving messages */
642 while (!match_found) {
643 if (stash->reproduce) {
644 i = stash->reproduce_count++;
645 PetscCallMPI(MPI_Wait(stash->recv_waits + i, &recv_status));
646 } else {
647 PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status));
648 }
649 PetscCheck(recv_status.MPI_SOURCE >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Negative MPI source!");
650
651 /* Now pack the received message into a structure which is usable by others */
652 if (i % 2) {
653 PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals));
654 flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
655 *nvals = *nvals / bs2;
656 } else {
657 PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals));
658 flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
659 *nvals = *nvals / 2; /* This message has both row indices and col indices */
660 }
661
662 /* Check if we have both messages from this proc */
663 i1 = flg_v[2 * recv_status.MPI_SOURCE];
664 i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
665 if (i1 != -1 && i2 != -1) {
666 *rows = stash->rindices[i2];
667 *cols = *rows + *nvals;
668 *vals = stash->rvalues[i1];
669 *flg = 1;
670 stash->nprocessed++;
671 match_found = PETSC_TRUE;
672 }
673 }
674 PetscFunctionReturn(PETSC_SUCCESS);
675 }
676
677 #if !defined(PETSC_HAVE_MPIUNI)
678 typedef struct {
679 PetscInt row;
680 PetscInt col;
681 PetscScalar vals[1]; /* Actually an array of length bs2 */
682 } MatStashBlock;
683
MatStashSortCompress_Private(MatStash * stash,InsertMode insertmode)684 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash, InsertMode insertmode)
685 {
686 PetscMatStashSpace space;
687 PetscInt n = stash->n, bs = stash->bs, bs2 = bs * bs, cnt, *row, *col, *perm, rowstart, i;
688 PetscScalar **valptr;
689
690 PetscFunctionBegin;
691 PetscCall(PetscMalloc4(n, &row, n, &col, n, &valptr, n, &perm));
692 for (space = stash->space_head, cnt = 0; space; space = space->next) {
693 for (i = 0; i < space->local_used; i++) {
694 row[cnt] = space->idx[i];
695 col[cnt] = space->idy[i];
696 valptr[cnt] = &space->val[i * bs2];
697 perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */
698 cnt++;
699 }
700 }
701 PetscCheck(cnt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries", n, cnt);
702 PetscCall(PetscSortIntWithArrayPair(n, row, col, perm));
703 /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
704 for (rowstart = 0, cnt = 0, i = 1; i <= n; i++) {
705 if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */
706 PetscInt colstart;
707 PetscCall(PetscSortIntWithArray(i - rowstart, &col[rowstart], &perm[rowstart]));
708 for (colstart = rowstart; colstart < i;) { /* Compress multiple insertions to the same location */
709 PetscInt j, l;
710 MatStashBlock *block;
711 PetscCall(PetscSegBufferGet(stash->segsendblocks, 1, &block));
712 block->row = row[rowstart];
713 block->col = col[colstart];
714 PetscCall(PetscArraycpy(block->vals, valptr[perm[colstart]], bs2));
715 for (j = colstart + 1; j < i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
716 if (insertmode == ADD_VALUES) {
717 for (l = 0; l < bs2; l++) block->vals[l] += valptr[perm[j]][l];
718 } else {
719 PetscCall(PetscArraycpy(block->vals, valptr[perm[j]], bs2));
720 }
721 }
722 colstart = j;
723 }
724 rowstart = i;
725 }
726 }
727 PetscCall(PetscFree4(row, col, valptr, perm));
728 PetscFunctionReturn(PETSC_SUCCESS);
729 }
730
MatStashBlockTypeSetUp(MatStash * stash)731 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
732 {
733 PetscFunctionBegin;
734 if (stash->blocktype == MPI_DATATYPE_NULL) {
735 PetscInt bs2 = PetscSqr(stash->bs);
736 PetscMPIInt blocklens[2];
737 MPI_Aint displs[2];
738 MPI_Datatype types[2], stype;
739 /*
740 DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex.
741 std::complex itself has standard layout, so does DummyBlock, recursively.
742 To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout,
743 though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++.
744 offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof.
745
746 We can test if std::complex has standard layout with the following code:
747 #include <iostream>
748 #include <type_traits>
749 #include <complex>
750 int main() {
751 std::cout << std::boolalpha;
752 std::cout << std::is_standard_layout<std::complex<double> >::value << '\n';
753 }
754 Output: true
755 */
756 struct DummyBlock {
757 PetscInt row, col;
758 PetscScalar vals;
759 };
760
761 stash->blocktype_size = offsetof(struct DummyBlock, vals) + bs2 * sizeof(PetscScalar);
762 if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
763 stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
764 }
765 PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segsendblocks));
766 PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segrecvblocks));
767 PetscCall(PetscSegBufferCreate(sizeof(MatStashFrame), 1, &stash->segrecvframe));
768 blocklens[0] = 2;
769 blocklens[1] = (PetscMPIInt)bs2;
770 displs[0] = offsetof(struct DummyBlock, row);
771 displs[1] = offsetof(struct DummyBlock, vals);
772 types[0] = MPIU_INT;
773 types[1] = MPIU_SCALAR;
774 PetscCallMPI(MPI_Type_create_struct(2, blocklens, displs, types, &stype));
775 PetscCallMPI(MPI_Type_commit(&stype));
776 PetscCallMPI(MPI_Type_create_resized(stype, 0, stash->blocktype_size, &stash->blocktype));
777 PetscCallMPI(MPI_Type_commit(&stash->blocktype));
778 PetscCallMPI(MPI_Type_free(&stype));
779 }
780 PetscFunctionReturn(PETSC_SUCCESS);
781 }
782
783 /* Callback invoked after target rank has initiated receive of rendezvous message.
784 * Here we post the main sends.
785 */
MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void * sdata,MPI_Request req[],PetscCtx ctx)786 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], PetscCtx ctx)
787 {
788 MatStash *stash = (MatStash *)ctx;
789 MatStashHeader *hdr = (MatStashHeader *)sdata;
790
791 PetscFunctionBegin;
792 PetscCheck(rank == stash->sendranks[rankid], comm, PETSC_ERR_PLIB, "BTS Send rank %d does not match sendranks[%d] %d", rank, rankid, stash->sendranks[rankid]);
793 PetscCallMPI(MPIU_Isend(stash->sendframes[rankid].buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
794 stash->sendframes[rankid].count = hdr->count;
795 stash->sendframes[rankid].pending = 1;
796 PetscFunctionReturn(PETSC_SUCCESS);
797 }
798
799 /*
800 Callback invoked by target after receiving rendezvous message.
801 Here we post the main recvs.
802 */
MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void * rdata,MPI_Request req[],PetscCtx ctx)803 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], PetscCtx ctx)
804 {
805 MatStash *stash = (MatStash *)ctx;
806 MatStashHeader *hdr = (MatStashHeader *)rdata;
807 MatStashFrame *frame;
808
809 PetscFunctionBegin;
810 PetscCall(PetscSegBufferGet(stash->segrecvframe, 1, &frame));
811 PetscCall(PetscSegBufferGet(stash->segrecvblocks, hdr->count, &frame->buffer));
812 PetscCallMPI(MPIU_Irecv(frame->buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
813 frame->count = hdr->count;
814 frame->pending = 1;
815 PetscFunctionReturn(PETSC_SUCCESS);
816 }
817
818 /*
819 * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
820 */
MatStashScatterBegin_BTS(Mat mat,MatStash * stash,PetscInt owners[])821 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat, MatStash *stash, PetscInt owners[])
822 {
823 PetscCount nblocks;
824 char *sendblocks;
825
826 PetscFunctionBegin;
827 if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */
828 InsertMode addv;
829 PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
830 PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
831 }
832
833 PetscCall(MatStashBlockTypeSetUp(stash));
834 PetscCall(MatStashSortCompress_Private(stash, mat->insertmode));
835 PetscCall(PetscSegBufferGetSize(stash->segsendblocks, &nblocks));
836 PetscCall(PetscSegBufferExtractInPlace(stash->segsendblocks, &sendblocks));
837 if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
838 PetscInt i;
839 PetscCount b;
840 for (i = 0, b = 0; i < stash->nsendranks; i++) {
841 stash->sendframes[i].buffer = &sendblocks[b * stash->blocktype_size];
842 /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
843 stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
844 for (; b < nblocks; b++) {
845 MatStashBlock *sendblock_b = (MatStashBlock *)&sendblocks[b * stash->blocktype_size];
846 PetscCheck(sendblock_b->row >= owners[stash->sendranks[i]], stash->comm, PETSC_ERR_ARG_WRONG, "MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly", sendblock_b->row, stash->sendranks[i]);
847 if (sendblock_b->row >= owners[stash->sendranks[i] + 1]) break;
848 stash->sendhdr[i].count++;
849 }
850 }
851 } else { /* Dynamically count and pack (first time) */
852 PetscInt sendno;
853 PetscCount i, rowstart;
854
855 /* Count number of send ranks and allocate for sends */
856 stash->nsendranks = 0;
857 for (rowstart = 0; rowstart < nblocks;) {
858 PetscInt owner;
859 MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
860
861 PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner));
862 if (owner < 0) owner = -(owner + 2);
863 for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
864 MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
865
866 if (sendblock_i->row >= owners[owner + 1]) break;
867 }
868 stash->nsendranks++;
869 rowstart = i;
870 }
871 PetscCall(PetscMalloc3(stash->nsendranks, &stash->sendranks, stash->nsendranks, &stash->sendhdr, stash->nsendranks, &stash->sendframes));
872
873 /* Set up sendhdrs and sendframes */
874 sendno = 0;
875 for (rowstart = 0; rowstart < nblocks;) {
876 PetscInt iowner;
877 PetscMPIInt owner;
878 MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
879
880 PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &iowner));
881 PetscCall(PetscMPIIntCast(iowner, &owner));
882 if (owner < 0) owner = -(owner + 2);
883 stash->sendranks[sendno] = owner;
884 for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
885 MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
886
887 if (sendblock_i->row >= owners[owner + 1]) break;
888 }
889 stash->sendframes[sendno].buffer = sendblock_rowstart;
890 stash->sendframes[sendno].pending = 0;
891 PetscCall(PetscIntCast(i - rowstart, &stash->sendhdr[sendno].count));
892 sendno++;
893 rowstart = i;
894 }
895 PetscCheck(sendno == stash->nsendranks, stash->comm, PETSC_ERR_PLIB, "BTS counted %d sendranks, but %" PetscInt_FMT " sends", stash->nsendranks, sendno);
896 }
897
898 /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
899 * message or a dummy entry of some sort. */
900 if (mat->insertmode == INSERT_VALUES) {
901 for (PetscCount i = 0; i < nblocks; i++) {
902 MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
903 sendblock_i->row = -(sendblock_i->row + 1);
904 }
905 }
906
907 if (stash->first_assembly_done) {
908 PetscMPIInt i, tag;
909
910 PetscCall(PetscCommGetNewTag(stash->comm, &tag));
911 for (i = 0; i < stash->nrecvranks; i++) PetscCall(MatStashBTSRecv_Private(stash->comm, &tag, stash->recvranks[i], &stash->recvhdr[i], &stash->recvreqs[i], stash));
912 for (i = 0; i < stash->nsendranks; i++) PetscCall(MatStashBTSSend_Private(stash->comm, &tag, i, stash->sendranks[i], &stash->sendhdr[i], &stash->sendreqs[i], stash));
913 stash->use_status = PETSC_TRUE; /* Use count from message status. */
914 } else {
915 PetscCall(PetscCommBuildTwoSidedFReq(stash->comm, 1, MPIU_INT, stash->nsendranks, stash->sendranks, (PetscInt *)stash->sendhdr, &stash->nrecvranks, &stash->recvranks, (PetscInt *)&stash->recvhdr, 1, &stash->sendreqs, &stash->recvreqs, MatStashBTSSend_Private, MatStashBTSRecv_Private, stash));
916 PetscCall(PetscMalloc2(stash->nrecvranks, &stash->some_indices, stash->nrecvranks, &stash->some_statuses));
917 stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
918 }
919
920 PetscCall(PetscSegBufferExtractInPlace(stash->segrecvframe, &stash->recvframes));
921 stash->recvframe_active = NULL;
922 stash->recvframe_i = 0;
923 stash->some_i = 0;
924 stash->some_count = 0;
925 stash->recvcount = 0;
926 stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
927 stash->insertmode = &mat->insertmode;
928 PetscFunctionReturn(PETSC_SUCCESS);
929 }
930
MatStashScatterGetMesg_BTS(MatStash * stash,PetscMPIInt * n,PetscInt ** row,PetscInt ** col,PetscScalar ** val,PetscInt * flg)931 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash, PetscMPIInt *n, PetscInt **row, PetscInt **col, PetscScalar **val, PetscInt *flg)
932 {
933 MatStashBlock *block;
934
935 PetscFunctionBegin;
936 *flg = 0;
937 while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
938 if (stash->some_i == stash->some_count) {
939 if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(PETSC_SUCCESS); /* Done */
940 PetscCallMPI(MPI_Waitsome(stash->nrecvranks, stash->recvreqs, &stash->some_count, stash->some_indices, stash->use_status ? stash->some_statuses : MPI_STATUSES_IGNORE));
941 stash->some_i = 0;
942 }
943 stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
944 stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
945 if (stash->use_status) { /* Count what was actually sent */
946 PetscMPIInt ic;
947
948 PetscCallMPI(MPI_Get_count(&stash->some_statuses[stash->some_i], stash->blocktype, &ic));
949 stash->recvframe_count = ic;
950 }
951 if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
952 block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[0];
953 if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
954 PetscCheck(*stash->insertmode != INSERT_VALUES || block->row < 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling INSERT_VALUES, but rank %d requested ADD_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
955 PetscCheck(*stash->insertmode != ADD_VALUES || block->row >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling ADD_VALUES, but rank %d requested INSERT_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
956 }
957 stash->some_i++;
958 stash->recvcount++;
959 stash->recvframe_i = 0;
960 }
961 *n = 1;
962 block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[stash->recvframe_i * stash->blocktype_size];
963 if (block->row < 0) block->row = -(block->row + 1);
964 *row = &block->row;
965 *col = &block->col;
966 *val = block->vals;
967 stash->recvframe_i++;
968 *flg = 1;
969 PetscFunctionReturn(PETSC_SUCCESS);
970 }
971
MatStashScatterEnd_BTS(MatStash * stash)972 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
973 {
974 PetscFunctionBegin;
975 PetscCallMPI(MPI_Waitall(stash->nsendranks, stash->sendreqs, MPI_STATUSES_IGNORE));
976 if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */
977 PetscCall(PetscSegBufferExtractInPlace(stash->segrecvblocks, NULL));
978 } else { /* No reuse, so collect everything. */
979 PetscCall(MatStashScatterDestroy_BTS(stash));
980 }
981
982 /* Now update nmaxold to be app 10% more than max n used, this way the
983 wastage of space is reduced the next time this stash is used.
984 Also update the oldmax, only if it increases */
985 if (stash->n) {
986 PetscInt bs2 = stash->bs * stash->bs;
987 PetscInt oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
988 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
989 }
990
991 stash->nmax = 0;
992 stash->n = 0;
993 stash->reallocs = -1;
994 stash->nprocessed = 0;
995
996 PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
997
998 stash->space = NULL;
999 PetscFunctionReturn(PETSC_SUCCESS);
1000 }
1001
MatStashScatterDestroy_BTS(MatStash * stash)1002 PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1003 {
1004 PetscFunctionBegin;
1005 PetscCall(PetscSegBufferDestroy(&stash->segsendblocks));
1006 PetscCall(PetscSegBufferDestroy(&stash->segrecvframe));
1007 stash->recvframes = NULL;
1008 PetscCall(PetscSegBufferDestroy(&stash->segrecvblocks));
1009 if (stash->blocktype != MPI_DATATYPE_NULL) PetscCallMPI(MPI_Type_free(&stash->blocktype));
1010 stash->nsendranks = 0;
1011 stash->nrecvranks = 0;
1012 PetscCall(PetscFree3(stash->sendranks, stash->sendhdr, stash->sendframes));
1013 PetscCall(PetscFree(stash->sendreqs));
1014 PetscCall(PetscFree(stash->recvreqs));
1015 PetscCall(PetscFree(stash->recvranks));
1016 PetscCall(PetscFree(stash->recvhdr));
1017 PetscCall(PetscFree2(stash->some_indices, stash->some_statuses));
1018 PetscFunctionReturn(PETSC_SUCCESS);
1019 }
1020 #endif
1021