1 #include <petsc/private/vecimpl.h>
2
3 #define DEFAULT_STASH_SIZE 100
4
5 /*
6 VecStashCreate_Private - Creates a stash,currently used for all the parallel
7 matrix implementations. The stash is where elements of a matrix destined
8 to be stored on other processors are kept until matrix assembly is done.
9
10 This is a simple minded stash. Simply adds entries to end of stash.
11
12 Input Parameters:
13 comm - communicator, required for scatters.
14 bs - stash block size. used when stashing blocks of values
15
16 Output Parameter:
17 . stash - the newly created stash
18 */
VecStashCreate_Private(MPI_Comm comm,PetscInt bs,VecStash * stash)19 PetscErrorCode VecStashCreate_Private(MPI_Comm comm, PetscInt bs, VecStash *stash)
20 {
21 PetscInt max, *opt, nopt;
22 PetscBool flg;
23
24 PetscFunctionBegin;
25 /* Require 2 tags, get the second using PetscCommGetNewTag() */
26 stash->comm = comm;
27 PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1));
28 PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2));
29 PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size));
30 PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank));
31
32 nopt = stash->size;
33 PetscCall(PetscMalloc1(nopt, &opt));
34 PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-vecstash_initial_size", opt, &nopt, &flg));
35 if (flg) {
36 if (nopt == 1) max = opt[0];
37 else if (nopt == stash->size) max = opt[stash->rank];
38 else if (stash->rank < nopt) max = opt[stash->rank];
39 else max = 0; /* use default */
40 stash->umax = max;
41 } else {
42 stash->umax = 0;
43 }
44 PetscCall(PetscFree(opt));
45
46 if (bs <= 0) bs = 1;
47
48 stash->bs = bs;
49 stash->nmax = 0;
50 stash->oldnmax = 0;
51 stash->n = 0;
52 stash->reallocs = -1;
53 stash->idx = NULL;
54 stash->array = NULL;
55
56 stash->send_waits = NULL;
57 stash->recv_waits = NULL;
58 stash->send_status = NULL;
59 stash->nsends = 0;
60 stash->nrecvs = 0;
61 stash->svalues = NULL;
62 stash->rvalues = NULL;
63 stash->rmax = 0;
64 stash->nprocs = NULL;
65 stash->nprocessed = 0;
66 stash->donotstash = PETSC_FALSE;
67 stash->ignorenegidx = PETSC_FALSE;
68 PetscFunctionReturn(PETSC_SUCCESS);
69 }
70
71 /*
72 VecStashDestroy_Private - Destroy the stash
73 */
VecStashDestroy_Private(VecStash * stash)74 PetscErrorCode VecStashDestroy_Private(VecStash *stash)
75 {
76 PetscFunctionBegin;
77 PetscCall(PetscFree2(stash->array, stash->idx));
78 PetscCall(PetscFree(stash->bowners));
79 PetscFunctionReturn(PETSC_SUCCESS);
80 }
81
82 /*
83 VecStashScatterEnd_Private - This is called as the final stage of
84 scatter. The final stages of message passing is done here, and
85 all the memory used for message passing is cleanedu up. This
86 routine also resets the stash, and deallocates the memory used
87 for the stash. It also keeps track of the current memory usage
88 so that the same value can be used the next time through.
89 */
VecStashScatterEnd_Private(VecStash * stash)90 PetscErrorCode VecStashScatterEnd_Private(VecStash *stash)
91 {
92 PetscMPIInt nsends = stash->nsends;
93 PetscInt oldnmax;
94 MPI_Status *send_status;
95
96 PetscFunctionBegin;
97 /* wait on sends */
98 if (nsends) {
99 PetscCall(PetscMalloc1(2 * nsends, &send_status));
100 PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status));
101 PetscCall(PetscFree(send_status));
102 }
103
104 /* Now update nmaxold to be app 10% more than max n, this way the
105 wastage of space is reduced the next time this stash is used.
106 Also update the oldmax, only if it increases */
107 if (stash->n) {
108 oldnmax = ((PetscInt)(stash->n * 1.1) + 5) * stash->bs;
109 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
110 }
111
112 stash->nmax = 0;
113 stash->n = 0;
114 stash->reallocs = -1;
115 stash->rmax = 0;
116 stash->nprocessed = 0;
117
118 PetscCall(PetscFree2(stash->array, stash->idx));
119 stash->array = NULL;
120 stash->idx = NULL;
121 PetscCall(PetscFree(stash->send_waits));
122 PetscCall(PetscFree(stash->recv_waits));
123 PetscCall(PetscFree2(stash->svalues, stash->sindices));
124 PetscCall(PetscFree2(stash->rvalues, stash->rindices));
125 PetscCall(PetscFree(stash->nprocs));
126 PetscFunctionReturn(PETSC_SUCCESS);
127 }
128
129 /*
130 VecStashGetInfo_Private - Gets the relevant statistics of the stash
131
132 Input Parameters:
133 stash - the stash
134 nstash - the size of the stash
135 reallocs - the number of additional mallocs incurred.
136
137 */
VecStashGetInfo_Private(VecStash * stash,PetscInt * nstash,PetscInt * reallocs)138 PetscErrorCode VecStashGetInfo_Private(VecStash *stash, PetscInt *nstash, PetscInt *reallocs)
139 {
140 PetscFunctionBegin;
141 if (nstash) *nstash = stash->n * stash->bs;
142 if (reallocs) {
143 if (stash->reallocs < 0) *reallocs = 0;
144 else *reallocs = stash->reallocs;
145 }
146 PetscFunctionReturn(PETSC_SUCCESS);
147 }
148
149 /*
150 VecStashSetInitialSize_Private - Sets the initial size of the stash
151
152 Input Parameters:
153 stash - the stash
154 max - the value that is used as the max size of the stash.
155 this value is used while allocating memory. It specifies
156 the number of vals stored, even with the block-stash
157 */
VecStashSetInitialSize_Private(VecStash * stash,PetscInt max)158 PetscErrorCode VecStashSetInitialSize_Private(VecStash *stash, PetscInt max)
159 {
160 PetscFunctionBegin;
161 stash->umax = max;
162 PetscFunctionReturn(PETSC_SUCCESS);
163 }
164
165 /* VecStashExpand_Private - Expand the stash. This function is called
166 when the space in the stash is not sufficient to add the new values
167 being inserted into the stash.
168
169 Input Parameters:
170 stash - the stash
171 incr - the minimum increase requested
172
173 Notes:
174 This routine doubles the currently used memory.
175 */
VecStashExpand_Private(VecStash * stash,PetscInt incr)176 PetscErrorCode VecStashExpand_Private(VecStash *stash, PetscInt incr)
177 {
178 PetscInt *n_idx, newnmax, bs = stash->bs;
179 PetscScalar *n_array;
180
181 PetscFunctionBegin;
182 /* allocate a larger stash. */
183 if (!stash->oldnmax && !stash->nmax) { /* new stash */
184 if (stash->umax) newnmax = stash->umax / bs;
185 else newnmax = DEFAULT_STASH_SIZE / bs;
186 } else if (!stash->nmax) { /* reusing stash */
187 if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs;
188 else newnmax = stash->oldnmax / bs;
189 } else newnmax = stash->nmax * 2;
190
191 if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr;
192
193 PetscCall(PetscMalloc2(bs * newnmax, &n_array, newnmax, &n_idx));
194 PetscCall(PetscArraycpy(n_array, stash->array, bs * stash->nmax));
195 PetscCall(PetscArraycpy(n_idx, stash->idx, stash->nmax));
196 PetscCall(PetscFree2(stash->array, stash->idx));
197
198 stash->array = n_array;
199 stash->idx = n_idx;
200 stash->nmax = newnmax;
201 stash->reallocs++;
202 PetscFunctionReturn(PETSC_SUCCESS);
203 }
204 /*
205 VecStashScatterBegin_Private - Initiates the transfer of values to the
206 correct owners. This function goes through the stash, and check the
207 owners of each stashed value, and sends the values off to the owner
208 processors.
209
210 Input Parameters:
211 stash - the stash
212 owners - an array of size 'no-of-procs' which gives the ownership range
213 for each node.
214
215 Notes:
216 The 'owners' array in the cased of the blocked-stash has the
217 ranges specified blocked global indices, and for the regular stash in
218 the proper global indices.
219 */
VecStashScatterBegin_Private(VecStash * stash,const PetscInt * owners)220 PetscErrorCode VecStashScatterBegin_Private(VecStash *stash, const PetscInt *owners)
221 {
222 PetscMPIInt size = stash->size, tag1 = stash->tag1, tag2 = stash->tag2, j, *owner, nsends, nreceives;
223 PetscInt *start, *nprocs, inreceives;
224 PetscInt nmax, *sindices, *rindices, idx, bs = stash->bs, lastidx;
225 PetscScalar *rvalues, *svalues;
226 MPI_Comm comm = stash->comm;
227 MPI_Request *send_waits, *recv_waits;
228
229 PetscFunctionBegin;
230 /* first count number of contributors to each processor */
231 PetscCall(PetscCalloc1(2 * size, &nprocs));
232 PetscCall(PetscMalloc1(stash->n, &owner));
233
234 j = 0;
235 lastidx = -1;
236 for (PetscInt i = 0; i < stash->n; i++) {
237 /* if indices are NOT locally sorted, need to start search at the beginning */
238 if (lastidx > (idx = stash->idx[i])) j = 0;
239 lastidx = idx;
240 for (; j < size; j++) {
241 if (idx >= owners[j] && idx < owners[j + 1]) {
242 nprocs[2 * j]++;
243 nprocs[2 * j + 1] = 1;
244 owner[i] = j;
245 break;
246 }
247 }
248 }
249 nsends = 0;
250 for (PetscMPIInt i = 0; i < size; i++) nsends += nprocs[2 * i + 1];
251
252 /* inform other processors of number of messages and max length*/
253 PetscCall(PetscMaxSum(comm, nprocs, &nmax, &inreceives));
254 PetscCall(PetscMPIIntCast(inreceives, &nreceives));
255
256 /* post receives:
257 since we don't know how long each individual message is we
258 allocate the largest needed buffer for each receive. Potentially
259 this is a lot of wasted space.
260 */
261 PetscCall(PetscMalloc2(nreceives * nmax * bs, &rvalues, nreceives * nmax, &rindices));
262 PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
263 for (PetscMPIInt i = 0, count = 0; i < nreceives; i++) {
264 PetscCallMPI(MPIU_Irecv(rvalues + bs * nmax * i, bs * nmax, MPIU_SCALAR, MPI_ANY_SOURCE, tag1, comm, recv_waits + count++));
265 PetscCallMPI(MPIU_Irecv(rindices + nmax * i, nmax, MPIU_INT, MPI_ANY_SOURCE, tag2, comm, recv_waits + count++));
266 }
267
268 /* do sends:
269 1) starts[i] gives the starting index in svalues for stuff going to
270 the ith processor
271 */
272 PetscCall(PetscMalloc2(stash->n * bs, &svalues, stash->n, &sindices));
273 PetscCall(PetscMalloc1(2 * nsends, &send_waits));
274 PetscCall(PetscMalloc1(size, &start));
275 /* use 2 sends the first with all_v, the next with all_i */
276 start[0] = 0;
277 for (PetscMPIInt i = 1; i < size; i++) start[i] = start[i - 1] + nprocs[2 * i - 2];
278
279 for (PetscInt i = 0; i < stash->n; i++) {
280 j = owner[i];
281 if (bs == 1) svalues[start[j]] = stash->array[i];
282 else PetscCall(PetscArraycpy(svalues + bs * start[j], stash->array + bs * i, bs));
283 sindices[start[j]] = stash->idx[i];
284 start[j]++;
285 }
286 start[0] = 0;
287 for (PetscMPIInt i = 1; i < size; i++) start[i] = start[i - 1] + nprocs[2 * i - 2];
288
289 for (PetscMPIInt i = 0, count = 0; i < size; i++) {
290 if (nprocs[2 * i + 1]) {
291 PetscCallMPI(MPIU_Isend(svalues + bs * start[i], bs * nprocs[2 * i], MPIU_SCALAR, i, tag1, comm, send_waits + count++));
292 PetscCallMPI(MPIU_Isend(sindices + start[i], nprocs[2 * i], MPIU_INT, i, tag2, comm, send_waits + count++));
293 }
294 }
295 PetscCall(PetscFree(owner));
296 PetscCall(PetscFree(start));
297 /* This memory is reused in scatter end for a different purpose*/
298 for (PetscMPIInt i = 0; i < 2 * size; i++) nprocs[i] = -1;
299
300 stash->nprocs = nprocs;
301 stash->svalues = svalues;
302 stash->sindices = sindices;
303 stash->rvalues = rvalues;
304 stash->rindices = rindices;
305 stash->nsends = nsends;
306 stash->nrecvs = nreceives;
307 stash->send_waits = send_waits;
308 stash->recv_waits = recv_waits;
309 stash->rmax = nmax;
310 PetscFunctionReturn(PETSC_SUCCESS);
311 }
312
313 /*
314 VecStashScatterGetMesg_Private - This function waits on the receives posted
315 in the function VecStashScatterBegin_Private() and returns one message at
316 a time to the calling function. If no messages are left, it indicates this
317 by setting flg = 0, else it sets flg = 1.
318
319 Input Parameters:
320 stash - the stash
321
322 Output Parameters:
323 nvals - the number of entries in the current message.
324 rows - an array of row indices (or blocked indices) corresponding to the values
325 cols - an array of columnindices (or blocked indices) corresponding to the values
326 vals - the values
327 flg - 0 indicates no more message left, and the current call has no values associated.
328 1 indicates that the current call successfully received a message, and the
329 other output parameters nvals,rows,cols,vals are set appropriately.
330 */
VecStashScatterGetMesg_Private(VecStash * stash,PetscMPIInt * nvals,PetscInt ** rows,PetscScalar ** vals,PetscInt * flg)331 PetscErrorCode VecStashScatterGetMesg_Private(VecStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscScalar **vals, PetscInt *flg)
332 {
333 PetscMPIInt i = 0; /* dummy value so MPI-Uni doesn't think it is not set */
334 PetscInt *flg_v;
335 PetscInt i1, i2, bs = stash->bs;
336 MPI_Status recv_status;
337 PetscBool match_found = PETSC_FALSE;
338
339 PetscFunctionBegin;
340 *flg = 0; /* When a message is discovered this is reset to 1 */
341 /* Return if no more messages to process */
342 if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(PETSC_SUCCESS);
343
344 flg_v = stash->nprocs;
345 /* If a matching pair of receives are found, process them, and return the data to
346 the calling function. Until then keep receiving messages */
347 while (!match_found) {
348 PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status));
349 /* Now pack the received message into a structure which is usable by others */
350 if (i % 2) {
351 PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals));
352 flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
353 } else {
354 PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals));
355 flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
356 *nvals = *nvals / bs;
357 }
358
359 /* Check if we have both the messages from this proc */
360 i1 = flg_v[2 * recv_status.MPI_SOURCE];
361 i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
362 if (i1 != -1 && i2 != -1) {
363 *rows = stash->rindices + i2 * stash->rmax;
364 *vals = stash->rvalues + i1 * bs * stash->rmax;
365 *flg = 1;
366 stash->nprocessed++;
367 match_found = PETSC_TRUE;
368 }
369 }
370 PetscFunctionReturn(PETSC_SUCCESS);
371 }
372
373 /*
374 * Sort the stash, removing duplicates (combining as appropriate).
375 */
VecStashSortCompress_Private(VecStash * stash)376 PetscErrorCode VecStashSortCompress_Private(VecStash *stash)
377 {
378 PetscInt i, j, bs = stash->bs;
379
380 PetscFunctionBegin;
381 if (!stash->n) PetscFunctionReturn(PETSC_SUCCESS);
382 if (bs == 1) {
383 PetscCall(PetscSortIntWithScalarArray(stash->n, stash->idx, stash->array));
384 for (i = 1, j = 0; i < stash->n; i++) {
385 if (stash->idx[i] == stash->idx[j]) {
386 switch (stash->insertmode) {
387 case ADD_VALUES:
388 stash->array[j] += stash->array[i];
389 break;
390 case INSERT_VALUES:
391 stash->array[j] = stash->array[i];
392 break;
393 default:
394 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", stash->insertmode);
395 }
396 } else {
397 j++;
398 stash->idx[j] = stash->idx[i];
399 stash->array[j] = stash->array[i];
400 }
401 }
402 stash->n = j + 1;
403 } else { /* block stash */
404 PetscInt *perm = NULL;
405 PetscScalar *arr;
406 PetscCall(PetscMalloc2(stash->n, &perm, stash->n * bs, &arr));
407 for (i = 0; i < stash->n; i++) perm[i] = i;
408 PetscCall(PetscSortIntWithArray(stash->n, stash->idx, perm));
409
410 /* Out-of-place copy of arr */
411 PetscCall(PetscArraycpy(arr, stash->array + perm[0] * bs, bs));
412 for (i = 1, j = 0; i < stash->n; i++) {
413 PetscInt k;
414 if (stash->idx[i] == stash->idx[j]) {
415 switch (stash->insertmode) {
416 case ADD_VALUES:
417 for (k = 0; k < bs; k++) arr[j * bs + k] += stash->array[perm[i] * bs + k];
418 break;
419 case INSERT_VALUES:
420 for (k = 0; k < bs; k++) arr[j * bs + k] = stash->array[perm[i] * bs + k];
421 break;
422 default:
423 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", stash->insertmode);
424 }
425 } else {
426 j++;
427 stash->idx[j] = stash->idx[i];
428 for (k = 0; k < bs; k++) arr[j * bs + k] = stash->array[perm[i] * bs + k];
429 }
430 }
431 stash->n = j + 1;
432 PetscCall(PetscArraycpy(stash->array, arr, stash->n * bs));
433 PetscCall(PetscFree2(perm, arr));
434 }
435 PetscFunctionReturn(PETSC_SUCCESS);
436 }
437
VecStashGetOwnerList_Private(VecStash * stash,PetscLayout map,PetscMPIInt * nowners,PetscMPIInt ** owners)438 PetscErrorCode VecStashGetOwnerList_Private(VecStash *stash, PetscLayout map, PetscMPIInt *nowners, PetscMPIInt **owners)
439 {
440 PetscInt i, bs = stash->bs;
441 PetscMPIInt r;
442 PetscSegBuffer seg;
443
444 PetscFunctionBegin;
445 PetscCheck(bs == 1 || bs == map->bs, map->comm, PETSC_ERR_PLIB, "Stash block size %" PetscInt_FMT " does not match layout block size %" PetscInt_FMT, bs, map->bs);
446 PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 50, &seg));
447 *nowners = 0;
448 for (i = 0, r = -1; i < stash->n; i++) {
449 if (stash->idx[i] * bs >= map->range[r + 1]) {
450 PetscMPIInt *rank;
451 PetscCall(PetscSegBufferGet(seg, 1, &rank));
452 PetscCall(PetscLayoutFindOwner(map, stash->idx[i] * bs, &r));
453 *rank = r;
454 (*nowners)++;
455 }
456 }
457 PetscCall(PetscSegBufferExtractAlloc(seg, owners));
458 PetscCall(PetscSegBufferDestroy(&seg));
459 PetscFunctionReturn(PETSC_SUCCESS);
460 }
461