xref: /petsc/src/vec/vec/utils/vecstash.c (revision 5d2a865bca654049dd32bfe43807bdf4c92c46f4)
1 #include <petsc/private/vecimpl.h>
2 
3 #define DEFAULT_STASH_SIZE 100
4 
5 /*
6   VecStashCreate_Private - Creates a stash,currently used for all the parallel
7   matrix implementations. The stash is where elements of a matrix destined
8   to be stored on other processors are kept until matrix assembly is done.
9 
10   This is a simple minded stash. Simply adds entries to end of stash.
11 
12   Input Parameters:
13   comm - communicator, required for scatters.
14   bs   - stash block size. used when stashing blocks of values
15 
16   Output Parameter:
17 . stash    - the newly created stash
18 */
VecStashCreate_Private(MPI_Comm comm,PetscInt bs,VecStash * stash)19 PetscErrorCode VecStashCreate_Private(MPI_Comm comm, PetscInt bs, VecStash *stash)
20 {
21   PetscInt  max, *opt, nopt;
22   PetscBool flg;
23 
24   PetscFunctionBegin;
25   /* Require 2 tags, get the second using PetscCommGetNewTag() */
26   stash->comm = comm;
27   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1));
28   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2));
29   PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size));
30   PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank));
31 
32   nopt = stash->size;
33   PetscCall(PetscMalloc1(nopt, &opt));
34   PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-vecstash_initial_size", opt, &nopt, &flg));
35   if (flg) {
36     if (nopt == 1) max = opt[0];
37     else if (nopt == stash->size) max = opt[stash->rank];
38     else if (stash->rank < nopt) max = opt[stash->rank];
39     else max = 0; /* use default */
40     stash->umax = max;
41   } else {
42     stash->umax = 0;
43   }
44   PetscCall(PetscFree(opt));
45 
46   if (bs <= 0) bs = 1;
47 
48   stash->bs       = bs;
49   stash->nmax     = 0;
50   stash->oldnmax  = 0;
51   stash->n        = 0;
52   stash->reallocs = -1;
53   stash->idx      = NULL;
54   stash->array    = NULL;
55 
56   stash->send_waits   = NULL;
57   stash->recv_waits   = NULL;
58   stash->send_status  = NULL;
59   stash->nsends       = 0;
60   stash->nrecvs       = 0;
61   stash->svalues      = NULL;
62   stash->rvalues      = NULL;
63   stash->rmax         = 0;
64   stash->nprocs       = NULL;
65   stash->nprocessed   = 0;
66   stash->donotstash   = PETSC_FALSE;
67   stash->ignorenegidx = PETSC_FALSE;
68   PetscFunctionReturn(PETSC_SUCCESS);
69 }
70 
71 /*
72    VecStashDestroy_Private - Destroy the stash
73 */
VecStashDestroy_Private(VecStash * stash)74 PetscErrorCode VecStashDestroy_Private(VecStash *stash)
75 {
76   PetscFunctionBegin;
77   PetscCall(PetscFree2(stash->array, stash->idx));
78   PetscCall(PetscFree(stash->bowners));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /*
83    VecStashScatterEnd_Private - This is called as the final stage of
84    scatter. The final stages of message passing is done here, and
85    all the memory used for message passing is cleanedu up. This
86    routine also resets the stash, and deallocates the memory used
87    for the stash. It also keeps track of the current memory usage
88    so that the same value can be used the next time through.
89 */
VecStashScatterEnd_Private(VecStash * stash)90 PetscErrorCode VecStashScatterEnd_Private(VecStash *stash)
91 {
92   PetscMPIInt nsends = stash->nsends;
93   PetscInt    oldnmax;
94   MPI_Status *send_status;
95 
96   PetscFunctionBegin;
97   /* wait on sends */
98   if (nsends) {
99     PetscCall(PetscMalloc1(2 * nsends, &send_status));
100     PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status));
101     PetscCall(PetscFree(send_status));
102   }
103 
104   /* Now update nmaxold to be app 10% more than max n, this way the
105      wastage of space is reduced the next time this stash is used.
106      Also update the oldmax, only if it increases */
107   if (stash->n) {
108     oldnmax = ((PetscInt)(stash->n * 1.1) + 5) * stash->bs;
109     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
110   }
111 
112   stash->nmax       = 0;
113   stash->n          = 0;
114   stash->reallocs   = -1;
115   stash->rmax       = 0;
116   stash->nprocessed = 0;
117 
118   PetscCall(PetscFree2(stash->array, stash->idx));
119   stash->array = NULL;
120   stash->idx   = NULL;
121   PetscCall(PetscFree(stash->send_waits));
122   PetscCall(PetscFree(stash->recv_waits));
123   PetscCall(PetscFree2(stash->svalues, stash->sindices));
124   PetscCall(PetscFree2(stash->rvalues, stash->rindices));
125   PetscCall(PetscFree(stash->nprocs));
126   PetscFunctionReturn(PETSC_SUCCESS);
127 }
128 
129 /*
130    VecStashGetInfo_Private - Gets the relevant statistics of the stash
131 
132    Input Parameters:
133    stash    - the stash
134    nstash   - the size of the stash
135    reallocs - the number of additional mallocs incurred.
136 
137 */
VecStashGetInfo_Private(VecStash * stash,PetscInt * nstash,PetscInt * reallocs)138 PetscErrorCode VecStashGetInfo_Private(VecStash *stash, PetscInt *nstash, PetscInt *reallocs)
139 {
140   PetscFunctionBegin;
141   if (nstash) *nstash = stash->n * stash->bs;
142   if (reallocs) {
143     if (stash->reallocs < 0) *reallocs = 0;
144     else *reallocs = stash->reallocs;
145   }
146   PetscFunctionReturn(PETSC_SUCCESS);
147 }
148 
149 /*
150    VecStashSetInitialSize_Private - Sets the initial size of the stash
151 
152    Input Parameters:
153    stash  - the stash
154    max    - the value that is used as the max size of the stash.
155             this value is used while allocating memory. It specifies
156             the number of vals stored, even with the block-stash
157 */
VecStashSetInitialSize_Private(VecStash * stash,PetscInt max)158 PetscErrorCode VecStashSetInitialSize_Private(VecStash *stash, PetscInt max)
159 {
160   PetscFunctionBegin;
161   stash->umax = max;
162   PetscFunctionReturn(PETSC_SUCCESS);
163 }
164 
165 /* VecStashExpand_Private - Expand the stash. This function is called
166    when the space in the stash is not sufficient to add the new values
167    being inserted into the stash.
168 
169    Input Parameters:
170    stash - the stash
171    incr  - the minimum increase requested
172 
173    Notes:
174    This routine doubles the currently used memory.
175 */
VecStashExpand_Private(VecStash * stash,PetscInt incr)176 PetscErrorCode VecStashExpand_Private(VecStash *stash, PetscInt incr)
177 {
178   PetscInt    *n_idx, newnmax, bs = stash->bs;
179   PetscScalar *n_array;
180 
181   PetscFunctionBegin;
182   /* allocate a larger stash. */
183   if (!stash->oldnmax && !stash->nmax) { /* new stash */
184     if (stash->umax) newnmax = stash->umax / bs;
185     else newnmax = DEFAULT_STASH_SIZE / bs;
186   } else if (!stash->nmax) { /* reusing stash */
187     if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs;
188     else newnmax = stash->oldnmax / bs;
189   } else newnmax = stash->nmax * 2;
190 
191   if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr;
192 
193   PetscCall(PetscMalloc2(bs * newnmax, &n_array, newnmax, &n_idx));
194   PetscCall(PetscArraycpy(n_array, stash->array, bs * stash->nmax));
195   PetscCall(PetscArraycpy(n_idx, stash->idx, stash->nmax));
196   PetscCall(PetscFree2(stash->array, stash->idx));
197 
198   stash->array = n_array;
199   stash->idx   = n_idx;
200   stash->nmax  = newnmax;
201   stash->reallocs++;
202   PetscFunctionReturn(PETSC_SUCCESS);
203 }
204 /*
205   VecStashScatterBegin_Private - Initiates the transfer of values to the
206   correct owners. This function goes through the stash, and check the
207   owners of each stashed value, and sends the values off to the owner
208   processors.
209 
210   Input Parameters:
211   stash  - the stash
212   owners - an array of size 'no-of-procs' which gives the ownership range
213            for each node.
214 
215   Notes:
216     The 'owners' array in the cased of the blocked-stash has the
217   ranges specified blocked global indices, and for the regular stash in
218   the proper global indices.
219 */
VecStashScatterBegin_Private(VecStash * stash,const PetscInt * owners)220 PetscErrorCode VecStashScatterBegin_Private(VecStash *stash, const PetscInt *owners)
221 {
222   PetscMPIInt  size = stash->size, tag1 = stash->tag1, tag2 = stash->tag2, j, *owner, nsends, nreceives;
223   PetscInt    *start, *nprocs, inreceives;
224   PetscInt     nmax, *sindices, *rindices, idx, bs = stash->bs, lastidx;
225   PetscScalar *rvalues, *svalues;
226   MPI_Comm     comm = stash->comm;
227   MPI_Request *send_waits, *recv_waits;
228 
229   PetscFunctionBegin;
230   /*  first count number of contributors to each processor */
231   PetscCall(PetscCalloc1(2 * size, &nprocs));
232   PetscCall(PetscMalloc1(stash->n, &owner));
233 
234   j       = 0;
235   lastidx = -1;
236   for (PetscInt i = 0; i < stash->n; i++) {
237     /* if indices are NOT locally sorted, need to start search at the beginning */
238     if (lastidx > (idx = stash->idx[i])) j = 0;
239     lastidx = idx;
240     for (; j < size; j++) {
241       if (idx >= owners[j] && idx < owners[j + 1]) {
242         nprocs[2 * j]++;
243         nprocs[2 * j + 1] = 1;
244         owner[i]          = j;
245         break;
246       }
247     }
248   }
249   nsends = 0;
250   for (PetscMPIInt i = 0; i < size; i++) nsends += nprocs[2 * i + 1];
251 
252   /* inform other processors of number of messages and max length*/
253   PetscCall(PetscMaxSum(comm, nprocs, &nmax, &inreceives));
254   PetscCall(PetscMPIIntCast(inreceives, &nreceives));
255 
256   /* post receives:
257      since we don't know how long each individual message is we
258      allocate the largest needed buffer for each receive. Potentially
259      this is a lot of wasted space.
260   */
261   PetscCall(PetscMalloc2(nreceives * nmax * bs, &rvalues, nreceives * nmax, &rindices));
262   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
263   for (PetscMPIInt i = 0, count = 0; i < nreceives; i++) {
264     PetscCallMPI(MPIU_Irecv(rvalues + bs * nmax * i, bs * nmax, MPIU_SCALAR, MPI_ANY_SOURCE, tag1, comm, recv_waits + count++));
265     PetscCallMPI(MPIU_Irecv(rindices + nmax * i, nmax, MPIU_INT, MPI_ANY_SOURCE, tag2, comm, recv_waits + count++));
266   }
267 
268   /* do sends:
269       1) starts[i] gives the starting index in svalues for stuff going to
270          the ith processor
271   */
272   PetscCall(PetscMalloc2(stash->n * bs, &svalues, stash->n, &sindices));
273   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
274   PetscCall(PetscMalloc1(size, &start));
275   /* use 2 sends the first with all_v, the next with all_i */
276   start[0] = 0;
277   for (PetscMPIInt i = 1; i < size; i++) start[i] = start[i - 1] + nprocs[2 * i - 2];
278 
279   for (PetscInt i = 0; i < stash->n; i++) {
280     j = owner[i];
281     if (bs == 1) svalues[start[j]] = stash->array[i];
282     else PetscCall(PetscArraycpy(svalues + bs * start[j], stash->array + bs * i, bs));
283     sindices[start[j]] = stash->idx[i];
284     start[j]++;
285   }
286   start[0] = 0;
287   for (PetscMPIInt i = 1; i < size; i++) start[i] = start[i - 1] + nprocs[2 * i - 2];
288 
289   for (PetscMPIInt i = 0, count = 0; i < size; i++) {
290     if (nprocs[2 * i + 1]) {
291       PetscCallMPI(MPIU_Isend(svalues + bs * start[i], bs * nprocs[2 * i], MPIU_SCALAR, i, tag1, comm, send_waits + count++));
292       PetscCallMPI(MPIU_Isend(sindices + start[i], nprocs[2 * i], MPIU_INT, i, tag2, comm, send_waits + count++));
293     }
294   }
295   PetscCall(PetscFree(owner));
296   PetscCall(PetscFree(start));
297   /* This memory is reused in scatter end  for a different purpose*/
298   for (PetscMPIInt i = 0; i < 2 * size; i++) nprocs[i] = -1;
299 
300   stash->nprocs     = nprocs;
301   stash->svalues    = svalues;
302   stash->sindices   = sindices;
303   stash->rvalues    = rvalues;
304   stash->rindices   = rindices;
305   stash->nsends     = nsends;
306   stash->nrecvs     = nreceives;
307   stash->send_waits = send_waits;
308   stash->recv_waits = recv_waits;
309   stash->rmax       = nmax;
310   PetscFunctionReturn(PETSC_SUCCESS);
311 }
312 
313 /*
314    VecStashScatterGetMesg_Private - This function waits on the receives posted
315    in the function VecStashScatterBegin_Private() and returns one message at
316    a time to the calling function. If no messages are left, it indicates this
317    by setting flg = 0, else it sets flg = 1.
318 
319    Input Parameters:
320    stash - the stash
321 
322    Output Parameters:
323    nvals - the number of entries in the current message.
324    rows  - an array of row indices (or blocked indices) corresponding to the values
325    cols  - an array of columnindices (or blocked indices) corresponding to the values
326    vals  - the values
327    flg   - 0 indicates no more message left, and the current call has no values associated.
328            1 indicates that the current call successfully received a message, and the
329              other output parameters nvals,rows,cols,vals are set appropriately.
330 */
VecStashScatterGetMesg_Private(VecStash * stash,PetscMPIInt * nvals,PetscInt ** rows,PetscScalar ** vals,PetscInt * flg)331 PetscErrorCode VecStashScatterGetMesg_Private(VecStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscScalar **vals, PetscInt *flg)
332 {
333   PetscMPIInt i = 0; /* dummy value so MPI-Uni doesn't think it is not set */
334   PetscInt   *flg_v;
335   PetscInt    i1, i2, bs = stash->bs;
336   MPI_Status  recv_status;
337   PetscBool   match_found = PETSC_FALSE;
338 
339   PetscFunctionBegin;
340   *flg = 0; /* When a message is discovered this is reset to 1 */
341   /* Return if no more messages to process */
342   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(PETSC_SUCCESS);
343 
344   flg_v = stash->nprocs;
345   /* If a matching pair of receives are found, process them, and return the data to
346      the calling function. Until then keep receiving messages */
347   while (!match_found) {
348     PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status));
349     /* Now pack the received message into a structure which is usable by others */
350     if (i % 2) {
351       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals));
352       flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
353     } else {
354       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals));
355       flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
356       *nvals                            = *nvals / bs;
357     }
358 
359     /* Check if we have both the messages from this proc */
360     i1 = flg_v[2 * recv_status.MPI_SOURCE];
361     i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
362     if (i1 != -1 && i2 != -1) {
363       *rows = stash->rindices + i2 * stash->rmax;
364       *vals = stash->rvalues + i1 * bs * stash->rmax;
365       *flg  = 1;
366       stash->nprocessed++;
367       match_found = PETSC_TRUE;
368     }
369   }
370   PetscFunctionReturn(PETSC_SUCCESS);
371 }
372 
373 /*
374  * Sort the stash, removing duplicates (combining as appropriate).
375  */
VecStashSortCompress_Private(VecStash * stash)376 PetscErrorCode VecStashSortCompress_Private(VecStash *stash)
377 {
378   PetscInt i, j, bs = stash->bs;
379 
380   PetscFunctionBegin;
381   if (!stash->n) PetscFunctionReturn(PETSC_SUCCESS);
382   if (bs == 1) {
383     PetscCall(PetscSortIntWithScalarArray(stash->n, stash->idx, stash->array));
384     for (i = 1, j = 0; i < stash->n; i++) {
385       if (stash->idx[i] == stash->idx[j]) {
386         switch (stash->insertmode) {
387         case ADD_VALUES:
388           stash->array[j] += stash->array[i];
389           break;
390         case INSERT_VALUES:
391           stash->array[j] = stash->array[i];
392           break;
393         default:
394           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", stash->insertmode);
395         }
396       } else {
397         j++;
398         stash->idx[j]   = stash->idx[i];
399         stash->array[j] = stash->array[i];
400       }
401     }
402     stash->n = j + 1;
403   } else { /* block stash */
404     PetscInt    *perm = NULL;
405     PetscScalar *arr;
406     PetscCall(PetscMalloc2(stash->n, &perm, stash->n * bs, &arr));
407     for (i = 0; i < stash->n; i++) perm[i] = i;
408     PetscCall(PetscSortIntWithArray(stash->n, stash->idx, perm));
409 
410     /* Out-of-place copy of arr */
411     PetscCall(PetscArraycpy(arr, stash->array + perm[0] * bs, bs));
412     for (i = 1, j = 0; i < stash->n; i++) {
413       PetscInt k;
414       if (stash->idx[i] == stash->idx[j]) {
415         switch (stash->insertmode) {
416         case ADD_VALUES:
417           for (k = 0; k < bs; k++) arr[j * bs + k] += stash->array[perm[i] * bs + k];
418           break;
419         case INSERT_VALUES:
420           for (k = 0; k < bs; k++) arr[j * bs + k] = stash->array[perm[i] * bs + k];
421           break;
422         default:
423           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", stash->insertmode);
424         }
425       } else {
426         j++;
427         stash->idx[j] = stash->idx[i];
428         for (k = 0; k < bs; k++) arr[j * bs + k] = stash->array[perm[i] * bs + k];
429       }
430     }
431     stash->n = j + 1;
432     PetscCall(PetscArraycpy(stash->array, arr, stash->n * bs));
433     PetscCall(PetscFree2(perm, arr));
434   }
435   PetscFunctionReturn(PETSC_SUCCESS);
436 }
437 
VecStashGetOwnerList_Private(VecStash * stash,PetscLayout map,PetscMPIInt * nowners,PetscMPIInt ** owners)438 PetscErrorCode VecStashGetOwnerList_Private(VecStash *stash, PetscLayout map, PetscMPIInt *nowners, PetscMPIInt **owners)
439 {
440   PetscInt       i, bs = stash->bs;
441   PetscMPIInt    r;
442   PetscSegBuffer seg;
443 
444   PetscFunctionBegin;
445   PetscCheck(bs == 1 || bs == map->bs, map->comm, PETSC_ERR_PLIB, "Stash block size %" PetscInt_FMT " does not match layout block size %" PetscInt_FMT, bs, map->bs);
446   PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 50, &seg));
447   *nowners = 0;
448   for (i = 0, r = -1; i < stash->n; i++) {
449     if (stash->idx[i] * bs >= map->range[r + 1]) {
450       PetscMPIInt *rank;
451       PetscCall(PetscSegBufferGet(seg, 1, &rank));
452       PetscCall(PetscLayoutFindOwner(map, stash->idx[i] * bs, &r));
453       *rank = r;
454       (*nowners)++;
455     }
456   }
457   PetscCall(PetscSegBufferExtractAlloc(seg, owners));
458   PetscCall(PetscSegBufferDestroy(&seg));
459   PetscFunctionReturn(PETSC_SUCCESS);
460 }
461