1af0996ceSBarry Smith #include <petsc/private/matimpl.h>
25bd3b8fbSHong Zhang
3bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000
44c1ff481SSatish Balay
5ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat, MatStash *, PetscInt *);
6d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
7d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
875d39b6aSBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
9d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat, MatStash *, PetscInt *);
10d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
11d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *);
1275d39b6aSBarry Smith #endif
13d7d60843SJed Brown
149417f4adSLois Curfman McInnes /*
158798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel
164c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined
174c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done.
189417f4adSLois Curfman McInnes
194c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash.
204c1ff481SSatish Balay
214c1ff481SSatish Balay Input Parameters:
224c1ff481SSatish Balay comm - communicator, required for scatters.
234c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values
244c1ff481SSatish Balay
252fe279fdSBarry Smith Output Parameter:
264c1ff481SSatish Balay stash - the newly created stash
279417f4adSLois Curfman McInnes */
MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash * stash)28d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashCreate_Private(MPI_Comm comm, PetscInt bs, MatStash *stash)
29d71ae5a4SJacob Faibussowitsch {
30533163c2SBarry Smith PetscInt max, *opt, nopt, i;
31ace3abfcSBarry Smith PetscBool flg;
32bc5ccf88SSatish Balay
333a40ed3dSBarry Smith PetscFunctionBegin;
34bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */
35752ec6e0SSatish Balay stash->comm = comm;
368865f1eaSKarl Rupp
379566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1));
389566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2));
399566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size));
409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank));
419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * stash->size, &stash->flg_v));
42533163c2SBarry Smith for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
43533163c2SBarry Smith
44434d7ff9SSatish Balay nopt = stash->size;
459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nopt, &opt));
469566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-matstash_initial_size", opt, &nopt, &flg));
47434d7ff9SSatish Balay if (flg) {
48434d7ff9SSatish Balay if (nopt == 1) max = opt[0];
49434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank];
50434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank];
51f4ab19daSSatish Balay else max = 0; /* Use default */
52434d7ff9SSatish Balay stash->umax = max;
53434d7ff9SSatish Balay } else {
54434d7ff9SSatish Balay stash->umax = 0;
55434d7ff9SSatish Balay }
569566063dSJacob Faibussowitsch PetscCall(PetscFree(opt));
574c1ff481SSatish Balay if (bs <= 0) bs = 1;
58a2d1c673SSatish Balay
594c1ff481SSatish Balay stash->bs = bs;
609417f4adSLois Curfman McInnes stash->nmax = 0;
61434d7ff9SSatish Balay stash->oldnmax = 0;
629417f4adSLois Curfman McInnes stash->n = 0;
634c1ff481SSatish Balay stash->reallocs = -1;
64f4259b30SLisandro Dalcin stash->space_head = NULL;
65f4259b30SLisandro Dalcin stash->space = NULL;
669417f4adSLois Curfman McInnes
67f4259b30SLisandro Dalcin stash->send_waits = NULL;
68f4259b30SLisandro Dalcin stash->recv_waits = NULL;
69f4259b30SLisandro Dalcin stash->send_status = NULL;
70bc5ccf88SSatish Balay stash->nsends = 0;
71bc5ccf88SSatish Balay stash->nrecvs = 0;
72f4259b30SLisandro Dalcin stash->svalues = NULL;
73f4259b30SLisandro Dalcin stash->rvalues = NULL;
74f4259b30SLisandro Dalcin stash->rindices = NULL;
75a2d1c673SSatish Balay stash->nprocessed = 0;
7667318a8aSJed Brown stash->reproduce = PETSC_FALSE;
77d7d60843SJed Brown stash->blocktype = MPI_DATATYPE_NULL;
788865f1eaSKarl Rupp
799566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_reproduce", &stash->reproduce, NULL));
801667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
81ca723aa4SStefano Zampini flg = PETSC_FALSE;
829566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_legacy", &flg, NULL));
83b30fb036SBarry Smith if (!flg) {
84d7d60843SJed Brown stash->ScatterBegin = MatStashScatterBegin_BTS;
85d7d60843SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
86d7d60843SJed Brown stash->ScatterEnd = MatStashScatterEnd_BTS;
87d7d60843SJed Brown stash->ScatterDestroy = MatStashScatterDestroy_BTS;
88ac2b2aa0SJed Brown } else {
891667be42SBarry Smith #endif
90ac2b2aa0SJed Brown stash->ScatterBegin = MatStashScatterBegin_Ref;
91ac2b2aa0SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
92ac2b2aa0SJed Brown stash->ScatterEnd = MatStashScatterEnd_Ref;
93ac2b2aa0SJed Brown stash->ScatterDestroy = NULL;
941667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
95ac2b2aa0SJed Brown }
961667be42SBarry Smith #endif
973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
989417f4adSLois Curfman McInnes }
999417f4adSLois Curfman McInnes
1004c1ff481SSatish Balay /*
1018798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash
1024c1ff481SSatish Balay */
MatStashDestroy_Private(MatStash * stash)103d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashDestroy_Private(MatStash *stash)
104d71ae5a4SJacob Faibussowitsch {
105bc5ccf88SSatish Balay PetscFunctionBegin;
1069566063dSJacob Faibussowitsch PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
1079566063dSJacob Faibussowitsch if (stash->ScatterDestroy) PetscCall((*stash->ScatterDestroy)(stash));
108f4259b30SLisandro Dalcin stash->space = NULL;
1099566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->flg_v));
1103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
111bc5ccf88SSatish Balay }
112bc5ccf88SSatish Balay
1134c1ff481SSatish Balay /*
11467318a8aSJed Brown MatStashScatterEnd_Private - This is called as the final stage of
1154c1ff481SSatish Balay scatter. The final stages of message passing is done here, and
11667318a8aSJed Brown all the memory used for message passing is cleaned up. This
1174c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used
1184c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage
1194c1ff481SSatish Balay so that the same value can be used the next time through.
1204c1ff481SSatish Balay */
MatStashScatterEnd_Private(MatStash * stash)121d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
122d71ae5a4SJacob Faibussowitsch {
123ac2b2aa0SJed Brown PetscFunctionBegin;
1249566063dSJacob Faibussowitsch PetscCall((*stash->ScatterEnd)(stash));
1253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
126ac2b2aa0SJed Brown }
127ac2b2aa0SJed Brown
MatStashScatterEnd_Ref(MatStash * stash)128d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
129d71ae5a4SJacob Faibussowitsch {
1306497c311SBarry Smith PetscMPIInt nsends = stash->nsends;
1316497c311SBarry Smith PetscInt bs2, oldnmax;
132a2d1c673SSatish Balay MPI_Status *send_status;
133a2d1c673SSatish Balay
1343a40ed3dSBarry Smith PetscFunctionBegin;
1356497c311SBarry Smith for (PetscMPIInt i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
136a2d1c673SSatish Balay /* wait on sends */
137a2d1c673SSatish Balay if (nsends) {
1389566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nsends, &send_status));
1399566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status));
1409566063dSJacob Faibussowitsch PetscCall(PetscFree(send_status));
141a2d1c673SSatish Balay }
142a2d1c673SSatish Balay
143c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the
144434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used.
145434d7ff9SSatish Balay Also update the oldmax, only if it increases */
146b9b97703SBarry Smith if (stash->n) {
14794b769a5SSatish Balay bs2 = stash->bs * stash->bs;
1488a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
149434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
150b9b97703SBarry Smith }
151434d7ff9SSatish Balay
152d07ff455SSatish Balay stash->nmax = 0;
153d07ff455SSatish Balay stash->n = 0;
1544c1ff481SSatish Balay stash->reallocs = -1;
155a2d1c673SSatish Balay stash->nprocessed = 0;
1568865f1eaSKarl Rupp
1579566063dSJacob Faibussowitsch PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
1588865f1eaSKarl Rupp
159f4259b30SLisandro Dalcin stash->space = NULL;
1608865f1eaSKarl Rupp
1619566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->send_waits));
1629566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->recv_waits));
1639566063dSJacob Faibussowitsch PetscCall(PetscFree2(stash->svalues, stash->sindices));
1649566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->rvalues[0]));
1659566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->rvalues));
1669566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->rindices[0]));
1679566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->rindices));
1683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1699417f4adSLois Curfman McInnes }
1709417f4adSLois Curfman McInnes
1714c1ff481SSatish Balay /*
1726aad120cSJose E. Roman MatStashGetInfo_Private - Gets the relevant statistics of the stash
1734c1ff481SSatish Balay
1744c1ff481SSatish Balay Input Parameters:
1754c1ff481SSatish Balay stash - the stash
17694b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored.
1774c1ff481SSatish Balay reallocs - the number of additional mallocs incurred.
1784c1ff481SSatish Balay
1794c1ff481SSatish Balay */
MatStashGetInfo_Private(MatStash * stash,PetscInt * nstash,PetscInt * reallocs)180d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashGetInfo_Private(MatStash *stash, PetscInt *nstash, PetscInt *reallocs)
181d71ae5a4SJacob Faibussowitsch {
182c1ac3661SBarry Smith PetscInt bs2 = stash->bs * stash->bs;
18394b769a5SSatish Balay
1843a40ed3dSBarry Smith PetscFunctionBegin;
1851ecfd215SBarry Smith if (nstash) *nstash = stash->n * bs2;
1861ecfd215SBarry Smith if (reallocs) {
187434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0;
188434d7ff9SSatish Balay else *reallocs = stash->reallocs;
1891ecfd215SBarry Smith }
1903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
191bc5ccf88SSatish Balay }
1924c1ff481SSatish Balay
1934c1ff481SSatish Balay /*
1948798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash
1954c1ff481SSatish Balay
1964c1ff481SSatish Balay Input Parameters:
1974c1ff481SSatish Balay stash - the stash
1984c1ff481SSatish Balay max - the value that is used as the max size of the stash.
1994c1ff481SSatish Balay this value is used while allocating memory.
2004c1ff481SSatish Balay */
MatStashSetInitialSize_Private(MatStash * stash,PetscInt max)201d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash, PetscInt max)
202d71ae5a4SJacob Faibussowitsch {
203bc5ccf88SSatish Balay PetscFunctionBegin;
204434d7ff9SSatish Balay stash->umax = max;
2053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
20697530c3fSBarry Smith }
20797530c3fSBarry Smith
2088798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2094c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values
2104c1ff481SSatish Balay being inserted into the stash.
2114c1ff481SSatish Balay
2124c1ff481SSatish Balay Input Parameters:
2134c1ff481SSatish Balay stash - the stash
2144c1ff481SSatish Balay incr - the minimum increase requested
2154c1ff481SSatish Balay
21611a5261eSBarry Smith Note:
2174c1ff481SSatish Balay This routine doubles the currently used memory.
2184c1ff481SSatish Balay */
MatStashExpand_Private(MatStash * stash,PetscInt incr)219d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashExpand_Private(MatStash *stash, PetscInt incr)
220d71ae5a4SJacob Faibussowitsch {
2215bd3b8fbSHong Zhang PetscInt newnmax, bs2 = stash->bs * stash->bs;
22225057d89SBarry Smith PetscCount cnewnmax;
2239417f4adSLois Curfman McInnes
2243a40ed3dSBarry Smith PetscFunctionBegin;
2259417f4adSLois Curfman McInnes /* allocate a larger stash */
226c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */
22725057d89SBarry Smith if (stash->umax) cnewnmax = stash->umax / bs2;
22825057d89SBarry Smith else cnewnmax = DEFAULT_STASH_SIZE / bs2;
229a678f235SPierre Jolivet } else if (!stash->nmax) { /* reusing stash */
23025057d89SBarry Smith if (stash->umax > stash->oldnmax) cnewnmax = stash->umax / bs2;
23125057d89SBarry Smith else cnewnmax = stash->oldnmax / bs2;
23225057d89SBarry Smith } else cnewnmax = stash->nmax * 2;
23325057d89SBarry Smith if (cnewnmax < (stash->nmax + incr)) cnewnmax += 2 * incr;
23425057d89SBarry Smith PetscCall(PetscIntCast(cnewnmax, &newnmax));
235d07ff455SSatish Balay
23675cae7c1SHong Zhang /* Get a MatStashSpace and attach it to stash */
2379566063dSJacob Faibussowitsch PetscCall(PetscMatStashSpaceGet(bs2, newnmax, &stash->space));
238a678f235SPierre Jolivet if (!stash->space_head) { /* new stash or reusing stash->oldnmax */
239b087b6d6SSatish Balay stash->space_head = stash->space;
24075cae7c1SHong Zhang }
241b087b6d6SSatish Balay
242bc5ccf88SSatish Balay stash->reallocs++;
24375cae7c1SHong Zhang stash->nmax = newnmax;
2443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
245bc5ccf88SSatish Balay }
246bc5ccf88SSatish Balay /*
2478798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function
2485e116b59SBarry Smith expects the values to be row-oriented. Multiple columns belong to the same row
2494c1ff481SSatish Balay can be inserted with a single call to this function.
2504c1ff481SSatish Balay
2514c1ff481SSatish Balay Input Parameters:
2524c1ff481SSatish Balay stash - the stash
2535e116b59SBarry Smith row - the global row corresponding to the values
2544c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row.
2554c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values.
2564c1ff481SSatish Balay values - the values inserted
257bc5ccf88SSatish Balay */
MatStashValuesRow_Private(MatStash * stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)258d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesRow_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscBool ignorezeroentries)
259d71ae5a4SJacob Faibussowitsch {
260b400d20cSBarry Smith PetscInt i, k, cnt = 0;
26175cae7c1SHong Zhang PetscMatStashSpace space = stash->space;
262bc5ccf88SSatish Balay
263bc5ccf88SSatish Balay PetscFunctionBegin;
2644c1ff481SSatish Balay /* Check and see if we have sufficient memory */
26548a46eb9SPierre Jolivet if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
26675cae7c1SHong Zhang space = stash->space;
26775cae7c1SHong Zhang k = space->local_used;
2684c1ff481SSatish Balay for (i = 0; i < n; i++) {
26914069ce9SStefano Zampini if (ignorezeroentries && values && values[i] == 0.0) continue;
27075cae7c1SHong Zhang space->idx[k] = row;
27175cae7c1SHong Zhang space->idy[k] = idxn[i];
27214069ce9SStefano Zampini space->val[k] = values ? values[i] : 0.0;
27375cae7c1SHong Zhang k++;
274b400d20cSBarry Smith cnt++;
2759417f4adSLois Curfman McInnes }
276b400d20cSBarry Smith stash->n += cnt;
277b400d20cSBarry Smith space->local_used += cnt;
278b400d20cSBarry Smith space->local_remaining -= cnt;
2793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
280a2d1c673SSatish Balay }
28175cae7c1SHong Zhang
2824c1ff481SSatish Balay /*
2838798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function
2845e116b59SBarry Smith expects the values to be column-oriented. Multiple columns belong to the same row
2854c1ff481SSatish Balay can be inserted with a single call to this function.
286a2d1c673SSatish Balay
2874c1ff481SSatish Balay Input Parameters:
2884c1ff481SSatish Balay stash - the stash
2895e116b59SBarry Smith row - the global row corresponding to the values
2904c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row.
2914c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values.
2924c1ff481SSatish Balay values - the values inserted
2934c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval.
2945e116b59SBarry Smith this happens because the input is column-oriented.
2954c1ff481SSatish Balay */
MatStashValuesCol_Private(MatStash * stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)296d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesCol_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt stepval, PetscBool ignorezeroentries)
297d71ae5a4SJacob Faibussowitsch {
29850e9ab7cSBarry Smith PetscInt i, k, cnt = 0;
29975cae7c1SHong Zhang PetscMatStashSpace space = stash->space;
300a2d1c673SSatish Balay
3014c1ff481SSatish Balay PetscFunctionBegin;
3024c1ff481SSatish Balay /* Check and see if we have sufficient memory */
30348a46eb9SPierre Jolivet if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
30475cae7c1SHong Zhang space = stash->space;
30575cae7c1SHong Zhang k = space->local_used;
3064c1ff481SSatish Balay for (i = 0; i < n; i++) {
30714069ce9SStefano Zampini if (ignorezeroentries && values && values[i * stepval] == 0.0) continue;
30875cae7c1SHong Zhang space->idx[k] = row;
30975cae7c1SHong Zhang space->idy[k] = idxn[i];
31014069ce9SStefano Zampini space->val[k] = values ? values[i * stepval] : 0.0;
31175cae7c1SHong Zhang k++;
312b400d20cSBarry Smith cnt++;
3134c1ff481SSatish Balay }
314b400d20cSBarry Smith stash->n += cnt;
315b400d20cSBarry Smith space->local_used += cnt;
316b400d20cSBarry Smith space->local_remaining -= cnt;
3173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3184c1ff481SSatish Balay }
3194c1ff481SSatish Balay
3204c1ff481SSatish Balay /*
3218798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3225e116b59SBarry Smith This function expects the values to be row-oriented. Multiple columns belong
3234c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function.
3244c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of
3254c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks.
3264c1ff481SSatish Balay
3274c1ff481SSatish Balay Input Parameters:
3284c1ff481SSatish Balay stash - the stash
3295e116b59SBarry Smith row - the global block-row corresponding to the values
3304c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row.
3314c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of
3324c1ff481SSatish Balay values. Each block is of size bs*bs.
3334c1ff481SSatish Balay values - the values inserted
3344c1ff481SSatish Balay rmax - the number of block-rows in the original block.
335a5b23f4aSJose E. Roman cmax - the number of block-columns on the original block.
3364c1ff481SSatish Balay idx - the index of the current block-row in the original block.
3374c1ff481SSatish Balay */
MatStashValuesRowBlocked_Private(MatStash * stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)338d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
339d71ae5a4SJacob Faibussowitsch {
34075cae7c1SHong Zhang PetscInt i, j, k, bs2, bs = stash->bs, l;
34154f21887SBarry Smith const PetscScalar *vals;
34254f21887SBarry Smith PetscScalar *array;
34375cae7c1SHong Zhang PetscMatStashSpace space = stash->space;
344a2d1c673SSatish Balay
345a2d1c673SSatish Balay PetscFunctionBegin;
34648a46eb9SPierre Jolivet if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
34775cae7c1SHong Zhang space = stash->space;
34875cae7c1SHong Zhang l = space->local_used;
34975cae7c1SHong Zhang bs2 = bs * bs;
3504c1ff481SSatish Balay for (i = 0; i < n; i++) {
35175cae7c1SHong Zhang space->idx[l] = row;
35275cae7c1SHong Zhang space->idy[l] = idxn[i];
3535e116b59SBarry Smith /* Now copy over the block of values. Store the values column-oriented.
35475cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single
355da81f932SPierre Jolivet function call */
35675cae7c1SHong Zhang array = space->val + bs2 * l;
35775cae7c1SHong Zhang vals = values + idx * bs2 * n + bs * i;
35875cae7c1SHong Zhang for (j = 0; j < bs; j++) {
35914069ce9SStefano Zampini for (k = 0; k < bs; k++) array[k * bs] = values ? vals[k] : 0.0;
36075cae7c1SHong Zhang array++;
36175cae7c1SHong Zhang vals += cmax * bs;
36275cae7c1SHong Zhang }
36375cae7c1SHong Zhang l++;
364a2d1c673SSatish Balay }
3655bd3b8fbSHong Zhang stash->n += n;
36675cae7c1SHong Zhang space->local_used += n;
36775cae7c1SHong Zhang space->local_remaining -= n;
3683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3694c1ff481SSatish Balay }
3704c1ff481SSatish Balay
3714c1ff481SSatish Balay /*
3728798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3735e116b59SBarry Smith This function expects the values to be column-oriented. Multiple columns belong
3744c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function.
3754c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of
3764c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks.
3774c1ff481SSatish Balay
3784c1ff481SSatish Balay Input Parameters:
3794c1ff481SSatish Balay stash - the stash
3805e116b59SBarry Smith row - the global block-row corresponding to the values
3814c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row.
3824c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of
3834c1ff481SSatish Balay values. Each block is of size bs*bs.
3844c1ff481SSatish Balay values - the values inserted
3854c1ff481SSatish Balay rmax - the number of block-rows in the original block.
386a5b23f4aSJose E. Roman cmax - the number of block-columns on the original block.
3874c1ff481SSatish Balay idx - the index of the current block-row in the original block.
3884c1ff481SSatish Balay */
MatStashValuesColBlocked_Private(MatStash * stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)389d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
390d71ae5a4SJacob Faibussowitsch {
39175cae7c1SHong Zhang PetscInt i, j, k, bs2, bs = stash->bs, l;
39254f21887SBarry Smith const PetscScalar *vals;
39354f21887SBarry Smith PetscScalar *array;
39475cae7c1SHong Zhang PetscMatStashSpace space = stash->space;
3954c1ff481SSatish Balay
3964c1ff481SSatish Balay PetscFunctionBegin;
39748a46eb9SPierre Jolivet if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
39875cae7c1SHong Zhang space = stash->space;
39975cae7c1SHong Zhang l = space->local_used;
40075cae7c1SHong Zhang bs2 = bs * bs;
4014c1ff481SSatish Balay for (i = 0; i < n; i++) {
40275cae7c1SHong Zhang space->idx[l] = row;
40375cae7c1SHong Zhang space->idy[l] = idxn[i];
4045e116b59SBarry Smith /* Now copy over the block of values. Store the values column-oriented.
40575cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single
406da81f932SPierre Jolivet function call */
40775cae7c1SHong Zhang array = space->val + bs2 * l;
40875cae7c1SHong Zhang vals = values + idx * bs2 * n + bs * i;
40975cae7c1SHong Zhang for (j = 0; j < bs; j++) {
41014069ce9SStefano Zampini for (k = 0; k < bs; k++) array[k] = values ? vals[k] : 0.0;
41175cae7c1SHong Zhang array += bs;
41275cae7c1SHong Zhang vals += rmax * bs;
41375cae7c1SHong Zhang }
4145bd3b8fbSHong Zhang l++;
415a2d1c673SSatish Balay }
4165bd3b8fbSHong Zhang stash->n += n;
41775cae7c1SHong Zhang space->local_used += n;
41875cae7c1SHong Zhang space->local_remaining -= n;
4193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
4209417f4adSLois Curfman McInnes }
4214c1ff481SSatish Balay /*
4228798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the
4234c1ff481SSatish Balay correct owners. This function goes through the stash, and check the
4244c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner
4254c1ff481SSatish Balay processors.
426bc5ccf88SSatish Balay
4274c1ff481SSatish Balay Input Parameters:
4284c1ff481SSatish Balay stash - the stash
4294c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range
4304c1ff481SSatish Balay for each node.
4314c1ff481SSatish Balay
43211a5261eSBarry Smith Note:
43395452b02SPatrick Sanan The 'owners' array in the cased of the blocked-stash has the
4344c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in
4354c1ff481SSatish Balay the proper global indices.
4364c1ff481SSatish Balay */
MatStashScatterBegin_Private(Mat mat,MatStash * stash,PetscInt * owners)437d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterBegin_Private(Mat mat, MatStash *stash, PetscInt *owners)
438d71ae5a4SJacob Faibussowitsch {
439ac2b2aa0SJed Brown PetscFunctionBegin;
4409566063dSJacob Faibussowitsch PetscCall((*stash->ScatterBegin)(mat, stash, owners));
4413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
442ac2b2aa0SJed Brown }
443ac2b2aa0SJed Brown
MatStashScatterBegin_Ref(Mat mat,MatStash * stash,PetscInt * owners)444d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_Ref(Mat mat, MatStash *stash, PetscInt *owners)
445d71ae5a4SJacob Faibussowitsch {
4466497c311SBarry Smith PetscInt *owner, *startv, *starti, bs2;
4476497c311SBarry Smith PetscInt size = stash->size;
4486497c311SBarry Smith PetscInt *sindices, **rindices, j, ii, idx, lastidx, l;
44954f21887SBarry Smith PetscScalar **rvalues, *svalues;
450bc5ccf88SSatish Balay MPI_Comm comm = stash->comm;
451563fb871SSatish Balay MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
4526497c311SBarry Smith PetscMPIInt *sizes, *nlengths, nreceives, nsends, tag1 = stash->tag1, tag2 = stash->tag2;
4535bd3b8fbSHong Zhang PetscInt *sp_idx, *sp_idy;
45454f21887SBarry Smith PetscScalar *sp_val;
4555bd3b8fbSHong Zhang PetscMatStashSpace space, space_next;
456bc5ccf88SSatish Balay
457bc5ccf88SSatish Balay PetscFunctionBegin;
4584b4eb8d3SJed Brown { /* make sure all processors are either in INSERTMODE or ADDMODE */
4594b4eb8d3SJed Brown InsertMode addv;
460462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
46108401ef6SPierre Jolivet PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
4624b4eb8d3SJed Brown mat->insertmode = addv; /* in case this processor had no cache */
4634b4eb8d3SJed Brown }
4644b4eb8d3SJed Brown
4654c1ff481SSatish Balay bs2 = stash->bs * stash->bs;
46675cae7c1SHong Zhang
467bc5ccf88SSatish Balay /* first count number of contributors to each processor */
4689566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &nlengths));
4699566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(stash->n + 1, &owner));
470a2d1c673SSatish Balay
4716497c311SBarry Smith ii = j = 0;
4727357eb19SBarry Smith lastidx = -1;
4735bd3b8fbSHong Zhang space = stash->space_head;
4746c4ed002SBarry Smith while (space) {
47575cae7c1SHong Zhang space_next = space->next;
4765bd3b8fbSHong Zhang sp_idx = space->idx;
47775cae7c1SHong Zhang for (l = 0; l < space->local_used; l++) {
4787357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */
4795bd3b8fbSHong Zhang if (lastidx > (idx = sp_idx[l])) j = 0;
4807357eb19SBarry Smith lastidx = idx;
4817357eb19SBarry Smith for (; j < size; j++) {
4824c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j + 1]) {
4839371c9d4SSatish Balay nlengths[j]++;
4846497c311SBarry Smith owner[ii] = j;
4859371c9d4SSatish Balay break;
486bc5ccf88SSatish Balay }
487bc5ccf88SSatish Balay }
4886497c311SBarry Smith ii++;
48975cae7c1SHong Zhang }
49075cae7c1SHong Zhang space = space_next;
491bc5ccf88SSatish Balay }
492071fcb05SBarry Smith
493563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */
4949566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &sizes));
4956497c311SBarry Smith nsends = 0;
4966497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) {
4978865f1eaSKarl Rupp if (nlengths[i]) {
4989371c9d4SSatish Balay sizes[i] = 1;
4999371c9d4SSatish Balay nsends++;
5008865f1eaSKarl Rupp }
501563fb871SSatish Balay }
502bc5ccf88SSatish Balay
5039371c9d4SSatish Balay {
5049371c9d4SSatish Balay PetscMPIInt *onodes, *olengths;
505563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */
5069566063dSJacob Faibussowitsch PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
5079566063dSJacob Faibussowitsch PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
508563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */
5096497c311SBarry Smith for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2;
5109566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
511563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */
5126497c311SBarry Smith for (PetscMPIInt i = 0; i < nreceives; i++) PetscCall(PetscMPIIntCast(olengths[i] * bs2 / 2, &olengths[i]));
5139566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
5149566063dSJacob Faibussowitsch PetscCall(PetscFree(onodes));
5159371c9d4SSatish Balay PetscCall(PetscFree(olengths));
5169371c9d4SSatish Balay }
517bc5ccf88SSatish Balay
518bc5ccf88SSatish Balay /* do sends:
519bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to
520bc5ccf88SSatish Balay the ith processor
521bc5ccf88SSatish Balay */
5229566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
5239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nsends, &send_waits));
5249566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size, &startv, size, &starti));
525a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */
5269371c9d4SSatish Balay startv[0] = 0;
5279371c9d4SSatish Balay starti[0] = 0;
5286497c311SBarry Smith for (PetscMPIInt i = 1; i < size; i++) {
529563fb871SSatish Balay startv[i] = startv[i - 1] + nlengths[i - 1];
530533163c2SBarry Smith starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
531bc5ccf88SSatish Balay }
53275cae7c1SHong Zhang
5336497c311SBarry Smith ii = 0;
5345bd3b8fbSHong Zhang space = stash->space_head;
5356c4ed002SBarry Smith while (space) {
53675cae7c1SHong Zhang space_next = space->next;
5375bd3b8fbSHong Zhang sp_idx = space->idx;
5385bd3b8fbSHong Zhang sp_idy = space->idy;
5395bd3b8fbSHong Zhang sp_val = space->val;
54075cae7c1SHong Zhang for (l = 0; l < space->local_used; l++) {
5416497c311SBarry Smith j = owner[ii];
542a2d1c673SSatish Balay if (bs2 == 1) {
5435bd3b8fbSHong Zhang svalues[startv[j]] = sp_val[l];
544a2d1c673SSatish Balay } else {
545c1ac3661SBarry Smith PetscInt k;
54654f21887SBarry Smith PetscScalar *buf1, *buf2;
5474c1ff481SSatish Balay buf1 = svalues + bs2 * startv[j];
548b087b6d6SSatish Balay buf2 = space->val + bs2 * l;
5498865f1eaSKarl Rupp for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
550a2d1c673SSatish Balay }
5515bd3b8fbSHong Zhang sindices[starti[j]] = sp_idx[l];
5525bd3b8fbSHong Zhang sindices[starti[j] + nlengths[j]] = sp_idy[l];
553bc5ccf88SSatish Balay startv[j]++;
554bc5ccf88SSatish Balay starti[j]++;
5556497c311SBarry Smith ii++;
55675cae7c1SHong Zhang }
55775cae7c1SHong Zhang space = space_next;
558bc5ccf88SSatish Balay }
559bc5ccf88SSatish Balay startv[0] = 0;
5606497c311SBarry Smith for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
561e5d0e772SSatish Balay
5626497c311SBarry Smith for (PetscMPIInt i = 0, count = 0; i < size; i++) {
56376ec1555SBarry Smith if (sizes[i]) {
5646497c311SBarry Smith PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
5656497c311SBarry Smith PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
566bc5ccf88SSatish Balay }
567b85c94c3SSatish Balay }
5686cf91177SBarry Smith #if defined(PETSC_USE_INFO)
5696497c311SBarry Smith PetscCall(PetscInfo(NULL, "No of messages: %d \n", nsends));
5706497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) {
5716497c311SBarry Smith if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
572e5d0e772SSatish Balay }
573e5d0e772SSatish Balay #endif
5749566063dSJacob Faibussowitsch PetscCall(PetscFree(nlengths));
5759566063dSJacob Faibussowitsch PetscCall(PetscFree(owner));
5769566063dSJacob Faibussowitsch PetscCall(PetscFree2(startv, starti));
5779566063dSJacob Faibussowitsch PetscCall(PetscFree(sizes));
578a2d1c673SSatish Balay
579563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
5809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
581563fb871SSatish Balay
5826497c311SBarry Smith for (PetscMPIInt i = 0; i < nreceives; i++) {
583563fb871SSatish Balay recv_waits[2 * i] = recv_waits1[i];
584563fb871SSatish Balay recv_waits[2 * i + 1] = recv_waits2[i];
585563fb871SSatish Balay }
586563fb871SSatish Balay stash->recv_waits = recv_waits;
5878865f1eaSKarl Rupp
5889566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits1));
5899566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits2));
590563fb871SSatish Balay
591c05d87d6SBarry Smith stash->svalues = svalues;
592c05d87d6SBarry Smith stash->sindices = sindices;
593c05d87d6SBarry Smith stash->rvalues = rvalues;
594c05d87d6SBarry Smith stash->rindices = rindices;
595c05d87d6SBarry Smith stash->send_waits = send_waits;
596c05d87d6SBarry Smith stash->nsends = nsends;
597c05d87d6SBarry Smith stash->nrecvs = nreceives;
59867318a8aSJed Brown stash->reproduce_count = 0;
5993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
600bc5ccf88SSatish Balay }
601bc5ccf88SSatish Balay
602a2d1c673SSatish Balay /*
6038798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted
6048798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at
6054c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this
6064c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1.
6074c1ff481SSatish Balay
6082fe279fdSBarry Smith Input Parameter:
6094c1ff481SSatish Balay stash - the stash
6104c1ff481SSatish Balay
6114c1ff481SSatish Balay Output Parameters:
6124c1ff481SSatish Balay nvals - the number of entries in the current message.
6134c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values
6144c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values
6154c1ff481SSatish Balay vals - the values
6164c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated.
6174c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the
6184c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately.
619a2d1c673SSatish Balay */
MatStashScatterGetMesg_Private(MatStash * stash,PetscMPIInt * nvals,PetscInt ** rows,PetscInt ** cols,PetscScalar ** vals,PetscInt * flg)620d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
621d71ae5a4SJacob Faibussowitsch {
622ac2b2aa0SJed Brown PetscFunctionBegin;
6239566063dSJacob Faibussowitsch PetscCall((*stash->ScatterGetMesg)(stash, nvals, rows, cols, vals, flg));
6243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
625ac2b2aa0SJed Brown }
626ac2b2aa0SJed Brown
MatStashScatterGetMesg_Ref(MatStash * stash,PetscMPIInt * nvals,PetscInt ** rows,PetscInt ** cols,PetscScalar ** vals,PetscInt * flg)627d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
628d71ae5a4SJacob Faibussowitsch {
629533163c2SBarry Smith PetscMPIInt i, *flg_v = stash->flg_v, i1, i2;
630fe09c992SBarry Smith PetscInt bs2;
631a2d1c673SSatish Balay MPI_Status recv_status;
632ace3abfcSBarry Smith PetscBool match_found = PETSC_FALSE;
633bc5ccf88SSatish Balay
634bc5ccf88SSatish Balay PetscFunctionBegin;
635a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */
636a2d1c673SSatish Balay /* Return if no more messages to process */
6373ba16761SJacob Faibussowitsch if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(PETSC_SUCCESS);
638a2d1c673SSatish Balay
6394c1ff481SSatish Balay bs2 = stash->bs * stash->bs;
64067318a8aSJed Brown /* If a matching pair of receives are found, process them, and return the data to
641a2d1c673SSatish Balay the calling function. Until then keep receiving messages */
642a2d1c673SSatish Balay while (!match_found) {
64367318a8aSJed Brown if (stash->reproduce) {
64467318a8aSJed Brown i = stash->reproduce_count++;
6459566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(stash->recv_waits + i, &recv_status));
64667318a8aSJed Brown } else {
6479566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status));
64867318a8aSJed Brown }
64908401ef6SPierre Jolivet PetscCheck(recv_status.MPI_SOURCE >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Negative MPI source!");
650533163c2SBarry Smith
65167318a8aSJed Brown /* Now pack the received message into a structure which is usable by others */
652a2d1c673SSatish Balay if (i % 2) {
6539566063dSJacob Faibussowitsch PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals));
654c1dc657dSBarry Smith flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
655a2d1c673SSatish Balay *nvals = *nvals / bs2;
656563fb871SSatish Balay } else {
6579566063dSJacob Faibussowitsch PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals));
658563fb871SSatish Balay flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
659563fb871SSatish Balay *nvals = *nvals / 2; /* This message has both row indices and col indices */
660bc5ccf88SSatish Balay }
661a2d1c673SSatish Balay
662cb2b73ccSBarry Smith /* Check if we have both messages from this proc */
663c1dc657dSBarry Smith i1 = flg_v[2 * recv_status.MPI_SOURCE];
664c1dc657dSBarry Smith i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
665a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) {
666563fb871SSatish Balay *rows = stash->rindices[i2];
667a2d1c673SSatish Balay *cols = *rows + *nvals;
668563fb871SSatish Balay *vals = stash->rvalues[i1];
669a2d1c673SSatish Balay *flg = 1;
670a2d1c673SSatish Balay stash->nprocessed++;
67135d8aa7fSBarry Smith match_found = PETSC_TRUE;
672bc5ccf88SSatish Balay }
673bc5ccf88SSatish Balay }
6743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
675bc5ccf88SSatish Balay }
676d7d60843SJed Brown
677e69ee1f7SSatish Balay #if !defined(PETSC_HAVE_MPIUNI)
678d7d60843SJed Brown typedef struct {
679d7d60843SJed Brown PetscInt row;
680d7d60843SJed Brown PetscInt col;
681d7d60843SJed Brown PetscScalar vals[1]; /* Actually an array of length bs2 */
682d7d60843SJed Brown } MatStashBlock;
683d7d60843SJed Brown
MatStashSortCompress_Private(MatStash * stash,InsertMode insertmode)684d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashSortCompress_Private(MatStash *stash, InsertMode insertmode)
685d71ae5a4SJacob Faibussowitsch {
686d7d60843SJed Brown PetscMatStashSpace space;
687d7d60843SJed Brown PetscInt n = stash->n, bs = stash->bs, bs2 = bs * bs, cnt, *row, *col, *perm, rowstart, i;
688d7d60843SJed Brown PetscScalar **valptr;
689d7d60843SJed Brown
690d7d60843SJed Brown PetscFunctionBegin;
6919566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(n, &row, n, &col, n, &valptr, n, &perm));
692d7d60843SJed Brown for (space = stash->space_head, cnt = 0; space; space = space->next) {
693d7d60843SJed Brown for (i = 0; i < space->local_used; i++) {
694d7d60843SJed Brown row[cnt] = space->idx[i];
695d7d60843SJed Brown col[cnt] = space->idy[i];
696d7d60843SJed Brown valptr[cnt] = &space->val[i * bs2];
697d7d60843SJed Brown perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */
698d7d60843SJed Brown cnt++;
699d7d60843SJed Brown }
700d7d60843SJed Brown }
70108401ef6SPierre Jolivet PetscCheck(cnt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries", n, cnt);
7029566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithArrayPair(n, row, col, perm));
703d7d60843SJed Brown /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
704d7d60843SJed Brown for (rowstart = 0, cnt = 0, i = 1; i <= n; i++) {
705d7d60843SJed Brown if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */
706d7d60843SJed Brown PetscInt colstart;
7079566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithArray(i - rowstart, &col[rowstart], &perm[rowstart]));
708d7d60843SJed Brown for (colstart = rowstart; colstart < i;) { /* Compress multiple insertions to the same location */
709d7d60843SJed Brown PetscInt j, l;
710d7d60843SJed Brown MatStashBlock *block;
7119566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(stash->segsendblocks, 1, &block));
712d7d60843SJed Brown block->row = row[rowstart];
713d7d60843SJed Brown block->col = col[colstart];
7149566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(block->vals, valptr[perm[colstart]], bs2));
715d7d60843SJed Brown for (j = colstart + 1; j < i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
716d7d60843SJed Brown if (insertmode == ADD_VALUES) {
717d7d60843SJed Brown for (l = 0; l < bs2; l++) block->vals[l] += valptr[perm[j]][l];
718d7d60843SJed Brown } else {
7199566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(block->vals, valptr[perm[j]], bs2));
720d7d60843SJed Brown }
721d7d60843SJed Brown }
722d7d60843SJed Brown colstart = j;
723d7d60843SJed Brown }
724d7d60843SJed Brown rowstart = i;
725d7d60843SJed Brown }
726d7d60843SJed Brown }
7279566063dSJacob Faibussowitsch PetscCall(PetscFree4(row, col, valptr, perm));
7283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
729d7d60843SJed Brown }
730d7d60843SJed Brown
MatStashBlockTypeSetUp(MatStash * stash)731d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
732d71ae5a4SJacob Faibussowitsch {
733d7d60843SJed Brown PetscFunctionBegin;
734d7d60843SJed Brown if (stash->blocktype == MPI_DATATYPE_NULL) {
735d7d60843SJed Brown PetscInt bs2 = PetscSqr(stash->bs);
736d7d60843SJed Brown PetscMPIInt blocklens[2];
737d7d60843SJed Brown MPI_Aint displs[2];
738d7d60843SJed Brown MPI_Datatype types[2], stype;
739223b4c07SBarry Smith /*
740223b4c07SBarry Smith DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex.
741f41aa5efSJunchao Zhang std::complex itself has standard layout, so does DummyBlock, recursively.
742a5b23f4aSJose E. Roman To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout,
743f41aa5efSJunchao Zhang though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++.
744f41aa5efSJunchao Zhang offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof.
745f41aa5efSJunchao Zhang
746f41aa5efSJunchao Zhang We can test if std::complex has standard layout with the following code:
747f41aa5efSJunchao Zhang #include <iostream>
748f41aa5efSJunchao Zhang #include <type_traits>
749f41aa5efSJunchao Zhang #include <complex>
750f41aa5efSJunchao Zhang int main() {
751f41aa5efSJunchao Zhang std::cout << std::boolalpha;
752f41aa5efSJunchao Zhang std::cout << std::is_standard_layout<std::complex<double> >::value << '\n';
753f41aa5efSJunchao Zhang }
754f41aa5efSJunchao Zhang Output: true
7559503c6c6SJed Brown */
7569371c9d4SSatish Balay struct DummyBlock {
7579371c9d4SSatish Balay PetscInt row, col;
7589371c9d4SSatish Balay PetscScalar vals;
7599371c9d4SSatish Balay };
760d7d60843SJed Brown
7619503c6c6SJed Brown stash->blocktype_size = offsetof(struct DummyBlock, vals) + bs2 * sizeof(PetscScalar);
762d7d60843SJed Brown if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
763d7d60843SJed Brown stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
764d7d60843SJed Brown }
7659566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segsendblocks));
7669566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segrecvblocks));
7679566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(sizeof(MatStashFrame), 1, &stash->segrecvframe));
768d7d60843SJed Brown blocklens[0] = 2;
7696497c311SBarry Smith blocklens[1] = (PetscMPIInt)bs2;
7709503c6c6SJed Brown displs[0] = offsetof(struct DummyBlock, row);
7719503c6c6SJed Brown displs[1] = offsetof(struct DummyBlock, vals);
772d7d60843SJed Brown types[0] = MPIU_INT;
773d7d60843SJed Brown types[1] = MPIU_SCALAR;
7749566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_create_struct(2, blocklens, displs, types, &stype));
7759566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_commit(&stype));
7769566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_create_resized(stype, 0, stash->blocktype_size, &stash->blocktype));
7779566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_commit(&stash->blocktype));
7789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_free(&stype));
779d7d60843SJed Brown }
7803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
781d7d60843SJed Brown }
782d7d60843SJed Brown
783da81f932SPierre Jolivet /* Callback invoked after target rank has initiated receive of rendezvous message.
784d7d60843SJed Brown * Here we post the main sends.
785d7d60843SJed Brown */
MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void * sdata,MPI_Request req[],PetscCtx ctx)786*2a8381b2SBarry Smith static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], PetscCtx ctx)
787d71ae5a4SJacob Faibussowitsch {
788d7d60843SJed Brown MatStash *stash = (MatStash *)ctx;
789d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader *)sdata;
790d7d60843SJed Brown
791d7d60843SJed Brown PetscFunctionBegin;
79208401ef6SPierre Jolivet PetscCheck(rank == stash->sendranks[rankid], comm, PETSC_ERR_PLIB, "BTS Send rank %d does not match sendranks[%d] %d", rank, rankid, stash->sendranks[rankid]);
7936497c311SBarry Smith PetscCallMPI(MPIU_Isend(stash->sendframes[rankid].buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
794d7d60843SJed Brown stash->sendframes[rankid].count = hdr->count;
795d7d60843SJed Brown stash->sendframes[rankid].pending = 1;
7963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
797d7d60843SJed Brown }
798d7d60843SJed Brown
799223b4c07SBarry Smith /*
800223b4c07SBarry Smith Callback invoked by target after receiving rendezvous message.
801223b4c07SBarry Smith Here we post the main recvs.
802d7d60843SJed Brown */
MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void * rdata,MPI_Request req[],PetscCtx ctx)803*2a8381b2SBarry Smith static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], PetscCtx ctx)
804d71ae5a4SJacob Faibussowitsch {
805d7d60843SJed Brown MatStash *stash = (MatStash *)ctx;
806d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader *)rdata;
807d7d60843SJed Brown MatStashFrame *frame;
808d7d60843SJed Brown
809d7d60843SJed Brown PetscFunctionBegin;
8109566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(stash->segrecvframe, 1, &frame));
8119566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(stash->segrecvblocks, hdr->count, &frame->buffer));
8126497c311SBarry Smith PetscCallMPI(MPIU_Irecv(frame->buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
813d7d60843SJed Brown frame->count = hdr->count;
814d7d60843SJed Brown frame->pending = 1;
8153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
816d7d60843SJed Brown }
817d7d60843SJed Brown
818d7d60843SJed Brown /*
819d7d60843SJed Brown * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
820d7d60843SJed Brown */
MatStashScatterBegin_BTS(Mat mat,MatStash * stash,PetscInt owners[])821d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_BTS(Mat mat, MatStash *stash, PetscInt owners[])
822d71ae5a4SJacob Faibussowitsch {
8236497c311SBarry Smith PetscCount nblocks;
824d7d60843SJed Brown char *sendblocks;
825d7d60843SJed Brown
826d7d60843SJed Brown PetscFunctionBegin;
82776bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */
8284b4eb8d3SJed Brown InsertMode addv;
829462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
83008401ef6SPierre Jolivet PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
8314b4eb8d3SJed Brown }
8324b4eb8d3SJed Brown
8339566063dSJacob Faibussowitsch PetscCall(MatStashBlockTypeSetUp(stash));
8349566063dSJacob Faibussowitsch PetscCall(MatStashSortCompress_Private(stash, mat->insertmode));
8359566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGetSize(stash->segsendblocks, &nblocks));
8369566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractInPlace(stash->segsendblocks, &sendblocks));
83760f34548SJunchao Zhang if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
83823b7d1baSJed Brown PetscInt i;
8396497c311SBarry Smith PetscCount b;
84097da8949SJed Brown for (i = 0, b = 0; i < stash->nsendranks; i++) {
84197da8949SJed Brown stash->sendframes[i].buffer = &sendblocks[b * stash->blocktype_size];
84297da8949SJed Brown /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
84397da8949SJed Brown stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
84497da8949SJed Brown for (; b < nblocks; b++) {
84597da8949SJed Brown MatStashBlock *sendblock_b = (MatStashBlock *)&sendblocks[b * stash->blocktype_size];
846aed4548fSBarry Smith PetscCheck(sendblock_b->row >= owners[stash->sendranks[i]], stash->comm, PETSC_ERR_ARG_WRONG, "MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly", sendblock_b->row, stash->sendranks[i]);
84797da8949SJed Brown if (sendblock_b->row >= owners[stash->sendranks[i] + 1]) break;
84897da8949SJed Brown stash->sendhdr[i].count++;
84997da8949SJed Brown }
85097da8949SJed Brown }
85197da8949SJed Brown } else { /* Dynamically count and pack (first time) */
85223b7d1baSJed Brown PetscInt sendno;
8536497c311SBarry Smith PetscCount i, rowstart;
854d7d60843SJed Brown
855d7d60843SJed Brown /* Count number of send ranks and allocate for sends */
856d7d60843SJed Brown stash->nsendranks = 0;
857d7d60843SJed Brown for (rowstart = 0; rowstart < nblocks;) {
8587e2ea869SJed Brown PetscInt owner;
859d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
8606497c311SBarry Smith
8619566063dSJacob Faibussowitsch PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner));
862d7d60843SJed Brown if (owner < 0) owner = -(owner + 2);
863d7d60843SJed Brown for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
864d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
8656497c311SBarry Smith
8667e2ea869SJed Brown if (sendblock_i->row >= owners[owner + 1]) break;
867d7d60843SJed Brown }
868d7d60843SJed Brown stash->nsendranks++;
869d7d60843SJed Brown rowstart = i;
870d7d60843SJed Brown }
8719566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(stash->nsendranks, &stash->sendranks, stash->nsendranks, &stash->sendhdr, stash->nsendranks, &stash->sendframes));
872d7d60843SJed Brown
873d7d60843SJed Brown /* Set up sendhdrs and sendframes */
874d7d60843SJed Brown sendno = 0;
875d7d60843SJed Brown for (rowstart = 0; rowstart < nblocks;) {
8766497c311SBarry Smith PetscInt iowner;
8776497c311SBarry Smith PetscMPIInt owner;
878d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
8796497c311SBarry Smith
8806497c311SBarry Smith PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &iowner));
8816497c311SBarry Smith PetscCall(PetscMPIIntCast(iowner, &owner));
882d7d60843SJed Brown if (owner < 0) owner = -(owner + 2);
883d7d60843SJed Brown stash->sendranks[sendno] = owner;
884d7d60843SJed Brown for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
885d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
8866497c311SBarry Smith
8877e2ea869SJed Brown if (sendblock_i->row >= owners[owner + 1]) break;
888d7d60843SJed Brown }
889d7d60843SJed Brown stash->sendframes[sendno].buffer = sendblock_rowstart;
890d7d60843SJed Brown stash->sendframes[sendno].pending = 0;
8916497c311SBarry Smith PetscCall(PetscIntCast(i - rowstart, &stash->sendhdr[sendno].count));
892d7d60843SJed Brown sendno++;
893d7d60843SJed Brown rowstart = i;
894d7d60843SJed Brown }
89508401ef6SPierre Jolivet PetscCheck(sendno == stash->nsendranks, stash->comm, PETSC_ERR_PLIB, "BTS counted %d sendranks, but %" PetscInt_FMT " sends", stash->nsendranks, sendno);
896d7d60843SJed Brown }
897d7d60843SJed Brown
8984b4eb8d3SJed Brown /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
8994b4eb8d3SJed Brown * message or a dummy entry of some sort. */
9004b4eb8d3SJed Brown if (mat->insertmode == INSERT_VALUES) {
9016497c311SBarry Smith for (PetscCount i = 0; i < nblocks; i++) {
9024b4eb8d3SJed Brown MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
9034b4eb8d3SJed Brown sendblock_i->row = -(sendblock_i->row + 1);
9044b4eb8d3SJed Brown }
9054b4eb8d3SJed Brown }
9064b4eb8d3SJed Brown
90760f34548SJunchao Zhang if (stash->first_assembly_done) {
90897da8949SJed Brown PetscMPIInt i, tag;
9096497c311SBarry Smith
9109566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(stash->comm, &tag));
91148a46eb9SPierre Jolivet for (i = 0; i < stash->nrecvranks; i++) PetscCall(MatStashBTSRecv_Private(stash->comm, &tag, stash->recvranks[i], &stash->recvhdr[i], &stash->recvreqs[i], stash));
91248a46eb9SPierre Jolivet for (i = 0; i < stash->nsendranks; i++) PetscCall(MatStashBTSSend_Private(stash->comm, &tag, i, stash->sendranks[i], &stash->sendhdr[i], &stash->sendreqs[i], stash));
91397da8949SJed Brown stash->use_status = PETSC_TRUE; /* Use count from message status. */
91497da8949SJed Brown } else {
9159371c9d4SSatish Balay PetscCall(PetscCommBuildTwoSidedFReq(stash->comm, 1, MPIU_INT, stash->nsendranks, stash->sendranks, (PetscInt *)stash->sendhdr, &stash->nrecvranks, &stash->recvranks, (PetscInt *)&stash->recvhdr, 1, &stash->sendreqs, &stash->recvreqs, MatStashBTSSend_Private, MatStashBTSRecv_Private, stash));
9169566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(stash->nrecvranks, &stash->some_indices, stash->nrecvranks, &stash->some_statuses));
91797da8949SJed Brown stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
91897da8949SJed Brown }
919d7d60843SJed Brown
9209566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractInPlace(stash->segrecvframe, &stash->recvframes));
921d7d60843SJed Brown stash->recvframe_active = NULL;
922d7d60843SJed Brown stash->recvframe_i = 0;
923d7d60843SJed Brown stash->some_i = 0;
924d7d60843SJed Brown stash->some_count = 0;
925d7d60843SJed Brown stash->recvcount = 0;
92660f34548SJunchao Zhang stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
9274b4eb8d3SJed Brown stash->insertmode = &mat->insertmode;
9283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
929d7d60843SJed Brown }
930d7d60843SJed Brown
MatStashScatterGetMesg_BTS(MatStash * stash,PetscMPIInt * n,PetscInt ** row,PetscInt ** col,PetscScalar ** val,PetscInt * flg)931d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash, PetscMPIInt *n, PetscInt **row, PetscInt **col, PetscScalar **val, PetscInt *flg)
932d71ae5a4SJacob Faibussowitsch {
933d7d60843SJed Brown MatStashBlock *block;
934d7d60843SJed Brown
935d7d60843SJed Brown PetscFunctionBegin;
936d7d60843SJed Brown *flg = 0;
937d7d60843SJed Brown while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
938d7d60843SJed Brown if (stash->some_i == stash->some_count) {
9393ba16761SJacob Faibussowitsch if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(PETSC_SUCCESS); /* Done */
9409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitsome(stash->nrecvranks, stash->recvreqs, &stash->some_count, stash->some_indices, stash->use_status ? stash->some_statuses : MPI_STATUSES_IGNORE));
941d7d60843SJed Brown stash->some_i = 0;
942d7d60843SJed Brown }
943d7d60843SJed Brown stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
944d7d60843SJed Brown stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
945d7d60843SJed Brown if (stash->use_status) { /* Count what was actually sent */
9466497c311SBarry Smith PetscMPIInt ic;
9476497c311SBarry Smith
9486497c311SBarry Smith PetscCallMPI(MPI_Get_count(&stash->some_statuses[stash->some_i], stash->blocktype, &ic));
9496497c311SBarry Smith stash->recvframe_count = ic;
950d7d60843SJed Brown }
9514b4eb8d3SJed Brown if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
9524b4eb8d3SJed Brown block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[0];
9534b4eb8d3SJed Brown if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
954aed4548fSBarry Smith PetscCheck(*stash->insertmode != INSERT_VALUES || block->row < 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling INSERT_VALUES, but rank %d requested ADD_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
955aed4548fSBarry Smith PetscCheck(*stash->insertmode != ADD_VALUES || block->row >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling ADD_VALUES, but rank %d requested INSERT_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
9564b4eb8d3SJed Brown }
957d7d60843SJed Brown stash->some_i++;
958d7d60843SJed Brown stash->recvcount++;
959d7d60843SJed Brown stash->recvframe_i = 0;
960d7d60843SJed Brown }
961d7d60843SJed Brown *n = 1;
962d7d60843SJed Brown block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[stash->recvframe_i * stash->blocktype_size];
9634b4eb8d3SJed Brown if (block->row < 0) block->row = -(block->row + 1);
964d7d60843SJed Brown *row = &block->row;
965d7d60843SJed Brown *col = &block->col;
966d7d60843SJed Brown *val = block->vals;
967d7d60843SJed Brown stash->recvframe_i++;
968d7d60843SJed Brown *flg = 1;
9693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
970d7d60843SJed Brown }
971d7d60843SJed Brown
MatStashScatterEnd_BTS(MatStash * stash)972d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
973d71ae5a4SJacob Faibussowitsch {
974d7d60843SJed Brown PetscFunctionBegin;
9759566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(stash->nsendranks, stash->sendreqs, MPI_STATUSES_IGNORE));
97660f34548SJunchao Zhang if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */
977d2b3fd65SBarry Smith PetscCall(PetscSegBufferExtractInPlace(stash->segrecvblocks, NULL));
9783575f486SJed Brown } else { /* No reuse, so collect everything. */
9799566063dSJacob Faibussowitsch PetscCall(MatStashScatterDestroy_BTS(stash));
98097da8949SJed Brown }
981d7d60843SJed Brown
982d7d60843SJed Brown /* Now update nmaxold to be app 10% more than max n used, this way the
983d7d60843SJed Brown wastage of space is reduced the next time this stash is used.
984d7d60843SJed Brown Also update the oldmax, only if it increases */
985d7d60843SJed Brown if (stash->n) {
986d7d60843SJed Brown PetscInt bs2 = stash->bs * stash->bs;
987d7d60843SJed Brown PetscInt oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
988d7d60843SJed Brown if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
989d7d60843SJed Brown }
990d7d60843SJed Brown
991d7d60843SJed Brown stash->nmax = 0;
992d7d60843SJed Brown stash->n = 0;
993d7d60843SJed Brown stash->reallocs = -1;
994d7d60843SJed Brown stash->nprocessed = 0;
995d7d60843SJed Brown
9969566063dSJacob Faibussowitsch PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
997d7d60843SJed Brown
998f4259b30SLisandro Dalcin stash->space = NULL;
9993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1000d7d60843SJed Brown }
1001d7d60843SJed Brown
MatStashScatterDestroy_BTS(MatStash * stash)1002d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1003d71ae5a4SJacob Faibussowitsch {
1004d7d60843SJed Brown PetscFunctionBegin;
10059566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&stash->segsendblocks));
10069566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&stash->segrecvframe));
1007d7d60843SJed Brown stash->recvframes = NULL;
10089566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&stash->segrecvblocks));
100948a46eb9SPierre Jolivet if (stash->blocktype != MPI_DATATYPE_NULL) PetscCallMPI(MPI_Type_free(&stash->blocktype));
1010d7d60843SJed Brown stash->nsendranks = 0;
1011d7d60843SJed Brown stash->nrecvranks = 0;
10129566063dSJacob Faibussowitsch PetscCall(PetscFree3(stash->sendranks, stash->sendhdr, stash->sendframes));
10139566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->sendreqs));
10149566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->recvreqs));
10159566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->recvranks));
10169566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->recvhdr));
10179566063dSJacob Faibussowitsch PetscCall(PetscFree2(stash->some_indices, stash->some_statuses));
10183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1019d7d60843SJed Brown }
10201667be42SBarry Smith #endif
1021