1d4002b98SHong Zhang /*
2d4002b98SHong Zhang Support for the parallel SELL matrix vector multiply
3d4002b98SHong Zhang */
4d4002b98SHong Zhang #include <../src/mat/impls/sell/mpi/mpisell.h>
5d4002b98SHong Zhang #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
6d4002b98SHong Zhang
7d4002b98SHong Zhang /*
8d4002b98SHong Zhang Takes the local part of an already assembled MPISELL matrix
9d4002b98SHong Zhang and disassembles it. This is to allow new nonzeros into the matrix
10d4002b98SHong Zhang that require more communication in the matrix vector multiply.
11d4002b98SHong Zhang Thus certain data-structures must be rebuilt.
12d4002b98SHong Zhang
13d4002b98SHong Zhang Kind of slow! But that's what application programmers get when
14d4002b98SHong Zhang they are sloppy.
15d4002b98SHong Zhang */
MatDisAssemble_MPISELL(Mat A)16d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDisAssemble_MPISELL(Mat A)
17d71ae5a4SJacob Faibussowitsch {
18d4002b98SHong Zhang Mat_MPISELL *sell = (Mat_MPISELL *)A->data;
19d4002b98SHong Zhang Mat B = sell->B, Bnew;
20d4002b98SHong Zhang Mat_SeqSELL *Bsell = (Mat_SeqSELL *)B->data;
214dfa11a4SJacob Faibussowitsch PetscInt i, j, totalslices, N = A->cmap->N, row;
22d4002b98SHong Zhang PetscBool isnonzero;
23d4002b98SHong Zhang
24d4002b98SHong Zhang PetscFunctionBegin;
25d4002b98SHong Zhang /* free stuff related to matrix-vec multiply */
269566063dSJacob Faibussowitsch PetscCall(VecDestroy(&sell->lvec));
279566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&sell->Mvctx));
28d4002b98SHong Zhang if (sell->colmap) {
29d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE)
30eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&sell->colmap));
31d4002b98SHong Zhang #else
329566063dSJacob Faibussowitsch PetscCall(PetscFree(sell->colmap));
33d4002b98SHong Zhang #endif
34d4002b98SHong Zhang }
35d4002b98SHong Zhang
36d4002b98SHong Zhang /* make sure that B is assembled so we can access its values */
379566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
389566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
39d4002b98SHong Zhang
40d4002b98SHong Zhang /* invent new B and copy stuff over */
419566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
429566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bnew, B->rmap->n, N, B->rmap->n, N));
439566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(Bnew, A, A));
449566063dSJacob Faibussowitsch PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
459566063dSJacob Faibussowitsch PetscCall(MatSeqSELLSetPreallocation(Bnew, 0, Bsell->rlen));
46b38c15b3SStefano Zampini if (Bsell->nonew >= 0) { /* Inherit insertion error options (if positive). */
47b38c15b3SStefano Zampini ((Mat_SeqSELL *)Bnew->data)->nonew = Bsell->nonew;
48b38c15b3SStefano Zampini }
49d4002b98SHong Zhang
50d4002b98SHong Zhang /*
51d4002b98SHong Zhang Ensure that B's nonzerostate is monotonically increasing.
52d4002b98SHong Zhang Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
53d4002b98SHong Zhang */
54d4002b98SHong Zhang Bnew->nonzerostate = B->nonzerostate;
55d4002b98SHong Zhang
5607e43b41SHong Zhang totalslices = PetscCeilInt(B->rmap->n, Bsell->sliceheight);
57d4002b98SHong Zhang for (i = 0; i < totalslices; i++) { /* loop over slices */
5807e43b41SHong Zhang for (j = Bsell->sliidx[i], row = 0; j < Bsell->sliidx[i + 1]; j++, row = (row + 1) % Bsell->sliceheight) {
5907e43b41SHong Zhang isnonzero = (PetscBool)((j - Bsell->sliidx[i]) / Bsell->sliceheight < Bsell->rlen[Bsell->sliceheight * i + row]);
603a7d0413SPierre Jolivet if (isnonzero) PetscCall(MatSetValue(Bnew, Bsell->sliceheight * i + row, sell->garray[Bsell->colidx[j]], Bsell->val[j], B->insertmode));
61d4002b98SHong Zhang }
62d4002b98SHong Zhang }
63d4002b98SHong Zhang
649566063dSJacob Faibussowitsch PetscCall(PetscFree(sell->garray));
659566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B));
66d4002b98SHong Zhang
67d4002b98SHong Zhang sell->B = Bnew;
68d4002b98SHong Zhang A->was_assembled = PETSC_FALSE;
69d4002b98SHong Zhang A->assembled = PETSC_FALSE;
703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
71d4002b98SHong Zhang }
72d4002b98SHong Zhang
MatSetUpMultiply_MPISELL(Mat mat)73d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPISELL(Mat mat)
74d71ae5a4SJacob Faibussowitsch {
75d4002b98SHong Zhang Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
76f4f49eeaSPierre Jolivet Mat_SeqSELL *B = (Mat_SeqSELL *)sell->B->data;
77d4002b98SHong Zhang PetscInt i, j, *bcolidx = B->colidx, ec = 0, *garray, totalslices;
78d4002b98SHong Zhang IS from, to;
79d4002b98SHong Zhang Vec gvec;
80d4002b98SHong Zhang PetscBool isnonzero;
81d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE)
82eec179cfSJacob Faibussowitsch PetscHMapI gid1_lid1 = NULL;
83eec179cfSJacob Faibussowitsch PetscHashIter tpos;
84d4002b98SHong Zhang PetscInt gid, lid;
85d4002b98SHong Zhang #else
86d4002b98SHong Zhang PetscInt N = mat->cmap->N, *indices;
87d4002b98SHong Zhang #endif
88d4002b98SHong Zhang
89d4002b98SHong Zhang PetscFunctionBegin;
9007e43b41SHong Zhang totalslices = PetscCeilInt(sell->B->rmap->n, B->sliceheight);
91d4002b98SHong Zhang
92d4002b98SHong Zhang /* ec counts the number of columns that contain nonzeros */
93d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE)
94d4002b98SHong Zhang /* use a table */
95eec179cfSJacob Faibussowitsch PetscCall(PetscHMapICreateWithSize(sell->B->rmap->n, &gid1_lid1));
96d4002b98SHong Zhang for (i = 0; i < totalslices; i++) { /* loop over slices */
97d4002b98SHong Zhang for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
9807e43b41SHong Zhang isnonzero = (PetscBool)((j - B->sliidx[i]) / B->sliceheight < B->rlen[i * B->sliceheight + j % B->sliceheight]);
99d4002b98SHong Zhang if (isnonzero) { /* check the mask bit */
100d4002b98SHong Zhang PetscInt data, gid1 = bcolidx[j] + 1;
101c76ffc5fSJacob Faibussowitsch
102eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
103d4002b98SHong Zhang /* one based table */
104c76ffc5fSJacob Faibussowitsch if (!data) PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
105d4002b98SHong Zhang }
106d4002b98SHong Zhang }
107d4002b98SHong Zhang }
108d4002b98SHong Zhang
109d4002b98SHong Zhang /* form array of columns we need */
1109566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray));
111eec179cfSJacob Faibussowitsch PetscHashIterBegin(gid1_lid1, tpos);
112eec179cfSJacob Faibussowitsch while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
113eec179cfSJacob Faibussowitsch PetscHashIterGetKey(gid1_lid1, tpos, gid);
114eec179cfSJacob Faibussowitsch PetscHashIterGetVal(gid1_lid1, tpos, lid);
115eec179cfSJacob Faibussowitsch PetscHashIterNext(gid1_lid1, tpos);
116d4002b98SHong Zhang gid--;
117d4002b98SHong Zhang lid--;
118d4002b98SHong Zhang garray[lid] = gid;
119d4002b98SHong Zhang }
1209566063dSJacob Faibussowitsch PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */
121eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIClear(gid1_lid1));
122c76ffc5fSJacob Faibussowitsch for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
123d4002b98SHong Zhang
124d4002b98SHong Zhang /* compact out the extra columns in B */
125d4002b98SHong Zhang for (i = 0; i < totalslices; i++) { /* loop over slices */
126d4002b98SHong Zhang for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
12707e43b41SHong Zhang isnonzero = (PetscBool)((j - B->sliidx[i]) / B->sliceheight < B->rlen[i * B->sliceheight + j % B->sliceheight]);
128d4002b98SHong Zhang if (isnonzero) {
129d4002b98SHong Zhang PetscInt gid1 = bcolidx[j] + 1;
130eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
131d4002b98SHong Zhang lid--;
132d4002b98SHong Zhang bcolidx[j] = lid;
133d4002b98SHong Zhang }
134d4002b98SHong Zhang }
135d4002b98SHong Zhang }
1369566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&sell->B->cmap));
1379566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap));
138eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&gid1_lid1));
139d4002b98SHong Zhang #else
140d4002b98SHong Zhang /* Make an array as long as the number of columns */
1419566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(N, &indices));
142d4002b98SHong Zhang /* mark those columns that are in sell->B */
143d4002b98SHong Zhang for (i = 0; i < totalslices; i++) { /* loop over slices */
144d4002b98SHong Zhang for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
14507e43b41SHong Zhang isnonzero = (PetscBool)((j - B->sliidx[i]) / B->sliceheight < B->rlen[i * B->sliceheight + j % B->sliceheight]);
146d4002b98SHong Zhang if (isnonzero) {
147d4002b98SHong Zhang if (!indices[bcolidx[j]]) ec++;
148d4002b98SHong Zhang indices[bcolidx[j]] = 1;
149d4002b98SHong Zhang }
150d4002b98SHong Zhang }
151d4002b98SHong Zhang }
152d4002b98SHong Zhang
153d4002b98SHong Zhang /* form array of columns we need */
1549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray));
155d4002b98SHong Zhang ec = 0;
156d4002b98SHong Zhang for (i = 0; i < N; i++) {
157d4002b98SHong Zhang if (indices[i]) garray[ec++] = i;
158d4002b98SHong Zhang }
159d4002b98SHong Zhang
160d4002b98SHong Zhang /* make indices now point into garray */
161ad540459SPierre Jolivet for (i = 0; i < ec; i++) indices[garray[i]] = i;
162d4002b98SHong Zhang
163d4002b98SHong Zhang /* compact out the extra columns in B */
164d4002b98SHong Zhang for (i = 0; i < totalslices; i++) { /* loop over slices */
165d4002b98SHong Zhang for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
16607e43b41SHong Zhang isnonzero = (PetscBool)((j - B->sliidx[i]) / B->sliceheight < B->rlen[i * B->sliceheight + j % B->sliceheight]);
167d4002b98SHong Zhang if (isnonzero) bcolidx[j] = indices[bcolidx[j]];
168d4002b98SHong Zhang }
169d4002b98SHong Zhang }
1709566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&sell->B->cmap));
1719566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap));
1729566063dSJacob Faibussowitsch PetscCall(PetscFree(indices));
173d4002b98SHong Zhang #endif
174d4002b98SHong Zhang /* create local vector that is used to scatter into */
1759566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec, &sell->lvec));
176d4002b98SHong Zhang /* create two temporary Index sets for build scatter gather */
1779566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from));
1789566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to));
179d4002b98SHong Zhang
180d4002b98SHong Zhang /* create temporary global vector to generate scatter context */
181d4002b98SHong Zhang /* This does not allocate the array's memory so is efficient */
1829566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
183d4002b98SHong Zhang
184d4002b98SHong Zhang /* generate the scatter context */
1859566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, sell->lvec, to, &sell->Mvctx));
1869566063dSJacob Faibussowitsch PetscCall(VecScatterViewFromOptions(sell->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
187d4002b98SHong Zhang
188d4002b98SHong Zhang sell->garray = garray;
189d4002b98SHong Zhang
1909566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from));
1919566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to));
1929566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec));
1933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
194d4002b98SHong Zhang }
195d4002b98SHong Zhang
196d4002b98SHong Zhang /* ugly stuff added for Glenn someday we should fix this up */
197f4259b30SLisandro Dalcin static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
198f4259b30SLisandro Dalcin static Vec auglydd = NULL, auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */
199d4002b98SHong Zhang
MatMPISELLDiagonalScaleLocalSetUp(Mat inA,Vec scale)20066976f2fSJacob Faibussowitsch static PetscErrorCode MatMPISELLDiagonalScaleLocalSetUp(Mat inA, Vec scale)
201d71ae5a4SJacob Faibussowitsch {
202d4002b98SHong Zhang Mat_MPISELL *ina = (Mat_MPISELL *)inA->data; /*access private part of matrix */
203d4002b98SHong Zhang PetscInt i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices;
204d4002b98SHong Zhang PetscInt *r_rmapd, *r_rmapo;
205d4002b98SHong Zhang
206d4002b98SHong Zhang PetscFunctionBegin;
2079566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
2089566063dSJacob Faibussowitsch PetscCall(MatGetSize(ina->A, NULL, &n));
209*f1957bc3SPierre Jolivet PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapd));
210d4002b98SHong Zhang nt = 0;
211d4002b98SHong Zhang for (i = 0; i < inA->rmap->mapping->n; i++) {
212d4002b98SHong Zhang if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
213d4002b98SHong Zhang nt++;
214d4002b98SHong Zhang r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
215d4002b98SHong Zhang }
216d4002b98SHong Zhang }
21708401ef6SPierre Jolivet PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n);
218*f1957bc3SPierre Jolivet PetscCall(PetscMalloc1(n, &auglyrmapd));
219d4002b98SHong Zhang for (i = 0; i < inA->rmap->mapping->n; i++) {
220ad540459SPierre Jolivet if (r_rmapd[i]) auglyrmapd[(r_rmapd[i] - 1) - cstart] = i;
221d4002b98SHong Zhang }
2229566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapd));
2239566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd));
224*f1957bc3SPierre Jolivet PetscCall(PetscCalloc1(inA->cmap->N, &lindices));
225ad540459SPierre Jolivet for (i = 0; i < ina->B->cmap->n; i++) lindices[garray[i]] = i + 1;
226d4002b98SHong Zhang no = inA->rmap->mapping->n - nt;
227*f1957bc3SPierre Jolivet PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapo));
228d4002b98SHong Zhang nt = 0;
229d4002b98SHong Zhang for (i = 0; i < inA->rmap->mapping->n; i++) {
230d4002b98SHong Zhang if (lindices[inA->rmap->mapping->indices[i]]) {
231d4002b98SHong Zhang nt++;
232d4002b98SHong Zhang r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
233d4002b98SHong Zhang }
234d4002b98SHong Zhang }
23508401ef6SPierre Jolivet PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
2369566063dSJacob Faibussowitsch PetscCall(PetscFree(lindices));
237*f1957bc3SPierre Jolivet PetscCall(PetscMalloc1(nt, &auglyrmapo));
238d4002b98SHong Zhang for (i = 0; i < inA->rmap->mapping->n; i++) {
239ad540459SPierre Jolivet if (r_rmapo[i]) auglyrmapo[(r_rmapo[i] - 1)] = i;
240d4002b98SHong Zhang }
2419566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapo));
2429566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo));
2433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
244d4002b98SHong Zhang }
245d4002b98SHong Zhang
MatDiagonalScaleLocal_MPISELL(Mat A,Vec scale)246d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScaleLocal_MPISELL(Mat A, Vec scale)
247d71ae5a4SJacob Faibussowitsch {
248d4002b98SHong Zhang Mat_MPISELL *a = (Mat_MPISELL *)A->data; /*access private part of matrix */
249d4002b98SHong Zhang PetscInt n, i;
250d4002b98SHong Zhang PetscScalar *d, *o;
251d4002b98SHong Zhang const PetscScalar *s;
252d4002b98SHong Zhang
253d4002b98SHong Zhang PetscFunctionBegin;
25448a46eb9SPierre Jolivet if (!auglyrmapd) PetscCall(MatMPISELLDiagonalScaleLocalSetUp(A, scale));
2559566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scale, &s));
2569566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(auglydd, &n));
2579566063dSJacob Faibussowitsch PetscCall(VecGetArray(auglydd, &d));
258ac530a7eSPierre Jolivet for (i = 0; i < n; i++) d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
2599566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(auglydd, &d));
260d4002b98SHong Zhang /* column scale "diagonal" portion of local matrix */
2619566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->A, NULL, auglydd));
2629566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(auglyoo, &n));
2639566063dSJacob Faibussowitsch PetscCall(VecGetArray(auglyoo, &o));
264ac530a7eSPierre Jolivet for (i = 0; i < n; i++) o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
2659566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scale, &s));
2669566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(auglyoo, &o));
267d4002b98SHong Zhang /* column scale "off-diagonal" portion of local matrix */
2689566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->B, NULL, auglyoo));
2693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
270d4002b98SHong Zhang }
271