18016bdd1SSatish Balay /*
2d9653453SSatish Balay Support for the parallel BAIJ matrix vector multiply
38016bdd1SSatish Balay */
4c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h>
5af0996ceSBarry Smith #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
6bba1ac68SSatish Balay
MatSetUpMultiply_MPIBAIJ(Mat mat)7d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
8d71ae5a4SJacob Faibussowitsch {
9d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10f4f49eeaSPierre Jolivet Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data;
11b24ad042SBarry Smith PetscInt i, j, *aj = B->j, ec = 0, *garray;
12d0f46423SBarry Smith PetscInt bs = mat->rmap->bs, *stmp;
138016bdd1SSatish Balay IS from, to;
148016bdd1SSatish Balay Vec gvec;
15aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
16eec179cfSJacob Faibussowitsch PetscHMapI gid1_lid1 = NULL;
17eec179cfSJacob Faibussowitsch PetscHashIter tpos;
18b24ad042SBarry Smith PetscInt gid, lid;
196f531f54SSatish Balay #else
20b24ad042SBarry Smith PetscInt Nbs = baij->Nbs, *indices;
2173a2e727SSatish Balay #endif
228016bdd1SSatish Balay
233a40ed3dSBarry Smith PetscFunctionBegin;
24aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
2573a2e727SSatish Balay /* use a table - Mark Adams */
26eec179cfSJacob Faibussowitsch PetscCall(PetscHMapICreateWithSize(B->mbs, &gid1_lid1));
2773a2e727SSatish Balay for (i = 0; i < B->mbs; i++) {
2873a2e727SSatish Balay for (j = 0; j < B->ilen[i]; j++) {
29b24ad042SBarry Smith PetscInt data, gid1 = aj[B->i[i] + j] + 1;
30eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
31fa46199cSSatish Balay if (!data) {
3273a2e727SSatish Balay /* one based table */
33c76ffc5fSJacob Faibussowitsch PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
3473a2e727SSatish Balay }
3573a2e727SSatish Balay }
3673a2e727SSatish Balay }
3773a2e727SSatish Balay /* form array of columns we need */
389566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray));
39eec179cfSJacob Faibussowitsch PetscHashIterBegin(gid1_lid1, tpos);
40eec179cfSJacob Faibussowitsch while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
41eec179cfSJacob Faibussowitsch PetscHashIterGetKey(gid1_lid1, tpos, gid);
42eec179cfSJacob Faibussowitsch PetscHashIterGetVal(gid1_lid1, tpos, lid);
43eec179cfSJacob Faibussowitsch PetscHashIterNext(gid1_lid1, tpos);
449371c9d4SSatish Balay gid--;
459371c9d4SSatish Balay lid--;
4673a2e727SSatish Balay garray[lid] = gid;
4773a2e727SSatish Balay }
489566063dSJacob Faibussowitsch PetscCall(PetscSortInt(ec, garray));
49eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIClear(gid1_lid1));
50c76ffc5fSJacob Faibussowitsch for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
5173a2e727SSatish Balay /* compact out the extra columns in B */
5273a2e727SSatish Balay for (i = 0; i < B->mbs; i++) {
5373a2e727SSatish Balay for (j = 0; j < B->ilen[i]; j++) {
54b24ad042SBarry Smith PetscInt gid1 = aj[B->i[i] + j] + 1;
55eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
56fa46199cSSatish Balay lid--;
5773a2e727SSatish Balay aj[B->i[i] + j] = lid;
5873a2e727SSatish Balay }
5973a2e727SSatish Balay }
6073a2e727SSatish Balay B->nbs = ec;
619566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&baij->B->cmap));
629566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
63eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&gid1_lid1));
6473a2e727SSatish Balay #else
65435da068SBarry Smith /* Make an array as long as the number of columns */
66d9653453SSatish Balay /* mark those columns that are in baij->B */
679566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(Nbs, &indices));
68d9653453SSatish Balay for (i = 0; i < B->mbs; i++) {
698016bdd1SSatish Balay for (j = 0; j < B->ilen[i]; j++) {
70d9653453SSatish Balay if (!indices[aj[B->i[i] + j]]) ec++;
71d9653453SSatish Balay indices[aj[B->i[i] + j]] = 1;
728016bdd1SSatish Balay }
738016bdd1SSatish Balay }
748016bdd1SSatish Balay
758016bdd1SSatish Balay /* form array of columns we need */
769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray));
778016bdd1SSatish Balay ec = 0;
78d9653453SSatish Balay for (i = 0; i < Nbs; i++) {
79ad540459SPierre Jolivet if (indices[i]) garray[ec++] = i;
808016bdd1SSatish Balay }
818016bdd1SSatish Balay
828016bdd1SSatish Balay /* make indices now point into garray */
83ad540459SPierre Jolivet for (i = 0; i < ec; i++) indices[garray[i]] = i;
848016bdd1SSatish Balay
858016bdd1SSatish Balay /* compact out the extra columns in B */
86d9653453SSatish Balay for (i = 0; i < B->mbs; i++) {
87ad540459SPierre Jolivet for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
888016bdd1SSatish Balay }
89d9653453SSatish Balay B->nbs = ec;
909566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&baij->B->cmap));
919566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
929566063dSJacob Faibussowitsch PetscCall(PetscFree(indices));
9373a2e727SSatish Balay #endif
948016bdd1SSatish Balay
958016bdd1SSatish Balay /* create local vector that is used to scatter into */
969566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec));
978016bdd1SSatish Balay
98c16cb8f2SBarry Smith /* create two temporary index sets for building scatter-gather */
999566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
100c16cb8f2SBarry Smith
1019566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &stmp));
10226fbe8dcSKarl Rupp for (i = 0; i < ec; i++) stmp[i] = i;
1039566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to));
1048016bdd1SSatish Balay
1058016bdd1SSatish Balay /* create temporary global vector to generate scatter context */
1069566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
1078016bdd1SSatish Balay
1089566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx));
1099566063dSJacob Faibussowitsch PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
11090f02eecSBarry Smith
111d9653453SSatish Balay baij->garray = garray;
11226fbe8dcSKarl Rupp
1139566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from));
1149566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to));
1159566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec));
1163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1178016bdd1SSatish Balay }
1188016bdd1SSatish Balay
1198016bdd1SSatish Balay /*
120d9653453SSatish Balay Takes the local part of an already assembled MPIBAIJ matrix
1218016bdd1SSatish Balay and disassembles it. This is to allow new nonzeros into the matrix
1228016bdd1SSatish Balay that require more communication in the matrix vector multiply.
1238016bdd1SSatish Balay Thus certain data-structures must be rebuilt.
1248016bdd1SSatish Balay
1258016bdd1SSatish Balay Kind of slow! But that's what application programmers get when
1268016bdd1SSatish Balay they are sloppy.
1278016bdd1SSatish Balay */
MatDisAssemble_MPIBAIJ(Mat A)128d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
129d71ae5a4SJacob Faibussowitsch {
130d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
131d9653453SSatish Balay Mat B = baij->B, Bnew;
1327c612885SStefano Zampini Mat_SeqBAIJ *Bbaij;
1337c612885SStefano Zampini PetscInt i, j, mbs, n = A->cmap->N, col, *garray = baij->garray;
1347c612885SStefano Zampini PetscInt bs2 = baij->bs2, *nz = NULL, m = A->rmap->n;
1357c612885SStefano Zampini MatScalar *a, *atmp;
13697e5c40aSBarry Smith
1373a40ed3dSBarry Smith PetscFunctionBegin;
1388016bdd1SSatish Balay /* free stuff related to matrix-vec multiply */
1399371c9d4SSatish Balay PetscCall(VecDestroy(&baij->lvec));
1409371c9d4SSatish Balay PetscCall(VecScatterDestroy(&baij->Mvctx));
141d9653453SSatish Balay if (baij->colmap) {
142aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
143eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&baij->colmap));
14448e59246SSatish Balay #else
1459566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->colmap));
14648e59246SSatish Balay #endif
1478016bdd1SSatish Balay }
1488016bdd1SSatish Balay
1498016bdd1SSatish Balay /* make sure that B is assembled so we can access its values */
1507c612885SStefano Zampini if (B) {
1519566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1529566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1537c612885SStefano Zampini Bbaij = (Mat_SeqBAIJ *)B->data;
1547c612885SStefano Zampini mbs = Bbaij->mbs;
1557c612885SStefano Zampini a = Bbaij->a;
1568016bdd1SSatish Balay
1578016bdd1SSatish Balay /* invent new B and copy stuff over */
1589566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs, &nz));
159ad540459SPierre Jolivet for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
1609566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew));
1619566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bnew, m, n, m, n));
1629566063dSJacob Faibussowitsch PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
1639566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
16477341eacSDmitry Karpeev /*
16577341eacSDmitry Karpeev Ensure that B's nonzerostate is monotonically increasing.
16677341eacSDmitry Karpeev Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
16777341eacSDmitry Karpeev */
16877341eacSDmitry Karpeev Bnew->nonzerostate = B->nonzerostate;
1697c612885SStefano Zampini if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
1707c612885SStefano Zampini ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
1717c612885SStefano Zampini }
172d9653453SSatish Balay
1737c612885SStefano Zampini PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE));
174bba1ac68SSatish Balay for (i = 0; i < mbs; i++) {
175bba1ac68SSatish Balay for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
176bba1ac68SSatish Balay col = garray[Bbaij->j[j]];
1773eda8832SBarry Smith atmp = a + j * bs2;
1789566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode));
1798016bdd1SSatish Balay }
1808016bdd1SSatish Balay }
181d0254051SBarry Smith PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, Bbaij->roworiented));
182bba1ac68SSatish Balay
1839566063dSJacob Faibussowitsch PetscCall(PetscFree(nz));
1849566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B));
18526fbe8dcSKarl Rupp
186d9653453SSatish Balay baij->B = Bnew;
1877c612885SStefano Zampini }
1887c612885SStefano Zampini PetscCall(PetscFree(baij->garray));
1898016bdd1SSatish Balay A->was_assembled = PETSC_FALSE;
1906a719282SBarry Smith A->assembled = PETSC_FALSE;
1913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1928016bdd1SSatish Balay }
1938016bdd1SSatish Balay
19404f1ad80SBarry Smith /* ugly stuff added for Glenn someday we should fix this up */
19504f1ad80SBarry Smith
196f4259b30SLisandro Dalcin static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
197f4259b30SLisandro Dalcin static Vec uglydd = NULL, uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */
19804f1ad80SBarry Smith
MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)199ba38deedSJacob Faibussowitsch static PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale)
200d71ae5a4SJacob Faibussowitsch {
20104f1ad80SBarry Smith Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */
20204f1ad80SBarry Smith Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)ina->B->data;
203d0f46423SBarry Smith PetscInt bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices;
204b24ad042SBarry Smith PetscInt *r_rmapd, *r_rmapo;
20504f1ad80SBarry Smith
20604f1ad80SBarry Smith PetscFunctionBegin;
2079566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
2089566063dSJacob Faibussowitsch PetscCall(MatGetSize(ina->A, NULL, &n));
209*498e7f42SRichard Tran Mills PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapd));
21004f1ad80SBarry Smith nt = 0;
21145b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) {
21245b6f7e9SBarry Smith if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) {
21304f1ad80SBarry Smith nt++;
21445b6f7e9SBarry Smith r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
21504f1ad80SBarry Smith }
21604f1ad80SBarry Smith }
21708401ef6SPierre Jolivet PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n);
218*498e7f42SRichard Tran Mills PetscCall(PetscMalloc1(n, &uglyrmapd));
21945b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) {
22004f1ad80SBarry Smith if (r_rmapd[i]) {
221ad540459SPierre Jolivet for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j;
22204f1ad80SBarry Smith }
22304f1ad80SBarry Smith }
2249566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapd));
2259566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd));
22604f1ad80SBarry Smith
227*498e7f42SRichard Tran Mills PetscCall(PetscCalloc1(ina->Nbs, &lindices));
228ad540459SPierre Jolivet for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1;
22945b6f7e9SBarry Smith no = inA->rmap->mapping->n - nt;
230*498e7f42SRichard Tran Mills PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapo));
23104f1ad80SBarry Smith nt = 0;
23245b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) {
23345b6f7e9SBarry Smith if (lindices[inA->rmap->mapping->indices[i]]) {
23404f1ad80SBarry Smith nt++;
23545b6f7e9SBarry Smith r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
23604f1ad80SBarry Smith }
23704f1ad80SBarry Smith }
23808401ef6SPierre Jolivet PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
2399566063dSJacob Faibussowitsch PetscCall(PetscFree(lindices));
240*498e7f42SRichard Tran Mills PetscCall(PetscMalloc1(nt * bs, &uglyrmapo));
24145b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) {
24204f1ad80SBarry Smith if (r_rmapo[i]) {
243ad540459SPierre Jolivet for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j;
24404f1ad80SBarry Smith }
24504f1ad80SBarry Smith }
2469566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapo));
2479566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo));
2483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
24904f1ad80SBarry Smith }
25004f1ad80SBarry Smith
MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)251d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale)
252d71ae5a4SJacob Faibussowitsch {
25304f1ad80SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */
254b24ad042SBarry Smith PetscInt n, i;
255bca11509SBarry Smith PetscScalar *d, *o;
256bca11509SBarry Smith const PetscScalar *s;
25704f1ad80SBarry Smith
25804f1ad80SBarry Smith PetscFunctionBegin;
25948a46eb9SPierre Jolivet if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale));
2602cd6534aSBarry Smith
2619566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scale, &s));
26204f1ad80SBarry Smith
2639566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(uglydd, &n));
2649566063dSJacob Faibussowitsch PetscCall(VecGetArray(uglydd, &d));
265ac530a7eSPierre Jolivet for (i = 0; i < n; i++) d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
2669566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(uglydd, &d));
26704f1ad80SBarry Smith /* column scale "diagonal" portion of local matrix */
2689566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->A, NULL, uglydd));
26904f1ad80SBarry Smith
2709566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(uglyoo, &n));
2719566063dSJacob Faibussowitsch PetscCall(VecGetArray(uglyoo, &o));
272ac530a7eSPierre Jolivet for (i = 0; i < n; i++) o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
2739566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scale, &s));
2749566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(uglyoo, &o));
27504f1ad80SBarry Smith /* column scale "off-diagonal" portion of local matrix */
2769566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->B, NULL, uglyoo));
277b1f10902SRichard Tran Mills PetscCall(PetscFree(uglyrmapd));
278b1f10902SRichard Tran Mills PetscCall(PetscFree(uglyrmapo));
279b1f10902SRichard Tran Mills PetscCall(VecDestroy(&uglydd));
280b1f10902SRichard Tran Mills PetscCall(VecDestroy(&uglyoo));
2813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
28204f1ad80SBarry Smith }
283