xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision d8e47b638cf8f604a99e9678e1df24f82d959cd7)
1a30f8f8cSSatish Balay /*
2a30f8f8cSSatish Balay    Support for the parallel SBAIJ matrix vector multiply
3a30f8f8cSSatish Balay */
4c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
57cff1425SSatish Balay 
MatSetUpMultiply_MPISBAIJ(Mat mat)6d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
7d71ae5a4SJacob Faibussowitsch {
840781036SHong Zhang   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ *)mat->data;
9f4f49eeaSPierre Jolivet   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ *)sbaij->B->data;
10eec179cfSJacob Faibussowitsch   PetscInt        i, j, *aj = B->j, ec = 0, *garray, *sgarray;
11d0f46423SBarry Smith   PetscInt        bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt;
1240781036SHong Zhang   IS              from, to;
1340781036SHong Zhang   Vec             gvec;
14*6497c311SBarry Smith   PetscMPIInt     rank   = sbaij->rank;
15077aedafSJed Brown   PetscInt       *owners = sbaij->rangebs, *ec_owner, k;
16077aedafSJed Brown   const PetscInt *sowners;
1740781036SHong Zhang   PetscScalar    *ptr;
18a11b0bdaSJunchao Zhang #if defined(PETSC_USE_CTABLE)
19eec179cfSJacob Faibussowitsch   PetscHMapI    gid1_lid1 = NULL; /* one-based gid to lid table */
20eec179cfSJacob Faibussowitsch   PetscHashIter tpos;
21a11b0bdaSJunchao Zhang   PetscInt      gid, lid;
22a11b0bdaSJunchao Zhang #else
23eec179cfSJacob Faibussowitsch   PetscInt  Nbs = sbaij->Nbs;
24a11b0bdaSJunchao Zhang   PetscInt *indices;
25a11b0bdaSJunchao Zhang #endif
2640781036SHong Zhang 
2740781036SHong Zhang   PetscFunctionBegin;
28a11b0bdaSJunchao Zhang #if defined(PETSC_USE_CTABLE)
29eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapICreateWithSize(mbs, &gid1_lid1));
30a11b0bdaSJunchao Zhang   for (i = 0; i < B->mbs; i++) {
31a11b0bdaSJunchao Zhang     for (j = 0; j < B->ilen[i]; j++) {
32a11b0bdaSJunchao Zhang       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
33eec179cfSJacob Faibussowitsch       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
34c76ffc5fSJacob Faibussowitsch       if (!data) PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
35a11b0bdaSJunchao Zhang     }
36a11b0bdaSJunchao Zhang   }
37a11b0bdaSJunchao Zhang   /* form array of columns we need */
389566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
39eec179cfSJacob Faibussowitsch   PetscHashIterBegin(gid1_lid1, tpos);
40eec179cfSJacob Faibussowitsch   while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
41eec179cfSJacob Faibussowitsch     PetscHashIterGetKey(gid1_lid1, tpos, gid);
42eec179cfSJacob Faibussowitsch     PetscHashIterGetVal(gid1_lid1, tpos, lid);
43eec179cfSJacob Faibussowitsch     PetscHashIterNext(gid1_lid1, tpos);
449371c9d4SSatish Balay     gid--;
459371c9d4SSatish Balay     lid--;
46a11b0bdaSJunchao Zhang     garray[lid] = gid;
47a11b0bdaSJunchao Zhang   }
489566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(ec, garray));
49eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIClear(gid1_lid1));
50c76ffc5fSJacob Faibussowitsch   for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
51a11b0bdaSJunchao Zhang   /* compact out the extra columns in B */
52a11b0bdaSJunchao Zhang   for (i = 0; i < B->mbs; i++) {
53a11b0bdaSJunchao Zhang     for (j = 0; j < B->ilen[i]; j++) {
54a11b0bdaSJunchao Zhang       PetscInt gid1 = aj[B->i[i] + j] + 1;
55eec179cfSJacob Faibussowitsch       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
56a11b0bdaSJunchao Zhang       lid--;
57a11b0bdaSJunchao Zhang       aj[B->i[i] + j] = lid;
58a11b0bdaSJunchao Zhang     }
59a11b0bdaSJunchao Zhang   }
60eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&gid1_lid1));
619566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
62a11b0bdaSJunchao Zhang   for (i = j = 0; i < ec; i++) {
63a11b0bdaSJunchao Zhang     while (garray[i] >= owners[j + 1]) j++;
64a11b0bdaSJunchao Zhang     ec_owner[i] = j;
65a11b0bdaSJunchao Zhang   }
66a11b0bdaSJunchao Zhang #else
6740781036SHong Zhang   /* For the first stab we make an array as long as the number of columns */
6840781036SHong Zhang   /* mark those columns that are in sbaij->B */
699566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(Nbs, &indices));
7040781036SHong Zhang   for (i = 0; i < mbs; i++) {
7140781036SHong Zhang     for (j = 0; j < B->ilen[i]; j++) {
7240781036SHong Zhang       if (!indices[aj[B->i[i] + j]]) ec++;
7340781036SHong Zhang       indices[aj[B->i[i] + j]] = 1;
7440781036SHong Zhang     }
7540781036SHong Zhang   }
7640781036SHong Zhang 
7740781036SHong Zhang   /* form arrays of columns we need */
789566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
799566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
8040781036SHong Zhang 
8140781036SHong Zhang   ec = 0;
82a11b0bdaSJunchao Zhang   for (j = 0; j < sbaij->size; j++) {
8340781036SHong Zhang     for (i = owners[j]; i < owners[j + 1]; i++) {
8440781036SHong Zhang       if (indices[i]) {
8540781036SHong Zhang         garray[ec]   = i;
8640781036SHong Zhang         ec_owner[ec] = j;
8740781036SHong Zhang         ec++;
8840781036SHong Zhang       }
8940781036SHong Zhang     }
9040781036SHong Zhang   }
9140781036SHong Zhang 
9240781036SHong Zhang   /* make indices now point into garray */
93b424e231SHong Zhang   for (i = 0; i < ec; i++) indices[garray[i]] = i;
9440781036SHong Zhang 
9540781036SHong Zhang   /* compact out the extra columns in B */
9640781036SHong Zhang   for (i = 0; i < mbs; i++) {
9740781036SHong Zhang     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
9840781036SHong Zhang   }
999566063dSJacob Faibussowitsch   PetscCall(PetscFree(indices));
100a11b0bdaSJunchao Zhang #endif
10140781036SHong Zhang   B->nbs = ec;
1029566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sbaij->B->cmap));
1039566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap));
10440781036SHong Zhang 
1059566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&sbaij->sMvctx));
10640781036SHong Zhang   /* create local vector that is used to scatter into */
1079566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec));
10840781036SHong Zhang 
10940781036SHong Zhang   /* create two temporary index sets for building scatter-gather */
1109566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * ec, &stmp));
1119566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
11226fbe8dcSKarl Rupp   for (i = 0; i < ec; i++) stmp[i] = i;
1139566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to));
11440781036SHong Zhang 
11593d1592dSHong Zhang   /* generate the scatter context
116bd096759SJunchao Zhang      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
1179566063dSJacob Faibussowitsch   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
1189566063dSJacob Faibussowitsch   PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx));
1199566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&gvec));
12040781036SHong Zhang 
12140781036SHong Zhang   sbaij->garray = garray;
12226fbe8dcSKarl Rupp 
1239566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&from));
1249566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&to));
12540781036SHong Zhang 
12640781036SHong Zhang   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
127*6497c311SBarry Smith   PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), (mbs + ec) * bs, PETSC_DETERMINE, &sbaij->slvec0));
1289566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1));
1299566063dSJacob Faibussowitsch   PetscCall(VecGetSize(sbaij->slvec0, &vec_size));
13040781036SHong Zhang 
1319566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners));
13240781036SHong Zhang 
13340781036SHong Zhang   /* x index in the IS sfrom */
13440781036SHong Zhang   for (i = 0; i < ec; i++) {
13540781036SHong Zhang     j          = ec_owner[i];
13640781036SHong Zhang     sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]);
13740781036SHong Zhang   }
13840781036SHong Zhang   /* b index in the IS sfrom */
13940781036SHong Zhang   k = sowners[rank] / bs + mbs;
14040781036SHong Zhang   for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j;
1419566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from));
14240781036SHong Zhang 
14340781036SHong Zhang   /* x index in the IS sto */
14440781036SHong Zhang   k = sowners[rank] / bs + mbs;
145e82e9f6bSBarry Smith   for (i = 0; i < ec; i++) stmp[i] = (k + i);
14640781036SHong Zhang   /* b index in the IS sto */
147e82e9f6bSBarry Smith   for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec];
14840781036SHong Zhang 
1499566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to));
15040781036SHong Zhang 
1519566063dSJacob Faibussowitsch   PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx));
1529566063dSJacob Faibussowitsch   PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
15340781036SHong Zhang 
1549566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(sbaij->slvec1, &nt));
1559566063dSJacob Faibussowitsch   PetscCall(VecGetArray(sbaij->slvec1, &ptr));
1569566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a));
1578e3a54c0SPierre Jolivet   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec1b));
1589566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(sbaij->slvec1, &ptr));
15940781036SHong Zhang 
1609566063dSJacob Faibussowitsch   PetscCall(VecGetArray(sbaij->slvec0, &ptr));
1618e3a54c0SPierre Jolivet   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec0b));
1629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(sbaij->slvec0, &ptr));
16340781036SHong Zhang 
1649566063dSJacob Faibussowitsch   PetscCall(PetscFree(stmp));
16540781036SHong Zhang 
1669566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&from));
1679566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&to));
1689566063dSJacob Faibussowitsch   PetscCall(PetscFree2(sgarray, ec_owner));
1693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17040781036SHong Zhang }
17140781036SHong Zhang 
172a30f8f8cSSatish Balay /*
17301b2bd88SHong Zhang      Takes the local part of an already assembled MPISBAIJ matrix
174a30f8f8cSSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
175a30f8f8cSSatish Balay    that require more communication in the matrix vector multiply.
176a30f8f8cSSatish Balay    Thus certain data-structures must be rebuilt.
177a30f8f8cSSatish Balay 
178a30f8f8cSSatish Balay    Kind of slow! But that's what application programmers get when
179a30f8f8cSSatish Balay    they are sloppy.
180a30f8f8cSSatish Balay */
MatDisAssemble_MPISBAIJ(Mat A)181d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
182d71ae5a4SJacob Faibussowitsch {
1832896befeSSatish Balay   Mat_MPISBAIJ *baij  = (Mat_MPISBAIJ *)A->data;
184a30f8f8cSSatish Balay   Mat           B     = baij->B, Bnew;
185a30f8f8cSSatish Balay   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ *)B->data;
186d0f46423SBarry Smith   PetscInt      i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
1874dfa11a4SJacob Faibussowitsch   PetscInt      k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n;
188a30f8f8cSSatish Balay   MatScalar    *a = Bbaij->a;
18987828ca2SBarry Smith   PetscScalar  *atmp;
190ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
19113f74950SBarry Smith   PetscInt l;
192a30f8f8cSSatish Balay #endif
193a30f8f8cSSatish Balay 
194a30f8f8cSSatish Balay   PetscFunctionBegin;
195ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
1969566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(A->rmap->bs, &atmp));
19782502324SSatish Balay #endif
198a30f8f8cSSatish Balay   /* free stuff related to matrix-vec multiply */
1999566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->lvec));
2009566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&baij->Mvctx));
20101b2bd88SHong Zhang 
2029566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec0));
2039566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec0b));
2049566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1));
2059566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1a));
2069566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1b));
20701b2bd88SHong Zhang 
208a30f8f8cSSatish Balay   if (baij->colmap) {
209a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
210eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapIDestroy(&baij->colmap));
211a30f8f8cSSatish Balay #else
2129566063dSJacob Faibussowitsch     PetscCall(PetscFree(baij->colmap));
213a30f8f8cSSatish Balay #endif
214a30f8f8cSSatish Balay   }
215a30f8f8cSSatish Balay 
216a30f8f8cSSatish Balay   /* make sure that B is assembled so we can access its values */
2179566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2189566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
219a30f8f8cSSatish Balay 
220a30f8f8cSSatish Balay   /* invent new B and copy stuff over */
2219566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mbs, &nz));
222ad540459SPierre Jolivet   for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
2239566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
2249566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Bnew, m, n, m, n));
2259566063dSJacob Faibussowitsch   PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
2269566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
2279566063dSJacob Faibussowitsch   PetscCall(PetscFree(nz));
228a30f8f8cSSatish Balay 
229b38c15b3SStefano Zampini   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
230b38c15b3SStefano Zampini     ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
231b38c15b3SStefano Zampini   }
232b38c15b3SStefano Zampini 
23377341eacSDmitry Karpeev   /*
23477341eacSDmitry Karpeev    Ensure that B's nonzerostate is monotonically increasing.
23577341eacSDmitry Karpeev    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
23677341eacSDmitry Karpeev    */
23777341eacSDmitry Karpeev   Bnew->nonzerostate = B->nonzerostate;
2389566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bs, &rvals));
239a30f8f8cSSatish Balay   for (i = 0; i < mbs; i++) {
240a30f8f8cSSatish Balay     rvals[0] = bs * i;
24126fbe8dcSKarl Rupp     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
242a30f8f8cSSatish Balay     for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
243a30f8f8cSSatish Balay       col = garray[Bbaij->j[j]] * bs;
244a30f8f8cSSatish Balay       for (k = 0; k < bs; k++) {
245ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
246a30f8f8cSSatish Balay         for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l];
247a30f8f8cSSatish Balay #else
24871730473SSatish Balay         atmp = a + j * bs2 + k * bs;
249a30f8f8cSSatish Balay #endif
2509566063dSJacob Faibussowitsch         PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode));
251a30f8f8cSSatish Balay         col++;
252a30f8f8cSSatish Balay       }
253a30f8f8cSSatish Balay     }
254a30f8f8cSSatish Balay   }
255ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
2569566063dSJacob Faibussowitsch   PetscCall(PetscFree(atmp));
257a30f8f8cSSatish Balay #endif
2589566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->garray));
25926fbe8dcSKarl Rupp 
260f4259b30SLisandro Dalcin   baij->garray = NULL;
26126fbe8dcSKarl Rupp 
2629566063dSJacob Faibussowitsch   PetscCall(PetscFree(rvals));
2639566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
26426fbe8dcSKarl Rupp 
265a30f8f8cSSatish Balay   baij->B = Bnew;
26626fbe8dcSKarl Rupp 
267a30f8f8cSSatish Balay   A->was_assembled = PETSC_FALSE;
2683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
269a30f8f8cSSatish Balay }
270