xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision f13dfd9ea68e0ddeee984e65c377a1819eab8a8a)
1 /*
2    Support for the parallel SBAIJ matrix vector multiply
3 */
4 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
5 
6 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ *)mat->data;
9   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ *)sbaij->B->data;
10   PetscInt        i, j, *aj = B->j, ec = 0, *garray, *sgarray;
11   PetscInt        bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt;
12   IS              from, to;
13   Vec             gvec;
14   PetscMPIInt     rank   = sbaij->rank, lsize;
15   PetscInt       *owners = sbaij->rangebs, *ec_owner, k;
16   const PetscInt *sowners;
17   PetscScalar    *ptr;
18 #if defined(PETSC_USE_CTABLE)
19   PetscHMapI    gid1_lid1 = NULL; /* one-based gid to lid table */
20   PetscHashIter tpos;
21   PetscInt      gid, lid;
22 #else
23   PetscInt  Nbs = sbaij->Nbs;
24   PetscInt *indices;
25 #endif
26 
27   PetscFunctionBegin;
28 #if defined(PETSC_USE_CTABLE)
29   PetscCall(PetscHMapICreateWithSize(mbs, &gid1_lid1));
30   for (i = 0; i < B->mbs; i++) {
31     for (j = 0; j < B->ilen[i]; j++) {
32       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
33       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
34       if (!data) PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
35     }
36   }
37   /* form array of columns we need */
38   PetscCall(PetscMalloc1(ec, &garray));
39   PetscHashIterBegin(gid1_lid1, tpos);
40   while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
41     PetscHashIterGetKey(gid1_lid1, tpos, gid);
42     PetscHashIterGetVal(gid1_lid1, tpos, lid);
43     PetscHashIterNext(gid1_lid1, tpos);
44     gid--;
45     lid--;
46     garray[lid] = gid;
47   }
48   PetscCall(PetscSortInt(ec, garray));
49   PetscCall(PetscHMapIClear(gid1_lid1));
50   for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
51   /* compact out the extra columns in B */
52   for (i = 0; i < B->mbs; i++) {
53     for (j = 0; j < B->ilen[i]; j++) {
54       PetscInt gid1 = aj[B->i[i] + j] + 1;
55       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
56       lid--;
57       aj[B->i[i] + j] = lid;
58     }
59   }
60   PetscCall(PetscHMapIDestroy(&gid1_lid1));
61   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
62   for (i = j = 0; i < ec; i++) {
63     while (garray[i] >= owners[j + 1]) j++;
64     ec_owner[i] = j;
65   }
66 #else
67   /* For the first stab we make an array as long as the number of columns */
68   /* mark those columns that are in sbaij->B */
69   PetscCall(PetscCalloc1(Nbs, &indices));
70   for (i = 0; i < mbs; i++) {
71     for (j = 0; j < B->ilen[i]; j++) {
72       if (!indices[aj[B->i[i] + j]]) ec++;
73       indices[aj[B->i[i] + j]] = 1;
74     }
75   }
76 
77   /* form arrays of columns we need */
78   PetscCall(PetscMalloc1(ec, &garray));
79   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
80 
81   ec = 0;
82   for (j = 0; j < sbaij->size; j++) {
83     for (i = owners[j]; i < owners[j + 1]; i++) {
84       if (indices[i]) {
85         garray[ec]   = i;
86         ec_owner[ec] = j;
87         ec++;
88       }
89     }
90   }
91 
92   /* make indices now point into garray */
93   for (i = 0; i < ec; i++) indices[garray[i]] = i;
94 
95   /* compact out the extra columns in B */
96   for (i = 0; i < mbs; i++) {
97     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
98   }
99   PetscCall(PetscFree(indices));
100 #endif
101   B->nbs = ec;
102   PetscCall(PetscLayoutDestroy(&sbaij->B->cmap));
103   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap));
104 
105   PetscCall(VecScatterDestroy(&sbaij->sMvctx));
106   /* create local vector that is used to scatter into */
107   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec));
108 
109   /* create two temporary index sets for building scatter-gather */
110   PetscCall(PetscMalloc1(2 * ec, &stmp));
111   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
112   for (i = 0; i < ec; i++) stmp[i] = i;
113   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to));
114 
115   /* generate the scatter context
116      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
117   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
118   PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx));
119   PetscCall(VecDestroy(&gvec));
120 
121   sbaij->garray = garray;
122 
123   PetscCall(ISDestroy(&from));
124   PetscCall(ISDestroy(&to));
125 
126   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
127   lsize = (mbs + ec) * bs;
128   PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0));
129   PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1));
130   PetscCall(VecGetSize(sbaij->slvec0, &vec_size));
131 
132   PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners));
133 
134   /* x index in the IS sfrom */
135   for (i = 0; i < ec; i++) {
136     j          = ec_owner[i];
137     sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]);
138   }
139   /* b index in the IS sfrom */
140   k = sowners[rank] / bs + mbs;
141   for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j;
142   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from));
143 
144   /* x index in the IS sto */
145   k = sowners[rank] / bs + mbs;
146   for (i = 0; i < ec; i++) stmp[i] = (k + i);
147   /* b index in the IS sto */
148   for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec];
149 
150   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to));
151 
152   PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx));
153   PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
154 
155   PetscCall(VecGetLocalSize(sbaij->slvec1, &nt));
156   PetscCall(VecGetArray(sbaij->slvec1, &ptr));
157   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a));
158   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec1b));
159   PetscCall(VecRestoreArray(sbaij->slvec1, &ptr));
160 
161   PetscCall(VecGetArray(sbaij->slvec0, &ptr));
162   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec0b));
163   PetscCall(VecRestoreArray(sbaij->slvec0, &ptr));
164 
165   PetscCall(PetscFree(stmp));
166 
167   PetscCall(ISDestroy(&from));
168   PetscCall(ISDestroy(&to));
169   PetscCall(PetscFree2(sgarray, ec_owner));
170   PetscFunctionReturn(PETSC_SUCCESS);
171 }
172 
173 /*
174      Takes the local part of an already assembled MPISBAIJ matrix
175    and disassembles it. This is to allow new nonzeros into the matrix
176    that require more communication in the matrix vector multiply.
177    Thus certain data-structures must be rebuilt.
178 
179    Kind of slow! But that's what application programmers get when
180    they are sloppy.
181 */
182 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
183 {
184   Mat_MPISBAIJ *baij  = (Mat_MPISBAIJ *)A->data;
185   Mat           B     = baij->B, Bnew;
186   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ *)B->data;
187   PetscInt      i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
188   PetscInt      k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n;
189   MatScalar    *a = Bbaij->a;
190   PetscScalar  *atmp;
191 #if defined(PETSC_USE_REAL_MAT_SINGLE)
192   PetscInt l;
193 #endif
194 
195   PetscFunctionBegin;
196 #if defined(PETSC_USE_REAL_MAT_SINGLE)
197   PetscCall(PetscMalloc1(A->rmap->bs, &atmp));
198 #endif
199   /* free stuff related to matrix-vec multiply */
200   PetscCall(VecDestroy(&baij->lvec));
201   PetscCall(VecScatterDestroy(&baij->Mvctx));
202 
203   PetscCall(VecDestroy(&baij->slvec0));
204   PetscCall(VecDestroy(&baij->slvec0b));
205   PetscCall(VecDestroy(&baij->slvec1));
206   PetscCall(VecDestroy(&baij->slvec1a));
207   PetscCall(VecDestroy(&baij->slvec1b));
208 
209   if (baij->colmap) {
210 #if defined(PETSC_USE_CTABLE)
211     PetscCall(PetscHMapIDestroy(&baij->colmap));
212 #else
213     PetscCall(PetscFree(baij->colmap));
214 #endif
215   }
216 
217   /* make sure that B is assembled so we can access its values */
218   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
219   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
220 
221   /* invent new B and copy stuff over */
222   PetscCall(PetscMalloc1(mbs, &nz));
223   for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
224   PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
225   PetscCall(MatSetSizes(Bnew, m, n, m, n));
226   PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
227   PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
228   PetscCall(PetscFree(nz));
229 
230   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
231     ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
232   }
233 
234   /*
235    Ensure that B's nonzerostate is monotonically increasing.
236    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
237    */
238   Bnew->nonzerostate = B->nonzerostate;
239   PetscCall(PetscMalloc1(bs, &rvals));
240   for (i = 0; i < mbs; i++) {
241     rvals[0] = bs * i;
242     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
243     for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
244       col = garray[Bbaij->j[j]] * bs;
245       for (k = 0; k < bs; k++) {
246 #if defined(PETSC_USE_REAL_MAT_SINGLE)
247         for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l];
248 #else
249         atmp = a + j * bs2 + k * bs;
250 #endif
251         PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode));
252         col++;
253       }
254     }
255   }
256 #if defined(PETSC_USE_REAL_MAT_SINGLE)
257   PetscCall(PetscFree(atmp));
258 #endif
259   PetscCall(PetscFree(baij->garray));
260 
261   baij->garray = NULL;
262 
263   PetscCall(PetscFree(rvals));
264   PetscCall(MatDestroy(&B));
265 
266   baij->B = Bnew;
267 
268   A->was_assembled = PETSC_FALSE;
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271