1 /*
2 Support for the parallel BAIJ matrix vector multiply
3 */
4 #include <../src/mat/impls/baij/mpi/mpibaij.h>
5 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
6
MatSetUpMultiply_MPIBAIJ(Mat mat)7 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
8 {
9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data;
11 PetscInt i, j, *aj = B->j, ec = 0, *garray;
12 PetscInt bs = mat->rmap->bs, *stmp;
13 IS from, to;
14 Vec gvec;
15 #if defined(PETSC_USE_CTABLE)
16 PetscHMapI gid1_lid1 = NULL;
17 PetscHashIter tpos;
18 PetscInt gid, lid;
19 #else
20 PetscInt Nbs = baij->Nbs, *indices;
21 #endif
22
23 PetscFunctionBegin;
24 #if defined(PETSC_USE_CTABLE)
25 /* use a table - Mark Adams */
26 PetscCall(PetscHMapICreateWithSize(B->mbs, &gid1_lid1));
27 for (i = 0; i < B->mbs; i++) {
28 for (j = 0; j < B->ilen[i]; j++) {
29 PetscInt data, gid1 = aj[B->i[i] + j] + 1;
30 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
31 if (!data) {
32 /* one based table */
33 PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
34 }
35 }
36 }
37 /* form array of columns we need */
38 PetscCall(PetscMalloc1(ec, &garray));
39 PetscHashIterBegin(gid1_lid1, tpos);
40 while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
41 PetscHashIterGetKey(gid1_lid1, tpos, gid);
42 PetscHashIterGetVal(gid1_lid1, tpos, lid);
43 PetscHashIterNext(gid1_lid1, tpos);
44 gid--;
45 lid--;
46 garray[lid] = gid;
47 }
48 PetscCall(PetscSortInt(ec, garray));
49 PetscCall(PetscHMapIClear(gid1_lid1));
50 for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
51 /* compact out the extra columns in B */
52 for (i = 0; i < B->mbs; i++) {
53 for (j = 0; j < B->ilen[i]; j++) {
54 PetscInt gid1 = aj[B->i[i] + j] + 1;
55 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
56 lid--;
57 aj[B->i[i] + j] = lid;
58 }
59 }
60 B->nbs = ec;
61 PetscCall(PetscLayoutDestroy(&baij->B->cmap));
62 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
63 PetscCall(PetscHMapIDestroy(&gid1_lid1));
64 #else
65 /* Make an array as long as the number of columns */
66 /* mark those columns that are in baij->B */
67 PetscCall(PetscCalloc1(Nbs, &indices));
68 for (i = 0; i < B->mbs; i++) {
69 for (j = 0; j < B->ilen[i]; j++) {
70 if (!indices[aj[B->i[i] + j]]) ec++;
71 indices[aj[B->i[i] + j]] = 1;
72 }
73 }
74
75 /* form array of columns we need */
76 PetscCall(PetscMalloc1(ec, &garray));
77 ec = 0;
78 for (i = 0; i < Nbs; i++) {
79 if (indices[i]) garray[ec++] = i;
80 }
81
82 /* make indices now point into garray */
83 for (i = 0; i < ec; i++) indices[garray[i]] = i;
84
85 /* compact out the extra columns in B */
86 for (i = 0; i < B->mbs; i++) {
87 for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
88 }
89 B->nbs = ec;
90 PetscCall(PetscLayoutDestroy(&baij->B->cmap));
91 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
92 PetscCall(PetscFree(indices));
93 #endif
94
95 /* create local vector that is used to scatter into */
96 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec));
97
98 /* create two temporary index sets for building scatter-gather */
99 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
100
101 PetscCall(PetscMalloc1(ec, &stmp));
102 for (i = 0; i < ec; i++) stmp[i] = i;
103 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to));
104
105 /* create temporary global vector to generate scatter context */
106 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
107
108 PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx));
109 PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
110
111 baij->garray = garray;
112
113 PetscCall(ISDestroy(&from));
114 PetscCall(ISDestroy(&to));
115 PetscCall(VecDestroy(&gvec));
116 PetscFunctionReturn(PETSC_SUCCESS);
117 }
118
119 /*
120 Takes the local part of an already assembled MPIBAIJ matrix
121 and disassembles it. This is to allow new nonzeros into the matrix
122 that require more communication in the matrix vector multiply.
123 Thus certain data-structures must be rebuilt.
124
125 Kind of slow! But that's what application programmers get when
126 they are sloppy.
127 */
MatDisAssemble_MPIBAIJ(Mat A)128 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
129 {
130 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
131 Mat B = baij->B, Bnew;
132 Mat_SeqBAIJ *Bbaij;
133 PetscInt i, j, mbs, n = A->cmap->N, col, *garray = baij->garray;
134 PetscInt bs2 = baij->bs2, *nz = NULL, m = A->rmap->n;
135 MatScalar *a, *atmp;
136
137 PetscFunctionBegin;
138 /* free stuff related to matrix-vec multiply */
139 PetscCall(VecDestroy(&baij->lvec));
140 PetscCall(VecScatterDestroy(&baij->Mvctx));
141 if (baij->colmap) {
142 #if defined(PETSC_USE_CTABLE)
143 PetscCall(PetscHMapIDestroy(&baij->colmap));
144 #else
145 PetscCall(PetscFree(baij->colmap));
146 #endif
147 }
148
149 /* make sure that B is assembled so we can access its values */
150 if (B) {
151 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
152 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
153 Bbaij = (Mat_SeqBAIJ *)B->data;
154 mbs = Bbaij->mbs;
155 a = Bbaij->a;
156
157 /* invent new B and copy stuff over */
158 PetscCall(PetscMalloc1(mbs, &nz));
159 for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
160 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew));
161 PetscCall(MatSetSizes(Bnew, m, n, m, n));
162 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
163 PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
164 /*
165 Ensure that B's nonzerostate is monotonically increasing.
166 Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
167 */
168 Bnew->nonzerostate = B->nonzerostate;
169 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
170 ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
171 }
172
173 PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE));
174 for (i = 0; i < mbs; i++) {
175 for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
176 col = garray[Bbaij->j[j]];
177 atmp = a + j * bs2;
178 PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode));
179 }
180 }
181 PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, Bbaij->roworiented));
182
183 PetscCall(PetscFree(nz));
184 PetscCall(MatDestroy(&B));
185
186 baij->B = Bnew;
187 }
188 PetscCall(PetscFree(baij->garray));
189 A->was_assembled = PETSC_FALSE;
190 A->assembled = PETSC_FALSE;
191 PetscFunctionReturn(PETSC_SUCCESS);
192 }
193
194 /* ugly stuff added for Glenn someday we should fix this up */
195
196 static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
197 static Vec uglydd = NULL, uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */
198
MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)199 static PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale)
200 {
201 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */
202 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)ina->B->data;
203 PetscInt bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices;
204 PetscInt *r_rmapd, *r_rmapo;
205
206 PetscFunctionBegin;
207 PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
208 PetscCall(MatGetSize(ina->A, NULL, &n));
209 PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapd));
210 nt = 0;
211 for (i = 0; i < inA->rmap->mapping->n; i++) {
212 if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) {
213 nt++;
214 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
215 }
216 }
217 PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n);
218 PetscCall(PetscMalloc1(n, &uglyrmapd));
219 for (i = 0; i < inA->rmap->mapping->n; i++) {
220 if (r_rmapd[i]) {
221 for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j;
222 }
223 }
224 PetscCall(PetscFree(r_rmapd));
225 PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd));
226
227 PetscCall(PetscCalloc1(ina->Nbs, &lindices));
228 for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1;
229 no = inA->rmap->mapping->n - nt;
230 PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapo));
231 nt = 0;
232 for (i = 0; i < inA->rmap->mapping->n; i++) {
233 if (lindices[inA->rmap->mapping->indices[i]]) {
234 nt++;
235 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
236 }
237 }
238 PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
239 PetscCall(PetscFree(lindices));
240 PetscCall(PetscMalloc1(nt * bs, &uglyrmapo));
241 for (i = 0; i < inA->rmap->mapping->n; i++) {
242 if (r_rmapo[i]) {
243 for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j;
244 }
245 }
246 PetscCall(PetscFree(r_rmapo));
247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo));
248 PetscFunctionReturn(PETSC_SUCCESS);
249 }
250
MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)251 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale)
252 {
253 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */
254 PetscInt n, i;
255 PetscScalar *d, *o;
256 const PetscScalar *s;
257
258 PetscFunctionBegin;
259 if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale));
260
261 PetscCall(VecGetArrayRead(scale, &s));
262
263 PetscCall(VecGetLocalSize(uglydd, &n));
264 PetscCall(VecGetArray(uglydd, &d));
265 for (i = 0; i < n; i++) d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
266 PetscCall(VecRestoreArray(uglydd, &d));
267 /* column scale "diagonal" portion of local matrix */
268 PetscCall(MatDiagonalScale(a->A, NULL, uglydd));
269
270 PetscCall(VecGetLocalSize(uglyoo, &n));
271 PetscCall(VecGetArray(uglyoo, &o));
272 for (i = 0; i < n; i++) o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
273 PetscCall(VecRestoreArrayRead(scale, &s));
274 PetscCall(VecRestoreArray(uglyoo, &o));
275 /* column scale "off-diagonal" portion of local matrix */
276 PetscCall(MatDiagonalScale(a->B, NULL, uglyoo));
277 PetscCall(PetscFree(uglyrmapd));
278 PetscCall(PetscFree(uglyrmapo));
279 PetscCall(VecDestroy(&uglydd));
280 PetscCall(VecDestroy(&uglyoo));
281 PetscFunctionReturn(PETSC_SUCCESS);
282 }
283