1 /*
2 Support for the parallel AIJ matrix vector multiply
3 */
4 #include <../src/mat/impls/aij/mpi/mpiaij.h>
5 #include <petsc/private/vecimpl.h>
6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7
MatSetUpMultiply_MPIAIJ(Mat mat)8 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
9 {
10 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
11 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data;
12 PetscInt i, j, *aj = B->j, *garray;
13 PetscInt ec = 0; /* Number of nonzero external columns */
14 IS from, to;
15 Vec gvec;
16 #if defined(PETSC_USE_CTABLE)
17 PetscHMapI gid1_lid1 = NULL;
18 PetscHashIter tpos;
19 PetscInt gid, lid;
20 #else
21 PetscInt N = mat->cmap->N, *indices;
22 #endif
23
24 PetscFunctionBegin;
25 if (!aij->garray) {
26 PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat");
27 #if defined(PETSC_USE_CTABLE)
28 /* use a table */
29 PetscCall(PetscHMapICreateWithSize(aij->B->rmap->n, &gid1_lid1));
30 for (i = 0; i < aij->B->rmap->n; i++) {
31 for (j = 0; j < B->ilen[i]; j++) {
32 PetscInt data, gid1 = aj[B->i[i] + j] + 1;
33 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
34 if (!data) {
35 /* one based table */
36 PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
37 }
38 }
39 }
40 /* form array of columns we need */
41 PetscCall(PetscMalloc1(ec, &garray));
42 PetscHashIterBegin(gid1_lid1, tpos);
43 while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
44 PetscHashIterGetKey(gid1_lid1, tpos, gid);
45 PetscHashIterGetVal(gid1_lid1, tpos, lid);
46 PetscHashIterNext(gid1_lid1, tpos);
47 gid--;
48 lid--;
49 garray[lid] = gid;
50 }
51 PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */
52 PetscCall(PetscHMapIClear(gid1_lid1));
53 for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
54 /* compact out the extra columns in B */
55 for (i = 0; i < aij->B->rmap->n; i++) {
56 for (j = 0; j < B->ilen[i]; j++) {
57 PetscInt gid1 = aj[B->i[i] + j] + 1;
58 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
59 lid--;
60 aj[B->i[i] + j] = lid;
61 }
62 }
63 PetscCall(PetscLayoutDestroy(&aij->B->cmap));
64 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap));
65 PetscCall(PetscHMapIDestroy(&gid1_lid1));
66 #else
67 /* Make an array as long as the number of columns */
68 /* mark those columns that are in aij->B */
69 PetscCall(PetscCalloc1(N, &indices));
70 for (i = 0; i < aij->B->rmap->n; i++) {
71 for (j = 0; j < B->ilen[i]; j++) {
72 if (!indices[aj[B->i[i] + j]]) ec++;
73 indices[aj[B->i[i] + j]] = 1;
74 }
75 }
76
77 /* form array of columns we need */
78 PetscCall(PetscMalloc1(ec, &garray));
79 ec = 0;
80 for (i = 0; i < N; i++) {
81 if (indices[i]) garray[ec++] = i;
82 }
83
84 /* make indices now point into garray */
85 for (i = 0; i < ec; i++) indices[garray[i]] = i;
86
87 /* compact out the extra columns in B */
88 for (i = 0; i < aij->B->rmap->n; i++) {
89 for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
90 }
91 PetscCall(PetscLayoutDestroy(&aij->B->cmap));
92 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap));
93 PetscCall(PetscFree(indices));
94 #endif
95 } else {
96 garray = aij->garray;
97 }
98
99 if (!aij->lvec) {
100 PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat");
101 PetscCall(MatCreateVecs(aij->B, &aij->lvec, NULL));
102 }
103 PetscCall(VecGetSize(aij->lvec, &ec));
104
105 /* create two temporary Index sets for build scatter gather */
106 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from));
107 PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to));
108
109 /* create temporary global vector to generate scatter context */
110 /* This does not allocate the array's memory so is efficient */
111 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
112
113 /* generate the scatter context */
114 PetscCall(VecScatterDestroy(&aij->Mvctx));
115 PetscCall(VecScatterCreate(gvec, from, aij->lvec, to, &aij->Mvctx));
116 PetscCall(VecScatterViewFromOptions(aij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
117 aij->garray = garray;
118
119 PetscCall(ISDestroy(&from));
120 PetscCall(ISDestroy(&to));
121 PetscCall(VecDestroy(&gvec));
122 PetscFunctionReturn(PETSC_SUCCESS);
123 }
124
125 /* Disassemble the off-diag portion of the MPIAIJXxx matrix.
126 In other words, change the B from reduced format using local col ids
127 to expanded format using global col ids, which will make it easier to
128 insert new nonzeros (with global col ids) into the matrix.
129 The off-diag B determines communication in the matrix vector multiply.
130 use_preallocation determines whether the use the preallocation or
131 existing non-zero structure when creating the global form of B
132 */
MatDisAssemble_MPIAIJ(Mat A,PetscBool use_preallocation)133 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A, PetscBool use_preallocation)
134 {
135 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data;
136 Mat B = aij->B, Bnew = NULL;
137
138 PetscFunctionBegin;
139 /* free stuff related to matrix-vec multiply */
140 PetscCall(VecDestroy(&aij->lvec));
141 if (aij->colmap) {
142 #if defined(PETSC_USE_CTABLE)
143 PetscCall(PetscHMapIDestroy(&aij->colmap));
144 #else
145 PetscCall(PetscFree(aij->colmap));
146 #endif
147 }
148
149 if (B) {
150 Mat_SeqAIJ *Baij = (Mat_SeqAIJ *)B->data;
151 PetscInt i, j, m = B->rmap->n, n = A->cmap->N, col, ct = 0, *garray = aij->garray, *nz;
152 PetscScalar v;
153 const PetscScalar *ba;
154
155 /* make sure that B is assembled so we can access its values */
156 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
157 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
158
159 /* invent new B and copy stuff over */
160 PetscCall(PetscMalloc1(m + 1, &nz));
161 if (use_preallocation)
162 for (i = 0; i < m; i++) nz[i] = Baij->ipre[i];
163 else
164 for (i = 0; i < m; i++) nz[i] = Baij->i[i + 1] - Baij->i[i];
165 PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
166 PetscCall(MatSetSizes(Bnew, m, n, m, n)); /* Bnew now uses A->cmap->N as its col size */
167 PetscCall(MatSetBlockSizesFromMats(Bnew, A, A));
168 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
169 PetscCall(MatSeqAIJSetPreallocation(Bnew, 0, nz));
170
171 if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */
172 ((Mat_SeqAIJ *)Bnew->data)->nonew = Baij->nonew;
173 }
174
175 /*
176 Ensure that B's nonzerostate is monotonically increasing.
177 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
178 */
179 Bnew->nonzerostate = B->nonzerostate;
180
181 PetscCall(PetscFree(nz));
182 PetscCall(MatSeqAIJGetArrayRead(B, &ba));
183 for (i = 0; i < m; i++) {
184 for (j = Baij->i[i]; j < Baij->i[i + 1]; j++) {
185 col = garray[Baij->j[ct]];
186 v = ba[ct++];
187 PetscCall(MatSetValues(Bnew, 1, &i, 1, &col, &v, B->insertmode));
188 }
189 }
190 PetscCall(MatSeqAIJRestoreArrayRead(B, &ba));
191 PetscCall(MatDestroy(&B));
192 }
193 PetscCall(PetscFree(aij->garray));
194
195 aij->B = Bnew;
196 A->was_assembled = PETSC_FALSE;
197 A->assembled = PETSC_FALSE;
198 PetscFunctionReturn(PETSC_SUCCESS);
199 }
200
201 /* ugly stuff added for Glenn someday we should fix this up */
202
203 static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
204 static Vec auglydd = NULL, auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */
205
MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)206 static PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale)
207 {
208 Mat_MPIAIJ *ina = (Mat_MPIAIJ *)inA->data; /*access private part of matrix */
209 PetscInt i, j, n, nt, cstart, cend, no, *garray = ina->garray, *lindices, bs = inA->rmap->bs;
210 PetscInt *r_rmapd, *r_rmapo;
211
212 PetscFunctionBegin;
213 PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
214 PetscCall(MatGetSize(ina->A, NULL, &n));
215 PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapd));
216 nt = 0;
217 for (i = 0; i < inA->rmap->mapping->n; i++) {
218 if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) {
219 nt++;
220 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
221 }
222 }
223 PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n);
224 PetscCall(PetscMalloc1(n, &auglyrmapd));
225 for (i = 0; i < inA->rmap->mapping->n; i++) {
226 if (r_rmapd[i]) {
227 for (j = 0; j < bs; j++) auglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j;
228 }
229 }
230 PetscCall(PetscFree(r_rmapd));
231 PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd));
232
233 /* This loop handling the setup for the off-diagonal local portion of the matrix is extremely similar to
234 its counterpart in the MPIBAIJ version of this code.
235 The tricky difference is that lindices[] uses block-based indexing (just as in the BAIJ code),
236 but garray[] uses element-based indexing; hence the multiplications / divisions involving bs. */
237 PetscCall(PetscCalloc1(inA->cmap->N / bs, &lindices));
238 for (i = 0; i < ina->B->cmap->n / bs; i++) lindices[garray[i * bs] / bs] = i + 1;
239 no = inA->rmap->mapping->n - nt;
240 PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapo));
241 nt = 0;
242 for (i = 0; i < inA->rmap->mapping->n; i++) {
243 if (lindices[inA->rmap->mapping->indices[i]]) {
244 nt++;
245 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
246 }
247 }
248 PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
249 PetscCall(PetscFree(lindices));
250 PetscCall(PetscMalloc1(nt * bs, &auglyrmapo));
251 for (i = 0; i < inA->rmap->mapping->n; i++) {
252 if (r_rmapo[i]) {
253 for (j = 0; j < bs; j++) auglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j;
254 }
255 }
256 PetscCall(PetscFree(r_rmapo));
257 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &auglyoo));
258 PetscFunctionReturn(PETSC_SUCCESS);
259 }
260
MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale)261 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A, Vec scale)
262 {
263 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; /*access private part of matrix */
264 PetscInt n, i;
265 PetscScalar *d, *o;
266 const PetscScalar *s;
267
268 PetscFunctionBegin;
269 if (!auglyrmapd) PetscCall(MatMPIAIJDiagonalScaleLocalSetUp(A, scale));
270
271 PetscCall(VecGetArrayRead(scale, &s));
272
273 PetscCall(VecGetLocalSize(auglydd, &n));
274 PetscCall(VecGetArray(auglydd, &d));
275 for (i = 0; i < n; i++) d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
276 PetscCall(VecRestoreArray(auglydd, &d));
277 /* column scale "diagonal" portion of local matrix */
278 PetscCall(MatDiagonalScale(a->A, NULL, auglydd));
279
280 PetscCall(VecGetLocalSize(auglyoo, &n));
281 PetscCall(VecGetArray(auglyoo, &o));
282 for (i = 0; i < n; i++) o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
283 PetscCall(VecRestoreArrayRead(scale, &s));
284 PetscCall(VecRestoreArray(auglyoo, &o));
285 /* column scale "off-diagonal" portion of local matrix */
286 PetscCall(MatDiagonalScale(a->B, NULL, auglyoo));
287 PetscCall(PetscFree(auglyrmapd));
288 PetscCall(PetscFree(auglyrmapo));
289 PetscCall(VecDestroy(&auglydd));
290 PetscCall(VecDestroy(&auglyoo));
291 PetscFunctionReturn(PETSC_SUCCESS);
292 }
293