1 /*
2 Support for the parallel SBAIJ matrix vector multiply
3 */
4 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
5
MatSetUpMultiply_MPISBAIJ(Mat mat)6 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
7 {
8 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ *)mat->data;
9 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)sbaij->B->data;
10 PetscInt i, j, *aj = B->j, ec = 0, *garray, *sgarray;
11 PetscInt bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt;
12 IS from, to;
13 Vec gvec;
14 PetscMPIInt rank = sbaij->rank;
15 PetscInt *owners = sbaij->rangebs, *ec_owner, k;
16 const PetscInt *sowners;
17 PetscScalar *ptr;
18 #if defined(PETSC_USE_CTABLE)
19 PetscHMapI gid1_lid1 = NULL; /* one-based gid to lid table */
20 PetscHashIter tpos;
21 PetscInt gid, lid;
22 #else
23 PetscInt Nbs = sbaij->Nbs;
24 PetscInt *indices;
25 #endif
26
27 PetscFunctionBegin;
28 #if defined(PETSC_USE_CTABLE)
29 PetscCall(PetscHMapICreateWithSize(mbs, &gid1_lid1));
30 for (i = 0; i < B->mbs; i++) {
31 for (j = 0; j < B->ilen[i]; j++) {
32 PetscInt data, gid1 = aj[B->i[i] + j] + 1;
33 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
34 if (!data) PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
35 }
36 }
37 /* form array of columns we need */
38 PetscCall(PetscMalloc1(ec, &garray));
39 PetscHashIterBegin(gid1_lid1, tpos);
40 while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
41 PetscHashIterGetKey(gid1_lid1, tpos, gid);
42 PetscHashIterGetVal(gid1_lid1, tpos, lid);
43 PetscHashIterNext(gid1_lid1, tpos);
44 gid--;
45 lid--;
46 garray[lid] = gid;
47 }
48 PetscCall(PetscSortInt(ec, garray));
49 PetscCall(PetscHMapIClear(gid1_lid1));
50 for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
51 /* compact out the extra columns in B */
52 for (i = 0; i < B->mbs; i++) {
53 for (j = 0; j < B->ilen[i]; j++) {
54 PetscInt gid1 = aj[B->i[i] + j] + 1;
55 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
56 lid--;
57 aj[B->i[i] + j] = lid;
58 }
59 }
60 PetscCall(PetscHMapIDestroy(&gid1_lid1));
61 PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
62 for (i = j = 0; i < ec; i++) {
63 while (garray[i] >= owners[j + 1]) j++;
64 ec_owner[i] = j;
65 }
66 #else
67 /* For the first stab we make an array as long as the number of columns */
68 /* mark those columns that are in sbaij->B */
69 PetscCall(PetscCalloc1(Nbs, &indices));
70 for (i = 0; i < mbs; i++) {
71 for (j = 0; j < B->ilen[i]; j++) {
72 if (!indices[aj[B->i[i] + j]]) ec++;
73 indices[aj[B->i[i] + j]] = 1;
74 }
75 }
76
77 /* form arrays of columns we need */
78 PetscCall(PetscMalloc1(ec, &garray));
79 PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
80
81 ec = 0;
82 for (j = 0; j < sbaij->size; j++) {
83 for (i = owners[j]; i < owners[j + 1]; i++) {
84 if (indices[i]) {
85 garray[ec] = i;
86 ec_owner[ec] = j;
87 ec++;
88 }
89 }
90 }
91
92 /* make indices now point into garray */
93 for (i = 0; i < ec; i++) indices[garray[i]] = i;
94
95 /* compact out the extra columns in B */
96 for (i = 0; i < mbs; i++) {
97 for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
98 }
99 PetscCall(PetscFree(indices));
100 #endif
101 B->nbs = ec;
102 PetscCall(PetscLayoutDestroy(&sbaij->B->cmap));
103 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap));
104
105 PetscCall(VecScatterDestroy(&sbaij->sMvctx));
106 /* create local vector that is used to scatter into */
107 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec));
108
109 /* create two temporary index sets for building scatter-gather */
110 PetscCall(PetscMalloc1(2 * ec, &stmp));
111 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
112 for (i = 0; i < ec; i++) stmp[i] = i;
113 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to));
114
115 /* generate the scatter context
116 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
117 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
118 PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx));
119 PetscCall(VecDestroy(&gvec));
120
121 sbaij->garray = garray;
122
123 PetscCall(ISDestroy(&from));
124 PetscCall(ISDestroy(&to));
125
126 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
127 PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), (mbs + ec) * bs, PETSC_DETERMINE, &sbaij->slvec0));
128 PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1));
129 PetscCall(VecGetSize(sbaij->slvec0, &vec_size));
130
131 PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners));
132
133 /* x index in the IS sfrom */
134 for (i = 0; i < ec; i++) {
135 j = ec_owner[i];
136 sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]);
137 }
138 /* b index in the IS sfrom */
139 k = sowners[rank] / bs + mbs;
140 for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j;
141 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from));
142
143 /* x index in the IS sto */
144 k = sowners[rank] / bs + mbs;
145 for (i = 0; i < ec; i++) stmp[i] = (k + i);
146 /* b index in the IS sto */
147 for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec];
148
149 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to));
150
151 PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx));
152 PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
153
154 PetscCall(VecGetLocalSize(sbaij->slvec1, &nt));
155 PetscCall(VecGetArray(sbaij->slvec1, &ptr));
156 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a));
157 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec1b));
158 PetscCall(VecRestoreArray(sbaij->slvec1, &ptr));
159
160 PetscCall(VecGetArray(sbaij->slvec0, &ptr));
161 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec0b));
162 PetscCall(VecRestoreArray(sbaij->slvec0, &ptr));
163
164 PetscCall(PetscFree(stmp));
165
166 PetscCall(ISDestroy(&from));
167 PetscCall(ISDestroy(&to));
168 PetscCall(PetscFree2(sgarray, ec_owner));
169 PetscFunctionReturn(PETSC_SUCCESS);
170 }
171
172 /*
173 Takes the local part of an already assembled MPISBAIJ matrix
174 and disassembles it. This is to allow new nonzeros into the matrix
175 that require more communication in the matrix vector multiply.
176 Thus certain data-structures must be rebuilt.
177
178 Kind of slow! But that's what application programmers get when
179 they are sloppy.
180 */
MatDisAssemble_MPISBAIJ(Mat A)181 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
182 {
183 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)A->data;
184 Mat B = baij->B, Bnew;
185 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data;
186 PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
187 PetscInt k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n;
188 MatScalar *a = Bbaij->a;
189 PetscScalar *atmp;
190 #if defined(PETSC_USE_REAL_MAT_SINGLE)
191 PetscInt l;
192 #endif
193
194 PetscFunctionBegin;
195 #if defined(PETSC_USE_REAL_MAT_SINGLE)
196 PetscCall(PetscMalloc1(A->rmap->bs, &atmp));
197 #endif
198 /* free stuff related to matrix-vec multiply */
199 PetscCall(VecDestroy(&baij->lvec));
200 PetscCall(VecScatterDestroy(&baij->Mvctx));
201
202 PetscCall(VecDestroy(&baij->slvec0));
203 PetscCall(VecDestroy(&baij->slvec0b));
204 PetscCall(VecDestroy(&baij->slvec1));
205 PetscCall(VecDestroy(&baij->slvec1a));
206 PetscCall(VecDestroy(&baij->slvec1b));
207
208 if (baij->colmap) {
209 #if defined(PETSC_USE_CTABLE)
210 PetscCall(PetscHMapIDestroy(&baij->colmap));
211 #else
212 PetscCall(PetscFree(baij->colmap));
213 #endif
214 }
215
216 /* make sure that B is assembled so we can access its values */
217 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
218 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
219
220 /* invent new B and copy stuff over */
221 PetscCall(PetscMalloc1(mbs, &nz));
222 for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
223 PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
224 PetscCall(MatSetSizes(Bnew, m, n, m, n));
225 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
226 PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
227 PetscCall(PetscFree(nz));
228
229 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
230 ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
231 }
232
233 /*
234 Ensure that B's nonzerostate is monotonically increasing.
235 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
236 */
237 Bnew->nonzerostate = B->nonzerostate;
238 PetscCall(PetscMalloc1(bs, &rvals));
239 for (i = 0; i < mbs; i++) {
240 rvals[0] = bs * i;
241 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
242 for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
243 col = garray[Bbaij->j[j]] * bs;
244 for (k = 0; k < bs; k++) {
245 #if defined(PETSC_USE_REAL_MAT_SINGLE)
246 for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l];
247 #else
248 atmp = a + j * bs2 + k * bs;
249 #endif
250 PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode));
251 col++;
252 }
253 }
254 }
255 #if defined(PETSC_USE_REAL_MAT_SINGLE)
256 PetscCall(PetscFree(atmp));
257 #endif
258 PetscCall(PetscFree(baij->garray));
259
260 baij->garray = NULL;
261
262 PetscCall(PetscFree(rvals));
263 PetscCall(MatDestroy(&B));
264
265 baij->B = Bnew;
266
267 A->was_assembled = PETSC_FALSE;
268 PetscFunctionReturn(PETSC_SUCCESS);
269 }
270