1 #include <../src/mat/impls/baij/seq/baij.h>
2 #include <petsc/private/kernels/blockinvert.h>
3
4 /* Block operations are done by accessing one column at a time */
5 /* Default MatSolve for block size 14 */
6
MatSolve_SeqBAIJ_14_NaturalOrdering(Mat A,Vec bb,Vec xx)7 PetscErrorCode MatSolve_SeqBAIJ_14_NaturalOrdering(Mat A, Vec bb, Vec xx)
8 {
9 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
10 const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
11 PetscInt i, k, nz, idx, idt, m;
12 const MatScalar *aa = a->a, *v;
13 PetscScalar s[14];
14 PetscScalar *x, xv;
15 const PetscScalar *b;
16
17 PetscFunctionBegin;
18 PetscCall(VecGetArrayRead(bb, &b));
19 PetscCall(VecGetArray(xx, &x));
20
21 /* forward solve the lower triangular */
22 for (i = 0; i < n; i++) {
23 v = aa + bs2 * ai[i];
24 vi = aj + ai[i];
25 nz = ai[i + 1] - ai[i];
26 idt = bs * i;
27 x[idt] = b[idt];
28 x[1 + idt] = b[1 + idt];
29 x[2 + idt] = b[2 + idt];
30 x[3 + idt] = b[3 + idt];
31 x[4 + idt] = b[4 + idt];
32 x[5 + idt] = b[5 + idt];
33 x[6 + idt] = b[6 + idt];
34 x[7 + idt] = b[7 + idt];
35 x[8 + idt] = b[8 + idt];
36 x[9 + idt] = b[9 + idt];
37 x[10 + idt] = b[10 + idt];
38 x[11 + idt] = b[11 + idt];
39 x[12 + idt] = b[12 + idt];
40 x[13 + idt] = b[13 + idt];
41 for (m = 0; m < nz; m++) {
42 idx = bs * vi[m];
43 for (k = 0; k < bs; k++) {
44 xv = x[k + idx];
45 x[idt] -= v[0] * xv;
46 x[1 + idt] -= v[1] * xv;
47 x[2 + idt] -= v[2] * xv;
48 x[3 + idt] -= v[3] * xv;
49 x[4 + idt] -= v[4] * xv;
50 x[5 + idt] -= v[5] * xv;
51 x[6 + idt] -= v[6] * xv;
52 x[7 + idt] -= v[7] * xv;
53 x[8 + idt] -= v[8] * xv;
54 x[9 + idt] -= v[9] * xv;
55 x[10 + idt] -= v[10] * xv;
56 x[11 + idt] -= v[11] * xv;
57 x[12 + idt] -= v[12] * xv;
58 x[13 + idt] -= v[13] * xv;
59 v += 14;
60 }
61 }
62 }
63 /* backward solve the upper triangular */
64 for (i = n - 1; i >= 0; i--) {
65 v = aa + bs2 * (adiag[i + 1] + 1);
66 vi = aj + adiag[i + 1] + 1;
67 nz = adiag[i] - adiag[i + 1] - 1;
68 idt = bs * i;
69 s[0] = x[idt];
70 s[1] = x[1 + idt];
71 s[2] = x[2 + idt];
72 s[3] = x[3 + idt];
73 s[4] = x[4 + idt];
74 s[5] = x[5 + idt];
75 s[6] = x[6 + idt];
76 s[7] = x[7 + idt];
77 s[8] = x[8 + idt];
78 s[9] = x[9 + idt];
79 s[10] = x[10 + idt];
80 s[11] = x[11 + idt];
81 s[12] = x[12 + idt];
82 s[13] = x[13 + idt];
83
84 for (m = 0; m < nz; m++) {
85 idx = bs * vi[m];
86 for (k = 0; k < bs; k++) {
87 xv = x[k + idx];
88 s[0] -= v[0] * xv;
89 s[1] -= v[1] * xv;
90 s[2] -= v[2] * xv;
91 s[3] -= v[3] * xv;
92 s[4] -= v[4] * xv;
93 s[5] -= v[5] * xv;
94 s[6] -= v[6] * xv;
95 s[7] -= v[7] * xv;
96 s[8] -= v[8] * xv;
97 s[9] -= v[9] * xv;
98 s[10] -= v[10] * xv;
99 s[11] -= v[11] * xv;
100 s[12] -= v[12] * xv;
101 s[13] -= v[13] * xv;
102 v += 14;
103 }
104 }
105 PetscCall(PetscArrayzero(x + idt, bs));
106 for (k = 0; k < bs; k++) {
107 x[idt] += v[0] * s[k];
108 x[1 + idt] += v[1] * s[k];
109 x[2 + idt] += v[2] * s[k];
110 x[3 + idt] += v[3] * s[k];
111 x[4 + idt] += v[4] * s[k];
112 x[5 + idt] += v[5] * s[k];
113 x[6 + idt] += v[6] * s[k];
114 x[7 + idt] += v[7] * s[k];
115 x[8 + idt] += v[8] * s[k];
116 x[9 + idt] += v[9] * s[k];
117 x[10 + idt] += v[10] * s[k];
118 x[11 + idt] += v[11] * s[k];
119 x[12 + idt] += v[12] * s[k];
120 x[13 + idt] += v[13] * s[k];
121 v += 14;
122 }
123 }
124 PetscCall(VecRestoreArrayRead(bb, &b));
125 PetscCall(VecRestoreArray(xx, &x));
126 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
127 PetscFunctionReturn(PETSC_SUCCESS);
128 }
129
130 /* Block operations are done by accessing one column at a time */
131 /* Default MatSolve for block size 13 */
132
MatSolve_SeqBAIJ_13_NaturalOrdering(Mat A,Vec bb,Vec xx)133 PetscErrorCode MatSolve_SeqBAIJ_13_NaturalOrdering(Mat A, Vec bb, Vec xx)
134 {
135 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
136 const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
137 PetscInt i, k, nz, idx, idt, m;
138 const MatScalar *aa = a->a, *v;
139 PetscScalar s[13];
140 PetscScalar *x, xv;
141 const PetscScalar *b;
142
143 PetscFunctionBegin;
144 PetscCall(VecGetArrayRead(bb, &b));
145 PetscCall(VecGetArray(xx, &x));
146
147 /* forward solve the lower triangular */
148 for (i = 0; i < n; i++) {
149 v = aa + bs2 * ai[i];
150 vi = aj + ai[i];
151 nz = ai[i + 1] - ai[i];
152 idt = bs * i;
153 x[idt] = b[idt];
154 x[1 + idt] = b[1 + idt];
155 x[2 + idt] = b[2 + idt];
156 x[3 + idt] = b[3 + idt];
157 x[4 + idt] = b[4 + idt];
158 x[5 + idt] = b[5 + idt];
159 x[6 + idt] = b[6 + idt];
160 x[7 + idt] = b[7 + idt];
161 x[8 + idt] = b[8 + idt];
162 x[9 + idt] = b[9 + idt];
163 x[10 + idt] = b[10 + idt];
164 x[11 + idt] = b[11 + idt];
165 x[12 + idt] = b[12 + idt];
166 for (m = 0; m < nz; m++) {
167 idx = bs * vi[m];
168 for (k = 0; k < bs; k++) {
169 xv = x[k + idx];
170 x[idt] -= v[0] * xv;
171 x[1 + idt] -= v[1] * xv;
172 x[2 + idt] -= v[2] * xv;
173 x[3 + idt] -= v[3] * xv;
174 x[4 + idt] -= v[4] * xv;
175 x[5 + idt] -= v[5] * xv;
176 x[6 + idt] -= v[6] * xv;
177 x[7 + idt] -= v[7] * xv;
178 x[8 + idt] -= v[8] * xv;
179 x[9 + idt] -= v[9] * xv;
180 x[10 + idt] -= v[10] * xv;
181 x[11 + idt] -= v[11] * xv;
182 x[12 + idt] -= v[12] * xv;
183 v += 13;
184 }
185 }
186 }
187 /* backward solve the upper triangular */
188 for (i = n - 1; i >= 0; i--) {
189 v = aa + bs2 * (adiag[i + 1] + 1);
190 vi = aj + adiag[i + 1] + 1;
191 nz = adiag[i] - adiag[i + 1] - 1;
192 idt = bs * i;
193 s[0] = x[idt];
194 s[1] = x[1 + idt];
195 s[2] = x[2 + idt];
196 s[3] = x[3 + idt];
197 s[4] = x[4 + idt];
198 s[5] = x[5 + idt];
199 s[6] = x[6 + idt];
200 s[7] = x[7 + idt];
201 s[8] = x[8 + idt];
202 s[9] = x[9 + idt];
203 s[10] = x[10 + idt];
204 s[11] = x[11 + idt];
205 s[12] = x[12 + idt];
206
207 for (m = 0; m < nz; m++) {
208 idx = bs * vi[m];
209 for (k = 0; k < bs; k++) {
210 xv = x[k + idx];
211 s[0] -= v[0] * xv;
212 s[1] -= v[1] * xv;
213 s[2] -= v[2] * xv;
214 s[3] -= v[3] * xv;
215 s[4] -= v[4] * xv;
216 s[5] -= v[5] * xv;
217 s[6] -= v[6] * xv;
218 s[7] -= v[7] * xv;
219 s[8] -= v[8] * xv;
220 s[9] -= v[9] * xv;
221 s[10] -= v[10] * xv;
222 s[11] -= v[11] * xv;
223 s[12] -= v[12] * xv;
224 v += 13;
225 }
226 }
227 PetscCall(PetscArrayzero(x + idt, bs));
228 for (k = 0; k < bs; k++) {
229 x[idt] += v[0] * s[k];
230 x[1 + idt] += v[1] * s[k];
231 x[2 + idt] += v[2] * s[k];
232 x[3 + idt] += v[3] * s[k];
233 x[4 + idt] += v[4] * s[k];
234 x[5 + idt] += v[5] * s[k];
235 x[6 + idt] += v[6] * s[k];
236 x[7 + idt] += v[7] * s[k];
237 x[8 + idt] += v[8] * s[k];
238 x[9 + idt] += v[9] * s[k];
239 x[10 + idt] += v[10] * s[k];
240 x[11 + idt] += v[11] * s[k];
241 x[12 + idt] += v[12] * s[k];
242 v += 13;
243 }
244 }
245 PetscCall(VecRestoreArrayRead(bb, &b));
246 PetscCall(VecRestoreArray(xx, &x));
247 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
248 PetscFunctionReturn(PETSC_SUCCESS);
249 }
250
251 /* Block operations are done by accessing one column at a time */
252 /* Default MatSolve for block size 12 */
253
MatSolve_SeqBAIJ_12_NaturalOrdering(Mat A,Vec bb,Vec xx)254 PetscErrorCode MatSolve_SeqBAIJ_12_NaturalOrdering(Mat A, Vec bb, Vec xx)
255 {
256 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
257 const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
258 PetscInt i, k, nz, idx, idt, m;
259 const MatScalar *aa = a->a, *v;
260 PetscScalar s[12];
261 PetscScalar *x, xv;
262 const PetscScalar *b;
263
264 PetscFunctionBegin;
265 PetscCall(VecGetArrayRead(bb, &b));
266 PetscCall(VecGetArray(xx, &x));
267
268 /* forward solve the lower triangular */
269 for (i = 0; i < n; i++) {
270 v = aa + bs2 * ai[i];
271 vi = aj + ai[i];
272 nz = ai[i + 1] - ai[i];
273 idt = bs * i;
274 x[idt] = b[idt];
275 x[1 + idt] = b[1 + idt];
276 x[2 + idt] = b[2 + idt];
277 x[3 + idt] = b[3 + idt];
278 x[4 + idt] = b[4 + idt];
279 x[5 + idt] = b[5 + idt];
280 x[6 + idt] = b[6 + idt];
281 x[7 + idt] = b[7 + idt];
282 x[8 + idt] = b[8 + idt];
283 x[9 + idt] = b[9 + idt];
284 x[10 + idt] = b[10 + idt];
285 x[11 + idt] = b[11 + idt];
286 for (m = 0; m < nz; m++) {
287 idx = bs * vi[m];
288 for (k = 0; k < bs; k++) {
289 xv = x[k + idx];
290 x[idt] -= v[0] * xv;
291 x[1 + idt] -= v[1] * xv;
292 x[2 + idt] -= v[2] * xv;
293 x[3 + idt] -= v[3] * xv;
294 x[4 + idt] -= v[4] * xv;
295 x[5 + idt] -= v[5] * xv;
296 x[6 + idt] -= v[6] * xv;
297 x[7 + idt] -= v[7] * xv;
298 x[8 + idt] -= v[8] * xv;
299 x[9 + idt] -= v[9] * xv;
300 x[10 + idt] -= v[10] * xv;
301 x[11 + idt] -= v[11] * xv;
302 v += 12;
303 }
304 }
305 }
306 /* backward solve the upper triangular */
307 for (i = n - 1; i >= 0; i--) {
308 v = aa + bs2 * (adiag[i + 1] + 1);
309 vi = aj + adiag[i + 1] + 1;
310 nz = adiag[i] - adiag[i + 1] - 1;
311 idt = bs * i;
312 s[0] = x[idt];
313 s[1] = x[1 + idt];
314 s[2] = x[2 + idt];
315 s[3] = x[3 + idt];
316 s[4] = x[4 + idt];
317 s[5] = x[5 + idt];
318 s[6] = x[6 + idt];
319 s[7] = x[7 + idt];
320 s[8] = x[8 + idt];
321 s[9] = x[9 + idt];
322 s[10] = x[10 + idt];
323 s[11] = x[11 + idt];
324
325 for (m = 0; m < nz; m++) {
326 idx = bs * vi[m];
327 for (k = 0; k < bs; k++) {
328 xv = x[k + idx];
329 s[0] -= v[0] * xv;
330 s[1] -= v[1] * xv;
331 s[2] -= v[2] * xv;
332 s[3] -= v[3] * xv;
333 s[4] -= v[4] * xv;
334 s[5] -= v[5] * xv;
335 s[6] -= v[6] * xv;
336 s[7] -= v[7] * xv;
337 s[8] -= v[8] * xv;
338 s[9] -= v[9] * xv;
339 s[10] -= v[10] * xv;
340 s[11] -= v[11] * xv;
341 v += 12;
342 }
343 }
344 PetscCall(PetscArrayzero(x + idt, bs));
345 for (k = 0; k < bs; k++) {
346 x[idt] += v[0] * s[k];
347 x[1 + idt] += v[1] * s[k];
348 x[2 + idt] += v[2] * s[k];
349 x[3 + idt] += v[3] * s[k];
350 x[4 + idt] += v[4] * s[k];
351 x[5 + idt] += v[5] * s[k];
352 x[6 + idt] += v[6] * s[k];
353 x[7 + idt] += v[7] * s[k];
354 x[8 + idt] += v[8] * s[k];
355 x[9 + idt] += v[9] * s[k];
356 x[10 + idt] += v[10] * s[k];
357 x[11 + idt] += v[11] * s[k];
358 v += 12;
359 }
360 }
361 PetscCall(VecRestoreArrayRead(bb, &b));
362 PetscCall(VecRestoreArray(xx, &x));
363 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
364 PetscFunctionReturn(PETSC_SUCCESS);
365 }
366