1 #include <../src/mat/impls/baij/seq/baij.h>
2 #include <petsc/private/kernels/blockinvert.h>
3
4 /* Block operations are done by accessing one column at a time */
5 /* Default MatSolve for block size 11 */
6
MatSolve_SeqBAIJ_11_NaturalOrdering(Mat A,Vec bb,Vec xx)7 PetscErrorCode MatSolve_SeqBAIJ_11_NaturalOrdering(Mat A, Vec bb, Vec xx)
8 {
9 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
10 const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
11 PetscInt i, k, nz, idx, idt, m;
12 const MatScalar *aa = a->a, *v;
13 PetscScalar s[11];
14 PetscScalar *x, xv;
15 const PetscScalar *b;
16
17 PetscFunctionBegin;
18 PetscCall(VecGetArrayRead(bb, &b));
19 PetscCall(VecGetArray(xx, &x));
20
21 /* forward solve the lower triangular */
22 for (i = 0; i < n; i++) {
23 v = aa + bs2 * ai[i];
24 vi = aj + ai[i];
25 nz = ai[i + 1] - ai[i];
26 idt = bs * i;
27 x[idt] = b[idt];
28 x[1 + idt] = b[1 + idt];
29 x[2 + idt] = b[2 + idt];
30 x[3 + idt] = b[3 + idt];
31 x[4 + idt] = b[4 + idt];
32 x[5 + idt] = b[5 + idt];
33 x[6 + idt] = b[6 + idt];
34 x[7 + idt] = b[7 + idt];
35 x[8 + idt] = b[8 + idt];
36 x[9 + idt] = b[9 + idt];
37 x[10 + idt] = b[10 + idt];
38 for (m = 0; m < nz; m++) {
39 idx = bs * vi[m];
40 for (k = 0; k < 11; k++) {
41 xv = x[k + idx];
42 x[idt] -= v[0] * xv;
43 x[1 + idt] -= v[1] * xv;
44 x[2 + idt] -= v[2] * xv;
45 x[3 + idt] -= v[3] * xv;
46 x[4 + idt] -= v[4] * xv;
47 x[5 + idt] -= v[5] * xv;
48 x[6 + idt] -= v[6] * xv;
49 x[7 + idt] -= v[7] * xv;
50 x[8 + idt] -= v[8] * xv;
51 x[9 + idt] -= v[9] * xv;
52 x[10 + idt] -= v[10] * xv;
53 v += 11;
54 }
55 }
56 }
57 /* backward solve the upper triangular */
58 for (i = n - 1; i >= 0; i--) {
59 v = aa + bs2 * (adiag[i + 1] + 1);
60 vi = aj + adiag[i + 1] + 1;
61 nz = adiag[i] - adiag[i + 1] - 1;
62 idt = bs * i;
63 s[0] = x[idt];
64 s[1] = x[1 + idt];
65 s[2] = x[2 + idt];
66 s[3] = x[3 + idt];
67 s[4] = x[4 + idt];
68 s[5] = x[5 + idt];
69 s[6] = x[6 + idt];
70 s[7] = x[7 + idt];
71 s[8] = x[8 + idt];
72 s[9] = x[9 + idt];
73 s[10] = x[10 + idt];
74
75 for (m = 0; m < nz; m++) {
76 idx = bs * vi[m];
77 for (k = 0; k < 11; k++) {
78 xv = x[k + idx];
79 s[0] -= v[0] * xv;
80 s[1] -= v[1] * xv;
81 s[2] -= v[2] * xv;
82 s[3] -= v[3] * xv;
83 s[4] -= v[4] * xv;
84 s[5] -= v[5] * xv;
85 s[6] -= v[6] * xv;
86 s[7] -= v[7] * xv;
87 s[8] -= v[8] * xv;
88 s[9] -= v[9] * xv;
89 s[10] -= v[10] * xv;
90 v += 11;
91 }
92 }
93 PetscCall(PetscArrayzero(x + idt, bs));
94 for (k = 0; k < 11; k++) {
95 x[idt] += v[0] * s[k];
96 x[1 + idt] += v[1] * s[k];
97 x[2 + idt] += v[2] * s[k];
98 x[3 + idt] += v[3] * s[k];
99 x[4 + idt] += v[4] * s[k];
100 x[5 + idt] += v[5] * s[k];
101 x[6 + idt] += v[6] * s[k];
102 x[7 + idt] += v[7] * s[k];
103 x[8 + idt] += v[8] * s[k];
104 x[9 + idt] += v[9] * s[k];
105 x[10 + idt] += v[10] * s[k];
106 v += 11;
107 }
108 }
109 PetscCall(VecRestoreArrayRead(bb, &b));
110 PetscCall(VecRestoreArray(xx, &x));
111 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
112 PetscFunctionReturn(PETSC_SUCCESS);
113 }
114