1 #include <../src/mat/impls/baij/seq/baij.h>
2 #include <petsc/private/kernels/blockinvert.h>
3
4 /*
5 Special case where the matrix was ILU(0) factored in the natural
6 ordering. This eliminates the need for the column and row permutation.
7 */
MatSolve_SeqBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)8 PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
9 {
10 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
11 const PetscInt n = a->mbs, *ai = a->i, *aj = a->j;
12 const PetscInt *diag = a->diag, *vi;
13 const MatScalar *aa = a->a, *v;
14 PetscScalar *x, s1, s2, s3, x1, x2, x3;
15 const PetscScalar *b;
16 PetscInt jdx, idt, idx, nz, i;
17
18 PetscFunctionBegin;
19 PetscCall(VecGetArrayRead(bb, &b));
20 PetscCall(VecGetArray(xx, &x));
21
22 /* forward solve the lower triangular */
23 idx = 0;
24 x[0] = b[0];
25 x[1] = b[1];
26 x[2] = b[2];
27 for (i = 1; i < n; i++) {
28 v = aa + 9 * ai[i];
29 vi = aj + ai[i];
30 nz = diag[i] - ai[i];
31 idx += 3;
32 s1 = b[idx];
33 s2 = b[1 + idx];
34 s3 = b[2 + idx];
35 while (nz--) {
36 jdx = 3 * (*vi++);
37 x1 = x[jdx];
38 x2 = x[1 + jdx];
39 x3 = x[2 + jdx];
40 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
41 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
42 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
43 v += 9;
44 }
45 x[idx] = s1;
46 x[1 + idx] = s2;
47 x[2 + idx] = s3;
48 }
49 /* backward solve the upper triangular */
50 for (i = n - 1; i >= 0; i--) {
51 v = aa + 9 * diag[i] + 9;
52 vi = aj + diag[i] + 1;
53 nz = ai[i + 1] - diag[i] - 1;
54 idt = 3 * i;
55 s1 = x[idt];
56 s2 = x[1 + idt];
57 s3 = x[2 + idt];
58 while (nz--) {
59 idx = 3 * (*vi++);
60 x1 = x[idx];
61 x2 = x[1 + idx];
62 x3 = x[2 + idx];
63 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
64 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
65 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
66 v += 9;
67 }
68 v = aa + 9 * diag[i];
69 x[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3;
70 x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
71 x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
72 }
73
74 PetscCall(VecRestoreArrayRead(bb, &b));
75 PetscCall(VecRestoreArray(xx, &x));
76 PetscCall(PetscLogFlops(2.0 * 9 * (a->nz) - 3.0 * A->cmap->n));
77 PetscFunctionReturn(PETSC_SUCCESS);
78 }
79
MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)80 PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx)
81 {
82 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
83 const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
84 PetscInt i, k, nz, idx, jdx, idt;
85 const PetscInt bs = A->rmap->bs, bs2 = a->bs2;
86 const MatScalar *aa = a->a, *v;
87 PetscScalar *x;
88 const PetscScalar *b;
89 PetscScalar s1, s2, s3, x1, x2, x3;
90
91 PetscFunctionBegin;
92 PetscCall(VecGetArrayRead(bb, &b));
93 PetscCall(VecGetArray(xx, &x));
94 /* forward solve the lower triangular */
95 idx = 0;
96 x[0] = b[idx];
97 x[1] = b[1 + idx];
98 x[2] = b[2 + idx];
99 for (i = 1; i < n; i++) {
100 v = aa + bs2 * ai[i];
101 vi = aj + ai[i];
102 nz = ai[i + 1] - ai[i];
103 idx = bs * i;
104 s1 = b[idx];
105 s2 = b[1 + idx];
106 s3 = b[2 + idx];
107 for (k = 0; k < nz; k++) {
108 jdx = bs * vi[k];
109 x1 = x[jdx];
110 x2 = x[1 + jdx];
111 x3 = x[2 + jdx];
112 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
113 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
114 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
115
116 v += bs2;
117 }
118
119 x[idx] = s1;
120 x[1 + idx] = s2;
121 x[2 + idx] = s3;
122 }
123
124 /* backward solve the upper triangular */
125 for (i = n - 1; i >= 0; i--) {
126 v = aa + bs2 * (adiag[i + 1] + 1);
127 vi = aj + adiag[i + 1] + 1;
128 nz = adiag[i] - adiag[i + 1] - 1;
129 idt = bs * i;
130 s1 = x[idt];
131 s2 = x[1 + idt];
132 s3 = x[2 + idt];
133
134 for (k = 0; k < nz; k++) {
135 idx = bs * vi[k];
136 x1 = x[idx];
137 x2 = x[1 + idx];
138 x3 = x[2 + idx];
139 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
140 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
141 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
142
143 v += bs2;
144 }
145 /* x = inv_diagonal*x */
146 x[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3;
147 x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
148 x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
149 }
150
151 PetscCall(VecRestoreArrayRead(bb, &b));
152 PetscCall(VecRestoreArray(xx, &x));
153 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
154 PetscFunctionReturn(PETSC_SUCCESS);
155 }
156
MatForwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)157 PetscErrorCode MatForwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx)
158 {
159 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
160 const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j;
161 PetscInt i, k, nz, idx, jdx;
162 const PetscInt bs = A->rmap->bs, bs2 = a->bs2;
163 const MatScalar *aa = a->a, *v;
164 PetscScalar *x;
165 const PetscScalar *b;
166 PetscScalar s1, s2, s3, x1, x2, x3;
167
168 PetscFunctionBegin;
169 PetscCall(VecGetArrayRead(bb, &b));
170 PetscCall(VecGetArray(xx, &x));
171 /* forward solve the lower triangular */
172 idx = 0;
173 x[0] = b[idx];
174 x[1] = b[1 + idx];
175 x[2] = b[2 + idx];
176 for (i = 1; i < n; i++) {
177 v = aa + bs2 * ai[i];
178 vi = aj + ai[i];
179 nz = ai[i + 1] - ai[i];
180 idx = bs * i;
181 s1 = b[idx];
182 s2 = b[1 + idx];
183 s3 = b[2 + idx];
184 for (k = 0; k < nz; k++) {
185 jdx = bs * vi[k];
186 x1 = x[jdx];
187 x2 = x[1 + jdx];
188 x3 = x[2 + jdx];
189 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
190 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
191 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
192
193 v += bs2;
194 }
195
196 x[idx] = s1;
197 x[1 + idx] = s2;
198 x[2 + idx] = s3;
199 }
200
201 PetscCall(VecRestoreArrayRead(bb, &b));
202 PetscCall(VecRestoreArray(xx, &x));
203 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz) - bs * A->cmap->n));
204 PetscFunctionReturn(PETSC_SUCCESS);
205 }
206
MatBackwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)207 PetscErrorCode MatBackwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx)
208 {
209 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
210 const PetscInt n = a->mbs, *vi, *aj = a->j, *adiag = a->diag;
211 PetscInt i, k, nz, idx, idt;
212 const PetscInt bs = A->rmap->bs, bs2 = a->bs2;
213 const MatScalar *aa = a->a, *v;
214 PetscScalar *x;
215 const PetscScalar *b;
216 PetscScalar s1, s2, s3, x1, x2, x3;
217
218 PetscFunctionBegin;
219 PetscCall(VecGetArrayRead(bb, &b));
220 PetscCall(VecGetArray(xx, &x));
221
222 /* backward solve the upper triangular */
223 for (i = n - 1; i >= 0; i--) {
224 v = aa + bs2 * (adiag[i + 1] + 1);
225 vi = aj + adiag[i + 1] + 1;
226 nz = adiag[i] - adiag[i + 1] - 1;
227 idt = bs * i;
228 s1 = b[idt];
229 s2 = b[1 + idt];
230 s3 = b[2 + idt];
231
232 for (k = 0; k < nz; k++) {
233 idx = bs * vi[k];
234 x1 = x[idx];
235 x2 = x[1 + idx];
236 x3 = x[2 + idx];
237 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
238 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
239 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
240
241 v += bs2;
242 }
243 /* x = inv_diagonal*x */
244 x[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3;
245 x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
246 x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
247 }
248
249 PetscCall(VecRestoreArrayRead(bb, &b));
250 PetscCall(VecRestoreArray(xx, &x));
251 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
252 PetscFunctionReturn(PETSC_SUCCESS);
253 }
254