xref: /petsc/src/mat/impls/baij/seq/baijsolvnat3.c (revision 31d78bcd2b98084dc1368b20eb1129c8b9fb39fe)
1 #include <../src/mat/impls/baij/seq/baij.h>
2 #include <petsc/private/kernels/blockinvert.h>
3 
4 /*
5       Special case where the matrix was ILU(0) factored in the natural
6    ordering. This eliminates the need for the column and row permutation.
7 */
MatSolve_SeqBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)8 PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
9 {
10   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
11   const PetscInt     n = a->mbs, *ai = a->i, *aj = a->j;
12   const PetscInt    *diag = a->diag, *vi;
13   const MatScalar   *aa   = a->a, *v;
14   PetscScalar       *x, s1, s2, s3, x1, x2, x3;
15   const PetscScalar *b;
16   PetscInt           jdx, idt, idx, nz, i;
17 
18   PetscFunctionBegin;
19   PetscCall(VecGetArrayRead(bb, &b));
20   PetscCall(VecGetArray(xx, &x));
21 
22   /* forward solve the lower triangular */
23   idx  = 0;
24   x[0] = b[0];
25   x[1] = b[1];
26   x[2] = b[2];
27   for (i = 1; i < n; i++) {
28     v  = aa + 9 * ai[i];
29     vi = aj + ai[i];
30     nz = diag[i] - ai[i];
31     idx += 3;
32     s1 = b[idx];
33     s2 = b[1 + idx];
34     s3 = b[2 + idx];
35     while (nz--) {
36       jdx = 3 * (*vi++);
37       x1  = x[jdx];
38       x2  = x[1 + jdx];
39       x3  = x[2 + jdx];
40       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
41       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
42       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
43       v += 9;
44     }
45     x[idx]     = s1;
46     x[1 + idx] = s2;
47     x[2 + idx] = s3;
48   }
49   /* backward solve the upper triangular */
50   for (i = n - 1; i >= 0; i--) {
51     v   = aa + 9 * diag[i] + 9;
52     vi  = aj + diag[i] + 1;
53     nz  = ai[i + 1] - diag[i] - 1;
54     idt = 3 * i;
55     s1  = x[idt];
56     s2  = x[1 + idt];
57     s3  = x[2 + idt];
58     while (nz--) {
59       idx = 3 * (*vi++);
60       x1  = x[idx];
61       x2  = x[1 + idx];
62       x3  = x[2 + idx];
63       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
64       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
65       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
66       v += 9;
67     }
68     v          = aa + 9 * diag[i];
69     x[idt]     = v[0] * s1 + v[3] * s2 + v[6] * s3;
70     x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
71     x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
72   }
73 
74   PetscCall(VecRestoreArrayRead(bb, &b));
75   PetscCall(VecRestoreArray(xx, &x));
76   PetscCall(PetscLogFlops(2.0 * 9 * (a->nz) - 3.0 * A->cmap->n));
77   PetscFunctionReturn(PETSC_SUCCESS);
78 }
79 
MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)80 PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx)
81 {
82   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
83   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
84   PetscInt           i, k, nz, idx, jdx, idt;
85   const PetscInt     bs = A->rmap->bs, bs2 = a->bs2;
86   const MatScalar   *aa = a->a, *v;
87   PetscScalar       *x;
88   const PetscScalar *b;
89   PetscScalar        s1, s2, s3, x1, x2, x3;
90 
91   PetscFunctionBegin;
92   PetscCall(VecGetArrayRead(bb, &b));
93   PetscCall(VecGetArray(xx, &x));
94   /* forward solve the lower triangular */
95   idx  = 0;
96   x[0] = b[idx];
97   x[1] = b[1 + idx];
98   x[2] = b[2 + idx];
99   for (i = 1; i < n; i++) {
100     v   = aa + bs2 * ai[i];
101     vi  = aj + ai[i];
102     nz  = ai[i + 1] - ai[i];
103     idx = bs * i;
104     s1  = b[idx];
105     s2  = b[1 + idx];
106     s3  = b[2 + idx];
107     for (k = 0; k < nz; k++) {
108       jdx = bs * vi[k];
109       x1  = x[jdx];
110       x2  = x[1 + jdx];
111       x3  = x[2 + jdx];
112       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
113       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
114       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
115 
116       v += bs2;
117     }
118 
119     x[idx]     = s1;
120     x[1 + idx] = s2;
121     x[2 + idx] = s3;
122   }
123 
124   /* backward solve the upper triangular */
125   for (i = n - 1; i >= 0; i--) {
126     v   = aa + bs2 * (adiag[i + 1] + 1);
127     vi  = aj + adiag[i + 1] + 1;
128     nz  = adiag[i] - adiag[i + 1] - 1;
129     idt = bs * i;
130     s1  = x[idt];
131     s2  = x[1 + idt];
132     s3  = x[2 + idt];
133 
134     for (k = 0; k < nz; k++) {
135       idx = bs * vi[k];
136       x1  = x[idx];
137       x2  = x[1 + idx];
138       x3  = x[2 + idx];
139       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
140       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
141       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
142 
143       v += bs2;
144     }
145     /* x = inv_diagonal*x */
146     x[idt]     = v[0] * s1 + v[3] * s2 + v[6] * s3;
147     x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
148     x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
149   }
150 
151   PetscCall(VecRestoreArrayRead(bb, &b));
152   PetscCall(VecRestoreArray(xx, &x));
153   PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
154   PetscFunctionReturn(PETSC_SUCCESS);
155 }
156 
MatForwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)157 PetscErrorCode MatForwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx)
158 {
159   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
160   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j;
161   PetscInt           i, k, nz, idx, jdx;
162   const PetscInt     bs = A->rmap->bs, bs2 = a->bs2;
163   const MatScalar   *aa = a->a, *v;
164   PetscScalar       *x;
165   const PetscScalar *b;
166   PetscScalar        s1, s2, s3, x1, x2, x3;
167 
168   PetscFunctionBegin;
169   PetscCall(VecGetArrayRead(bb, &b));
170   PetscCall(VecGetArray(xx, &x));
171   /* forward solve the lower triangular */
172   idx  = 0;
173   x[0] = b[idx];
174   x[1] = b[1 + idx];
175   x[2] = b[2 + idx];
176   for (i = 1; i < n; i++) {
177     v   = aa + bs2 * ai[i];
178     vi  = aj + ai[i];
179     nz  = ai[i + 1] - ai[i];
180     idx = bs * i;
181     s1  = b[idx];
182     s2  = b[1 + idx];
183     s3  = b[2 + idx];
184     for (k = 0; k < nz; k++) {
185       jdx = bs * vi[k];
186       x1  = x[jdx];
187       x2  = x[1 + jdx];
188       x3  = x[2 + jdx];
189       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
190       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
191       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
192 
193       v += bs2;
194     }
195 
196     x[idx]     = s1;
197     x[1 + idx] = s2;
198     x[2 + idx] = s3;
199   }
200 
201   PetscCall(VecRestoreArrayRead(bb, &b));
202   PetscCall(VecRestoreArray(xx, &x));
203   PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz) - bs * A->cmap->n));
204   PetscFunctionReturn(PETSC_SUCCESS);
205 }
206 
MatBackwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)207 PetscErrorCode MatBackwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx)
208 {
209   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
210   const PetscInt     n = a->mbs, *vi, *aj = a->j, *adiag = a->diag;
211   PetscInt           i, k, nz, idx, idt;
212   const PetscInt     bs = A->rmap->bs, bs2 = a->bs2;
213   const MatScalar   *aa = a->a, *v;
214   PetscScalar       *x;
215   const PetscScalar *b;
216   PetscScalar        s1, s2, s3, x1, x2, x3;
217 
218   PetscFunctionBegin;
219   PetscCall(VecGetArrayRead(bb, &b));
220   PetscCall(VecGetArray(xx, &x));
221 
222   /* backward solve the upper triangular */
223   for (i = n - 1; i >= 0; i--) {
224     v   = aa + bs2 * (adiag[i + 1] + 1);
225     vi  = aj + adiag[i + 1] + 1;
226     nz  = adiag[i] - adiag[i + 1] - 1;
227     idt = bs * i;
228     s1  = b[idt];
229     s2  = b[1 + idt];
230     s3  = b[2 + idt];
231 
232     for (k = 0; k < nz; k++) {
233       idx = bs * vi[k];
234       x1  = x[idx];
235       x2  = x[1 + idx];
236       x3  = x[2 + idx];
237       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
238       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
239       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
240 
241       v += bs2;
242     }
243     /* x = inv_diagonal*x */
244     x[idt]     = v[0] * s1 + v[3] * s2 + v[6] * s3;
245     x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
246     x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
247   }
248 
249   PetscCall(VecRestoreArrayRead(bb, &b));
250   PetscCall(VecRestoreArray(xx, &x));
251   PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
252   PetscFunctionReturn(PETSC_SUCCESS);
253 }
254