xref: /petsc/src/mat/impls/baij/seq/baijsolvnat3.c (revision 58d68138c660dfb4e9f5b03334792cd4f2ffd7cc)
1 #include <../src/mat/impls/baij/seq/baij.h>
2 #include <petsc/private/kernels/blockinvert.h>
3 
4 /*
5       Special case where the matrix was ILU(0) factored in the natural
6    ordering. This eliminates the need for the column and row permutation.
7 */
8 PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) {
9   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
10   const PetscInt     n = a->mbs, *ai = a->i, *aj = a->j;
11   const PetscInt    *diag = a->diag, *vi;
12   const MatScalar   *aa   = a->a, *v;
13   PetscScalar       *x, s1, s2, s3, x1, x2, x3;
14   const PetscScalar *b;
15   PetscInt           jdx, idt, idx, nz, i;
16 
17   PetscFunctionBegin;
18   PetscCall(VecGetArrayRead(bb, &b));
19   PetscCall(VecGetArray(xx, &x));
20 
21   /* forward solve the lower triangular */
22   idx  = 0;
23   x[0] = b[0];
24   x[1] = b[1];
25   x[2] = b[2];
26   for (i = 1; i < n; i++) {
27     v  = aa + 9 * ai[i];
28     vi = aj + ai[i];
29     nz = diag[i] - ai[i];
30     idx += 3;
31     s1 = b[idx];
32     s2 = b[1 + idx];
33     s3 = b[2 + idx];
34     while (nz--) {
35       jdx = 3 * (*vi++);
36       x1  = x[jdx];
37       x2  = x[1 + jdx];
38       x3  = x[2 + jdx];
39       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
40       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
41       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
42       v += 9;
43     }
44     x[idx]     = s1;
45     x[1 + idx] = s2;
46     x[2 + idx] = s3;
47   }
48   /* backward solve the upper triangular */
49   for (i = n - 1; i >= 0; i--) {
50     v   = aa + 9 * diag[i] + 9;
51     vi  = aj + diag[i] + 1;
52     nz  = ai[i + 1] - diag[i] - 1;
53     idt = 3 * i;
54     s1  = x[idt];
55     s2  = x[1 + idt];
56     s3  = x[2 + idt];
57     while (nz--) {
58       idx = 3 * (*vi++);
59       x1  = x[idx];
60       x2  = x[1 + idx];
61       x3  = x[2 + idx];
62       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
63       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
64       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
65       v += 9;
66     }
67     v          = aa + 9 * diag[i];
68     x[idt]     = v[0] * s1 + v[3] * s2 + v[6] * s3;
69     x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
70     x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
71   }
72 
73   PetscCall(VecRestoreArrayRead(bb, &b));
74   PetscCall(VecRestoreArray(xx, &x));
75   PetscCall(PetscLogFlops(2.0 * 9 * (a->nz) - 3.0 * A->cmap->n));
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx) {
80   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
81   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
82   PetscInt           i, k, nz, idx, jdx, idt;
83   const PetscInt     bs = A->rmap->bs, bs2 = a->bs2;
84   const MatScalar   *aa = a->a, *v;
85   PetscScalar       *x;
86   const PetscScalar *b;
87   PetscScalar        s1, s2, s3, x1, x2, x3;
88 
89   PetscFunctionBegin;
90   PetscCall(VecGetArrayRead(bb, &b));
91   PetscCall(VecGetArray(xx, &x));
92   /* forward solve the lower triangular */
93   idx  = 0;
94   x[0] = b[idx];
95   x[1] = b[1 + idx];
96   x[2] = b[2 + idx];
97   for (i = 1; i < n; i++) {
98     v   = aa + bs2 * ai[i];
99     vi  = aj + ai[i];
100     nz  = ai[i + 1] - ai[i];
101     idx = bs * i;
102     s1  = b[idx];
103     s2  = b[1 + idx];
104     s3  = b[2 + idx];
105     for (k = 0; k < nz; k++) {
106       jdx = bs * vi[k];
107       x1  = x[jdx];
108       x2  = x[1 + jdx];
109       x3  = x[2 + jdx];
110       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
111       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
112       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
113 
114       v += bs2;
115     }
116 
117     x[idx]     = s1;
118     x[1 + idx] = s2;
119     x[2 + idx] = s3;
120   }
121 
122   /* backward solve the upper triangular */
123   for (i = n - 1; i >= 0; i--) {
124     v   = aa + bs2 * (adiag[i + 1] + 1);
125     vi  = aj + adiag[i + 1] + 1;
126     nz  = adiag[i] - adiag[i + 1] - 1;
127     idt = bs * i;
128     s1  = x[idt];
129     s2  = x[1 + idt];
130     s3  = x[2 + idt];
131 
132     for (k = 0; k < nz; k++) {
133       idx = bs * vi[k];
134       x1  = x[idx];
135       x2  = x[1 + idx];
136       x3  = x[2 + idx];
137       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
138       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
139       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
140 
141       v += bs2;
142     }
143     /* x = inv_diagonal*x */
144     x[idt]     = v[0] * s1 + v[3] * s2 + v[6] * s3;
145     x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
146     x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
147   }
148 
149   PetscCall(VecRestoreArrayRead(bb, &b));
150   PetscCall(VecRestoreArray(xx, &x));
151   PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
152   PetscFunctionReturn(0);
153 }
154 
155 PetscErrorCode MatForwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx) {
156   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
157   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j;
158   PetscInt           i, k, nz, idx, jdx;
159   const PetscInt     bs = A->rmap->bs, bs2 = a->bs2;
160   const MatScalar   *aa = a->a, *v;
161   PetscScalar       *x;
162   const PetscScalar *b;
163   PetscScalar        s1, s2, s3, x1, x2, x3;
164 
165   PetscFunctionBegin;
166   PetscCall(VecGetArrayRead(bb, &b));
167   PetscCall(VecGetArray(xx, &x));
168   /* forward solve the lower triangular */
169   idx  = 0;
170   x[0] = b[idx];
171   x[1] = b[1 + idx];
172   x[2] = b[2 + idx];
173   for (i = 1; i < n; i++) {
174     v   = aa + bs2 * ai[i];
175     vi  = aj + ai[i];
176     nz  = ai[i + 1] - ai[i];
177     idx = bs * i;
178     s1  = b[idx];
179     s2  = b[1 + idx];
180     s3  = b[2 + idx];
181     for (k = 0; k < nz; k++) {
182       jdx = bs * vi[k];
183       x1  = x[jdx];
184       x2  = x[1 + jdx];
185       x3  = x[2 + jdx];
186       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
187       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
188       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
189 
190       v += bs2;
191     }
192 
193     x[idx]     = s1;
194     x[1 + idx] = s2;
195     x[2 + idx] = s3;
196   }
197 
198   PetscCall(VecRestoreArrayRead(bb, &b));
199   PetscCall(VecRestoreArray(xx, &x));
200   PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz) - bs * A->cmap->n));
201   PetscFunctionReturn(0);
202 }
203 
204 PetscErrorCode MatBackwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx) {
205   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
206   const PetscInt     n = a->mbs, *vi, *aj = a->j, *adiag = a->diag;
207   PetscInt           i, k, nz, idx, idt;
208   const PetscInt     bs = A->rmap->bs, bs2 = a->bs2;
209   const MatScalar   *aa = a->a, *v;
210   PetscScalar       *x;
211   const PetscScalar *b;
212   PetscScalar        s1, s2, s3, x1, x2, x3;
213 
214   PetscFunctionBegin;
215   PetscCall(VecGetArrayRead(bb, &b));
216   PetscCall(VecGetArray(xx, &x));
217 
218   /* backward solve the upper triangular */
219   for (i = n - 1; i >= 0; i--) {
220     v   = aa + bs2 * (adiag[i + 1] + 1);
221     vi  = aj + adiag[i + 1] + 1;
222     nz  = adiag[i] - adiag[i + 1] - 1;
223     idt = bs * i;
224     s1  = b[idt];
225     s2  = b[1 + idt];
226     s3  = b[2 + idt];
227 
228     for (k = 0; k < nz; k++) {
229       idx = bs * vi[k];
230       x1  = x[idx];
231       x2  = x[1 + idx];
232       x3  = x[2 + idx];
233       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
234       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
235       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
236 
237       v += bs2;
238     }
239     /* x = inv_diagonal*x */
240     x[idt]     = v[0] * s1 + v[3] * s2 + v[6] * s3;
241     x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
242     x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
243   }
244 
245   PetscCall(VecRestoreArrayRead(bb, &b));
246   PetscCall(VecRestoreArray(xx, &x));
247   PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
248   PetscFunctionReturn(0);
249 }
250