xref: /petsc/src/mat/impls/baij/seq/baijsolvnat2.c (revision 58d68138c660dfb4e9f5b03334792cd4f2ffd7cc)
1 #include <../src/mat/impls/baij/seq/baij.h>
2 #include <petsc/private/kernels/blockinvert.h>
3 
4 /*
5       Special case where the matrix was ILU(0) factored in the natural
6    ordering. This eliminates the need for the column and row permutation.
7 */
8 PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) {
9   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
10   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *diag = a->diag;
11   const MatScalar   *aa = a->a, *v;
12   PetscScalar       *x, s1, s2, x1, x2;
13   const PetscScalar *b;
14   PetscInt           jdx, idt, idx, nz, i;
15 
16   PetscFunctionBegin;
17   PetscCall(VecGetArrayRead(bb, &b));
18   PetscCall(VecGetArray(xx, &x));
19 
20   /* forward solve the lower triangular */
21   idx  = 0;
22   x[0] = b[0];
23   x[1] = b[1];
24   for (i = 1; i < n; i++) {
25     v  = aa + 4 * ai[i];
26     vi = aj + ai[i];
27     nz = diag[i] - ai[i];
28     idx += 2;
29     s1 = b[idx];
30     s2 = b[1 + idx];
31     while (nz--) {
32       jdx = 2 * (*vi++);
33       x1  = x[jdx];
34       x2  = x[1 + jdx];
35       s1 -= v[0] * x1 + v[2] * x2;
36       s2 -= v[1] * x1 + v[3] * x2;
37       v += 4;
38     }
39     x[idx]     = s1;
40     x[1 + idx] = s2;
41   }
42   /* backward solve the upper triangular */
43   for (i = n - 1; i >= 0; i--) {
44     v   = aa + 4 * diag[i] + 4;
45     vi  = aj + diag[i] + 1;
46     nz  = ai[i + 1] - diag[i] - 1;
47     idt = 2 * i;
48     s1  = x[idt];
49     s2  = x[1 + idt];
50     while (nz--) {
51       idx = 2 * (*vi++);
52       x1  = x[idx];
53       x2  = x[1 + idx];
54       s1 -= v[0] * x1 + v[2] * x2;
55       s2 -= v[1] * x1 + v[3] * x2;
56       v += 4;
57     }
58     v          = aa + 4 * diag[i];
59     x[idt]     = v[0] * s1 + v[2] * s2;
60     x[1 + idt] = v[1] * s1 + v[3] * s2;
61   }
62 
63   PetscCall(VecRestoreArrayRead(bb, &b));
64   PetscCall(VecRestoreArray(xx, &x));
65   PetscCall(PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n));
66   PetscFunctionReturn(0);
67 }
68 
69 PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx) {
70   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
71   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
72   PetscInt           i, k, nz, idx, idt, jdx;
73   const MatScalar   *aa = a->a, *v;
74   PetscScalar       *x, s1, s2, x1, x2;
75   const PetscScalar *b;
76 
77   PetscFunctionBegin;
78   PetscCall(VecGetArrayRead(bb, &b));
79   PetscCall(VecGetArray(xx, &x));
80   /* forward solve the lower triangular */
81   idx  = 0;
82   x[0] = b[idx];
83   x[1] = b[1 + idx];
84   for (i = 1; i < n; i++) {
85     v   = aa + 4 * ai[i];
86     vi  = aj + ai[i];
87     nz  = ai[i + 1] - ai[i];
88     idx = 2 * i;
89     s1  = b[idx];
90     s2  = b[1 + idx];
91     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
92     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
93     for (k = 0; k < nz; k++) {
94       jdx = 2 * vi[k];
95       x1  = x[jdx];
96       x2  = x[1 + jdx];
97       s1 -= v[0] * x1 + v[2] * x2;
98       s2 -= v[1] * x1 + v[3] * x2;
99       v += 4;
100     }
101     x[idx]     = s1;
102     x[1 + idx] = s2;
103   }
104 
105   /* backward solve the upper triangular */
106   for (i = n - 1; i >= 0; i--) {
107     v   = aa + 4 * (adiag[i + 1] + 1);
108     vi  = aj + adiag[i + 1] + 1;
109     nz  = adiag[i] - adiag[i + 1] - 1;
110     idt = 2 * i;
111     s1  = x[idt];
112     s2  = x[1 + idt];
113     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
114     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
115     for (k = 0; k < nz; k++) {
116       idx = 2 * vi[k];
117       x1  = x[idx];
118       x2  = x[1 + idx];
119       s1 -= v[0] * x1 + v[2] * x2;
120       s2 -= v[1] * x1 + v[3] * x2;
121       v += 4;
122     }
123     /* x = inv_diagonal*x */
124     x[idt]     = v[0] * s1 + v[2] * s2;
125     x[1 + idt] = v[1] * s1 + v[3] * s2;
126   }
127 
128   PetscCall(VecRestoreArrayRead(bb, &b));
129   PetscCall(VecRestoreArray(xx, &x));
130   PetscCall(PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n));
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatForwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx) {
135   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
136   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j;
137   PetscInt           i, k, nz, idx, jdx;
138   const MatScalar   *aa = a->a, *v;
139   PetscScalar       *x, s1, s2, x1, x2;
140   const PetscScalar *b;
141 
142   PetscFunctionBegin;
143   PetscCall(VecGetArrayRead(bb, &b));
144   PetscCall(VecGetArray(xx, &x));
145   /* forward solve the lower triangular */
146   idx  = 0;
147   x[0] = b[idx];
148   x[1] = b[1 + idx];
149   for (i = 1; i < n; i++) {
150     v   = aa + 4 * ai[i];
151     vi  = aj + ai[i];
152     nz  = ai[i + 1] - ai[i];
153     idx = 2 * i;
154     s1  = b[idx];
155     s2  = b[1 + idx];
156     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
157     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
158     for (k = 0; k < nz; k++) {
159       jdx = 2 * vi[k];
160       x1  = x[jdx];
161       x2  = x[1 + jdx];
162       s1 -= v[0] * x1 + v[2] * x2;
163       s2 -= v[1] * x1 + v[3] * x2;
164       v += 4;
165     }
166     x[idx]     = s1;
167     x[1 + idx] = s2;
168   }
169 
170   PetscCall(VecRestoreArrayRead(bb, &b));
171   PetscCall(VecRestoreArray(xx, &x));
172   PetscCall(PetscLogFlops(4.0 * (a->nz) - A->cmap->n));
173   PetscFunctionReturn(0);
174 }
175 
176 PetscErrorCode MatBackwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx) {
177   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
178   const PetscInt     n = a->mbs, *vi, *aj = a->j, *adiag = a->diag;
179   PetscInt           i, k, nz, idx, idt;
180   const MatScalar   *aa = a->a, *v;
181   PetscScalar       *x, s1, s2, x1, x2;
182   const PetscScalar *b;
183 
184   PetscFunctionBegin;
185   PetscCall(VecGetArrayRead(bb, &b));
186   PetscCall(VecGetArray(xx, &x));
187 
188   /* backward solve the upper triangular */
189   for (i = n - 1; i >= 0; i--) {
190     v   = aa + 4 * (adiag[i + 1] + 1);
191     vi  = aj + adiag[i + 1] + 1;
192     nz  = adiag[i] - adiag[i + 1] - 1;
193     idt = 2 * i;
194     s1  = b[idt];
195     s2  = b[1 + idt];
196     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
197     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
198     for (k = 0; k < nz; k++) {
199       idx = 2 * vi[k];
200       x1  = x[idx];
201       x2  = x[1 + idx];
202       s1 -= v[0] * x1 + v[2] * x2;
203       s2 -= v[1] * x1 + v[3] * x2;
204       v += 4;
205     }
206     /* x = inv_diagonal*x */
207     x[idt]     = v[0] * s1 + v[2] * s2;
208     x[1 + idt] = v[1] * s1 + v[3] * s2;
209   }
210 
211   PetscCall(VecRestoreArrayRead(bb, &b));
212   PetscCall(VecRestoreArray(xx, &x));
213   PetscCall(PetscLogFlops(4.0 * a->nz - A->cmap->n));
214   PetscFunctionReturn(0);
215 }
216