xref: /petsc/src/mat/impls/baij/seq/baijsolvnat2.c (revision b7d2914710534e99fffc0a018fc22098823e0f37)
1 #include <../src/mat/impls/baij/seq/baij.h>
2 #include <petsc/private/kernels/blockinvert.h>
3 
4 #if defined(PETSC_HAVE_XMMINTRIN_H)
5   #include <xmmintrin.h>
6 #endif
7 
8 /*
9       Special case where the matrix was ILU(0) factored in the natural
10    ordering. This eliminates the need for the column and row permutation.
11 */
MatSolve_SeqBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)12 PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
13 {
14   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
15   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *diag = a->diag;
16   const MatScalar   *aa = a->a, *v;
17   PetscScalar       *x, s1, s2, x1, x2;
18   const PetscScalar *b;
19   PetscInt           jdx, idt, idx, nz, i;
20 
21   PetscFunctionBegin;
22   PetscCall(VecGetArrayRead(bb, &b));
23   PetscCall(VecGetArray(xx, &x));
24 
25   /* forward solve the lower triangular */
26   idx  = 0;
27   x[0] = b[0];
28   x[1] = b[1];
29   for (i = 1; i < n; i++) {
30     v  = aa + 4 * ai[i];
31     vi = aj + ai[i];
32     nz = diag[i] - ai[i];
33     idx += 2;
34     s1 = b[idx];
35     s2 = b[1 + idx];
36     while (nz--) {
37       jdx = 2 * (*vi++);
38       x1  = x[jdx];
39       x2  = x[1 + jdx];
40       s1 -= v[0] * x1 + v[2] * x2;
41       s2 -= v[1] * x1 + v[3] * x2;
42       v += 4;
43     }
44     x[idx]     = s1;
45     x[1 + idx] = s2;
46   }
47   /* backward solve the upper triangular */
48   for (i = n - 1; i >= 0; i--) {
49     v   = aa + 4 * diag[i] + 4;
50     vi  = aj + diag[i] + 1;
51     nz  = ai[i + 1] - diag[i] - 1;
52     idt = 2 * i;
53     s1  = x[idt];
54     s2  = x[1 + idt];
55     while (nz--) {
56       idx = 2 * (*vi++);
57       x1  = x[idx];
58       x2  = x[1 + idx];
59       s1 -= v[0] * x1 + v[2] * x2;
60       s2 -= v[1] * x1 + v[3] * x2;
61       v += 4;
62     }
63     v          = aa + 4 * diag[i];
64     x[idt]     = v[0] * s1 + v[2] * s2;
65     x[1 + idt] = v[1] * s1 + v[3] * s2;
66   }
67 
68   PetscCall(VecRestoreArrayRead(bb, &b));
69   PetscCall(VecRestoreArray(xx, &x));
70   PetscCall(PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n));
71   PetscFunctionReturn(PETSC_SUCCESS);
72 }
73 
MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)74 PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx)
75 {
76   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
77   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
78   PetscInt           i, k, nz, idx, idt, jdx;
79   const MatScalar   *aa = a->a, *v;
80   PetscScalar       *x, s1, s2, x1, x2;
81   const PetscScalar *b;
82 
83   PetscFunctionBegin;
84   PetscCall(VecGetArrayRead(bb, &b));
85   PetscCall(VecGetArray(xx, &x));
86   /* forward solve the lower triangular */
87   idx  = 0;
88   x[0] = b[idx];
89   x[1] = b[1 + idx];
90   for (i = 1; i < n; i++) {
91     v   = aa + 4 * ai[i];
92     vi  = aj + ai[i];
93     nz  = ai[i + 1] - ai[i];
94     idx = 2 * i;
95     s1  = b[idx];
96     s2  = b[1 + idx];
97     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
98     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
99     for (k = 0; k < nz; k++) {
100       jdx = 2 * vi[k];
101       x1  = x[jdx];
102       x2  = x[1 + jdx];
103       s1 -= v[0] * x1 + v[2] * x2;
104       s2 -= v[1] * x1 + v[3] * x2;
105       v += 4;
106     }
107     x[idx]     = s1;
108     x[1 + idx] = s2;
109   }
110 
111   /* backward solve the upper triangular */
112   for (i = n - 1; i >= 0; i--) {
113     v   = aa + 4 * (adiag[i + 1] + 1);
114     vi  = aj + adiag[i + 1] + 1;
115     nz  = adiag[i] - adiag[i + 1] - 1;
116     idt = 2 * i;
117     s1  = x[idt];
118     s2  = x[1 + idt];
119     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
120     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
121     for (k = 0; k < nz; k++) {
122       idx = 2 * vi[k];
123       x1  = x[idx];
124       x2  = x[1 + idx];
125       s1 -= v[0] * x1 + v[2] * x2;
126       s2 -= v[1] * x1 + v[3] * x2;
127       v += 4;
128     }
129     /* x = inv_diagonal*x */
130     x[idt]     = v[0] * s1 + v[2] * s2;
131     x[1 + idt] = v[1] * s1 + v[3] * s2;
132   }
133 
134   PetscCall(VecRestoreArrayRead(bb, &b));
135   PetscCall(VecRestoreArray(xx, &x));
136   PetscCall(PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n));
137   PetscFunctionReturn(PETSC_SUCCESS);
138 }
139 
MatForwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)140 PetscErrorCode MatForwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx)
141 {
142   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
143   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j;
144   PetscInt           i, k, nz, idx, jdx;
145   const MatScalar   *aa = a->a, *v;
146   PetscScalar       *x, s1, s2, x1, x2;
147   const PetscScalar *b;
148 
149   PetscFunctionBegin;
150   PetscCall(VecGetArrayRead(bb, &b));
151   PetscCall(VecGetArray(xx, &x));
152   /* forward solve the lower triangular */
153   idx  = 0;
154   x[0] = b[idx];
155   x[1] = b[1 + idx];
156   for (i = 1; i < n; i++) {
157     v   = aa + 4 * ai[i];
158     vi  = aj + ai[i];
159     nz  = ai[i + 1] - ai[i];
160     idx = 2 * i;
161     s1  = b[idx];
162     s2  = b[1 + idx];
163     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
164     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
165     for (k = 0; k < nz; k++) {
166       jdx = 2 * vi[k];
167       x1  = x[jdx];
168       x2  = x[1 + jdx];
169       s1 -= v[0] * x1 + v[2] * x2;
170       s2 -= v[1] * x1 + v[3] * x2;
171       v += 4;
172     }
173     x[idx]     = s1;
174     x[1 + idx] = s2;
175   }
176 
177   PetscCall(VecRestoreArrayRead(bb, &b));
178   PetscCall(VecRestoreArray(xx, &x));
179   PetscCall(PetscLogFlops(4.0 * (a->nz) - A->cmap->n));
180   PetscFunctionReturn(PETSC_SUCCESS);
181 }
182 
MatBackwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)183 PetscErrorCode MatBackwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx)
184 {
185   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
186   const PetscInt     n = a->mbs, *vi, *aj = a->j, *adiag = a->diag;
187   PetscInt           i, k, nz, idx, idt;
188   const MatScalar   *aa = a->a, *v;
189   PetscScalar       *x, s1, s2, x1, x2;
190   const PetscScalar *b;
191 
192   PetscFunctionBegin;
193   PetscCall(VecGetArrayRead(bb, &b));
194   PetscCall(VecGetArray(xx, &x));
195 
196   /* backward solve the upper triangular */
197   for (i = n - 1; i >= 0; i--) {
198     v   = aa + 4 * (adiag[i + 1] + 1);
199     vi  = aj + adiag[i + 1] + 1;
200     nz  = adiag[i] - adiag[i + 1] - 1;
201     idt = 2 * i;
202     s1  = b[idt];
203     s2  = b[1 + idt];
204     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
205     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
206     for (k = 0; k < nz; k++) {
207       idx = 2 * vi[k];
208       x1  = x[idx];
209       x2  = x[1 + idx];
210       s1 -= v[0] * x1 + v[2] * x2;
211       s2 -= v[1] * x1 + v[3] * x2;
212       v += 4;
213     }
214     /* x = inv_diagonal*x */
215     x[idt]     = v[0] * s1 + v[2] * s2;
216     x[1 + idt] = v[1] * s1 + v[3] * s2;
217   }
218 
219   PetscCall(VecRestoreArrayRead(bb, &b));
220   PetscCall(VecRestoreArray(xx, &x));
221   PetscCall(PetscLogFlops(4.0 * a->nz - A->cmap->n));
222   PetscFunctionReturn(PETSC_SUCCESS);
223 }
224