xref: /petsc/src/mat/impls/baij/seq/baijsolvnat2.c (revision e5a36eccef3d6b83a2c625c30d0dfd5adb4001f2)
1 #include <../src/mat/impls/baij/seq/baij.h>
2 #include <petsc/private/kernels/blockinvert.h>
3 
4 /*
5       Special case where the matrix was ILU(0) factored in the natural
6    ordering. This eliminates the need for the column and row permutation.
7 */
8 PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
9 {
10   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
11   const PetscInt    n  =a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
12   PetscErrorCode    ierr;
13   const MatScalar   *aa=a->a,*v;
14   PetscScalar       *x,s1,s2,x1,x2;
15   const PetscScalar *b;
16   PetscInt          jdx,idt,idx,nz,i;
17 
18   PetscFunctionBegin;
19   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
20   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
21 
22   /* forward solve the lower triangular */
23   idx  = 0;
24   x[0] = b[0]; x[1] = b[1];
25   for (i=1; i<n; i++) {
26     v    =  aa      + 4*ai[i];
27     vi   =  aj      + ai[i];
28     nz   =  diag[i] - ai[i];
29     idx +=  2;
30     s1   =  b[idx];s2 = b[1+idx];
31     while (nz--) {
32       jdx = 2*(*vi++);
33       x1  = x[jdx];x2 = x[1+jdx];
34       s1 -= v[0]*x1 + v[2]*x2;
35       s2 -= v[1]*x1 + v[3]*x2;
36       v  += 4;
37     }
38     x[idx]   = s1;
39     x[1+idx] = s2;
40   }
41   /* backward solve the upper triangular */
42   for (i=n-1; i>=0; i--) {
43     v   = aa + 4*diag[i] + 4;
44     vi  = aj + diag[i] + 1;
45     nz  = ai[i+1] - diag[i] - 1;
46     idt = 2*i;
47     s1  = x[idt];  s2 = x[1+idt];
48     while (nz--) {
49       idx = 2*(*vi++);
50       x1  = x[idx];   x2 = x[1+idx];
51       s1 -= v[0]*x1 + v[2]*x2;
52       s2 -= v[1]*x1 + v[3]*x2;
53       v  += 4;
54     }
55     v        = aa +  4*diag[i];
56     x[idt]   = v[0]*s1 + v[2]*s2;
57     x[1+idt] = v[1]*s1 + v[3]*s2;
58   }
59 
60   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
61   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
62   ierr = PetscLogFlops(2.0*4*(a->nz) - 2.0*A->cmap->n);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
67 {
68   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
69   const PetscInt    n  = a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag=a->diag;
70   PetscInt          i,k,nz,idx,idt,jdx;
71   PetscErrorCode    ierr;
72   const MatScalar   *aa=a->a,*v;
73   PetscScalar       *x,s1,s2,x1,x2;
74   const PetscScalar *b;
75 
76   PetscFunctionBegin;
77   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
78   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
79   /* forward solve the lower triangular */
80   idx  = 0;
81   x[0] = b[idx]; x[1] = b[1+idx];
82   for (i=1; i<n; i++) {
83     v   = aa + 4*ai[i];
84     vi  = aj + ai[i];
85     nz  = ai[i+1] - ai[i];
86     idx = 2*i;
87     s1  = b[idx];s2 = b[1+idx];
88     PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
89     PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
90     for (k=0; k<nz; k++) {
91       jdx = 2*vi[k];
92       x1  = x[jdx];x2 = x[1+jdx];
93       s1 -= v[0]*x1 + v[2]*x2;
94       s2 -= v[1]*x1 + v[3]*x2;
95       v  +=  4;
96     }
97     x[idx]   = s1;
98     x[1+idx] = s2;
99   }
100 
101   /* backward solve the upper triangular */
102   for (i=n-1; i>=0; i--) {
103     v   = aa + 4*(adiag[i+1]+1);
104     vi  = aj + adiag[i+1]+1;
105     nz  = adiag[i] - adiag[i+1]-1;
106     idt = 2*i;
107     s1  = x[idt];  s2 = x[1+idt];
108     PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
109     PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
110     for (k=0; k<nz; k++) {
111       idx = 2*vi[k];
112       x1  = x[idx];   x2 = x[1+idx];
113       s1 -= v[0]*x1 + v[2]*x2;
114       s2 -= v[1]*x1 + v[3]*x2;
115       v  += 4;
116     }
117     /* x = inv_diagonal*x */
118     x[idt]   = v[0]*s1 + v[2]*s2;
119     x[1+idt] = v[1]*s1 + v[3]*s2;
120   }
121 
122   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
123   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
124   ierr = PetscLogFlops(2.0*4*(a->nz) - 2.0*A->cmap->n);CHKERRQ(ierr);
125   PetscFunctionReturn(0);
126 }
127 
128 PetscErrorCode MatForwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
129 {
130   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
131   const PetscInt    n  = a->mbs,*vi,*ai=a->i,*aj=a->j;
132   PetscInt          i,k,nz,idx,jdx;
133   PetscErrorCode    ierr;
134   const MatScalar   *aa=a->a,*v;
135   PetscScalar       *x,s1,s2,x1,x2;
136   const PetscScalar *b;
137 
138   PetscFunctionBegin;
139   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   /* forward solve the lower triangular */
142   idx  = 0;
143   x[0] = b[idx]; x[1] = b[1+idx];
144   for (i=1; i<n; i++) {
145     v   = aa + 4*ai[i];
146     vi  = aj + ai[i];
147     nz  = ai[i+1] - ai[i];
148     idx = 2*i;
149     s1  = b[idx];s2 = b[1+idx];
150     PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
151     PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
152     for (k=0; k<nz; k++) {
153       jdx = 2*vi[k];
154       x1  = x[jdx];x2 = x[1+jdx];
155       s1 -= v[0]*x1 + v[2]*x2;
156       s2 -= v[1]*x1 + v[3]*x2;
157       v  +=  4;
158     }
159     x[idx]   = s1;
160     x[1+idx] = s2;
161   }
162 
163 
164   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
165   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
166   ierr = PetscLogFlops(4.0*(a->nz) - A->cmap->n);CHKERRQ(ierr);
167   PetscFunctionReturn(0);
168 }
169 
170 PetscErrorCode MatBackwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
171 {
172   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
173   const PetscInt    n  = a->mbs,*vi,*aj=a->j,*adiag=a->diag;
174   PetscInt          i,k,nz,idx,idt;
175   PetscErrorCode    ierr;
176   const MatScalar   *aa=a->a,*v;
177   PetscScalar       *x,s1,s2,x1,x2;
178   const PetscScalar *b;
179 
180   PetscFunctionBegin;
181   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
182   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
183 
184   /* backward solve the upper triangular */
185   for (i=n-1; i>=0; i--) {
186     v   = aa + 4*(adiag[i+1]+1);
187     vi  = aj + adiag[i+1]+1;
188     nz  = adiag[i] - adiag[i+1]-1;
189     idt = 2*i;
190     s1  = b[idt];  s2 = b[1+idt];
191     PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
192     PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
193     for (k=0; k<nz; k++) {
194       idx = 2*vi[k];
195       x1  = x[idx];   x2 = x[1+idx];
196       s1 -= v[0]*x1 + v[2]*x2;
197       s2 -= v[1]*x1 + v[3]*x2;
198       v  += 4;
199     }
200     /* x = inv_diagonal*x */
201     x[idt]   = v[0]*s1 + v[2]*s2;
202     x[1+idt] = v[1]*s1 + v[3]*s2;
203   }
204 
205   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
206   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
207   ierr = PetscLogFlops(4.0*a->nz - A->cmap->n);CHKERRQ(ierr);
208   PetscFunctionReturn(0);
209 }
210