xref: /petsc/src/mat/impls/baij/seq/baijsolvnat2.c (revision 8aefec70bf72ef1dffeff2fde7d501d2a2f792dc)
1 #include <../src/mat/impls/baij/seq/baij.h>
2 #include <petsc/private/kernels/blockinvert.h>
3 
4 /*
5       Special case where the matrix was ILU(0) factored in the natural
6    ordering. This eliminates the need for the column and row permutation.
7 */
8 PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
9 {
10   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
11   const PetscInt    n  =a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
12   PetscErrorCode    ierr;
13   const MatScalar   *aa=a->a,*v;
14   PetscScalar       *x,s1,s2,x1,x2;
15   const PetscScalar *b;
16   PetscInt          jdx,idt,idx,nz,i;
17 
18   PetscFunctionBegin;
19   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
20   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
21 
22   /* forward solve the lower triangular */
23   idx  = 0;
24   x[0] = b[0]; x[1] = b[1];
25   for (i=1; i<n; i++) {
26     v    =  aa      + 4*ai[i];
27     vi   =  aj      + ai[i];
28     nz   =  diag[i] - ai[i];
29     idx +=  2;
30     s1   =  b[idx];s2 = b[1+idx];
31     while (nz--) {
32       jdx = 2*(*vi++);
33       x1  = x[jdx];x2 = x[1+jdx];
34       s1 -= v[0]*x1 + v[2]*x2;
35       s2 -= v[1]*x1 + v[3]*x2;
36       v  += 4;
37     }
38     x[idx]   = s1;
39     x[1+idx] = s2;
40   }
41   /* backward solve the upper triangular */
42   for (i=n-1; i>=0; i--) {
43     v   = aa + 4*diag[i] + 4;
44     vi  = aj + diag[i] + 1;
45     nz  = ai[i+1] - diag[i] - 1;
46     idt = 2*i;
47     s1  = x[idt];  s2 = x[1+idt];
48     while (nz--) {
49       idx = 2*(*vi++);
50       x1  = x[idx];   x2 = x[1+idx];
51       s1 -= v[0]*x1 + v[2]*x2;
52       s2 -= v[1]*x1 + v[3]*x2;
53       v  += 4;
54     }
55     v        = aa +  4*diag[i];
56     x[idt]   = v[0]*s1 + v[2]*s2;
57     x[1+idt] = v[1]*s1 + v[3]*s2;
58   }
59 
60   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
61   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
62   ierr = PetscLogFlops(2.0*4*(a->nz) - 2.0*A->cmap->n);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
67 {
68   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
69   const PetscInt    n  = a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag=a->diag;
70   PetscInt          i,k,nz,idx,idt,jdx;
71   PetscErrorCode    ierr;
72   const MatScalar   *aa=a->a,*v;
73   PetscScalar       *x,s1,s2,x1,x2;
74   const PetscScalar *b;
75 
76   PetscFunctionBegin;
77   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
78   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
79   /* forward solve the lower triangular */
80   idx  = 0;
81   x[0] = b[idx]; x[1] = b[1+idx];
82   for (i=1; i<n; i++) {
83     v   = aa + 4*ai[i];
84     vi  = aj + ai[i];
85     nz  = ai[i+1] - ai[i];
86     idx = 2*i;
87     s1  = b[idx];s2 = b[1+idx];
88     PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
89     PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
90     for (k=0; k<nz; k++) {
91       jdx = 2*vi[k];
92       x1  = x[jdx];x2 = x[1+jdx];
93       s1 -= v[0]*x1 + v[2]*x2;
94       s2 -= v[1]*x1 + v[3]*x2;
95       v  +=  4;
96     }
97     x[idx]   = s1;
98     x[1+idx] = s2;
99   }
100 
101   /* backward solve the upper triangular */
102   for (i=n-1; i>=0; i--) {
103     v   = aa + 4*(adiag[i+1]+1);
104     vi  = aj + adiag[i+1]+1;
105     nz  = adiag[i] - adiag[i+1]-1;
106     idt = 2*i;
107     s1  = x[idt];  s2 = x[1+idt];
108     PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
109     PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
110     for (k=0; k<nz; k++) {
111       idx = 2*vi[k];
112       x1  = x[idx];   x2 = x[1+idx];
113       s1 -= v[0]*x1 + v[2]*x2;
114       s2 -= v[1]*x1 + v[3]*x2;
115       v  += 4;
116     }
117     /* x = inv_diagonal*x */
118     x[idt]   = v[0]*s1 + v[2]*s2;
119     x[1+idt] = v[1]*s1 + v[3]*s2;
120   }
121 
122   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
123   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
124   ierr = PetscLogFlops(2.0*4*(a->nz) - 2.0*A->cmap->n);CHKERRQ(ierr);
125   PetscFunctionReturn(0);
126 }
127 
128 PetscErrorCode MatForwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
129 {
130   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
131   const PetscInt    n  = a->mbs,*vi,*ai=a->i,*aj=a->j;
132   PetscInt          i,k,nz,idx,jdx;
133   PetscErrorCode    ierr;
134   const MatScalar   *aa=a->a,*v;
135   PetscScalar       *x,s1,s2,x1,x2;
136   const PetscScalar *b;
137 
138   PetscFunctionBegin;
139   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   /* forward solve the lower triangular */
142   idx  = 0;
143   x[0] = b[idx]; x[1] = b[1+idx];
144   for (i=1; i<n; i++) {
145     v   = aa + 4*ai[i];
146     vi  = aj + ai[i];
147     nz  = ai[i+1] - ai[i];
148     idx = 2*i;
149     s1  = b[idx];s2 = b[1+idx];
150     PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
151     PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
152     for (k=0; k<nz; k++) {
153       jdx = 2*vi[k];
154       x1  = x[jdx];x2 = x[1+jdx];
155       s1 -= v[0]*x1 + v[2]*x2;
156       s2 -= v[1]*x1 + v[3]*x2;
157       v  +=  4;
158     }
159     x[idx]   = s1;
160     x[1+idx] = s2;
161   }
162 
163   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
164   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
165   ierr = PetscLogFlops(4.0*(a->nz) - A->cmap->n);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 PetscErrorCode MatBackwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
170 {
171   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
172   const PetscInt    n  = a->mbs,*vi,*aj=a->j,*adiag=a->diag;
173   PetscInt          i,k,nz,idx,idt;
174   PetscErrorCode    ierr;
175   const MatScalar   *aa=a->a,*v;
176   PetscScalar       *x,s1,s2,x1,x2;
177   const PetscScalar *b;
178 
179   PetscFunctionBegin;
180   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
181   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
182 
183   /* backward solve the upper triangular */
184   for (i=n-1; i>=0; i--) {
185     v   = aa + 4*(adiag[i+1]+1);
186     vi  = aj + adiag[i+1]+1;
187     nz  = adiag[i] - adiag[i+1]-1;
188     idt = 2*i;
189     s1  = b[idt];  s2 = b[1+idt];
190     PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
191     PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
192     for (k=0; k<nz; k++) {
193       idx = 2*vi[k];
194       x1  = x[idx];   x2 = x[1+idx];
195       s1 -= v[0]*x1 + v[2]*x2;
196       s2 -= v[1]*x1 + v[3]*x2;
197       v  += 4;
198     }
199     /* x = inv_diagonal*x */
200     x[idt]   = v[0]*s1 + v[2]*s2;
201     x[1+idt] = v[1]*s1 + v[3]*s2;
202   }
203 
204   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
205   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
206   ierr = PetscLogFlops(4.0*a->nz - A->cmap->n);CHKERRQ(ierr);
207   PetscFunctionReturn(0);
208 }
209