xref: /petsc/src/mat/impls/baij/seq/baijsolvtrann.c (revision ccb4e88a40f0b86eaeca07ff64c64e4de2fae686)
1 #include <../src/mat/impls/baij/seq/baij.h>
2 #include <petsc/private/kernels/blockinvert.h>
3 
4 /* ----------------------------------------------------------- */
5 PetscErrorCode MatSolveTranspose_SeqBAIJ_N_inplace(Mat A,Vec bb,Vec xx)
6 {
7   Mat_SeqBAIJ       *a   =(Mat_SeqBAIJ*)A->data;
8   IS                iscol=a->col,isrow=a->row;
9   PetscErrorCode    ierr;
10   const PetscInt    *r,*c,*rout,*cout,*ai=a->i,*aj=a->j,*vi;
11   PetscInt          i,nz,j;
12   const PetscInt    n  =a->mbs,bs=A->rmap->bs,bs2=a->bs2;
13   const MatScalar   *aa=a->a,*v;
14   PetscScalar       *x,*t,*ls;
15   const PetscScalar *b;
16 
17   PetscFunctionBegin;
18   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
19   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
20   t    = a->solve_work;
21 
22   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
23   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
24 
25   /* copy the b into temp work space according to permutation */
26   for (i=0; i<n; i++) {
27     for (j=0; j<bs; j++) {
28       t[i*bs+j] = b[c[i]*bs+j];
29     }
30   }
31 
32   /* forward solve the upper triangular transpose */
33   ls = a->solve_work + A->cmap->n;
34   for (i=0; i<n; i++) {
35     ierr = PetscArraycpy(ls,t+i*bs,bs);CHKERRQ(ierr);
36     PetscKernel_w_gets_transA_times_v(bs,ls,aa+bs2*a->diag[i],t+i*bs);
37     v  = aa + bs2*(a->diag[i] + 1);
38     vi = aj + a->diag[i] + 1;
39     nz = ai[i+1] - a->diag[i] - 1;
40     while (nz--) {
41       PetscKernel_v_gets_v_minus_transA_times_w(bs,t+bs*(*vi++),v,t+i*bs);
42       v += bs2;
43     }
44   }
45 
46   /* backward solve the lower triangular transpose */
47   for (i=n-1; i>=0; i--) {
48     v  = aa + bs2*ai[i];
49     vi = aj + ai[i];
50     nz = a->diag[i] - ai[i];
51     while (nz--) {
52       PetscKernel_v_gets_v_minus_transA_times_w(bs,t+bs*(*vi++),v,t+i*bs);
53       v += bs2;
54     }
55   }
56 
57   /* copy t into x according to permutation */
58   for (i=0; i<n; i++) {
59     for (j=0; j<bs; j++) {
60       x[bs*r[i]+j]   = t[bs*i+j];
61     }
62   }
63 
64   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
65   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
66   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
67   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
68   ierr = PetscLogFlops(2.0*(a->bs2)*(a->nz) - A->rmap->bs*A->cmap->n);CHKERRQ(ierr);
69   PetscFunctionReturn(0);
70 }
71 
72 PetscErrorCode MatSolveTranspose_SeqBAIJ_N(Mat A,Vec bb,Vec xx)
73 {
74   Mat_SeqBAIJ       *a   =(Mat_SeqBAIJ*)A->data;
75   IS                iscol=a->col,isrow=a->row;
76   PetscErrorCode    ierr;
77   const PetscInt    *r,*c,*rout,*cout;
78   const PetscInt    n=a->mbs,*ai=a->i,*aj=a->j,*vi,*diag=a->diag;
79   PetscInt          i,j,nz;
80   const PetscInt    bs =A->rmap->bs,bs2=a->bs2;
81   const MatScalar   *aa=a->a,*v;
82   PetscScalar       *x,*t,*ls;
83   const PetscScalar *b;
84 
85   PetscFunctionBegin;
86   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
87   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
88   t    = a->solve_work;
89 
90   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
91   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
92 
93   /* copy the b into temp work space according to permutation */
94   for (i=0; i<n; i++) {
95     for (j=0; j<bs; j++) {
96       t[i*bs+j] = b[c[i]*bs+j];
97     }
98   }
99 
100   /* forward solve the upper triangular transpose */
101   ls = a->solve_work + A->cmap->n;
102   for (i=0; i<n; i++) {
103     ierr = PetscArraycpy(ls,t+i*bs,bs);CHKERRQ(ierr);
104     PetscKernel_w_gets_transA_times_v(bs,ls,aa+bs2*diag[i],t+i*bs);
105     v  = aa + bs2*(diag[i] - 1);
106     vi = aj + diag[i] - 1;
107     nz = diag[i] - diag[i+1] - 1;
108     for (j=0; j>-nz; j--) {
109       PetscKernel_v_gets_v_minus_transA_times_w(bs,t+bs*(vi[j]),v,t+i*bs);
110       v -= bs2;
111     }
112   }
113 
114   /* backward solve the lower triangular transpose */
115   for (i=n-1; i>=0; i--) {
116     v  = aa + bs2*ai[i];
117     vi = aj + ai[i];
118     nz = ai[i+1] - ai[i];
119     for (j=0; j<nz; j++) {
120       PetscKernel_v_gets_v_minus_transA_times_w(bs,t+bs*(vi[j]),v,t+i*bs);
121       v += bs2;
122     }
123   }
124 
125   /* copy t into x according to permutation */
126   for (i=0; i<n; i++) {
127     for (j=0; j<bs; j++) {
128       x[bs*r[i]+j]   = t[bs*i+j];
129     }
130   }
131 
132   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
133   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
134   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
135   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
136   ierr = PetscLogFlops(2.0*(a->bs2)*(a->nz) - A->rmap->bs*A->cmap->n);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140