xref: /petsc/src/mat/impls/baij/seq/baijsolvnat11.c (revision 6996bd1a6dda9216f11f3a1c5d2357ea301aa80d)
1 #include <../src/mat/impls/baij/seq/baij.h>
2 #include <petsc/private/kernels/blockinvert.h>
3 
4 /* Block operations are done by accessing one column at a time */
5 /* Default MatSolve for block size 11 */
6 
MatSolve_SeqBAIJ_11_NaturalOrdering(Mat A,Vec bb,Vec xx)7 PetscErrorCode MatSolve_SeqBAIJ_11_NaturalOrdering(Mat A, Vec bb, Vec xx)
8 {
9   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
10   const PetscInt     n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
11   PetscInt           i, k, nz, idx, idt, m;
12   const MatScalar   *aa = a->a, *v;
13   PetscScalar        s[11];
14   PetscScalar       *x, xv;
15   const PetscScalar *b;
16 
17   PetscFunctionBegin;
18   PetscCall(VecGetArrayRead(bb, &b));
19   PetscCall(VecGetArray(xx, &x));
20 
21   /* forward solve the lower triangular */
22   for (i = 0; i < n; i++) {
23     v           = aa + bs2 * ai[i];
24     vi          = aj + ai[i];
25     nz          = ai[i + 1] - ai[i];
26     idt         = bs * i;
27     x[idt]      = b[idt];
28     x[1 + idt]  = b[1 + idt];
29     x[2 + idt]  = b[2 + idt];
30     x[3 + idt]  = b[3 + idt];
31     x[4 + idt]  = b[4 + idt];
32     x[5 + idt]  = b[5 + idt];
33     x[6 + idt]  = b[6 + idt];
34     x[7 + idt]  = b[7 + idt];
35     x[8 + idt]  = b[8 + idt];
36     x[9 + idt]  = b[9 + idt];
37     x[10 + idt] = b[10 + idt];
38     for (m = 0; m < nz; m++) {
39       idx = bs * vi[m];
40       for (k = 0; k < 11; k++) {
41         xv = x[k + idx];
42         x[idt] -= v[0] * xv;
43         x[1 + idt] -= v[1] * xv;
44         x[2 + idt] -= v[2] * xv;
45         x[3 + idt] -= v[3] * xv;
46         x[4 + idt] -= v[4] * xv;
47         x[5 + idt] -= v[5] * xv;
48         x[6 + idt] -= v[6] * xv;
49         x[7 + idt] -= v[7] * xv;
50         x[8 + idt] -= v[8] * xv;
51         x[9 + idt] -= v[9] * xv;
52         x[10 + idt] -= v[10] * xv;
53         v += 11;
54       }
55     }
56   }
57   /* backward solve the upper triangular */
58   for (i = n - 1; i >= 0; i--) {
59     v     = aa + bs2 * (adiag[i + 1] + 1);
60     vi    = aj + adiag[i + 1] + 1;
61     nz    = adiag[i] - adiag[i + 1] - 1;
62     idt   = bs * i;
63     s[0]  = x[idt];
64     s[1]  = x[1 + idt];
65     s[2]  = x[2 + idt];
66     s[3]  = x[3 + idt];
67     s[4]  = x[4 + idt];
68     s[5]  = x[5 + idt];
69     s[6]  = x[6 + idt];
70     s[7]  = x[7 + idt];
71     s[8]  = x[8 + idt];
72     s[9]  = x[9 + idt];
73     s[10] = x[10 + idt];
74 
75     for (m = 0; m < nz; m++) {
76       idx = bs * vi[m];
77       for (k = 0; k < 11; k++) {
78         xv = x[k + idx];
79         s[0] -= v[0] * xv;
80         s[1] -= v[1] * xv;
81         s[2] -= v[2] * xv;
82         s[3] -= v[3] * xv;
83         s[4] -= v[4] * xv;
84         s[5] -= v[5] * xv;
85         s[6] -= v[6] * xv;
86         s[7] -= v[7] * xv;
87         s[8] -= v[8] * xv;
88         s[9] -= v[9] * xv;
89         s[10] -= v[10] * xv;
90         v += 11;
91       }
92     }
93     PetscCall(PetscArrayzero(x + idt, bs));
94     for (k = 0; k < 11; k++) {
95       x[idt] += v[0] * s[k];
96       x[1 + idt] += v[1] * s[k];
97       x[2 + idt] += v[2] * s[k];
98       x[3 + idt] += v[3] * s[k];
99       x[4 + idt] += v[4] * s[k];
100       x[5 + idt] += v[5] * s[k];
101       x[6 + idt] += v[6] * s[k];
102       x[7 + idt] += v[7] * s[k];
103       x[8 + idt] += v[8] * s[k];
104       x[9 + idt] += v[9] * s[k];
105       x[10 + idt] += v[10] * s[k];
106       v += 11;
107     }
108   }
109   PetscCall(VecRestoreArrayRead(bb, &b));
110   PetscCall(VecRestoreArray(xx, &x));
111   PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
112   PetscFunctionReturn(PETSC_SUCCESS);
113 }
114