xref: /petsc/src/mat/impls/baij/seq/baijsolv.c (revision a02648fdf9ec0d41d7b5ca02cb70ddcfa0e65728)
128e30874SSatish Balay #include <../src/mat/impls/baij/seq/baij.h>
2af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
328e30874SSatish Balay 
MatSolve_SeqBAIJ_N_inplace(Mat A,Vec bb,Vec xx)4d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_N_inplace(Mat A, Vec bb, Vec xx)
5d71ae5a4SJacob Faibussowitsch {
628e30874SSatish Balay   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
728e30874SSatish Balay   IS                 iscol = a->col, isrow = a->row;
828e30874SSatish Balay   const PetscInt    *r, *c, *rout, *cout;
928e30874SSatish Balay   const PetscInt     n = a->mbs, *ai = a->i, *aj = a->j, *vi;
1028e30874SSatish Balay   PetscInt           i, nz;
1128e30874SSatish Balay   const PetscInt     bs = A->rmap->bs, bs2 = a->bs2;
1228e30874SSatish Balay   const MatScalar   *aa = a->a, *v;
1328e30874SSatish Balay   PetscScalar       *x, *s, *t, *ls;
1428e30874SSatish Balay   const PetscScalar *b;
1528e30874SSatish Balay 
1628e30874SSatish Balay   PetscFunctionBegin;
17*ce6b3536SJed Brown   PetscCheck(bs > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected bs %" PetscInt_FMT " > 0", bs);
189566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
199566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
2028e30874SSatish Balay   t = a->solve_work;
2128e30874SSatish Balay 
229371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
239371c9d4SSatish Balay   r = rout;
249371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
259371c9d4SSatish Balay   c = cout + (n - 1);
2628e30874SSatish Balay 
2728e30874SSatish Balay   /* forward solve the lower triangular */
289566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(t, b + bs * (*r++), bs));
2928e30874SSatish Balay   for (i = 1; i < n; i++) {
3028e30874SSatish Balay     v  = aa + bs2 * ai[i];
3128e30874SSatish Balay     vi = aj + ai[i];
3228e30874SSatish Balay     nz = a->diag[i] - ai[i];
3328e30874SSatish Balay     s  = t + bs * i;
349566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(s, b + bs * (*r++), bs));
3528e30874SSatish Balay     while (nz--) {
3628e30874SSatish Balay       PetscKernel_v_gets_v_minus_A_times_w(bs, s, v, t + bs * (*vi++));
3728e30874SSatish Balay       v += bs2;
3828e30874SSatish Balay     }
3928e30874SSatish Balay   }
4028e30874SSatish Balay   /* backward solve the upper triangular */
4128e30874SSatish Balay   ls = a->solve_work + A->cmap->n;
4228e30874SSatish Balay   for (i = n - 1; i >= 0; i--) {
4328e30874SSatish Balay     v  = aa + bs2 * (a->diag[i] + 1);
4428e30874SSatish Balay     vi = aj + a->diag[i] + 1;
4528e30874SSatish Balay     nz = ai[i + 1] - a->diag[i] - 1;
469566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ls, t + i * bs, bs));
4728e30874SSatish Balay     while (nz--) {
4828e30874SSatish Balay       PetscKernel_v_gets_v_minus_A_times_w(bs, ls, v, t + bs * (*vi++));
4928e30874SSatish Balay       v += bs2;
5028e30874SSatish Balay     }
5128e30874SSatish Balay     PetscKernel_w_gets_A_times_v(bs, ls, aa + bs2 * a->diag[i], t + i * bs);
529566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(x + bs * (*c--), t + i * bs, bs));
5328e30874SSatish Balay   }
5428e30874SSatish Balay 
559566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
569566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
589566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
599566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * (a->bs2) * (a->nz) - A->rmap->bs * A->cmap->n));
603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6128e30874SSatish Balay }
6228e30874SSatish Balay 
MatSolve_SeqBAIJ_7_inplace(Mat A,Vec bb,Vec xx)63d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_7_inplace(Mat A, Vec bb, Vec xx)
64d71ae5a4SJacob Faibussowitsch {
6528e30874SSatish Balay   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
6628e30874SSatish Balay   IS                 iscol = a->col, isrow = a->row;
6728e30874SSatish Balay   const PetscInt    *r, *c, *ai = a->i, *aj = a->j;
6828e30874SSatish Balay   const PetscInt    *rout, *cout, *diag = a->diag, *vi, n = a->mbs;
6928e30874SSatish Balay   PetscInt           i, nz, idx, idt, idc;
7028e30874SSatish Balay   const MatScalar   *aa = a->a, *v;
7128e30874SSatish Balay   PetscScalar        s1, s2, s3, s4, s5, s6, s7, x1, x2, x3, x4, x5, x6, x7, *x, *t;
7228e30874SSatish Balay   const PetscScalar *b;
7328e30874SSatish Balay 
7428e30874SSatish Balay   PetscFunctionBegin;
759566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
769566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
7728e30874SSatish Balay   t = a->solve_work;
7828e30874SSatish Balay 
799371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
809371c9d4SSatish Balay   r = rout;
819371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
829371c9d4SSatish Balay   c = cout + (n - 1);
8328e30874SSatish Balay 
8428e30874SSatish Balay   /* forward solve the lower triangular */
8528e30874SSatish Balay   idx  = 7 * (*r++);
869371c9d4SSatish Balay   t[0] = b[idx];
879371c9d4SSatish Balay   t[1] = b[1 + idx];
889371c9d4SSatish Balay   t[2] = b[2 + idx];
899371c9d4SSatish Balay   t[3] = b[3 + idx];
909371c9d4SSatish Balay   t[4] = b[4 + idx];
919371c9d4SSatish Balay   t[5] = b[5 + idx];
929371c9d4SSatish Balay   t[6] = b[6 + idx];
9328e30874SSatish Balay 
9428e30874SSatish Balay   for (i = 1; i < n; i++) {
9528e30874SSatish Balay     v   = aa + 49 * ai[i];
9628e30874SSatish Balay     vi  = aj + ai[i];
9728e30874SSatish Balay     nz  = diag[i] - ai[i];
9828e30874SSatish Balay     idx = 7 * (*r++);
999371c9d4SSatish Balay     s1  = b[idx];
1009371c9d4SSatish Balay     s2  = b[1 + idx];
1019371c9d4SSatish Balay     s3  = b[2 + idx];
1029371c9d4SSatish Balay     s4  = b[3 + idx];
1039371c9d4SSatish Balay     s5  = b[4 + idx];
1049371c9d4SSatish Balay     s6  = b[5 + idx];
1059371c9d4SSatish Balay     s7  = b[6 + idx];
10628e30874SSatish Balay     while (nz--) {
10728e30874SSatish Balay       idx = 7 * (*vi++);
1089371c9d4SSatish Balay       x1  = t[idx];
1099371c9d4SSatish Balay       x2  = t[1 + idx];
1109371c9d4SSatish Balay       x3  = t[2 + idx];
1119371c9d4SSatish Balay       x4  = t[3 + idx];
1129371c9d4SSatish Balay       x5  = t[4 + idx];
1139371c9d4SSatish Balay       x6  = t[5 + idx];
1149371c9d4SSatish Balay       x7  = t[6 + idx];
11528e30874SSatish Balay       s1 -= v[0] * x1 + v[7] * x2 + v[14] * x3 + v[21] * x4 + v[28] * x5 + v[35] * x6 + v[42] * x7;
11628e30874SSatish Balay       s2 -= v[1] * x1 + v[8] * x2 + v[15] * x3 + v[22] * x4 + v[29] * x5 + v[36] * x6 + v[43] * x7;
11728e30874SSatish Balay       s3 -= v[2] * x1 + v[9] * x2 + v[16] * x3 + v[23] * x4 + v[30] * x5 + v[37] * x6 + v[44] * x7;
11828e30874SSatish Balay       s4 -= v[3] * x1 + v[10] * x2 + v[17] * x3 + v[24] * x4 + v[31] * x5 + v[38] * x6 + v[45] * x7;
11928e30874SSatish Balay       s5 -= v[4] * x1 + v[11] * x2 + v[18] * x3 + v[25] * x4 + v[32] * x5 + v[39] * x6 + v[46] * x7;
12028e30874SSatish Balay       s6 -= v[5] * x1 + v[12] * x2 + v[19] * x3 + v[26] * x4 + v[33] * x5 + v[40] * x6 + v[47] * x7;
12128e30874SSatish Balay       s7 -= v[6] * x1 + v[13] * x2 + v[20] * x3 + v[27] * x4 + v[34] * x5 + v[41] * x6 + v[48] * x7;
12228e30874SSatish Balay       v += 49;
12328e30874SSatish Balay     }
12428e30874SSatish Balay     idx        = 7 * i;
1259371c9d4SSatish Balay     t[idx]     = s1;
1269371c9d4SSatish Balay     t[1 + idx] = s2;
1279371c9d4SSatish Balay     t[2 + idx] = s3;
1289371c9d4SSatish Balay     t[3 + idx] = s4;
1299371c9d4SSatish Balay     t[4 + idx] = s5;
1309371c9d4SSatish Balay     t[5 + idx] = s6;
1319371c9d4SSatish Balay     t[6 + idx] = s7;
13228e30874SSatish Balay   }
13328e30874SSatish Balay   /* backward solve the upper triangular */
13428e30874SSatish Balay   for (i = n - 1; i >= 0; i--) {
13528e30874SSatish Balay     v   = aa + 49 * diag[i] + 49;
13628e30874SSatish Balay     vi  = aj + diag[i] + 1;
13728e30874SSatish Balay     nz  = ai[i + 1] - diag[i] - 1;
13828e30874SSatish Balay     idt = 7 * i;
1399371c9d4SSatish Balay     s1  = t[idt];
1409371c9d4SSatish Balay     s2  = t[1 + idt];
1419371c9d4SSatish Balay     s3  = t[2 + idt];
1429371c9d4SSatish Balay     s4  = t[3 + idt];
1439371c9d4SSatish Balay     s5  = t[4 + idt];
1449371c9d4SSatish Balay     s6  = t[5 + idt];
1459371c9d4SSatish Balay     s7  = t[6 + idt];
14628e30874SSatish Balay     while (nz--) {
14728e30874SSatish Balay       idx = 7 * (*vi++);
1489371c9d4SSatish Balay       x1  = t[idx];
1499371c9d4SSatish Balay       x2  = t[1 + idx];
1509371c9d4SSatish Balay       x3  = t[2 + idx];
1519371c9d4SSatish Balay       x4  = t[3 + idx];
1529371c9d4SSatish Balay       x5  = t[4 + idx];
1539371c9d4SSatish Balay       x6  = t[5 + idx];
1549371c9d4SSatish Balay       x7  = t[6 + idx];
15528e30874SSatish Balay       s1 -= v[0] * x1 + v[7] * x2 + v[14] * x3 + v[21] * x4 + v[28] * x5 + v[35] * x6 + v[42] * x7;
15628e30874SSatish Balay       s2 -= v[1] * x1 + v[8] * x2 + v[15] * x3 + v[22] * x4 + v[29] * x5 + v[36] * x6 + v[43] * x7;
15728e30874SSatish Balay       s3 -= v[2] * x1 + v[9] * x2 + v[16] * x3 + v[23] * x4 + v[30] * x5 + v[37] * x6 + v[44] * x7;
15828e30874SSatish Balay       s4 -= v[3] * x1 + v[10] * x2 + v[17] * x3 + v[24] * x4 + v[31] * x5 + v[38] * x6 + v[45] * x7;
15928e30874SSatish Balay       s5 -= v[4] * x1 + v[11] * x2 + v[18] * x3 + v[25] * x4 + v[32] * x5 + v[39] * x6 + v[46] * x7;
16028e30874SSatish Balay       s6 -= v[5] * x1 + v[12] * x2 + v[19] * x3 + v[26] * x4 + v[33] * x5 + v[40] * x6 + v[47] * x7;
16128e30874SSatish Balay       s7 -= v[6] * x1 + v[13] * x2 + v[20] * x3 + v[27] * x4 + v[34] * x5 + v[41] * x6 + v[48] * x7;
16228e30874SSatish Balay       v += 49;
16328e30874SSatish Balay     }
16428e30874SSatish Balay     idc    = 7 * (*c--);
16528e30874SSatish Balay     v      = aa + 49 * diag[i];
1669371c9d4SSatish Balay     x[idc] = t[idt] = v[0] * s1 + v[7] * s2 + v[14] * s3 + v[21] * s4 + v[28] * s5 + v[35] * s6 + v[42] * s7;
1679371c9d4SSatish Balay     x[1 + idc] = t[1 + idt] = v[1] * s1 + v[8] * s2 + v[15] * s3 + v[22] * s4 + v[29] * s5 + v[36] * s6 + v[43] * s7;
1689371c9d4SSatish Balay     x[2 + idc] = t[2 + idt] = v[2] * s1 + v[9] * s2 + v[16] * s3 + v[23] * s4 + v[30] * s5 + v[37] * s6 + v[44] * s7;
1699371c9d4SSatish Balay     x[3 + idc] = t[3 + idt] = v[3] * s1 + v[10] * s2 + v[17] * s3 + v[24] * s4 + v[31] * s5 + v[38] * s6 + v[45] * s7;
1709371c9d4SSatish Balay     x[4 + idc] = t[4 + idt] = v[4] * s1 + v[11] * s2 + v[18] * s3 + v[25] * s4 + v[32] * s5 + v[39] * s6 + v[46] * s7;
1719371c9d4SSatish Balay     x[5 + idc] = t[5 + idt] = v[5] * s1 + v[12] * s2 + v[19] * s3 + v[26] * s4 + v[33] * s5 + v[40] * s6 + v[47] * s7;
1729371c9d4SSatish Balay     x[6 + idc] = t[6 + idt] = v[6] * s1 + v[13] * s2 + v[20] * s3 + v[27] * s4 + v[34] * s5 + v[41] * s6 + v[48] * s7;
17328e30874SSatish Balay   }
17428e30874SSatish Balay 
1759566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
1769566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
1779566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
1789566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
1799566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * 49 * (a->nz) - 7.0 * A->cmap->n));
1803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
18128e30874SSatish Balay }
18228e30874SSatish Balay 
MatSolve_SeqBAIJ_7(Mat A,Vec bb,Vec xx)183d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_7(Mat A, Vec bb, Vec xx)
184d71ae5a4SJacob Faibussowitsch {
18528e30874SSatish Balay   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
18628e30874SSatish Balay   IS                 iscol = a->col, isrow = a->row;
18728e30874SSatish Balay   const PetscInt    *r, *c, *ai = a->i, *aj = a->j, *adiag = a->diag;
18828e30874SSatish Balay   const PetscInt     n = a->mbs, *rout, *cout, *vi;
18928e30874SSatish Balay   PetscInt           i, nz, idx, idt, idc, m;
19028e30874SSatish Balay   const MatScalar   *aa = a->a, *v;
19128e30874SSatish Balay   PetscScalar        s1, s2, s3, s4, s5, s6, s7, x1, x2, x3, x4, x5, x6, x7, *x, *t;
19228e30874SSatish Balay   const PetscScalar *b;
19328e30874SSatish Balay 
19428e30874SSatish Balay   PetscFunctionBegin;
1959566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
1969566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
19728e30874SSatish Balay   t = a->solve_work;
19828e30874SSatish Balay 
1999371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
2009371c9d4SSatish Balay   r = rout;
2019371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
2029371c9d4SSatish Balay   c = cout;
20328e30874SSatish Balay 
20428e30874SSatish Balay   /* forward solve the lower triangular */
20528e30874SSatish Balay   idx  = 7 * r[0];
2069371c9d4SSatish Balay   t[0] = b[idx];
2079371c9d4SSatish Balay   t[1] = b[1 + idx];
2089371c9d4SSatish Balay   t[2] = b[2 + idx];
2099371c9d4SSatish Balay   t[3] = b[3 + idx];
2109371c9d4SSatish Balay   t[4] = b[4 + idx];
2119371c9d4SSatish Balay   t[5] = b[5 + idx];
2129371c9d4SSatish Balay   t[6] = b[6 + idx];
21328e30874SSatish Balay 
21428e30874SSatish Balay   for (i = 1; i < n; i++) {
21528e30874SSatish Balay     v   = aa + 49 * ai[i];
21628e30874SSatish Balay     vi  = aj + ai[i];
21728e30874SSatish Balay     nz  = ai[i + 1] - ai[i];
21828e30874SSatish Balay     idx = 7 * r[i];
2199371c9d4SSatish Balay     s1  = b[idx];
2209371c9d4SSatish Balay     s2  = b[1 + idx];
2219371c9d4SSatish Balay     s3  = b[2 + idx];
2229371c9d4SSatish Balay     s4  = b[3 + idx];
2239371c9d4SSatish Balay     s5  = b[4 + idx];
2249371c9d4SSatish Balay     s6  = b[5 + idx];
2259371c9d4SSatish Balay     s7  = b[6 + idx];
22628e30874SSatish Balay     for (m = 0; m < nz; m++) {
22728e30874SSatish Balay       idx = 7 * vi[m];
2289371c9d4SSatish Balay       x1  = t[idx];
2299371c9d4SSatish Balay       x2  = t[1 + idx];
2309371c9d4SSatish Balay       x3  = t[2 + idx];
2319371c9d4SSatish Balay       x4  = t[3 + idx];
2329371c9d4SSatish Balay       x5  = t[4 + idx];
2339371c9d4SSatish Balay       x6  = t[5 + idx];
2349371c9d4SSatish Balay       x7  = t[6 + idx];
23528e30874SSatish Balay       s1 -= v[0] * x1 + v[7] * x2 + v[14] * x3 + v[21] * x4 + v[28] * x5 + v[35] * x6 + v[42] * x7;
23628e30874SSatish Balay       s2 -= v[1] * x1 + v[8] * x2 + v[15] * x3 + v[22] * x4 + v[29] * x5 + v[36] * x6 + v[43] * x7;
23728e30874SSatish Balay       s3 -= v[2] * x1 + v[9] * x2 + v[16] * x3 + v[23] * x4 + v[30] * x5 + v[37] * x6 + v[44] * x7;
23828e30874SSatish Balay       s4 -= v[3] * x1 + v[10] * x2 + v[17] * x3 + v[24] * x4 + v[31] * x5 + v[38] * x6 + v[45] * x7;
23928e30874SSatish Balay       s5 -= v[4] * x1 + v[11] * x2 + v[18] * x3 + v[25] * x4 + v[32] * x5 + v[39] * x6 + v[46] * x7;
24028e30874SSatish Balay       s6 -= v[5] * x1 + v[12] * x2 + v[19] * x3 + v[26] * x4 + v[33] * x5 + v[40] * x6 + v[47] * x7;
24128e30874SSatish Balay       s7 -= v[6] * x1 + v[13] * x2 + v[20] * x3 + v[27] * x4 + v[34] * x5 + v[41] * x6 + v[48] * x7;
24228e30874SSatish Balay       v += 49;
24328e30874SSatish Balay     }
24428e30874SSatish Balay     idx        = 7 * i;
2459371c9d4SSatish Balay     t[idx]     = s1;
2469371c9d4SSatish Balay     t[1 + idx] = s2;
2479371c9d4SSatish Balay     t[2 + idx] = s3;
2489371c9d4SSatish Balay     t[3 + idx] = s4;
2499371c9d4SSatish Balay     t[4 + idx] = s5;
2509371c9d4SSatish Balay     t[5 + idx] = s6;
2519371c9d4SSatish Balay     t[6 + idx] = s7;
25228e30874SSatish Balay   }
25328e30874SSatish Balay   /* backward solve the upper triangular */
25428e30874SSatish Balay   for (i = n - 1; i >= 0; i--) {
25528e30874SSatish Balay     v   = aa + 49 * (adiag[i + 1] + 1);
25628e30874SSatish Balay     vi  = aj + adiag[i + 1] + 1;
25728e30874SSatish Balay     nz  = adiag[i] - adiag[i + 1] - 1;
25828e30874SSatish Balay     idt = 7 * i;
2599371c9d4SSatish Balay     s1  = t[idt];
2609371c9d4SSatish Balay     s2  = t[1 + idt];
2619371c9d4SSatish Balay     s3  = t[2 + idt];
2629371c9d4SSatish Balay     s4  = t[3 + idt];
2639371c9d4SSatish Balay     s5  = t[4 + idt];
2649371c9d4SSatish Balay     s6  = t[5 + idt];
2659371c9d4SSatish Balay     s7  = t[6 + idt];
26628e30874SSatish Balay     for (m = 0; m < nz; m++) {
26728e30874SSatish Balay       idx = 7 * vi[m];
2689371c9d4SSatish Balay       x1  = t[idx];
2699371c9d4SSatish Balay       x2  = t[1 + idx];
2709371c9d4SSatish Balay       x3  = t[2 + idx];
2719371c9d4SSatish Balay       x4  = t[3 + idx];
2729371c9d4SSatish Balay       x5  = t[4 + idx];
2739371c9d4SSatish Balay       x6  = t[5 + idx];
2749371c9d4SSatish Balay       x7  = t[6 + idx];
27528e30874SSatish Balay       s1 -= v[0] * x1 + v[7] * x2 + v[14] * x3 + v[21] * x4 + v[28] * x5 + v[35] * x6 + v[42] * x7;
27628e30874SSatish Balay       s2 -= v[1] * x1 + v[8] * x2 + v[15] * x3 + v[22] * x4 + v[29] * x5 + v[36] * x6 + v[43] * x7;
27728e30874SSatish Balay       s3 -= v[2] * x1 + v[9] * x2 + v[16] * x3 + v[23] * x4 + v[30] * x5 + v[37] * x6 + v[44] * x7;
27828e30874SSatish Balay       s4 -= v[3] * x1 + v[10] * x2 + v[17] * x3 + v[24] * x4 + v[31] * x5 + v[38] * x6 + v[45] * x7;
27928e30874SSatish Balay       s5 -= v[4] * x1 + v[11] * x2 + v[18] * x3 + v[25] * x4 + v[32] * x5 + v[39] * x6 + v[46] * x7;
28028e30874SSatish Balay       s6 -= v[5] * x1 + v[12] * x2 + v[19] * x3 + v[26] * x4 + v[33] * x5 + v[40] * x6 + v[47] * x7;
28128e30874SSatish Balay       s7 -= v[6] * x1 + v[13] * x2 + v[20] * x3 + v[27] * x4 + v[34] * x5 + v[41] * x6 + v[48] * x7;
28228e30874SSatish Balay       v += 49;
28328e30874SSatish Balay     }
28428e30874SSatish Balay     idc    = 7 * c[i];
2859371c9d4SSatish Balay     x[idc] = t[idt] = v[0] * s1 + v[7] * s2 + v[14] * s3 + v[21] * s4 + v[28] * s5 + v[35] * s6 + v[42] * s7;
2869371c9d4SSatish Balay     x[1 + idc] = t[1 + idt] = v[1] * s1 + v[8] * s2 + v[15] * s3 + v[22] * s4 + v[29] * s5 + v[36] * s6 + v[43] * s7;
2879371c9d4SSatish Balay     x[2 + idc] = t[2 + idt] = v[2] * s1 + v[9] * s2 + v[16] * s3 + v[23] * s4 + v[30] * s5 + v[37] * s6 + v[44] * s7;
2889371c9d4SSatish Balay     x[3 + idc] = t[3 + idt] = v[3] * s1 + v[10] * s2 + v[17] * s3 + v[24] * s4 + v[31] * s5 + v[38] * s6 + v[45] * s7;
2899371c9d4SSatish Balay     x[4 + idc] = t[4 + idt] = v[4] * s1 + v[11] * s2 + v[18] * s3 + v[25] * s4 + v[32] * s5 + v[39] * s6 + v[46] * s7;
2909371c9d4SSatish Balay     x[5 + idc] = t[5 + idt] = v[5] * s1 + v[12] * s2 + v[19] * s3 + v[26] * s4 + v[33] * s5 + v[40] * s6 + v[47] * s7;
2919371c9d4SSatish Balay     x[6 + idc] = t[6 + idt] = v[6] * s1 + v[13] * s2 + v[20] * s3 + v[27] * s4 + v[34] * s5 + v[41] * s6 + v[48] * s7;
29228e30874SSatish Balay   }
29328e30874SSatish Balay 
2949566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
2959566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
2969566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
2979566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
2989566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * 49 * (a->nz) - 7.0 * A->cmap->n));
2993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
30028e30874SSatish Balay }
30128e30874SSatish Balay 
MatSolve_SeqBAIJ_6_inplace(Mat A,Vec bb,Vec xx)302d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_6_inplace(Mat A, Vec bb, Vec xx)
303d71ae5a4SJacob Faibussowitsch {
30428e30874SSatish Balay   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
30528e30874SSatish Balay   IS                 iscol = a->col, isrow = a->row;
30628e30874SSatish Balay   const PetscInt    *r, *c, *rout, *cout;
30728e30874SSatish Balay   const PetscInt    *diag = a->diag, n = a->mbs, *vi, *ai = a->i, *aj = a->j;
30828e30874SSatish Balay   PetscInt           i, nz, idx, idt, idc;
30928e30874SSatish Balay   const MatScalar   *aa = a->a, *v;
31028e30874SSatish Balay   PetscScalar       *x, s1, s2, s3, s4, s5, s6, x1, x2, x3, x4, x5, x6, *t;
31128e30874SSatish Balay   const PetscScalar *b;
31228e30874SSatish Balay 
31328e30874SSatish Balay   PetscFunctionBegin;
3149566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
3159566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
31628e30874SSatish Balay   t = a->solve_work;
31728e30874SSatish Balay 
3189371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
3199371c9d4SSatish Balay   r = rout;
3209371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
3219371c9d4SSatish Balay   c = cout + (n - 1);
32228e30874SSatish Balay 
32328e30874SSatish Balay   /* forward solve the lower triangular */
32428e30874SSatish Balay   idx  = 6 * (*r++);
3259371c9d4SSatish Balay   t[0] = b[idx];
3269371c9d4SSatish Balay   t[1] = b[1 + idx];
3279371c9d4SSatish Balay   t[2] = b[2 + idx];
3289371c9d4SSatish Balay   t[3] = b[3 + idx];
3299371c9d4SSatish Balay   t[4] = b[4 + idx];
3309371c9d4SSatish Balay   t[5] = b[5 + idx];
33128e30874SSatish Balay   for (i = 1; i < n; i++) {
33228e30874SSatish Balay     v   = aa + 36 * ai[i];
33328e30874SSatish Balay     vi  = aj + ai[i];
33428e30874SSatish Balay     nz  = diag[i] - ai[i];
33528e30874SSatish Balay     idx = 6 * (*r++);
3369371c9d4SSatish Balay     s1  = b[idx];
3379371c9d4SSatish Balay     s2  = b[1 + idx];
3389371c9d4SSatish Balay     s3  = b[2 + idx];
3399371c9d4SSatish Balay     s4  = b[3 + idx];
3409371c9d4SSatish Balay     s5  = b[4 + idx];
3419371c9d4SSatish Balay     s6  = b[5 + idx];
34228e30874SSatish Balay     while (nz--) {
34328e30874SSatish Balay       idx = 6 * (*vi++);
3449371c9d4SSatish Balay       x1  = t[idx];
3459371c9d4SSatish Balay       x2  = t[1 + idx];
3469371c9d4SSatish Balay       x3  = t[2 + idx];
3479371c9d4SSatish Balay       x4  = t[3 + idx];
3489371c9d4SSatish Balay       x5  = t[4 + idx];
3499371c9d4SSatish Balay       x6  = t[5 + idx];
35028e30874SSatish Balay       s1 -= v[0] * x1 + v[6] * x2 + v[12] * x3 + v[18] * x4 + v[24] * x5 + v[30] * x6;
35128e30874SSatish Balay       s2 -= v[1] * x1 + v[7] * x2 + v[13] * x3 + v[19] * x4 + v[25] * x5 + v[31] * x6;
35228e30874SSatish Balay       s3 -= v[2] * x1 + v[8] * x2 + v[14] * x3 + v[20] * x4 + v[26] * x5 + v[32] * x6;
35328e30874SSatish Balay       s4 -= v[3] * x1 + v[9] * x2 + v[15] * x3 + v[21] * x4 + v[27] * x5 + v[33] * x6;
35428e30874SSatish Balay       s5 -= v[4] * x1 + v[10] * x2 + v[16] * x3 + v[22] * x4 + v[28] * x5 + v[34] * x6;
35528e30874SSatish Balay       s6 -= v[5] * x1 + v[11] * x2 + v[17] * x3 + v[23] * x4 + v[29] * x5 + v[35] * x6;
35628e30874SSatish Balay       v += 36;
35728e30874SSatish Balay     }
35828e30874SSatish Balay     idx        = 6 * i;
3599371c9d4SSatish Balay     t[idx]     = s1;
3609371c9d4SSatish Balay     t[1 + idx] = s2;
3619371c9d4SSatish Balay     t[2 + idx] = s3;
3629371c9d4SSatish Balay     t[3 + idx] = s4;
3639371c9d4SSatish Balay     t[4 + idx] = s5;
3649371c9d4SSatish Balay     t[5 + idx] = s6;
36528e30874SSatish Balay   }
36628e30874SSatish Balay   /* backward solve the upper triangular */
36728e30874SSatish Balay   for (i = n - 1; i >= 0; i--) {
36828e30874SSatish Balay     v   = aa + 36 * diag[i] + 36;
36928e30874SSatish Balay     vi  = aj + diag[i] + 1;
37028e30874SSatish Balay     nz  = ai[i + 1] - diag[i] - 1;
37128e30874SSatish Balay     idt = 6 * i;
3729371c9d4SSatish Balay     s1  = t[idt];
3739371c9d4SSatish Balay     s2  = t[1 + idt];
3749371c9d4SSatish Balay     s3  = t[2 + idt];
3759371c9d4SSatish Balay     s4  = t[3 + idt];
3769371c9d4SSatish Balay     s5  = t[4 + idt];
3779371c9d4SSatish Balay     s6  = t[5 + idt];
37828e30874SSatish Balay     while (nz--) {
37928e30874SSatish Balay       idx = 6 * (*vi++);
3809371c9d4SSatish Balay       x1  = t[idx];
3819371c9d4SSatish Balay       x2  = t[1 + idx];
3829371c9d4SSatish Balay       x3  = t[2 + idx];
3839371c9d4SSatish Balay       x4  = t[3 + idx];
3849371c9d4SSatish Balay       x5  = t[4 + idx];
3859371c9d4SSatish Balay       x6  = t[5 + idx];
38628e30874SSatish Balay       s1 -= v[0] * x1 + v[6] * x2 + v[12] * x3 + v[18] * x4 + v[24] * x5 + v[30] * x6;
38728e30874SSatish Balay       s2 -= v[1] * x1 + v[7] * x2 + v[13] * x3 + v[19] * x4 + v[25] * x5 + v[31] * x6;
38828e30874SSatish Balay       s3 -= v[2] * x1 + v[8] * x2 + v[14] * x3 + v[20] * x4 + v[26] * x5 + v[32] * x6;
38928e30874SSatish Balay       s4 -= v[3] * x1 + v[9] * x2 + v[15] * x3 + v[21] * x4 + v[27] * x5 + v[33] * x6;
39028e30874SSatish Balay       s5 -= v[4] * x1 + v[10] * x2 + v[16] * x3 + v[22] * x4 + v[28] * x5 + v[34] * x6;
39128e30874SSatish Balay       s6 -= v[5] * x1 + v[11] * x2 + v[17] * x3 + v[23] * x4 + v[29] * x5 + v[35] * x6;
39228e30874SSatish Balay       v += 36;
39328e30874SSatish Balay     }
39428e30874SSatish Balay     idc    = 6 * (*c--);
39528e30874SSatish Balay     v      = aa + 36 * diag[i];
3969371c9d4SSatish Balay     x[idc] = t[idt] = v[0] * s1 + v[6] * s2 + v[12] * s3 + v[18] * s4 + v[24] * s5 + v[30] * s6;
3979371c9d4SSatish Balay     x[1 + idc] = t[1 + idt] = v[1] * s1 + v[7] * s2 + v[13] * s3 + v[19] * s4 + v[25] * s5 + v[31] * s6;
3989371c9d4SSatish Balay     x[2 + idc] = t[2 + idt] = v[2] * s1 + v[8] * s2 + v[14] * s3 + v[20] * s4 + v[26] * s5 + v[32] * s6;
3999371c9d4SSatish Balay     x[3 + idc] = t[3 + idt] = v[3] * s1 + v[9] * s2 + v[15] * s3 + v[21] * s4 + v[27] * s5 + v[33] * s6;
4009371c9d4SSatish Balay     x[4 + idc] = t[4 + idt] = v[4] * s1 + v[10] * s2 + v[16] * s3 + v[22] * s4 + v[28] * s5 + v[34] * s6;
4019371c9d4SSatish Balay     x[5 + idc] = t[5 + idt] = v[5] * s1 + v[11] * s2 + v[17] * s3 + v[23] * s4 + v[29] * s5 + v[35] * s6;
40228e30874SSatish Balay   }
40328e30874SSatish Balay 
4049566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
4059566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
4069566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
4079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
4089566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * 36 * (a->nz) - 6.0 * A->cmap->n));
4093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
41028e30874SSatish Balay }
41128e30874SSatish Balay 
MatSolve_SeqBAIJ_6(Mat A,Vec bb,Vec xx)412d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_6(Mat A, Vec bb, Vec xx)
413d71ae5a4SJacob Faibussowitsch {
41428e30874SSatish Balay   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
41528e30874SSatish Balay   IS                 iscol = a->col, isrow = a->row;
41628e30874SSatish Balay   const PetscInt    *r, *c, *rout, *cout;
41728e30874SSatish Balay   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
41828e30874SSatish Balay   PetscInt           i, nz, idx, idt, idc, m;
41928e30874SSatish Balay   const MatScalar   *aa = a->a, *v;
42028e30874SSatish Balay   PetscScalar       *x, s1, s2, s3, s4, s5, s6, x1, x2, x3, x4, x5, x6, *t;
42128e30874SSatish Balay   const PetscScalar *b;
42228e30874SSatish Balay 
42328e30874SSatish Balay   PetscFunctionBegin;
4249566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
4259566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
42628e30874SSatish Balay   t = a->solve_work;
42728e30874SSatish Balay 
4289371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
4299371c9d4SSatish Balay   r = rout;
4309371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
4319371c9d4SSatish Balay   c = cout;
43228e30874SSatish Balay 
43328e30874SSatish Balay   /* forward solve the lower triangular */
43428e30874SSatish Balay   idx  = 6 * r[0];
4359371c9d4SSatish Balay   t[0] = b[idx];
4369371c9d4SSatish Balay   t[1] = b[1 + idx];
4379371c9d4SSatish Balay   t[2] = b[2 + idx];
4389371c9d4SSatish Balay   t[3] = b[3 + idx];
4399371c9d4SSatish Balay   t[4] = b[4 + idx];
4409371c9d4SSatish Balay   t[5] = b[5 + idx];
44128e30874SSatish Balay   for (i = 1; i < n; i++) {
44228e30874SSatish Balay     v   = aa + 36 * ai[i];
44328e30874SSatish Balay     vi  = aj + ai[i];
44428e30874SSatish Balay     nz  = ai[i + 1] - ai[i];
44528e30874SSatish Balay     idx = 6 * r[i];
4469371c9d4SSatish Balay     s1  = b[idx];
4479371c9d4SSatish Balay     s2  = b[1 + idx];
4489371c9d4SSatish Balay     s3  = b[2 + idx];
4499371c9d4SSatish Balay     s4  = b[3 + idx];
4509371c9d4SSatish Balay     s5  = b[4 + idx];
4519371c9d4SSatish Balay     s6  = b[5 + idx];
45228e30874SSatish Balay     for (m = 0; m < nz; m++) {
45328e30874SSatish Balay       idx = 6 * vi[m];
4549371c9d4SSatish Balay       x1  = t[idx];
4559371c9d4SSatish Balay       x2  = t[1 + idx];
4569371c9d4SSatish Balay       x3  = t[2 + idx];
4579371c9d4SSatish Balay       x4  = t[3 + idx];
4589371c9d4SSatish Balay       x5  = t[4 + idx];
4599371c9d4SSatish Balay       x6  = t[5 + idx];
46028e30874SSatish Balay       s1 -= v[0] * x1 + v[6] * x2 + v[12] * x3 + v[18] * x4 + v[24] * x5 + v[30] * x6;
46128e30874SSatish Balay       s2 -= v[1] * x1 + v[7] * x2 + v[13] * x3 + v[19] * x4 + v[25] * x5 + v[31] * x6;
46228e30874SSatish Balay       s3 -= v[2] * x1 + v[8] * x2 + v[14] * x3 + v[20] * x4 + v[26] * x5 + v[32] * x6;
46328e30874SSatish Balay       s4 -= v[3] * x1 + v[9] * x2 + v[15] * x3 + v[21] * x4 + v[27] * x5 + v[33] * x6;
46428e30874SSatish Balay       s5 -= v[4] * x1 + v[10] * x2 + v[16] * x3 + v[22] * x4 + v[28] * x5 + v[34] * x6;
46528e30874SSatish Balay       s6 -= v[5] * x1 + v[11] * x2 + v[17] * x3 + v[23] * x4 + v[29] * x5 + v[35] * x6;
46628e30874SSatish Balay       v += 36;
46728e30874SSatish Balay     }
46828e30874SSatish Balay     idx        = 6 * i;
4699371c9d4SSatish Balay     t[idx]     = s1;
4709371c9d4SSatish Balay     t[1 + idx] = s2;
4719371c9d4SSatish Balay     t[2 + idx] = s3;
4729371c9d4SSatish Balay     t[3 + idx] = s4;
4739371c9d4SSatish Balay     t[4 + idx] = s5;
4749371c9d4SSatish Balay     t[5 + idx] = s6;
47528e30874SSatish Balay   }
47628e30874SSatish Balay   /* backward solve the upper triangular */
47728e30874SSatish Balay   for (i = n - 1; i >= 0; i--) {
47828e30874SSatish Balay     v   = aa + 36 * (adiag[i + 1] + 1);
47928e30874SSatish Balay     vi  = aj + adiag[i + 1] + 1;
48028e30874SSatish Balay     nz  = adiag[i] - adiag[i + 1] - 1;
48128e30874SSatish Balay     idt = 6 * i;
4829371c9d4SSatish Balay     s1  = t[idt];
4839371c9d4SSatish Balay     s2  = t[1 + idt];
4849371c9d4SSatish Balay     s3  = t[2 + idt];
4859371c9d4SSatish Balay     s4  = t[3 + idt];
4869371c9d4SSatish Balay     s5  = t[4 + idt];
4879371c9d4SSatish Balay     s6  = t[5 + idt];
48828e30874SSatish Balay     for (m = 0; m < nz; m++) {
48928e30874SSatish Balay       idx = 6 * vi[m];
4909371c9d4SSatish Balay       x1  = t[idx];
4919371c9d4SSatish Balay       x2  = t[1 + idx];
4929371c9d4SSatish Balay       x3  = t[2 + idx];
4939371c9d4SSatish Balay       x4  = t[3 + idx];
4949371c9d4SSatish Balay       x5  = t[4 + idx];
4959371c9d4SSatish Balay       x6  = t[5 + idx];
49628e30874SSatish Balay       s1 -= v[0] * x1 + v[6] * x2 + v[12] * x3 + v[18] * x4 + v[24] * x5 + v[30] * x6;
49728e30874SSatish Balay       s2 -= v[1] * x1 + v[7] * x2 + v[13] * x3 + v[19] * x4 + v[25] * x5 + v[31] * x6;
49828e30874SSatish Balay       s3 -= v[2] * x1 + v[8] * x2 + v[14] * x3 + v[20] * x4 + v[26] * x5 + v[32] * x6;
49928e30874SSatish Balay       s4 -= v[3] * x1 + v[9] * x2 + v[15] * x3 + v[21] * x4 + v[27] * x5 + v[33] * x6;
50028e30874SSatish Balay       s5 -= v[4] * x1 + v[10] * x2 + v[16] * x3 + v[22] * x4 + v[28] * x5 + v[34] * x6;
50128e30874SSatish Balay       s6 -= v[5] * x1 + v[11] * x2 + v[17] * x3 + v[23] * x4 + v[29] * x5 + v[35] * x6;
50228e30874SSatish Balay       v += 36;
50328e30874SSatish Balay     }
50428e30874SSatish Balay     idc    = 6 * c[i];
5059371c9d4SSatish Balay     x[idc] = t[idt] = v[0] * s1 + v[6] * s2 + v[12] * s3 + v[18] * s4 + v[24] * s5 + v[30] * s6;
5069371c9d4SSatish Balay     x[1 + idc] = t[1 + idt] = v[1] * s1 + v[7] * s2 + v[13] * s3 + v[19] * s4 + v[25] * s5 + v[31] * s6;
5079371c9d4SSatish Balay     x[2 + idc] = t[2 + idt] = v[2] * s1 + v[8] * s2 + v[14] * s3 + v[20] * s4 + v[26] * s5 + v[32] * s6;
5089371c9d4SSatish Balay     x[3 + idc] = t[3 + idt] = v[3] * s1 + v[9] * s2 + v[15] * s3 + v[21] * s4 + v[27] * s5 + v[33] * s6;
5099371c9d4SSatish Balay     x[4 + idc] = t[4 + idt] = v[4] * s1 + v[10] * s2 + v[16] * s3 + v[22] * s4 + v[28] * s5 + v[34] * s6;
5109371c9d4SSatish Balay     x[5 + idc] = t[5 + idt] = v[5] * s1 + v[11] * s2 + v[17] * s3 + v[23] * s4 + v[29] * s5 + v[35] * s6;
51128e30874SSatish Balay   }
51228e30874SSatish Balay 
5139566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
5149566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
5159566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
5169566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
5179566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * 36 * (a->nz) - 6.0 * A->cmap->n));
5183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
51928e30874SSatish Balay }
52028e30874SSatish Balay 
MatSolve_SeqBAIJ_5_inplace(Mat A,Vec bb,Vec xx)521d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_5_inplace(Mat A, Vec bb, Vec xx)
522d71ae5a4SJacob Faibussowitsch {
52328e30874SSatish Balay   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
52428e30874SSatish Balay   IS                 iscol = a->col, isrow = a->row;
52528e30874SSatish Balay   const PetscInt    *r, *c, *rout, *cout, *diag = a->diag;
52628e30874SSatish Balay   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j;
52728e30874SSatish Balay   PetscInt           i, nz, idx, idt, idc;
52828e30874SSatish Balay   const MatScalar   *aa = a->a, *v;
52928e30874SSatish Balay   PetscScalar       *x, s1, s2, s3, s4, s5, x1, x2, x3, x4, x5, *t;
53028e30874SSatish Balay   const PetscScalar *b;
53128e30874SSatish Balay 
53228e30874SSatish Balay   PetscFunctionBegin;
5339566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
5349566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
53528e30874SSatish Balay   t = a->solve_work;
53628e30874SSatish Balay 
5379371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
5389371c9d4SSatish Balay   r = rout;
5399371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
5409371c9d4SSatish Balay   c = cout + (n - 1);
54128e30874SSatish Balay 
54228e30874SSatish Balay   /* forward solve the lower triangular */
54328e30874SSatish Balay   idx  = 5 * (*r++);
5449371c9d4SSatish Balay   t[0] = b[idx];
5459371c9d4SSatish Balay   t[1] = b[1 + idx];
5469371c9d4SSatish Balay   t[2] = b[2 + idx];
5479371c9d4SSatish Balay   t[3] = b[3 + idx];
5489371c9d4SSatish Balay   t[4] = b[4 + idx];
54928e30874SSatish Balay   for (i = 1; i < n; i++) {
55028e30874SSatish Balay     v   = aa + 25 * ai[i];
55128e30874SSatish Balay     vi  = aj + ai[i];
55228e30874SSatish Balay     nz  = diag[i] - ai[i];
55328e30874SSatish Balay     idx = 5 * (*r++);
5549371c9d4SSatish Balay     s1  = b[idx];
5559371c9d4SSatish Balay     s2  = b[1 + idx];
5569371c9d4SSatish Balay     s3  = b[2 + idx];
5579371c9d4SSatish Balay     s4  = b[3 + idx];
55828e30874SSatish Balay     s5  = b[4 + idx];
55928e30874SSatish Balay     while (nz--) {
56028e30874SSatish Balay       idx = 5 * (*vi++);
5619371c9d4SSatish Balay       x1  = t[idx];
5629371c9d4SSatish Balay       x2  = t[1 + idx];
5639371c9d4SSatish Balay       x3  = t[2 + idx];
5649371c9d4SSatish Balay       x4  = t[3 + idx];
5659371c9d4SSatish Balay       x5  = t[4 + idx];
56628e30874SSatish Balay       s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5;
56728e30874SSatish Balay       s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5;
56828e30874SSatish Balay       s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5;
56928e30874SSatish Balay       s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5;
57028e30874SSatish Balay       s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5;
57128e30874SSatish Balay       v += 25;
57228e30874SSatish Balay     }
57328e30874SSatish Balay     idx        = 5 * i;
5749371c9d4SSatish Balay     t[idx]     = s1;
5759371c9d4SSatish Balay     t[1 + idx] = s2;
5769371c9d4SSatish Balay     t[2 + idx] = s3;
5779371c9d4SSatish Balay     t[3 + idx] = s4;
5789371c9d4SSatish Balay     t[4 + idx] = s5;
57928e30874SSatish Balay   }
58028e30874SSatish Balay   /* backward solve the upper triangular */
58128e30874SSatish Balay   for (i = n - 1; i >= 0; i--) {
58228e30874SSatish Balay     v   = aa + 25 * diag[i] + 25;
58328e30874SSatish Balay     vi  = aj + diag[i] + 1;
58428e30874SSatish Balay     nz  = ai[i + 1] - diag[i] - 1;
58528e30874SSatish Balay     idt = 5 * i;
5869371c9d4SSatish Balay     s1  = t[idt];
5879371c9d4SSatish Balay     s2  = t[1 + idt];
5889371c9d4SSatish Balay     s3  = t[2 + idt];
5899371c9d4SSatish Balay     s4  = t[3 + idt];
5909371c9d4SSatish Balay     s5  = t[4 + idt];
59128e30874SSatish Balay     while (nz--) {
59228e30874SSatish Balay       idx = 5 * (*vi++);
5939371c9d4SSatish Balay       x1  = t[idx];
5949371c9d4SSatish Balay       x2  = t[1 + idx];
5959371c9d4SSatish Balay       x3  = t[2 + idx];
5969371c9d4SSatish Balay       x4  = t[3 + idx];
5979371c9d4SSatish Balay       x5  = t[4 + idx];
59828e30874SSatish Balay       s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5;
59928e30874SSatish Balay       s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5;
60028e30874SSatish Balay       s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5;
60128e30874SSatish Balay       s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5;
60228e30874SSatish Balay       s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5;
60328e30874SSatish Balay       v += 25;
60428e30874SSatish Balay     }
60528e30874SSatish Balay     idc    = 5 * (*c--);
60628e30874SSatish Balay     v      = aa + 25 * diag[i];
6079371c9d4SSatish Balay     x[idc] = t[idt] = v[0] * s1 + v[5] * s2 + v[10] * s3 + v[15] * s4 + v[20] * s5;
6089371c9d4SSatish Balay     x[1 + idc] = t[1 + idt] = v[1] * s1 + v[6] * s2 + v[11] * s3 + v[16] * s4 + v[21] * s5;
6099371c9d4SSatish Balay     x[2 + idc] = t[2 + idt] = v[2] * s1 + v[7] * s2 + v[12] * s3 + v[17] * s4 + v[22] * s5;
6109371c9d4SSatish Balay     x[3 + idc] = t[3 + idt] = v[3] * s1 + v[8] * s2 + v[13] * s3 + v[18] * s4 + v[23] * s5;
6119371c9d4SSatish Balay     x[4 + idc] = t[4 + idt] = v[4] * s1 + v[9] * s2 + v[14] * s3 + v[19] * s4 + v[24] * s5;
61228e30874SSatish Balay   }
61328e30874SSatish Balay 
6149566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
6159566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
6169566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
6179566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
6189566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * 25 * (a->nz) - 5.0 * A->cmap->n));
6193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
62028e30874SSatish Balay }
62128e30874SSatish Balay 
MatSolve_SeqBAIJ_5(Mat A,Vec bb,Vec xx)622d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_5(Mat A, Vec bb, Vec xx)
623d71ae5a4SJacob Faibussowitsch {
62428e30874SSatish Balay   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
62528e30874SSatish Balay   IS                 iscol = a->col, isrow = a->row;
62628e30874SSatish Balay   const PetscInt    *r, *c, *rout, *cout;
62728e30874SSatish Balay   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
62828e30874SSatish Balay   PetscInt           i, nz, idx, idt, idc, m;
62928e30874SSatish Balay   const MatScalar   *aa = a->a, *v;
63028e30874SSatish Balay   PetscScalar       *x, s1, s2, s3, s4, s5, x1, x2, x3, x4, x5, *t;
63128e30874SSatish Balay   const PetscScalar *b;
63228e30874SSatish Balay 
63328e30874SSatish Balay   PetscFunctionBegin;
6349566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
6359566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
63628e30874SSatish Balay   t = a->solve_work;
63728e30874SSatish Balay 
6389371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
6399371c9d4SSatish Balay   r = rout;
6409371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
6419371c9d4SSatish Balay   c = cout;
64228e30874SSatish Balay 
64328e30874SSatish Balay   /* forward solve the lower triangular */
64428e30874SSatish Balay   idx  = 5 * r[0];
6459371c9d4SSatish Balay   t[0] = b[idx];
6469371c9d4SSatish Balay   t[1] = b[1 + idx];
6479371c9d4SSatish Balay   t[2] = b[2 + idx];
6489371c9d4SSatish Balay   t[3] = b[3 + idx];
6499371c9d4SSatish Balay   t[4] = b[4 + idx];
65028e30874SSatish Balay   for (i = 1; i < n; i++) {
65128e30874SSatish Balay     v   = aa + 25 * ai[i];
65228e30874SSatish Balay     vi  = aj + ai[i];
65328e30874SSatish Balay     nz  = ai[i + 1] - ai[i];
65428e30874SSatish Balay     idx = 5 * r[i];
6559371c9d4SSatish Balay     s1  = b[idx];
6569371c9d4SSatish Balay     s2  = b[1 + idx];
6579371c9d4SSatish Balay     s3  = b[2 + idx];
6589371c9d4SSatish Balay     s4  = b[3 + idx];
65928e30874SSatish Balay     s5  = b[4 + idx];
66028e30874SSatish Balay     for (m = 0; m < nz; m++) {
66128e30874SSatish Balay       idx = 5 * vi[m];
6629371c9d4SSatish Balay       x1  = t[idx];
6639371c9d4SSatish Balay       x2  = t[1 + idx];
6649371c9d4SSatish Balay       x3  = t[2 + idx];
6659371c9d4SSatish Balay       x4  = t[3 + idx];
6669371c9d4SSatish Balay       x5  = t[4 + idx];
66728e30874SSatish Balay       s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5;
66828e30874SSatish Balay       s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5;
66928e30874SSatish Balay       s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5;
67028e30874SSatish Balay       s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5;
67128e30874SSatish Balay       s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5;
67228e30874SSatish Balay       v += 25;
67328e30874SSatish Balay     }
67428e30874SSatish Balay     idx        = 5 * i;
6759371c9d4SSatish Balay     t[idx]     = s1;
6769371c9d4SSatish Balay     t[1 + idx] = s2;
6779371c9d4SSatish Balay     t[2 + idx] = s3;
6789371c9d4SSatish Balay     t[3 + idx] = s4;
6799371c9d4SSatish Balay     t[4 + idx] = s5;
68028e30874SSatish Balay   }
68128e30874SSatish Balay   /* backward solve the upper triangular */
68228e30874SSatish Balay   for (i = n - 1; i >= 0; i--) {
68328e30874SSatish Balay     v   = aa + 25 * (adiag[i + 1] + 1);
68428e30874SSatish Balay     vi  = aj + adiag[i + 1] + 1;
68528e30874SSatish Balay     nz  = adiag[i] - adiag[i + 1] - 1;
68628e30874SSatish Balay     idt = 5 * i;
6879371c9d4SSatish Balay     s1  = t[idt];
6889371c9d4SSatish Balay     s2  = t[1 + idt];
6899371c9d4SSatish Balay     s3  = t[2 + idt];
6909371c9d4SSatish Balay     s4  = t[3 + idt];
6919371c9d4SSatish Balay     s5  = t[4 + idt];
69228e30874SSatish Balay     for (m = 0; m < nz; m++) {
69328e30874SSatish Balay       idx = 5 * vi[m];
6949371c9d4SSatish Balay       x1  = t[idx];
6959371c9d4SSatish Balay       x2  = t[1 + idx];
6969371c9d4SSatish Balay       x3  = t[2 + idx];
6979371c9d4SSatish Balay       x4  = t[3 + idx];
6989371c9d4SSatish Balay       x5  = t[4 + idx];
69928e30874SSatish Balay       s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5;
70028e30874SSatish Balay       s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5;
70128e30874SSatish Balay       s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5;
70228e30874SSatish Balay       s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5;
70328e30874SSatish Balay       s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5;
70428e30874SSatish Balay       v += 25;
70528e30874SSatish Balay     }
70628e30874SSatish Balay     idc    = 5 * c[i];
7079371c9d4SSatish Balay     x[idc] = t[idt] = v[0] * s1 + v[5] * s2 + v[10] * s3 + v[15] * s4 + v[20] * s5;
7089371c9d4SSatish Balay     x[1 + idc] = t[1 + idt] = v[1] * s1 + v[6] * s2 + v[11] * s3 + v[16] * s4 + v[21] * s5;
7099371c9d4SSatish Balay     x[2 + idc] = t[2 + idt] = v[2] * s1 + v[7] * s2 + v[12] * s3 + v[17] * s4 + v[22] * s5;
7109371c9d4SSatish Balay     x[3 + idc] = t[3 + idt] = v[3] * s1 + v[8] * s2 + v[13] * s3 + v[18] * s4 + v[23] * s5;
7119371c9d4SSatish Balay     x[4 + idc] = t[4 + idt] = v[4] * s1 + v[9] * s2 + v[14] * s3 + v[19] * s4 + v[24] * s5;
71228e30874SSatish Balay   }
71328e30874SSatish Balay 
7149566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
7159566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
7169566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
7179566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
7189566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * 25 * (a->nz) - 5.0 * A->cmap->n));
7193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
72028e30874SSatish Balay }
72128e30874SSatish Balay 
MatSolve_SeqBAIJ_4_inplace(Mat A,Vec bb,Vec xx)722d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_4_inplace(Mat A, Vec bb, Vec xx)
723d71ae5a4SJacob Faibussowitsch {
72428e30874SSatish Balay   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
72528e30874SSatish Balay   IS                 iscol = a->col, isrow = a->row;
72628e30874SSatish Balay   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j;
72728e30874SSatish Balay   PetscInt           i, nz, idx, idt, idc;
72828e30874SSatish Balay   const PetscInt    *r, *c, *diag = a->diag, *rout, *cout;
72928e30874SSatish Balay   const MatScalar   *aa = a->a, *v;
73028e30874SSatish Balay   PetscScalar       *x, s1, s2, s3, s4, x1, x2, x3, x4, *t;
73128e30874SSatish Balay   const PetscScalar *b;
73228e30874SSatish Balay 
73328e30874SSatish Balay   PetscFunctionBegin;
7349566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
7359566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
73628e30874SSatish Balay   t = a->solve_work;
73728e30874SSatish Balay 
7389371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
7399371c9d4SSatish Balay   r = rout;
7409371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
7419371c9d4SSatish Balay   c = cout + (n - 1);
74228e30874SSatish Balay 
74328e30874SSatish Balay   /* forward solve the lower triangular */
74428e30874SSatish Balay   idx  = 4 * (*r++);
7459371c9d4SSatish Balay   t[0] = b[idx];
7469371c9d4SSatish Balay   t[1] = b[1 + idx];
7479371c9d4SSatish Balay   t[2] = b[2 + idx];
7489371c9d4SSatish Balay   t[3] = b[3 + idx];
74928e30874SSatish Balay   for (i = 1; i < n; i++) {
75028e30874SSatish Balay     v   = aa + 16 * ai[i];
75128e30874SSatish Balay     vi  = aj + ai[i];
75228e30874SSatish Balay     nz  = diag[i] - ai[i];
75328e30874SSatish Balay     idx = 4 * (*r++);
7549371c9d4SSatish Balay     s1  = b[idx];
7559371c9d4SSatish Balay     s2  = b[1 + idx];
7569371c9d4SSatish Balay     s3  = b[2 + idx];
7579371c9d4SSatish Balay     s4  = b[3 + idx];
75828e30874SSatish Balay     while (nz--) {
75928e30874SSatish Balay       idx = 4 * (*vi++);
7609371c9d4SSatish Balay       x1  = t[idx];
7619371c9d4SSatish Balay       x2  = t[1 + idx];
7629371c9d4SSatish Balay       x3  = t[2 + idx];
7639371c9d4SSatish Balay       x4  = t[3 + idx];
76428e30874SSatish Balay       s1 -= v[0] * x1 + v[4] * x2 + v[8] * x3 + v[12] * x4;
76528e30874SSatish Balay       s2 -= v[1] * x1 + v[5] * x2 + v[9] * x3 + v[13] * x4;
76628e30874SSatish Balay       s3 -= v[2] * x1 + v[6] * x2 + v[10] * x3 + v[14] * x4;
76728e30874SSatish Balay       s4 -= v[3] * x1 + v[7] * x2 + v[11] * x3 + v[15] * x4;
76828e30874SSatish Balay       v += 16;
76928e30874SSatish Balay     }
77028e30874SSatish Balay     idx        = 4 * i;
7719371c9d4SSatish Balay     t[idx]     = s1;
7729371c9d4SSatish Balay     t[1 + idx] = s2;
7739371c9d4SSatish Balay     t[2 + idx] = s3;
7749371c9d4SSatish Balay     t[3 + idx] = s4;
77528e30874SSatish Balay   }
77628e30874SSatish Balay   /* backward solve the upper triangular */
77728e30874SSatish Balay   for (i = n - 1; i >= 0; i--) {
77828e30874SSatish Balay     v   = aa + 16 * diag[i] + 16;
77928e30874SSatish Balay     vi  = aj + diag[i] + 1;
78028e30874SSatish Balay     nz  = ai[i + 1] - diag[i] - 1;
78128e30874SSatish Balay     idt = 4 * i;
7829371c9d4SSatish Balay     s1  = t[idt];
7839371c9d4SSatish Balay     s2  = t[1 + idt];
7849371c9d4SSatish Balay     s3  = t[2 + idt];
7859371c9d4SSatish Balay     s4  = t[3 + idt];
78628e30874SSatish Balay     while (nz--) {
78728e30874SSatish Balay       idx = 4 * (*vi++);
7889371c9d4SSatish Balay       x1  = t[idx];
7899371c9d4SSatish Balay       x2  = t[1 + idx];
7909371c9d4SSatish Balay       x3  = t[2 + idx];
7919371c9d4SSatish Balay       x4  = t[3 + idx];
79228e30874SSatish Balay       s1 -= v[0] * x1 + v[4] * x2 + v[8] * x3 + v[12] * x4;
79328e30874SSatish Balay       s2 -= v[1] * x1 + v[5] * x2 + v[9] * x3 + v[13] * x4;
79428e30874SSatish Balay       s3 -= v[2] * x1 + v[6] * x2 + v[10] * x3 + v[14] * x4;
79528e30874SSatish Balay       s4 -= v[3] * x1 + v[7] * x2 + v[11] * x3 + v[15] * x4;
79628e30874SSatish Balay       v += 16;
79728e30874SSatish Balay     }
79828e30874SSatish Balay     idc    = 4 * (*c--);
79928e30874SSatish Balay     v      = aa + 16 * diag[i];
80028e30874SSatish Balay     x[idc] = t[idt] = v[0] * s1 + v[4] * s2 + v[8] * s3 + v[12] * s4;
80128e30874SSatish Balay     x[1 + idc] = t[1 + idt] = v[1] * s1 + v[5] * s2 + v[9] * s3 + v[13] * s4;
80228e30874SSatish Balay     x[2 + idc] = t[2 + idt] = v[2] * s1 + v[6] * s2 + v[10] * s3 + v[14] * s4;
80328e30874SSatish Balay     x[3 + idc] = t[3 + idt] = v[3] * s1 + v[7] * s2 + v[11] * s3 + v[15] * s4;
80428e30874SSatish Balay   }
80528e30874SSatish Balay 
8069566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
8079566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
8089566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
8099566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
8109566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * 16 * (a->nz) - 4.0 * A->cmap->n));
8113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
81228e30874SSatish Balay }
81328e30874SSatish Balay 
MatSolve_SeqBAIJ_4(Mat A,Vec bb,Vec xx)814d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_4(Mat A, Vec bb, Vec xx)
815d71ae5a4SJacob Faibussowitsch {
81628e30874SSatish Balay   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
81728e30874SSatish Balay   IS                 iscol = a->col, isrow = a->row;
81828e30874SSatish Balay   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
81928e30874SSatish Balay   PetscInt           i, nz, idx, idt, idc, m;
82028e30874SSatish Balay   const PetscInt    *r, *c, *rout, *cout;
82128e30874SSatish Balay   const MatScalar   *aa = a->a, *v;
82228e30874SSatish Balay   PetscScalar       *x, s1, s2, s3, s4, x1, x2, x3, x4, *t;
82328e30874SSatish Balay   const PetscScalar *b;
82428e30874SSatish Balay 
82528e30874SSatish Balay   PetscFunctionBegin;
8269566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
8279566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
82828e30874SSatish Balay   t = a->solve_work;
82928e30874SSatish Balay 
8309371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
8319371c9d4SSatish Balay   r = rout;
8329371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
8339371c9d4SSatish Balay   c = cout;
83428e30874SSatish Balay 
83528e30874SSatish Balay   /* forward solve the lower triangular */
83628e30874SSatish Balay   idx  = 4 * r[0];
8379371c9d4SSatish Balay   t[0] = b[idx];
8389371c9d4SSatish Balay   t[1] = b[1 + idx];
8399371c9d4SSatish Balay   t[2] = b[2 + idx];
8409371c9d4SSatish Balay   t[3] = b[3 + idx];
84128e30874SSatish Balay   for (i = 1; i < n; i++) {
84228e30874SSatish Balay     v   = aa + 16 * ai[i];
84328e30874SSatish Balay     vi  = aj + ai[i];
84428e30874SSatish Balay     nz  = ai[i + 1] - ai[i];
84528e30874SSatish Balay     idx = 4 * r[i];
8469371c9d4SSatish Balay     s1  = b[idx];
8479371c9d4SSatish Balay     s2  = b[1 + idx];
8489371c9d4SSatish Balay     s3  = b[2 + idx];
8499371c9d4SSatish Balay     s4  = b[3 + idx];
85028e30874SSatish Balay     for (m = 0; m < nz; m++) {
85128e30874SSatish Balay       idx = 4 * vi[m];
8529371c9d4SSatish Balay       x1  = t[idx];
8539371c9d4SSatish Balay       x2  = t[1 + idx];
8549371c9d4SSatish Balay       x3  = t[2 + idx];
8559371c9d4SSatish Balay       x4  = t[3 + idx];
85628e30874SSatish Balay       s1 -= v[0] * x1 + v[4] * x2 + v[8] * x3 + v[12] * x4;
85728e30874SSatish Balay       s2 -= v[1] * x1 + v[5] * x2 + v[9] * x3 + v[13] * x4;
85828e30874SSatish Balay       s3 -= v[2] * x1 + v[6] * x2 + v[10] * x3 + v[14] * x4;
85928e30874SSatish Balay       s4 -= v[3] * x1 + v[7] * x2 + v[11] * x3 + v[15] * x4;
86028e30874SSatish Balay       v += 16;
86128e30874SSatish Balay     }
86228e30874SSatish Balay     idx        = 4 * i;
8639371c9d4SSatish Balay     t[idx]     = s1;
8649371c9d4SSatish Balay     t[1 + idx] = s2;
8659371c9d4SSatish Balay     t[2 + idx] = s3;
8669371c9d4SSatish Balay     t[3 + idx] = s4;
86728e30874SSatish Balay   }
86828e30874SSatish Balay   /* backward solve the upper triangular */
86928e30874SSatish Balay   for (i = n - 1; i >= 0; i--) {
87028e30874SSatish Balay     v   = aa + 16 * (adiag[i + 1] + 1);
87128e30874SSatish Balay     vi  = aj + adiag[i + 1] + 1;
87228e30874SSatish Balay     nz  = adiag[i] - adiag[i + 1] - 1;
87328e30874SSatish Balay     idt = 4 * i;
8749371c9d4SSatish Balay     s1  = t[idt];
8759371c9d4SSatish Balay     s2  = t[1 + idt];
8769371c9d4SSatish Balay     s3  = t[2 + idt];
8779371c9d4SSatish Balay     s4  = t[3 + idt];
87828e30874SSatish Balay     for (m = 0; m < nz; m++) {
87928e30874SSatish Balay       idx = 4 * vi[m];
8809371c9d4SSatish Balay       x1  = t[idx];
8819371c9d4SSatish Balay       x2  = t[1 + idx];
8829371c9d4SSatish Balay       x3  = t[2 + idx];
8839371c9d4SSatish Balay       x4  = t[3 + idx];
88428e30874SSatish Balay       s1 -= v[0] * x1 + v[4] * x2 + v[8] * x3 + v[12] * x4;
88528e30874SSatish Balay       s2 -= v[1] * x1 + v[5] * x2 + v[9] * x3 + v[13] * x4;
88628e30874SSatish Balay       s3 -= v[2] * x1 + v[6] * x2 + v[10] * x3 + v[14] * x4;
88728e30874SSatish Balay       s4 -= v[3] * x1 + v[7] * x2 + v[11] * x3 + v[15] * x4;
88828e30874SSatish Balay       v += 16;
88928e30874SSatish Balay     }
89028e30874SSatish Balay     idc    = 4 * c[i];
89128e30874SSatish Balay     x[idc] = t[idt] = v[0] * s1 + v[4] * s2 + v[8] * s3 + v[12] * s4;
89228e30874SSatish Balay     x[1 + idc] = t[1 + idt] = v[1] * s1 + v[5] * s2 + v[9] * s3 + v[13] * s4;
89328e30874SSatish Balay     x[2 + idc] = t[2 + idt] = v[2] * s1 + v[6] * s2 + v[10] * s3 + v[14] * s4;
89428e30874SSatish Balay     x[3 + idc] = t[3 + idt] = v[3] * s1 + v[7] * s2 + v[11] * s3 + v[15] * s4;
89528e30874SSatish Balay   }
89628e30874SSatish Balay 
8979566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
8989566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
8999566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
9009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
9019566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * 16 * (a->nz) - 4.0 * A->cmap->n));
9023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
90328e30874SSatish Balay }
90428e30874SSatish Balay 
MatSolve_SeqBAIJ_3_inplace(Mat A,Vec bb,Vec xx)905d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_3_inplace(Mat A, Vec bb, Vec xx)
906d71ae5a4SJacob Faibussowitsch {
90728e30874SSatish Balay   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
90828e30874SSatish Balay   IS                 iscol = a->col, isrow = a->row;
90928e30874SSatish Balay   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j;
91028e30874SSatish Balay   PetscInt           i, nz, idx, idt, idc;
91128e30874SSatish Balay   const PetscInt    *r, *c, *diag = a->diag, *rout, *cout;
91228e30874SSatish Balay   const MatScalar   *aa = a->a, *v;
91328e30874SSatish Balay   PetscScalar       *x, s1, s2, s3, x1, x2, x3, *t;
91428e30874SSatish Balay   const PetscScalar *b;
91528e30874SSatish Balay 
91628e30874SSatish Balay   PetscFunctionBegin;
9179566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
9189566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
91928e30874SSatish Balay   t = a->solve_work;
92028e30874SSatish Balay 
9219371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
9229371c9d4SSatish Balay   r = rout;
9239371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
9249371c9d4SSatish Balay   c = cout + (n - 1);
92528e30874SSatish Balay 
92628e30874SSatish Balay   /* forward solve the lower triangular */
92728e30874SSatish Balay   idx  = 3 * (*r++);
9289371c9d4SSatish Balay   t[0] = b[idx];
9299371c9d4SSatish Balay   t[1] = b[1 + idx];
9309371c9d4SSatish Balay   t[2] = b[2 + idx];
93128e30874SSatish Balay   for (i = 1; i < n; i++) {
93228e30874SSatish Balay     v   = aa + 9 * ai[i];
93328e30874SSatish Balay     vi  = aj + ai[i];
93428e30874SSatish Balay     nz  = diag[i] - ai[i];
93528e30874SSatish Balay     idx = 3 * (*r++);
9369371c9d4SSatish Balay     s1  = b[idx];
9379371c9d4SSatish Balay     s2  = b[1 + idx];
9389371c9d4SSatish Balay     s3  = b[2 + idx];
93928e30874SSatish Balay     while (nz--) {
94028e30874SSatish Balay       idx = 3 * (*vi++);
9419371c9d4SSatish Balay       x1  = t[idx];
9429371c9d4SSatish Balay       x2  = t[1 + idx];
9439371c9d4SSatish Balay       x3  = t[2 + idx];
94428e30874SSatish Balay       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
94528e30874SSatish Balay       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
94628e30874SSatish Balay       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
94728e30874SSatish Balay       v += 9;
94828e30874SSatish Balay     }
94928e30874SSatish Balay     idx        = 3 * i;
9509371c9d4SSatish Balay     t[idx]     = s1;
9519371c9d4SSatish Balay     t[1 + idx] = s2;
9529371c9d4SSatish Balay     t[2 + idx] = s3;
95328e30874SSatish Balay   }
95428e30874SSatish Balay   /* backward solve the upper triangular */
95528e30874SSatish Balay   for (i = n - 1; i >= 0; i--) {
95628e30874SSatish Balay     v   = aa + 9 * diag[i] + 9;
95728e30874SSatish Balay     vi  = aj + diag[i] + 1;
95828e30874SSatish Balay     nz  = ai[i + 1] - diag[i] - 1;
95928e30874SSatish Balay     idt = 3 * i;
9609371c9d4SSatish Balay     s1  = t[idt];
9619371c9d4SSatish Balay     s2  = t[1 + idt];
9629371c9d4SSatish Balay     s3  = t[2 + idt];
96328e30874SSatish Balay     while (nz--) {
96428e30874SSatish Balay       idx = 3 * (*vi++);
9659371c9d4SSatish Balay       x1  = t[idx];
9669371c9d4SSatish Balay       x2  = t[1 + idx];
9679371c9d4SSatish Balay       x3  = t[2 + idx];
96828e30874SSatish Balay       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
96928e30874SSatish Balay       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
97028e30874SSatish Balay       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
97128e30874SSatish Balay       v += 9;
97228e30874SSatish Balay     }
97328e30874SSatish Balay     idc    = 3 * (*c--);
97428e30874SSatish Balay     v      = aa + 9 * diag[i];
97528e30874SSatish Balay     x[idc] = t[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3;
97628e30874SSatish Balay     x[1 + idc] = t[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
97728e30874SSatish Balay     x[2 + idc] = t[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
97828e30874SSatish Balay   }
9799566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
9809566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
9819566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
9829566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
9839566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * 9 * (a->nz) - 3.0 * A->cmap->n));
9843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
98528e30874SSatish Balay }
98628e30874SSatish Balay 
MatSolve_SeqBAIJ_3(Mat A,Vec bb,Vec xx)987d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_3(Mat A, Vec bb, Vec xx)
988d71ae5a4SJacob Faibussowitsch {
98928e30874SSatish Balay   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
99028e30874SSatish Balay   IS                 iscol = a->col, isrow = a->row;
99128e30874SSatish Balay   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
99228e30874SSatish Balay   PetscInt           i, nz, idx, idt, idc, m;
99328e30874SSatish Balay   const PetscInt    *r, *c, *rout, *cout;
99428e30874SSatish Balay   const MatScalar   *aa = a->a, *v;
99528e30874SSatish Balay   PetscScalar       *x, s1, s2, s3, x1, x2, x3, *t;
99628e30874SSatish Balay   const PetscScalar *b;
99728e30874SSatish Balay 
99828e30874SSatish Balay   PetscFunctionBegin;
9999566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
10009566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
100128e30874SSatish Balay   t = a->solve_work;
100228e30874SSatish Balay 
10039371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
10049371c9d4SSatish Balay   r = rout;
10059371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
10069371c9d4SSatish Balay   c = cout;
100728e30874SSatish Balay 
100828e30874SSatish Balay   /* forward solve the lower triangular */
100928e30874SSatish Balay   idx  = 3 * r[0];
10109371c9d4SSatish Balay   t[0] = b[idx];
10119371c9d4SSatish Balay   t[1] = b[1 + idx];
10129371c9d4SSatish Balay   t[2] = b[2 + idx];
101328e30874SSatish Balay   for (i = 1; i < n; i++) {
101428e30874SSatish Balay     v   = aa + 9 * ai[i];
101528e30874SSatish Balay     vi  = aj + ai[i];
101628e30874SSatish Balay     nz  = ai[i + 1] - ai[i];
101728e30874SSatish Balay     idx = 3 * r[i];
10189371c9d4SSatish Balay     s1  = b[idx];
10199371c9d4SSatish Balay     s2  = b[1 + idx];
10209371c9d4SSatish Balay     s3  = b[2 + idx];
102128e30874SSatish Balay     for (m = 0; m < nz; m++) {
102228e30874SSatish Balay       idx = 3 * vi[m];
10239371c9d4SSatish Balay       x1  = t[idx];
10249371c9d4SSatish Balay       x2  = t[1 + idx];
10259371c9d4SSatish Balay       x3  = t[2 + idx];
102628e30874SSatish Balay       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
102728e30874SSatish Balay       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
102828e30874SSatish Balay       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
102928e30874SSatish Balay       v += 9;
103028e30874SSatish Balay     }
103128e30874SSatish Balay     idx        = 3 * i;
10329371c9d4SSatish Balay     t[idx]     = s1;
10339371c9d4SSatish Balay     t[1 + idx] = s2;
10349371c9d4SSatish Balay     t[2 + idx] = s3;
103528e30874SSatish Balay   }
103628e30874SSatish Balay   /* backward solve the upper triangular */
103728e30874SSatish Balay   for (i = n - 1; i >= 0; i--) {
103828e30874SSatish Balay     v   = aa + 9 * (adiag[i + 1] + 1);
103928e30874SSatish Balay     vi  = aj + adiag[i + 1] + 1;
104028e30874SSatish Balay     nz  = adiag[i] - adiag[i + 1] - 1;
104128e30874SSatish Balay     idt = 3 * i;
10429371c9d4SSatish Balay     s1  = t[idt];
10439371c9d4SSatish Balay     s2  = t[1 + idt];
10449371c9d4SSatish Balay     s3  = t[2 + idt];
104528e30874SSatish Balay     for (m = 0; m < nz; m++) {
104628e30874SSatish Balay       idx = 3 * vi[m];
10479371c9d4SSatish Balay       x1  = t[idx];
10489371c9d4SSatish Balay       x2  = t[1 + idx];
10499371c9d4SSatish Balay       x3  = t[2 + idx];
105028e30874SSatish Balay       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
105128e30874SSatish Balay       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
105228e30874SSatish Balay       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
105328e30874SSatish Balay       v += 9;
105428e30874SSatish Balay     }
105528e30874SSatish Balay     idc    = 3 * c[i];
105628e30874SSatish Balay     x[idc] = t[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3;
105728e30874SSatish Balay     x[1 + idc] = t[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
105828e30874SSatish Balay     x[2 + idc] = t[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
105928e30874SSatish Balay   }
10609566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
10619566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
10629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
10639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
10649566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * 9 * (a->nz) - 3.0 * A->cmap->n));
10653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
106628e30874SSatish Balay }
106728e30874SSatish Balay 
MatSolve_SeqBAIJ_2_inplace(Mat A,Vec bb,Vec xx)1068d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_2_inplace(Mat A, Vec bb, Vec xx)
1069d71ae5a4SJacob Faibussowitsch {
107028e30874SSatish Balay   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
107128e30874SSatish Balay   IS                 iscol = a->col, isrow = a->row;
107228e30874SSatish Balay   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j;
107328e30874SSatish Balay   PetscInt           i, nz, idx, idt, idc;
107428e30874SSatish Balay   const PetscInt    *r, *c, *diag = a->diag, *rout, *cout;
107528e30874SSatish Balay   const MatScalar   *aa = a->a, *v;
107628e30874SSatish Balay   PetscScalar       *x, s1, s2, x1, x2, *t;
107728e30874SSatish Balay   const PetscScalar *b;
107828e30874SSatish Balay 
107928e30874SSatish Balay   PetscFunctionBegin;
10809566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
10819566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
108228e30874SSatish Balay   t = a->solve_work;
108328e30874SSatish Balay 
10849371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
10859371c9d4SSatish Balay   r = rout;
10869371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
10879371c9d4SSatish Balay   c = cout + (n - 1);
108828e30874SSatish Balay 
108928e30874SSatish Balay   /* forward solve the lower triangular */
109028e30874SSatish Balay   idx  = 2 * (*r++);
10919371c9d4SSatish Balay   t[0] = b[idx];
10929371c9d4SSatish Balay   t[1] = b[1 + idx];
109328e30874SSatish Balay   for (i = 1; i < n; i++) {
109428e30874SSatish Balay     v   = aa + 4 * ai[i];
109528e30874SSatish Balay     vi  = aj + ai[i];
109628e30874SSatish Balay     nz  = diag[i] - ai[i];
109728e30874SSatish Balay     idx = 2 * (*r++);
10989371c9d4SSatish Balay     s1  = b[idx];
10999371c9d4SSatish Balay     s2  = b[1 + idx];
110028e30874SSatish Balay     while (nz--) {
110128e30874SSatish Balay       idx = 2 * (*vi++);
11029371c9d4SSatish Balay       x1  = t[idx];
11039371c9d4SSatish Balay       x2  = t[1 + idx];
110428e30874SSatish Balay       s1 -= v[0] * x1 + v[2] * x2;
110528e30874SSatish Balay       s2 -= v[1] * x1 + v[3] * x2;
110628e30874SSatish Balay       v += 4;
110728e30874SSatish Balay     }
110828e30874SSatish Balay     idx        = 2 * i;
11099371c9d4SSatish Balay     t[idx]     = s1;
11109371c9d4SSatish Balay     t[1 + idx] = s2;
111128e30874SSatish Balay   }
111228e30874SSatish Balay   /* backward solve the upper triangular */
111328e30874SSatish Balay   for (i = n - 1; i >= 0; i--) {
111428e30874SSatish Balay     v   = aa + 4 * diag[i] + 4;
111528e30874SSatish Balay     vi  = aj + diag[i] + 1;
111628e30874SSatish Balay     nz  = ai[i + 1] - diag[i] - 1;
111728e30874SSatish Balay     idt = 2 * i;
11189371c9d4SSatish Balay     s1  = t[idt];
11199371c9d4SSatish Balay     s2  = t[1 + idt];
112028e30874SSatish Balay     while (nz--) {
112128e30874SSatish Balay       idx = 2 * (*vi++);
11229371c9d4SSatish Balay       x1  = t[idx];
11239371c9d4SSatish Balay       x2  = t[1 + idx];
112428e30874SSatish Balay       s1 -= v[0] * x1 + v[2] * x2;
112528e30874SSatish Balay       s2 -= v[1] * x1 + v[3] * x2;
112628e30874SSatish Balay       v += 4;
112728e30874SSatish Balay     }
112828e30874SSatish Balay     idc    = 2 * (*c--);
112928e30874SSatish Balay     v      = aa + 4 * diag[i];
113028e30874SSatish Balay     x[idc] = t[idt] = v[0] * s1 + v[2] * s2;
113128e30874SSatish Balay     x[1 + idc] = t[1 + idt] = v[1] * s1 + v[3] * s2;
113228e30874SSatish Balay   }
11339566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
11349566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
11359566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
11369566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
11379566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n));
11383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
113928e30874SSatish Balay }
114028e30874SSatish Balay 
MatSolve_SeqBAIJ_2(Mat A,Vec bb,Vec xx)1141d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_2(Mat A, Vec bb, Vec xx)
1142d71ae5a4SJacob Faibussowitsch {
114328e30874SSatish Balay   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
114428e30874SSatish Balay   IS                 iscol = a->col, isrow = a->row;
114528e30874SSatish Balay   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
114628e30874SSatish Balay   PetscInt           i, nz, idx, jdx, idt, idc, m;
114728e30874SSatish Balay   const PetscInt    *r, *c, *rout, *cout;
114828e30874SSatish Balay   const MatScalar   *aa = a->a, *v;
114928e30874SSatish Balay   PetscScalar       *x, s1, s2, x1, x2, *t;
115028e30874SSatish Balay   const PetscScalar *b;
115128e30874SSatish Balay 
115228e30874SSatish Balay   PetscFunctionBegin;
11539566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
11549566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
115528e30874SSatish Balay   t = a->solve_work;
115628e30874SSatish Balay 
11579371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
11589371c9d4SSatish Balay   r = rout;
11599371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
11609371c9d4SSatish Balay   c = cout;
116128e30874SSatish Balay 
116228e30874SSatish Balay   /* forward solve the lower triangular */
116328e30874SSatish Balay   idx  = 2 * r[0];
11649371c9d4SSatish Balay   t[0] = b[idx];
11659371c9d4SSatish Balay   t[1] = b[1 + idx];
116628e30874SSatish Balay   for (i = 1; i < n; i++) {
116728e30874SSatish Balay     v   = aa + 4 * ai[i];
116828e30874SSatish Balay     vi  = aj + ai[i];
116928e30874SSatish Balay     nz  = ai[i + 1] - ai[i];
117028e30874SSatish Balay     idx = 2 * r[i];
11719371c9d4SSatish Balay     s1  = b[idx];
11729371c9d4SSatish Balay     s2  = b[1 + idx];
117328e30874SSatish Balay     for (m = 0; m < nz; m++) {
117428e30874SSatish Balay       jdx = 2 * vi[m];
11759371c9d4SSatish Balay       x1  = t[jdx];
11769371c9d4SSatish Balay       x2  = t[1 + jdx];
117728e30874SSatish Balay       s1 -= v[0] * x1 + v[2] * x2;
117828e30874SSatish Balay       s2 -= v[1] * x1 + v[3] * x2;
117928e30874SSatish Balay       v += 4;
118028e30874SSatish Balay     }
118128e30874SSatish Balay     idx        = 2 * i;
11829371c9d4SSatish Balay     t[idx]     = s1;
11839371c9d4SSatish Balay     t[1 + idx] = s2;
118428e30874SSatish Balay   }
118528e30874SSatish Balay   /* backward solve the upper triangular */
118628e30874SSatish Balay   for (i = n - 1; i >= 0; i--) {
118728e30874SSatish Balay     v   = aa + 4 * (adiag[i + 1] + 1);
118828e30874SSatish Balay     vi  = aj + adiag[i + 1] + 1;
118928e30874SSatish Balay     nz  = adiag[i] - adiag[i + 1] - 1;
119028e30874SSatish Balay     idt = 2 * i;
11919371c9d4SSatish Balay     s1  = t[idt];
11929371c9d4SSatish Balay     s2  = t[1 + idt];
119328e30874SSatish Balay     for (m = 0; m < nz; m++) {
119428e30874SSatish Balay       idx = 2 * vi[m];
11959371c9d4SSatish Balay       x1  = t[idx];
11969371c9d4SSatish Balay       x2  = t[1 + idx];
119728e30874SSatish Balay       s1 -= v[0] * x1 + v[2] * x2;
119828e30874SSatish Balay       s2 -= v[1] * x1 + v[3] * x2;
119928e30874SSatish Balay       v += 4;
120028e30874SSatish Balay     }
120128e30874SSatish Balay     idc    = 2 * c[i];
120228e30874SSatish Balay     x[idc] = t[idt] = v[0] * s1 + v[2] * s2;
120328e30874SSatish Balay     x[1 + idc] = t[1 + idt] = v[1] * s1 + v[3] * s2;
120428e30874SSatish Balay   }
12059566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
12069566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
12079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12089566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
12099566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n));
12103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
121128e30874SSatish Balay }
121228e30874SSatish Balay 
MatSolve_SeqBAIJ_1_inplace(Mat A,Vec bb,Vec xx)1213d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_1_inplace(Mat A, Vec bb, Vec xx)
1214d71ae5a4SJacob Faibussowitsch {
121528e30874SSatish Balay   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
121628e30874SSatish Balay   IS                 iscol = a->col, isrow = a->row;
121728e30874SSatish Balay   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j;
121828e30874SSatish Balay   PetscInt           i, nz;
121928e30874SSatish Balay   const PetscInt    *r, *c, *diag = a->diag, *rout, *cout;
122028e30874SSatish Balay   const MatScalar   *aa = a->a, *v;
122128e30874SSatish Balay   PetscScalar       *x, s1, *t;
122228e30874SSatish Balay   const PetscScalar *b;
122328e30874SSatish Balay 
122428e30874SSatish Balay   PetscFunctionBegin;
12253ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
122628e30874SSatish Balay 
12279566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
12289566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
122928e30874SSatish Balay   t = a->solve_work;
123028e30874SSatish Balay 
12319371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
12329371c9d4SSatish Balay   r = rout;
12339371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
12349371c9d4SSatish Balay   c = cout + (n - 1);
123528e30874SSatish Balay 
123628e30874SSatish Balay   /* forward solve the lower triangular */
123728e30874SSatish Balay   t[0] = b[*r++];
123828e30874SSatish Balay   for (i = 1; i < n; i++) {
123928e30874SSatish Balay     v  = aa + ai[i];
124028e30874SSatish Balay     vi = aj + ai[i];
124128e30874SSatish Balay     nz = diag[i] - ai[i];
124228e30874SSatish Balay     s1 = b[*r++];
1243ad540459SPierre Jolivet     while (nz--) s1 -= (*v++) * t[*vi++];
124428e30874SSatish Balay     t[i] = s1;
124528e30874SSatish Balay   }
124628e30874SSatish Balay   /* backward solve the upper triangular */
124728e30874SSatish Balay   for (i = n - 1; i >= 0; i--) {
124828e30874SSatish Balay     v  = aa + diag[i] + 1;
124928e30874SSatish Balay     vi = aj + diag[i] + 1;
125028e30874SSatish Balay     nz = ai[i + 1] - diag[i] - 1;
125128e30874SSatish Balay     s1 = t[i];
1252ad540459SPierre Jolivet     while (nz--) s1 -= (*v++) * t[*vi++];
125328e30874SSatish Balay     x[*c--] = t[i] = aa[diag[i]] * s1;
125428e30874SSatish Balay   }
125528e30874SSatish Balay 
12569566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
12579566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
12589566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12599566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
12609566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * 1 * (a->nz) - A->cmap->n));
12613ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
126228e30874SSatish Balay }
126328e30874SSatish Balay 
MatSolve_SeqBAIJ_1(Mat A,Vec bb,Vec xx)1264d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_1(Mat A, Vec bb, Vec xx)
1265d71ae5a4SJacob Faibussowitsch {
126628e30874SSatish Balay   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
126728e30874SSatish Balay   IS                 iscol = a->col, isrow = a->row;
126828e30874SSatish Balay   PetscInt           i, n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
126928e30874SSatish Balay   const PetscInt    *rout, *cout, *r, *c;
127028e30874SSatish Balay   PetscScalar       *x, *tmp, sum;
127128e30874SSatish Balay   const PetscScalar *b;
127228e30874SSatish Balay   const MatScalar   *aa = a->a, *v;
127328e30874SSatish Balay 
127428e30874SSatish Balay   PetscFunctionBegin;
12753ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
127628e30874SSatish Balay 
12779566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
12789566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
127928e30874SSatish Balay   tmp = a->solve_work;
128028e30874SSatish Balay 
12819371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
12829371c9d4SSatish Balay   r = rout;
12839371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
12849371c9d4SSatish Balay   c = cout;
128528e30874SSatish Balay 
128628e30874SSatish Balay   /* forward solve the lower triangular */
128728e30874SSatish Balay   tmp[0] = b[r[0]];
128828e30874SSatish Balay   v      = aa;
128928e30874SSatish Balay   vi     = aj;
129028e30874SSatish Balay   for (i = 1; i < n; i++) {
129128e30874SSatish Balay     nz  = ai[i + 1] - ai[i];
129228e30874SSatish Balay     sum = b[r[i]];
129328e30874SSatish Balay     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
129428e30874SSatish Balay     tmp[i] = sum;
12959371c9d4SSatish Balay     v += nz;
12969371c9d4SSatish Balay     vi += nz;
129728e30874SSatish Balay   }
129828e30874SSatish Balay 
129928e30874SSatish Balay   /* backward solve the upper triangular */
130028e30874SSatish Balay   for (i = n - 1; i >= 0; i--) {
130128e30874SSatish Balay     v   = aa + adiag[i + 1] + 1;
130228e30874SSatish Balay     vi  = aj + adiag[i + 1] + 1;
130328e30874SSatish Balay     nz  = adiag[i] - adiag[i + 1] - 1;
130428e30874SSatish Balay     sum = tmp[i];
130528e30874SSatish Balay     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
130628e30874SSatish Balay     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
130728e30874SSatish Balay   }
130828e30874SSatish Balay 
13099566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
13109566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
13119566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
13129566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
13139566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
13143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
131528e30874SSatish Balay }
1316