12c733ed4SBarry Smith #include <../src/mat/impls/baij/seq/baij.h>
22c733ed4SBarry Smith #include <petsc/private/kernels/blockinvert.h>
32c733ed4SBarry Smith
4*15229ffcSPierre Jolivet /* Block operations are done by accessing one column at a time */
52c733ed4SBarry Smith /* Default MatSolve for block size 14 */
62c733ed4SBarry Smith
MatSolve_SeqBAIJ_14_NaturalOrdering(Mat A,Vec bb,Vec xx)7d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_14_NaturalOrdering(Mat A, Vec bb, Vec xx)
8d71ae5a4SJacob Faibussowitsch {
92c733ed4SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
102c733ed4SBarry Smith const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
112c733ed4SBarry Smith PetscInt i, k, nz, idx, idt, m;
122c733ed4SBarry Smith const MatScalar *aa = a->a, *v;
132c733ed4SBarry Smith PetscScalar s[14];
142c733ed4SBarry Smith PetscScalar *x, xv;
152c733ed4SBarry Smith const PetscScalar *b;
162c733ed4SBarry Smith
172c733ed4SBarry Smith PetscFunctionBegin;
189566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b));
199566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x));
202c733ed4SBarry Smith
212c733ed4SBarry Smith /* forward solve the lower triangular */
222c733ed4SBarry Smith for (i = 0; i < n; i++) {
232c733ed4SBarry Smith v = aa + bs2 * ai[i];
242c733ed4SBarry Smith vi = aj + ai[i];
252c733ed4SBarry Smith nz = ai[i + 1] - ai[i];
262c733ed4SBarry Smith idt = bs * i;
279371c9d4SSatish Balay x[idt] = b[idt];
289371c9d4SSatish Balay x[1 + idt] = b[1 + idt];
299371c9d4SSatish Balay x[2 + idt] = b[2 + idt];
309371c9d4SSatish Balay x[3 + idt] = b[3 + idt];
319371c9d4SSatish Balay x[4 + idt] = b[4 + idt];
329371c9d4SSatish Balay x[5 + idt] = b[5 + idt];
339371c9d4SSatish Balay x[6 + idt] = b[6 + idt];
349371c9d4SSatish Balay x[7 + idt] = b[7 + idt];
359371c9d4SSatish Balay x[8 + idt] = b[8 + idt];
369371c9d4SSatish Balay x[9 + idt] = b[9 + idt];
379371c9d4SSatish Balay x[10 + idt] = b[10 + idt];
389371c9d4SSatish Balay x[11 + idt] = b[11 + idt];
399371c9d4SSatish Balay x[12 + idt] = b[12 + idt];
409371c9d4SSatish Balay x[13 + idt] = b[13 + idt];
412c733ed4SBarry Smith for (m = 0; m < nz; m++) {
422c733ed4SBarry Smith idx = bs * vi[m];
432c733ed4SBarry Smith for (k = 0; k < bs; k++) {
442c733ed4SBarry Smith xv = x[k + idx];
452c733ed4SBarry Smith x[idt] -= v[0] * xv;
462c733ed4SBarry Smith x[1 + idt] -= v[1] * xv;
472c733ed4SBarry Smith x[2 + idt] -= v[2] * xv;
482c733ed4SBarry Smith x[3 + idt] -= v[3] * xv;
492c733ed4SBarry Smith x[4 + idt] -= v[4] * xv;
502c733ed4SBarry Smith x[5 + idt] -= v[5] * xv;
512c733ed4SBarry Smith x[6 + idt] -= v[6] * xv;
522c733ed4SBarry Smith x[7 + idt] -= v[7] * xv;
532c733ed4SBarry Smith x[8 + idt] -= v[8] * xv;
542c733ed4SBarry Smith x[9 + idt] -= v[9] * xv;
552c733ed4SBarry Smith x[10 + idt] -= v[10] * xv;
562c733ed4SBarry Smith x[11 + idt] -= v[11] * xv;
572c733ed4SBarry Smith x[12 + idt] -= v[12] * xv;
582c733ed4SBarry Smith x[13 + idt] -= v[13] * xv;
592c733ed4SBarry Smith v += 14;
602c733ed4SBarry Smith }
612c733ed4SBarry Smith }
622c733ed4SBarry Smith }
632c733ed4SBarry Smith /* backward solve the upper triangular */
642c733ed4SBarry Smith for (i = n - 1; i >= 0; i--) {
652c733ed4SBarry Smith v = aa + bs2 * (adiag[i + 1] + 1);
662c733ed4SBarry Smith vi = aj + adiag[i + 1] + 1;
672c733ed4SBarry Smith nz = adiag[i] - adiag[i + 1] - 1;
682c733ed4SBarry Smith idt = bs * i;
699371c9d4SSatish Balay s[0] = x[idt];
709371c9d4SSatish Balay s[1] = x[1 + idt];
719371c9d4SSatish Balay s[2] = x[2 + idt];
729371c9d4SSatish Balay s[3] = x[3 + idt];
739371c9d4SSatish Balay s[4] = x[4 + idt];
749371c9d4SSatish Balay s[5] = x[5 + idt];
759371c9d4SSatish Balay s[6] = x[6 + idt];
769371c9d4SSatish Balay s[7] = x[7 + idt];
779371c9d4SSatish Balay s[8] = x[8 + idt];
789371c9d4SSatish Balay s[9] = x[9 + idt];
799371c9d4SSatish Balay s[10] = x[10 + idt];
809371c9d4SSatish Balay s[11] = x[11 + idt];
819371c9d4SSatish Balay s[12] = x[12 + idt];
829371c9d4SSatish Balay s[13] = x[13 + idt];
832c733ed4SBarry Smith
842c733ed4SBarry Smith for (m = 0; m < nz; m++) {
852c733ed4SBarry Smith idx = bs * vi[m];
862c733ed4SBarry Smith for (k = 0; k < bs; k++) {
872c733ed4SBarry Smith xv = x[k + idx];
882c733ed4SBarry Smith s[0] -= v[0] * xv;
892c733ed4SBarry Smith s[1] -= v[1] * xv;
902c733ed4SBarry Smith s[2] -= v[2] * xv;
912c733ed4SBarry Smith s[3] -= v[3] * xv;
922c733ed4SBarry Smith s[4] -= v[4] * xv;
932c733ed4SBarry Smith s[5] -= v[5] * xv;
942c733ed4SBarry Smith s[6] -= v[6] * xv;
952c733ed4SBarry Smith s[7] -= v[7] * xv;
962c733ed4SBarry Smith s[8] -= v[8] * xv;
972c733ed4SBarry Smith s[9] -= v[9] * xv;
982c733ed4SBarry Smith s[10] -= v[10] * xv;
992c733ed4SBarry Smith s[11] -= v[11] * xv;
1002c733ed4SBarry Smith s[12] -= v[12] * xv;
1012c733ed4SBarry Smith s[13] -= v[13] * xv;
1022c733ed4SBarry Smith v += 14;
1032c733ed4SBarry Smith }
1042c733ed4SBarry Smith }
1059566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(x + idt, bs));
1062c733ed4SBarry Smith for (k = 0; k < bs; k++) {
1072c733ed4SBarry Smith x[idt] += v[0] * s[k];
1082c733ed4SBarry Smith x[1 + idt] += v[1] * s[k];
1092c733ed4SBarry Smith x[2 + idt] += v[2] * s[k];
1102c733ed4SBarry Smith x[3 + idt] += v[3] * s[k];
1112c733ed4SBarry Smith x[4 + idt] += v[4] * s[k];
1122c733ed4SBarry Smith x[5 + idt] += v[5] * s[k];
1132c733ed4SBarry Smith x[6 + idt] += v[6] * s[k];
1142c733ed4SBarry Smith x[7 + idt] += v[7] * s[k];
1152c733ed4SBarry Smith x[8 + idt] += v[8] * s[k];
1162c733ed4SBarry Smith x[9 + idt] += v[9] * s[k];
1172c733ed4SBarry Smith x[10 + idt] += v[10] * s[k];
1182c733ed4SBarry Smith x[11 + idt] += v[11] * s[k];
1192c733ed4SBarry Smith x[12 + idt] += v[12] * s[k];
1202c733ed4SBarry Smith x[13 + idt] += v[13] * s[k];
1212c733ed4SBarry Smith v += 14;
1222c733ed4SBarry Smith }
1232c733ed4SBarry Smith }
1249566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b));
1259566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x));
1269566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
1273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1282c733ed4SBarry Smith }
1292c733ed4SBarry Smith
130*15229ffcSPierre Jolivet /* Block operations are done by accessing one column at a time */
1312c733ed4SBarry Smith /* Default MatSolve for block size 13 */
1322c733ed4SBarry Smith
MatSolve_SeqBAIJ_13_NaturalOrdering(Mat A,Vec bb,Vec xx)133d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_13_NaturalOrdering(Mat A, Vec bb, Vec xx)
134d71ae5a4SJacob Faibussowitsch {
1352c733ed4SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1362c733ed4SBarry Smith const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
1372c733ed4SBarry Smith PetscInt i, k, nz, idx, idt, m;
1382c733ed4SBarry Smith const MatScalar *aa = a->a, *v;
1392c733ed4SBarry Smith PetscScalar s[13];
1402c733ed4SBarry Smith PetscScalar *x, xv;
1412c733ed4SBarry Smith const PetscScalar *b;
1422c733ed4SBarry Smith
1432c733ed4SBarry Smith PetscFunctionBegin;
1449566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b));
1459566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x));
1462c733ed4SBarry Smith
1472c733ed4SBarry Smith /* forward solve the lower triangular */
1482c733ed4SBarry Smith for (i = 0; i < n; i++) {
1492c733ed4SBarry Smith v = aa + bs2 * ai[i];
1502c733ed4SBarry Smith vi = aj + ai[i];
1512c733ed4SBarry Smith nz = ai[i + 1] - ai[i];
1522c733ed4SBarry Smith idt = bs * i;
1539371c9d4SSatish Balay x[idt] = b[idt];
1549371c9d4SSatish Balay x[1 + idt] = b[1 + idt];
1559371c9d4SSatish Balay x[2 + idt] = b[2 + idt];
1569371c9d4SSatish Balay x[3 + idt] = b[3 + idt];
1579371c9d4SSatish Balay x[4 + idt] = b[4 + idt];
1589371c9d4SSatish Balay x[5 + idt] = b[5 + idt];
1599371c9d4SSatish Balay x[6 + idt] = b[6 + idt];
1609371c9d4SSatish Balay x[7 + idt] = b[7 + idt];
1619371c9d4SSatish Balay x[8 + idt] = b[8 + idt];
1629371c9d4SSatish Balay x[9 + idt] = b[9 + idt];
1639371c9d4SSatish Balay x[10 + idt] = b[10 + idt];
1649371c9d4SSatish Balay x[11 + idt] = b[11 + idt];
1659371c9d4SSatish Balay x[12 + idt] = b[12 + idt];
1662c733ed4SBarry Smith for (m = 0; m < nz; m++) {
1672c733ed4SBarry Smith idx = bs * vi[m];
1682c733ed4SBarry Smith for (k = 0; k < bs; k++) {
1692c733ed4SBarry Smith xv = x[k + idx];
1702c733ed4SBarry Smith x[idt] -= v[0] * xv;
1712c733ed4SBarry Smith x[1 + idt] -= v[1] * xv;
1722c733ed4SBarry Smith x[2 + idt] -= v[2] * xv;
1732c733ed4SBarry Smith x[3 + idt] -= v[3] * xv;
1742c733ed4SBarry Smith x[4 + idt] -= v[4] * xv;
1752c733ed4SBarry Smith x[5 + idt] -= v[5] * xv;
1762c733ed4SBarry Smith x[6 + idt] -= v[6] * xv;
1772c733ed4SBarry Smith x[7 + idt] -= v[7] * xv;
1782c733ed4SBarry Smith x[8 + idt] -= v[8] * xv;
1792c733ed4SBarry Smith x[9 + idt] -= v[9] * xv;
1802c733ed4SBarry Smith x[10 + idt] -= v[10] * xv;
1812c733ed4SBarry Smith x[11 + idt] -= v[11] * xv;
1822c733ed4SBarry Smith x[12 + idt] -= v[12] * xv;
1832c733ed4SBarry Smith v += 13;
1842c733ed4SBarry Smith }
1852c733ed4SBarry Smith }
1862c733ed4SBarry Smith }
1872c733ed4SBarry Smith /* backward solve the upper triangular */
1882c733ed4SBarry Smith for (i = n - 1; i >= 0; i--) {
1892c733ed4SBarry Smith v = aa + bs2 * (adiag[i + 1] + 1);
1902c733ed4SBarry Smith vi = aj + adiag[i + 1] + 1;
1912c733ed4SBarry Smith nz = adiag[i] - adiag[i + 1] - 1;
1922c733ed4SBarry Smith idt = bs * i;
1939371c9d4SSatish Balay s[0] = x[idt];
1949371c9d4SSatish Balay s[1] = x[1 + idt];
1959371c9d4SSatish Balay s[2] = x[2 + idt];
1969371c9d4SSatish Balay s[3] = x[3 + idt];
1979371c9d4SSatish Balay s[4] = x[4 + idt];
1989371c9d4SSatish Balay s[5] = x[5 + idt];
1999371c9d4SSatish Balay s[6] = x[6 + idt];
2009371c9d4SSatish Balay s[7] = x[7 + idt];
2019371c9d4SSatish Balay s[8] = x[8 + idt];
2029371c9d4SSatish Balay s[9] = x[9 + idt];
2039371c9d4SSatish Balay s[10] = x[10 + idt];
2049371c9d4SSatish Balay s[11] = x[11 + idt];
2059371c9d4SSatish Balay s[12] = x[12 + idt];
2062c733ed4SBarry Smith
2072c733ed4SBarry Smith for (m = 0; m < nz; m++) {
2082c733ed4SBarry Smith idx = bs * vi[m];
2092c733ed4SBarry Smith for (k = 0; k < bs; k++) {
2102c733ed4SBarry Smith xv = x[k + idx];
2112c733ed4SBarry Smith s[0] -= v[0] * xv;
2122c733ed4SBarry Smith s[1] -= v[1] * xv;
2132c733ed4SBarry Smith s[2] -= v[2] * xv;
2142c733ed4SBarry Smith s[3] -= v[3] * xv;
2152c733ed4SBarry Smith s[4] -= v[4] * xv;
2162c733ed4SBarry Smith s[5] -= v[5] * xv;
2172c733ed4SBarry Smith s[6] -= v[6] * xv;
2182c733ed4SBarry Smith s[7] -= v[7] * xv;
2192c733ed4SBarry Smith s[8] -= v[8] * xv;
2202c733ed4SBarry Smith s[9] -= v[9] * xv;
2212c733ed4SBarry Smith s[10] -= v[10] * xv;
2222c733ed4SBarry Smith s[11] -= v[11] * xv;
2232c733ed4SBarry Smith s[12] -= v[12] * xv;
2242c733ed4SBarry Smith v += 13;
2252c733ed4SBarry Smith }
2262c733ed4SBarry Smith }
2279566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(x + idt, bs));
2282c733ed4SBarry Smith for (k = 0; k < bs; k++) {
2292c733ed4SBarry Smith x[idt] += v[0] * s[k];
2302c733ed4SBarry Smith x[1 + idt] += v[1] * s[k];
2312c733ed4SBarry Smith x[2 + idt] += v[2] * s[k];
2322c733ed4SBarry Smith x[3 + idt] += v[3] * s[k];
2332c733ed4SBarry Smith x[4 + idt] += v[4] * s[k];
2342c733ed4SBarry Smith x[5 + idt] += v[5] * s[k];
2352c733ed4SBarry Smith x[6 + idt] += v[6] * s[k];
2362c733ed4SBarry Smith x[7 + idt] += v[7] * s[k];
2372c733ed4SBarry Smith x[8 + idt] += v[8] * s[k];
2382c733ed4SBarry Smith x[9 + idt] += v[9] * s[k];
2392c733ed4SBarry Smith x[10 + idt] += v[10] * s[k];
2402c733ed4SBarry Smith x[11 + idt] += v[11] * s[k];
2412c733ed4SBarry Smith x[12 + idt] += v[12] * s[k];
2422c733ed4SBarry Smith v += 13;
2432c733ed4SBarry Smith }
2442c733ed4SBarry Smith }
2459566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b));
2469566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x));
2479566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
2483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2492c733ed4SBarry Smith }
2502c733ed4SBarry Smith
251*15229ffcSPierre Jolivet /* Block operations are done by accessing one column at a time */
2522c733ed4SBarry Smith /* Default MatSolve for block size 12 */
2532c733ed4SBarry Smith
MatSolve_SeqBAIJ_12_NaturalOrdering(Mat A,Vec bb,Vec xx)254d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_12_NaturalOrdering(Mat A, Vec bb, Vec xx)
255d71ae5a4SJacob Faibussowitsch {
2562c733ed4SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2572c733ed4SBarry Smith const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
2582c733ed4SBarry Smith PetscInt i, k, nz, idx, idt, m;
2592c733ed4SBarry Smith const MatScalar *aa = a->a, *v;
2602c733ed4SBarry Smith PetscScalar s[12];
2612c733ed4SBarry Smith PetscScalar *x, xv;
2622c733ed4SBarry Smith const PetscScalar *b;
2632c733ed4SBarry Smith
2642c733ed4SBarry Smith PetscFunctionBegin;
2659566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b));
2669566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x));
2672c733ed4SBarry Smith
2682c733ed4SBarry Smith /* forward solve the lower triangular */
2692c733ed4SBarry Smith for (i = 0; i < n; i++) {
2702c733ed4SBarry Smith v = aa + bs2 * ai[i];
2712c733ed4SBarry Smith vi = aj + ai[i];
2722c733ed4SBarry Smith nz = ai[i + 1] - ai[i];
2732c733ed4SBarry Smith idt = bs * i;
2749371c9d4SSatish Balay x[idt] = b[idt];
2759371c9d4SSatish Balay x[1 + idt] = b[1 + idt];
2769371c9d4SSatish Balay x[2 + idt] = b[2 + idt];
2779371c9d4SSatish Balay x[3 + idt] = b[3 + idt];
2789371c9d4SSatish Balay x[4 + idt] = b[4 + idt];
2799371c9d4SSatish Balay x[5 + idt] = b[5 + idt];
2809371c9d4SSatish Balay x[6 + idt] = b[6 + idt];
2819371c9d4SSatish Balay x[7 + idt] = b[7 + idt];
2829371c9d4SSatish Balay x[8 + idt] = b[8 + idt];
2839371c9d4SSatish Balay x[9 + idt] = b[9 + idt];
2849371c9d4SSatish Balay x[10 + idt] = b[10 + idt];
2859371c9d4SSatish Balay x[11 + idt] = b[11 + idt];
2862c733ed4SBarry Smith for (m = 0; m < nz; m++) {
2872c733ed4SBarry Smith idx = bs * vi[m];
2882c733ed4SBarry Smith for (k = 0; k < bs; k++) {
2892c733ed4SBarry Smith xv = x[k + idx];
2902c733ed4SBarry Smith x[idt] -= v[0] * xv;
2912c733ed4SBarry Smith x[1 + idt] -= v[1] * xv;
2922c733ed4SBarry Smith x[2 + idt] -= v[2] * xv;
2932c733ed4SBarry Smith x[3 + idt] -= v[3] * xv;
2942c733ed4SBarry Smith x[4 + idt] -= v[4] * xv;
2952c733ed4SBarry Smith x[5 + idt] -= v[5] * xv;
2962c733ed4SBarry Smith x[6 + idt] -= v[6] * xv;
2972c733ed4SBarry Smith x[7 + idt] -= v[7] * xv;
2982c733ed4SBarry Smith x[8 + idt] -= v[8] * xv;
2992c733ed4SBarry Smith x[9 + idt] -= v[9] * xv;
3002c733ed4SBarry Smith x[10 + idt] -= v[10] * xv;
3012c733ed4SBarry Smith x[11 + idt] -= v[11] * xv;
3022c733ed4SBarry Smith v += 12;
3032c733ed4SBarry Smith }
3042c733ed4SBarry Smith }
3052c733ed4SBarry Smith }
3062c733ed4SBarry Smith /* backward solve the upper triangular */
3072c733ed4SBarry Smith for (i = n - 1; i >= 0; i--) {
3082c733ed4SBarry Smith v = aa + bs2 * (adiag[i + 1] + 1);
3092c733ed4SBarry Smith vi = aj + adiag[i + 1] + 1;
3102c733ed4SBarry Smith nz = adiag[i] - adiag[i + 1] - 1;
3112c733ed4SBarry Smith idt = bs * i;
3129371c9d4SSatish Balay s[0] = x[idt];
3139371c9d4SSatish Balay s[1] = x[1 + idt];
3149371c9d4SSatish Balay s[2] = x[2 + idt];
3159371c9d4SSatish Balay s[3] = x[3 + idt];
3169371c9d4SSatish Balay s[4] = x[4 + idt];
3179371c9d4SSatish Balay s[5] = x[5 + idt];
3189371c9d4SSatish Balay s[6] = x[6 + idt];
3199371c9d4SSatish Balay s[7] = x[7 + idt];
3209371c9d4SSatish Balay s[8] = x[8 + idt];
3219371c9d4SSatish Balay s[9] = x[9 + idt];
3229371c9d4SSatish Balay s[10] = x[10 + idt];
3239371c9d4SSatish Balay s[11] = x[11 + idt];
3242c733ed4SBarry Smith
3252c733ed4SBarry Smith for (m = 0; m < nz; m++) {
3262c733ed4SBarry Smith idx = bs * vi[m];
3272c733ed4SBarry Smith for (k = 0; k < bs; k++) {
3282c733ed4SBarry Smith xv = x[k + idx];
3292c733ed4SBarry Smith s[0] -= v[0] * xv;
3302c733ed4SBarry Smith s[1] -= v[1] * xv;
3312c733ed4SBarry Smith s[2] -= v[2] * xv;
3322c733ed4SBarry Smith s[3] -= v[3] * xv;
3332c733ed4SBarry Smith s[4] -= v[4] * xv;
3342c733ed4SBarry Smith s[5] -= v[5] * xv;
3352c733ed4SBarry Smith s[6] -= v[6] * xv;
3362c733ed4SBarry Smith s[7] -= v[7] * xv;
3372c733ed4SBarry Smith s[8] -= v[8] * xv;
3382c733ed4SBarry Smith s[9] -= v[9] * xv;
3392c733ed4SBarry Smith s[10] -= v[10] * xv;
3402c733ed4SBarry Smith s[11] -= v[11] * xv;
3412c733ed4SBarry Smith v += 12;
3422c733ed4SBarry Smith }
3432c733ed4SBarry Smith }
3449566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(x + idt, bs));
3452c733ed4SBarry Smith for (k = 0; k < bs; k++) {
3462c733ed4SBarry Smith x[idt] += v[0] * s[k];
3472c733ed4SBarry Smith x[1 + idt] += v[1] * s[k];
3482c733ed4SBarry Smith x[2 + idt] += v[2] * s[k];
3492c733ed4SBarry Smith x[3 + idt] += v[3] * s[k];
3502c733ed4SBarry Smith x[4 + idt] += v[4] * s[k];
3512c733ed4SBarry Smith x[5 + idt] += v[5] * s[k];
3522c733ed4SBarry Smith x[6 + idt] += v[6] * s[k];
3532c733ed4SBarry Smith x[7 + idt] += v[7] * s[k];
3542c733ed4SBarry Smith x[8 + idt] += v[8] * s[k];
3552c733ed4SBarry Smith x[9 + idt] += v[9] * s[k];
3562c733ed4SBarry Smith x[10 + idt] += v[10] * s[k];
3572c733ed4SBarry Smith x[11 + idt] += v[11] * s[k];
3582c733ed4SBarry Smith v += 12;
3592c733ed4SBarry Smith }
3602c733ed4SBarry Smith }
3619566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b));
3629566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x));
3639566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
3643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3652c733ed4SBarry Smith }
366