1 #include <../src/mat/impls/baij/seq/baij.h> 2 #include <petsc/private/kernels/blockinvert.h> 3 4 /* Block operations are done by accessing one column at a time */ 5 /* Default MatSolve for block size 11 */ 6 7 PetscErrorCode MatSolve_SeqBAIJ_11_NaturalOrdering(Mat A, Vec bb, Vec xx) 8 { 9 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 10 const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2; 11 PetscInt i, k, nz, idx, idt, m; 12 const MatScalar *aa = a->a, *v; 13 PetscScalar s[11]; 14 PetscScalar *x, xv; 15 const PetscScalar *b; 16 17 PetscFunctionBegin; 18 PetscCall(VecGetArrayRead(bb, &b)); 19 PetscCall(VecGetArray(xx, &x)); 20 21 /* forward solve the lower triangular */ 22 for (i = 0; i < n; i++) { 23 v = aa + bs2 * ai[i]; 24 vi = aj + ai[i]; 25 nz = ai[i + 1] - ai[i]; 26 idt = bs * i; 27 x[idt] = b[idt]; 28 x[1 + idt] = b[1 + idt]; 29 x[2 + idt] = b[2 + idt]; 30 x[3 + idt] = b[3 + idt]; 31 x[4 + idt] = b[4 + idt]; 32 x[5 + idt] = b[5 + idt]; 33 x[6 + idt] = b[6 + idt]; 34 x[7 + idt] = b[7 + idt]; 35 x[8 + idt] = b[8 + idt]; 36 x[9 + idt] = b[9 + idt]; 37 x[10 + idt] = b[10 + idt]; 38 for (m = 0; m < nz; m++) { 39 idx = bs * vi[m]; 40 for (k = 0; k < 11; k++) { 41 xv = x[k + idx]; 42 x[idt] -= v[0] * xv; 43 x[1 + idt] -= v[1] * xv; 44 x[2 + idt] -= v[2] * xv; 45 x[3 + idt] -= v[3] * xv; 46 x[4 + idt] -= v[4] * xv; 47 x[5 + idt] -= v[5] * xv; 48 x[6 + idt] -= v[6] * xv; 49 x[7 + idt] -= v[7] * xv; 50 x[8 + idt] -= v[8] * xv; 51 x[9 + idt] -= v[9] * xv; 52 x[10 + idt] -= v[10] * xv; 53 v += 11; 54 } 55 } 56 } 57 /* backward solve the upper triangular */ 58 for (i = n - 1; i >= 0; i--) { 59 v = aa + bs2 * (adiag[i + 1] + 1); 60 vi = aj + adiag[i + 1] + 1; 61 nz = adiag[i] - adiag[i + 1] - 1; 62 idt = bs * i; 63 s[0] = x[idt]; 64 s[1] = x[1 + idt]; 65 s[2] = x[2 + idt]; 66 s[3] = x[3 + idt]; 67 s[4] = x[4 + idt]; 68 s[5] = x[5 + idt]; 69 s[6] = x[6 + idt]; 70 s[7] = x[7 + idt]; 71 s[8] = x[8 + idt]; 72 s[9] = x[9 + idt]; 73 s[10] = x[10 + idt]; 74 75 for (m = 0; m < nz; m++) { 76 idx = bs * vi[m]; 77 for (k = 0; k < 11; k++) { 78 xv = x[k + idx]; 79 s[0] -= v[0] * xv; 80 s[1] -= v[1] * xv; 81 s[2] -= v[2] * xv; 82 s[3] -= v[3] * xv; 83 s[4] -= v[4] * xv; 84 s[5] -= v[5] * xv; 85 s[6] -= v[6] * xv; 86 s[7] -= v[7] * xv; 87 s[8] -= v[8] * xv; 88 s[9] -= v[9] * xv; 89 s[10] -= v[10] * xv; 90 v += 11; 91 } 92 } 93 PetscCall(PetscArrayzero(x + idt, bs)); 94 for (k = 0; k < 11; k++) { 95 x[idt] += v[0] * s[k]; 96 x[1 + idt] += v[1] * s[k]; 97 x[2 + idt] += v[2] * s[k]; 98 x[3 + idt] += v[3] * s[k]; 99 x[4 + idt] += v[4] * s[k]; 100 x[5 + idt] += v[5] * s[k]; 101 x[6 + idt] += v[6] * s[k]; 102 x[7 + idt] += v[7] * s[k]; 103 x[8 + idt] += v[8] * s[k]; 104 x[9 + idt] += v[9] * s[k]; 105 x[10 + idt] += v[10] * s[k]; 106 v += 11; 107 } 108 } 109 PetscCall(VecRestoreArrayRead(bb, &b)); 110 PetscCall(VecRestoreArray(xx, &x)); 111 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n)); 112 PetscFunctionReturn(PETSC_SUCCESS); 113 } 114