1 #include <../src/mat/impls/baij/seq/baij.h> 2 #include <petsc/private/kernels/blockinvert.h> 3 4 /* 5 Special case where the matrix was ILU(0) factored in the natural 6 ordering. This eliminates the need for the column and row permutation. 7 */ 8 PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) { 9 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 10 const PetscInt n = a->mbs, *ai = a->i, *aj = a->j; 11 const PetscInt *diag = a->diag, *vi; 12 const MatScalar *aa = a->a, *v; 13 PetscScalar *x, s1, s2, s3, x1, x2, x3; 14 const PetscScalar *b; 15 PetscInt jdx, idt, idx, nz, i; 16 17 PetscFunctionBegin; 18 PetscCall(VecGetArrayRead(bb, &b)); 19 PetscCall(VecGetArray(xx, &x)); 20 21 /* forward solve the lower triangular */ 22 idx = 0; 23 x[0] = b[0]; 24 x[1] = b[1]; 25 x[2] = b[2]; 26 for (i = 1; i < n; i++) { 27 v = aa + 9 * ai[i]; 28 vi = aj + ai[i]; 29 nz = diag[i] - ai[i]; 30 idx += 3; 31 s1 = b[idx]; 32 s2 = b[1 + idx]; 33 s3 = b[2 + idx]; 34 while (nz--) { 35 jdx = 3 * (*vi++); 36 x1 = x[jdx]; 37 x2 = x[1 + jdx]; 38 x3 = x[2 + jdx]; 39 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3; 40 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3; 41 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3; 42 v += 9; 43 } 44 x[idx] = s1; 45 x[1 + idx] = s2; 46 x[2 + idx] = s3; 47 } 48 /* backward solve the upper triangular */ 49 for (i = n - 1; i >= 0; i--) { 50 v = aa + 9 * diag[i] + 9; 51 vi = aj + diag[i] + 1; 52 nz = ai[i + 1] - diag[i] - 1; 53 idt = 3 * i; 54 s1 = x[idt]; 55 s2 = x[1 + idt]; 56 s3 = x[2 + idt]; 57 while (nz--) { 58 idx = 3 * (*vi++); 59 x1 = x[idx]; 60 x2 = x[1 + idx]; 61 x3 = x[2 + idx]; 62 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3; 63 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3; 64 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3; 65 v += 9; 66 } 67 v = aa + 9 * diag[i]; 68 x[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3; 69 x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3; 70 x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3; 71 } 72 73 PetscCall(VecRestoreArrayRead(bb, &b)); 74 PetscCall(VecRestoreArray(xx, &x)); 75 PetscCall(PetscLogFlops(2.0 * 9 * (a->nz) - 3.0 * A->cmap->n)); 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx) { 80 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 81 const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 82 PetscInt i, k, nz, idx, jdx, idt; 83 const PetscInt bs = A->rmap->bs, bs2 = a->bs2; 84 const MatScalar *aa = a->a, *v; 85 PetscScalar *x; 86 const PetscScalar *b; 87 PetscScalar s1, s2, s3, x1, x2, x3; 88 89 PetscFunctionBegin; 90 PetscCall(VecGetArrayRead(bb, &b)); 91 PetscCall(VecGetArray(xx, &x)); 92 /* forward solve the lower triangular */ 93 idx = 0; 94 x[0] = b[idx]; 95 x[1] = b[1 + idx]; 96 x[2] = b[2 + idx]; 97 for (i = 1; i < n; i++) { 98 v = aa + bs2 * ai[i]; 99 vi = aj + ai[i]; 100 nz = ai[i + 1] - ai[i]; 101 idx = bs * i; 102 s1 = b[idx]; 103 s2 = b[1 + idx]; 104 s3 = b[2 + idx]; 105 for (k = 0; k < nz; k++) { 106 jdx = bs * vi[k]; 107 x1 = x[jdx]; 108 x2 = x[1 + jdx]; 109 x3 = x[2 + jdx]; 110 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3; 111 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3; 112 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3; 113 114 v += bs2; 115 } 116 117 x[idx] = s1; 118 x[1 + idx] = s2; 119 x[2 + idx] = s3; 120 } 121 122 /* backward solve the upper triangular */ 123 for (i = n - 1; i >= 0; i--) { 124 v = aa + bs2 * (adiag[i + 1] + 1); 125 vi = aj + adiag[i + 1] + 1; 126 nz = adiag[i] - adiag[i + 1] - 1; 127 idt = bs * i; 128 s1 = x[idt]; 129 s2 = x[1 + idt]; 130 s3 = x[2 + idt]; 131 132 for (k = 0; k < nz; k++) { 133 idx = bs * vi[k]; 134 x1 = x[idx]; 135 x2 = x[1 + idx]; 136 x3 = x[2 + idx]; 137 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3; 138 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3; 139 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3; 140 141 v += bs2; 142 } 143 /* x = inv_diagonal*x */ 144 x[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3; 145 x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3; 146 x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3; 147 } 148 149 PetscCall(VecRestoreArrayRead(bb, &b)); 150 PetscCall(VecRestoreArray(xx, &x)); 151 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n)); 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode MatForwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx) { 156 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 157 const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j; 158 PetscInt i, k, nz, idx, jdx; 159 const PetscInt bs = A->rmap->bs, bs2 = a->bs2; 160 const MatScalar *aa = a->a, *v; 161 PetscScalar *x; 162 const PetscScalar *b; 163 PetscScalar s1, s2, s3, x1, x2, x3; 164 165 PetscFunctionBegin; 166 PetscCall(VecGetArrayRead(bb, &b)); 167 PetscCall(VecGetArray(xx, &x)); 168 /* forward solve the lower triangular */ 169 idx = 0; 170 x[0] = b[idx]; 171 x[1] = b[1 + idx]; 172 x[2] = b[2 + idx]; 173 for (i = 1; i < n; i++) { 174 v = aa + bs2 * ai[i]; 175 vi = aj + ai[i]; 176 nz = ai[i + 1] - ai[i]; 177 idx = bs * i; 178 s1 = b[idx]; 179 s2 = b[1 + idx]; 180 s3 = b[2 + idx]; 181 for (k = 0; k < nz; k++) { 182 jdx = bs * vi[k]; 183 x1 = x[jdx]; 184 x2 = x[1 + jdx]; 185 x3 = x[2 + jdx]; 186 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3; 187 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3; 188 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3; 189 190 v += bs2; 191 } 192 193 x[idx] = s1; 194 x[1 + idx] = s2; 195 x[2 + idx] = s3; 196 } 197 198 PetscCall(VecRestoreArrayRead(bb, &b)); 199 PetscCall(VecRestoreArray(xx, &x)); 200 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz) - bs * A->cmap->n)); 201 PetscFunctionReturn(0); 202 } 203 204 PetscErrorCode MatBackwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx) { 205 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 206 const PetscInt n = a->mbs, *vi, *aj = a->j, *adiag = a->diag; 207 PetscInt i, k, nz, idx, idt; 208 const PetscInt bs = A->rmap->bs, bs2 = a->bs2; 209 const MatScalar *aa = a->a, *v; 210 PetscScalar *x; 211 const PetscScalar *b; 212 PetscScalar s1, s2, s3, x1, x2, x3; 213 214 PetscFunctionBegin; 215 PetscCall(VecGetArrayRead(bb, &b)); 216 PetscCall(VecGetArray(xx, &x)); 217 218 /* backward solve the upper triangular */ 219 for (i = n - 1; i >= 0; i--) { 220 v = aa + bs2 * (adiag[i + 1] + 1); 221 vi = aj + adiag[i + 1] + 1; 222 nz = adiag[i] - adiag[i + 1] - 1; 223 idt = bs * i; 224 s1 = b[idt]; 225 s2 = b[1 + idt]; 226 s3 = b[2 + idt]; 227 228 for (k = 0; k < nz; k++) { 229 idx = bs * vi[k]; 230 x1 = x[idx]; 231 x2 = x[1 + idx]; 232 x3 = x[2 + idx]; 233 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3; 234 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3; 235 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3; 236 237 v += bs2; 238 } 239 /* x = inv_diagonal*x */ 240 x[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3; 241 x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3; 242 x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3; 243 } 244 245 PetscCall(VecRestoreArrayRead(bb, &b)); 246 PetscCall(VecRestoreArray(xx, &x)); 247 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n)); 248 PetscFunctionReturn(0); 249 } 250