1 #include <../src/mat/impls/baij/seq/baij.h> 2 #include <petsc/private/kernels/blockinvert.h> 3 4 /* 5 Special case where the matrix was ILU(0) factored in the natural 6 ordering. This eliminates the need for the column and row permutation. 7 */ 8 PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 9 { 10 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 11 const PetscInt n = a->mbs, *ai = a->i, *aj = a->j; 12 const PetscInt *diag = a->diag, *vi; 13 const MatScalar *aa = a->a, *v; 14 PetscScalar *x, s1, s2, s3, x1, x2, x3; 15 const PetscScalar *b; 16 PetscInt jdx, idt, idx, nz, i; 17 18 PetscFunctionBegin; 19 PetscCall(VecGetArrayRead(bb, &b)); 20 PetscCall(VecGetArray(xx, &x)); 21 22 /* forward solve the lower triangular */ 23 idx = 0; 24 x[0] = b[0]; 25 x[1] = b[1]; 26 x[2] = b[2]; 27 for (i = 1; i < n; i++) { 28 v = aa + 9 * ai[i]; 29 vi = aj + ai[i]; 30 nz = diag[i] - ai[i]; 31 idx += 3; 32 s1 = b[idx]; 33 s2 = b[1 + idx]; 34 s3 = b[2 + idx]; 35 while (nz--) { 36 jdx = 3 * (*vi++); 37 x1 = x[jdx]; 38 x2 = x[1 + jdx]; 39 x3 = x[2 + jdx]; 40 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3; 41 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3; 42 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3; 43 v += 9; 44 } 45 x[idx] = s1; 46 x[1 + idx] = s2; 47 x[2 + idx] = s3; 48 } 49 /* backward solve the upper triangular */ 50 for (i = n - 1; i >= 0; i--) { 51 v = aa + 9 * diag[i] + 9; 52 vi = aj + diag[i] + 1; 53 nz = ai[i + 1] - diag[i] - 1; 54 idt = 3 * i; 55 s1 = x[idt]; 56 s2 = x[1 + idt]; 57 s3 = x[2 + idt]; 58 while (nz--) { 59 idx = 3 * (*vi++); 60 x1 = x[idx]; 61 x2 = x[1 + idx]; 62 x3 = x[2 + idx]; 63 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3; 64 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3; 65 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3; 66 v += 9; 67 } 68 v = aa + 9 * diag[i]; 69 x[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3; 70 x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3; 71 x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3; 72 } 73 74 PetscCall(VecRestoreArrayRead(bb, &b)); 75 PetscCall(VecRestoreArray(xx, &x)); 76 PetscCall(PetscLogFlops(2.0 * 9 * (a->nz) - 3.0 * A->cmap->n)); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx) 81 { 82 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 83 const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 84 PetscInt i, k, nz, idx, jdx, idt; 85 const PetscInt bs = A->rmap->bs, bs2 = a->bs2; 86 const MatScalar *aa = a->a, *v; 87 PetscScalar *x; 88 const PetscScalar *b; 89 PetscScalar s1, s2, s3, x1, x2, x3; 90 91 PetscFunctionBegin; 92 PetscCall(VecGetArrayRead(bb, &b)); 93 PetscCall(VecGetArray(xx, &x)); 94 /* forward solve the lower triangular */ 95 idx = 0; 96 x[0] = b[idx]; 97 x[1] = b[1 + idx]; 98 x[2] = b[2 + idx]; 99 for (i = 1; i < n; i++) { 100 v = aa + bs2 * ai[i]; 101 vi = aj + ai[i]; 102 nz = ai[i + 1] - ai[i]; 103 idx = bs * i; 104 s1 = b[idx]; 105 s2 = b[1 + idx]; 106 s3 = b[2 + idx]; 107 for (k = 0; k < nz; k++) { 108 jdx = bs * vi[k]; 109 x1 = x[jdx]; 110 x2 = x[1 + jdx]; 111 x3 = x[2 + jdx]; 112 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3; 113 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3; 114 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3; 115 116 v += bs2; 117 } 118 119 x[idx] = s1; 120 x[1 + idx] = s2; 121 x[2 + idx] = s3; 122 } 123 124 /* backward solve the upper triangular */ 125 for (i = n - 1; i >= 0; i--) { 126 v = aa + bs2 * (adiag[i + 1] + 1); 127 vi = aj + adiag[i + 1] + 1; 128 nz = adiag[i] - adiag[i + 1] - 1; 129 idt = bs * i; 130 s1 = x[idt]; 131 s2 = x[1 + idt]; 132 s3 = x[2 + idt]; 133 134 for (k = 0; k < nz; k++) { 135 idx = bs * vi[k]; 136 x1 = x[idx]; 137 x2 = x[1 + idx]; 138 x3 = x[2 + idx]; 139 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3; 140 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3; 141 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3; 142 143 v += bs2; 144 } 145 /* x = inv_diagonal*x */ 146 x[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3; 147 x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3; 148 x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3; 149 } 150 151 PetscCall(VecRestoreArrayRead(bb, &b)); 152 PetscCall(VecRestoreArray(xx, &x)); 153 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n)); 154 PetscFunctionReturn(PETSC_SUCCESS); 155 } 156 157 PetscErrorCode MatForwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx) 158 { 159 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 160 const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j; 161 PetscInt i, k, nz, idx, jdx; 162 const PetscInt bs = A->rmap->bs, bs2 = a->bs2; 163 const MatScalar *aa = a->a, *v; 164 PetscScalar *x; 165 const PetscScalar *b; 166 PetscScalar s1, s2, s3, x1, x2, x3; 167 168 PetscFunctionBegin; 169 PetscCall(VecGetArrayRead(bb, &b)); 170 PetscCall(VecGetArray(xx, &x)); 171 /* forward solve the lower triangular */ 172 idx = 0; 173 x[0] = b[idx]; 174 x[1] = b[1 + idx]; 175 x[2] = b[2 + idx]; 176 for (i = 1; i < n; i++) { 177 v = aa + bs2 * ai[i]; 178 vi = aj + ai[i]; 179 nz = ai[i + 1] - ai[i]; 180 idx = bs * i; 181 s1 = b[idx]; 182 s2 = b[1 + idx]; 183 s3 = b[2 + idx]; 184 for (k = 0; k < nz; k++) { 185 jdx = bs * vi[k]; 186 x1 = x[jdx]; 187 x2 = x[1 + jdx]; 188 x3 = x[2 + jdx]; 189 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3; 190 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3; 191 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3; 192 193 v += bs2; 194 } 195 196 x[idx] = s1; 197 x[1 + idx] = s2; 198 x[2 + idx] = s3; 199 } 200 201 PetscCall(VecRestoreArrayRead(bb, &b)); 202 PetscCall(VecRestoreArray(xx, &x)); 203 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz) - bs * A->cmap->n)); 204 PetscFunctionReturn(PETSC_SUCCESS); 205 } 206 207 PetscErrorCode MatBackwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx) 208 { 209 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 210 const PetscInt n = a->mbs, *vi, *aj = a->j, *adiag = a->diag; 211 PetscInt i, k, nz, idx, idt; 212 const PetscInt bs = A->rmap->bs, bs2 = a->bs2; 213 const MatScalar *aa = a->a, *v; 214 PetscScalar *x; 215 const PetscScalar *b; 216 PetscScalar s1, s2, s3, x1, x2, x3; 217 218 PetscFunctionBegin; 219 PetscCall(VecGetArrayRead(bb, &b)); 220 PetscCall(VecGetArray(xx, &x)); 221 222 /* backward solve the upper triangular */ 223 for (i = n - 1; i >= 0; i--) { 224 v = aa + bs2 * (adiag[i + 1] + 1); 225 vi = aj + adiag[i + 1] + 1; 226 nz = adiag[i] - adiag[i + 1] - 1; 227 idt = bs * i; 228 s1 = b[idt]; 229 s2 = b[1 + idt]; 230 s3 = b[2 + idt]; 231 232 for (k = 0; k < nz; k++) { 233 idx = bs * vi[k]; 234 x1 = x[idx]; 235 x2 = x[1 + idx]; 236 x3 = x[2 + idx]; 237 s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3; 238 s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3; 239 s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3; 240 241 v += bs2; 242 } 243 /* x = inv_diagonal*x */ 244 x[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3; 245 x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3; 246 x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3; 247 } 248 249 PetscCall(VecRestoreArrayRead(bb, &b)); 250 PetscCall(VecRestoreArray(xx, &x)); 251 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n)); 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254