1 #include <../src/mat/impls/baij/seq/baij.h> 2 #include <petsc/private/kernels/blockinvert.h> 3 4 /* Block operations are done by accessing one column at a time */ 5 /* Default MatSolve for block size 14 */ 6 7 PetscErrorCode MatSolve_SeqBAIJ_14_NaturalOrdering(Mat A, Vec bb, Vec xx) 8 { 9 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 10 const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2; 11 PetscInt i, k, nz, idx, idt, m; 12 const MatScalar *aa = a->a, *v; 13 PetscScalar s[14]; 14 PetscScalar *x, xv; 15 const PetscScalar *b; 16 17 PetscFunctionBegin; 18 PetscCall(VecGetArrayRead(bb, &b)); 19 PetscCall(VecGetArray(xx, &x)); 20 21 /* forward solve the lower triangular */ 22 for (i = 0; i < n; i++) { 23 v = aa + bs2 * ai[i]; 24 vi = aj + ai[i]; 25 nz = ai[i + 1] - ai[i]; 26 idt = bs * i; 27 x[idt] = b[idt]; 28 x[1 + idt] = b[1 + idt]; 29 x[2 + idt] = b[2 + idt]; 30 x[3 + idt] = b[3 + idt]; 31 x[4 + idt] = b[4 + idt]; 32 x[5 + idt] = b[5 + idt]; 33 x[6 + idt] = b[6 + idt]; 34 x[7 + idt] = b[7 + idt]; 35 x[8 + idt] = b[8 + idt]; 36 x[9 + idt] = b[9 + idt]; 37 x[10 + idt] = b[10 + idt]; 38 x[11 + idt] = b[11 + idt]; 39 x[12 + idt] = b[12 + idt]; 40 x[13 + idt] = b[13 + idt]; 41 for (m = 0; m < nz; m++) { 42 idx = bs * vi[m]; 43 for (k = 0; k < bs; k++) { 44 xv = x[k + idx]; 45 x[idt] -= v[0] * xv; 46 x[1 + idt] -= v[1] * xv; 47 x[2 + idt] -= v[2] * xv; 48 x[3 + idt] -= v[3] * xv; 49 x[4 + idt] -= v[4] * xv; 50 x[5 + idt] -= v[5] * xv; 51 x[6 + idt] -= v[6] * xv; 52 x[7 + idt] -= v[7] * xv; 53 x[8 + idt] -= v[8] * xv; 54 x[9 + idt] -= v[9] * xv; 55 x[10 + idt] -= v[10] * xv; 56 x[11 + idt] -= v[11] * xv; 57 x[12 + idt] -= v[12] * xv; 58 x[13 + idt] -= v[13] * xv; 59 v += 14; 60 } 61 } 62 } 63 /* backward solve the upper triangular */ 64 for (i = n - 1; i >= 0; i--) { 65 v = aa + bs2 * (adiag[i + 1] + 1); 66 vi = aj + adiag[i + 1] + 1; 67 nz = adiag[i] - adiag[i + 1] - 1; 68 idt = bs * i; 69 s[0] = x[idt]; 70 s[1] = x[1 + idt]; 71 s[2] = x[2 + idt]; 72 s[3] = x[3 + idt]; 73 s[4] = x[4 + idt]; 74 s[5] = x[5 + idt]; 75 s[6] = x[6 + idt]; 76 s[7] = x[7 + idt]; 77 s[8] = x[8 + idt]; 78 s[9] = x[9 + idt]; 79 s[10] = x[10 + idt]; 80 s[11] = x[11 + idt]; 81 s[12] = x[12 + idt]; 82 s[13] = x[13 + idt]; 83 84 for (m = 0; m < nz; m++) { 85 idx = bs * vi[m]; 86 for (k = 0; k < bs; k++) { 87 xv = x[k + idx]; 88 s[0] -= v[0] * xv; 89 s[1] -= v[1] * xv; 90 s[2] -= v[2] * xv; 91 s[3] -= v[3] * xv; 92 s[4] -= v[4] * xv; 93 s[5] -= v[5] * xv; 94 s[6] -= v[6] * xv; 95 s[7] -= v[7] * xv; 96 s[8] -= v[8] * xv; 97 s[9] -= v[9] * xv; 98 s[10] -= v[10] * xv; 99 s[11] -= v[11] * xv; 100 s[12] -= v[12] * xv; 101 s[13] -= v[13] * xv; 102 v += 14; 103 } 104 } 105 PetscCall(PetscArrayzero(x + idt, bs)); 106 for (k = 0; k < bs; k++) { 107 x[idt] += v[0] * s[k]; 108 x[1 + idt] += v[1] * s[k]; 109 x[2 + idt] += v[2] * s[k]; 110 x[3 + idt] += v[3] * s[k]; 111 x[4 + idt] += v[4] * s[k]; 112 x[5 + idt] += v[5] * s[k]; 113 x[6 + idt] += v[6] * s[k]; 114 x[7 + idt] += v[7] * s[k]; 115 x[8 + idt] += v[8] * s[k]; 116 x[9 + idt] += v[9] * s[k]; 117 x[10 + idt] += v[10] * s[k]; 118 x[11 + idt] += v[11] * s[k]; 119 x[12 + idt] += v[12] * s[k]; 120 x[13 + idt] += v[13] * s[k]; 121 v += 14; 122 } 123 } 124 PetscCall(VecRestoreArrayRead(bb, &b)); 125 PetscCall(VecRestoreArray(xx, &x)); 126 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n)); 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 /* Block operations are done by accessing one column at a time */ 131 /* Default MatSolve for block size 13 */ 132 133 PetscErrorCode MatSolve_SeqBAIJ_13_NaturalOrdering(Mat A, Vec bb, Vec xx) 134 { 135 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 136 const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2; 137 PetscInt i, k, nz, idx, idt, m; 138 const MatScalar *aa = a->a, *v; 139 PetscScalar s[13]; 140 PetscScalar *x, xv; 141 const PetscScalar *b; 142 143 PetscFunctionBegin; 144 PetscCall(VecGetArrayRead(bb, &b)); 145 PetscCall(VecGetArray(xx, &x)); 146 147 /* forward solve the lower triangular */ 148 for (i = 0; i < n; i++) { 149 v = aa + bs2 * ai[i]; 150 vi = aj + ai[i]; 151 nz = ai[i + 1] - ai[i]; 152 idt = bs * i; 153 x[idt] = b[idt]; 154 x[1 + idt] = b[1 + idt]; 155 x[2 + idt] = b[2 + idt]; 156 x[3 + idt] = b[3 + idt]; 157 x[4 + idt] = b[4 + idt]; 158 x[5 + idt] = b[5 + idt]; 159 x[6 + idt] = b[6 + idt]; 160 x[7 + idt] = b[7 + idt]; 161 x[8 + idt] = b[8 + idt]; 162 x[9 + idt] = b[9 + idt]; 163 x[10 + idt] = b[10 + idt]; 164 x[11 + idt] = b[11 + idt]; 165 x[12 + idt] = b[12 + idt]; 166 for (m = 0; m < nz; m++) { 167 idx = bs * vi[m]; 168 for (k = 0; k < bs; k++) { 169 xv = x[k + idx]; 170 x[idt] -= v[0] * xv; 171 x[1 + idt] -= v[1] * xv; 172 x[2 + idt] -= v[2] * xv; 173 x[3 + idt] -= v[3] * xv; 174 x[4 + idt] -= v[4] * xv; 175 x[5 + idt] -= v[5] * xv; 176 x[6 + idt] -= v[6] * xv; 177 x[7 + idt] -= v[7] * xv; 178 x[8 + idt] -= v[8] * xv; 179 x[9 + idt] -= v[9] * xv; 180 x[10 + idt] -= v[10] * xv; 181 x[11 + idt] -= v[11] * xv; 182 x[12 + idt] -= v[12] * xv; 183 v += 13; 184 } 185 } 186 } 187 /* backward solve the upper triangular */ 188 for (i = n - 1; i >= 0; i--) { 189 v = aa + bs2 * (adiag[i + 1] + 1); 190 vi = aj + adiag[i + 1] + 1; 191 nz = adiag[i] - adiag[i + 1] - 1; 192 idt = bs * i; 193 s[0] = x[idt]; 194 s[1] = x[1 + idt]; 195 s[2] = x[2 + idt]; 196 s[3] = x[3 + idt]; 197 s[4] = x[4 + idt]; 198 s[5] = x[5 + idt]; 199 s[6] = x[6 + idt]; 200 s[7] = x[7 + idt]; 201 s[8] = x[8 + idt]; 202 s[9] = x[9 + idt]; 203 s[10] = x[10 + idt]; 204 s[11] = x[11 + idt]; 205 s[12] = x[12 + idt]; 206 207 for (m = 0; m < nz; m++) { 208 idx = bs * vi[m]; 209 for (k = 0; k < bs; k++) { 210 xv = x[k + idx]; 211 s[0] -= v[0] * xv; 212 s[1] -= v[1] * xv; 213 s[2] -= v[2] * xv; 214 s[3] -= v[3] * xv; 215 s[4] -= v[4] * xv; 216 s[5] -= v[5] * xv; 217 s[6] -= v[6] * xv; 218 s[7] -= v[7] * xv; 219 s[8] -= v[8] * xv; 220 s[9] -= v[9] * xv; 221 s[10] -= v[10] * xv; 222 s[11] -= v[11] * xv; 223 s[12] -= v[12] * xv; 224 v += 13; 225 } 226 } 227 PetscCall(PetscArrayzero(x + idt, bs)); 228 for (k = 0; k < bs; k++) { 229 x[idt] += v[0] * s[k]; 230 x[1 + idt] += v[1] * s[k]; 231 x[2 + idt] += v[2] * s[k]; 232 x[3 + idt] += v[3] * s[k]; 233 x[4 + idt] += v[4] * s[k]; 234 x[5 + idt] += v[5] * s[k]; 235 x[6 + idt] += v[6] * s[k]; 236 x[7 + idt] += v[7] * s[k]; 237 x[8 + idt] += v[8] * s[k]; 238 x[9 + idt] += v[9] * s[k]; 239 x[10 + idt] += v[10] * s[k]; 240 x[11 + idt] += v[11] * s[k]; 241 x[12 + idt] += v[12] * s[k]; 242 v += 13; 243 } 244 } 245 PetscCall(VecRestoreArrayRead(bb, &b)); 246 PetscCall(VecRestoreArray(xx, &x)); 247 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 /* Block operations are done by accessing one column at a time */ 252 /* Default MatSolve for block size 12 */ 253 254 PetscErrorCode MatSolve_SeqBAIJ_12_NaturalOrdering(Mat A, Vec bb, Vec xx) 255 { 256 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 257 const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2; 258 PetscInt i, k, nz, idx, idt, m; 259 const MatScalar *aa = a->a, *v; 260 PetscScalar s[12]; 261 PetscScalar *x, xv; 262 const PetscScalar *b; 263 264 PetscFunctionBegin; 265 PetscCall(VecGetArrayRead(bb, &b)); 266 PetscCall(VecGetArray(xx, &x)); 267 268 /* forward solve the lower triangular */ 269 for (i = 0; i < n; i++) { 270 v = aa + bs2 * ai[i]; 271 vi = aj + ai[i]; 272 nz = ai[i + 1] - ai[i]; 273 idt = bs * i; 274 x[idt] = b[idt]; 275 x[1 + idt] = b[1 + idt]; 276 x[2 + idt] = b[2 + idt]; 277 x[3 + idt] = b[3 + idt]; 278 x[4 + idt] = b[4 + idt]; 279 x[5 + idt] = b[5 + idt]; 280 x[6 + idt] = b[6 + idt]; 281 x[7 + idt] = b[7 + idt]; 282 x[8 + idt] = b[8 + idt]; 283 x[9 + idt] = b[9 + idt]; 284 x[10 + idt] = b[10 + idt]; 285 x[11 + idt] = b[11 + idt]; 286 for (m = 0; m < nz; m++) { 287 idx = bs * vi[m]; 288 for (k = 0; k < bs; k++) { 289 xv = x[k + idx]; 290 x[idt] -= v[0] * xv; 291 x[1 + idt] -= v[1] * xv; 292 x[2 + idt] -= v[2] * xv; 293 x[3 + idt] -= v[3] * xv; 294 x[4 + idt] -= v[4] * xv; 295 x[5 + idt] -= v[5] * xv; 296 x[6 + idt] -= v[6] * xv; 297 x[7 + idt] -= v[7] * xv; 298 x[8 + idt] -= v[8] * xv; 299 x[9 + idt] -= v[9] * xv; 300 x[10 + idt] -= v[10] * xv; 301 x[11 + idt] -= v[11] * xv; 302 v += 12; 303 } 304 } 305 } 306 /* backward solve the upper triangular */ 307 for (i = n - 1; i >= 0; i--) { 308 v = aa + bs2 * (adiag[i + 1] + 1); 309 vi = aj + adiag[i + 1] + 1; 310 nz = adiag[i] - adiag[i + 1] - 1; 311 idt = bs * i; 312 s[0] = x[idt]; 313 s[1] = x[1 + idt]; 314 s[2] = x[2 + idt]; 315 s[3] = x[3 + idt]; 316 s[4] = x[4 + idt]; 317 s[5] = x[5 + idt]; 318 s[6] = x[6 + idt]; 319 s[7] = x[7 + idt]; 320 s[8] = x[8 + idt]; 321 s[9] = x[9 + idt]; 322 s[10] = x[10 + idt]; 323 s[11] = x[11 + idt]; 324 325 for (m = 0; m < nz; m++) { 326 idx = bs * vi[m]; 327 for (k = 0; k < bs; k++) { 328 xv = x[k + idx]; 329 s[0] -= v[0] * xv; 330 s[1] -= v[1] * xv; 331 s[2] -= v[2] * xv; 332 s[3] -= v[3] * xv; 333 s[4] -= v[4] * xv; 334 s[5] -= v[5] * xv; 335 s[6] -= v[6] * xv; 336 s[7] -= v[7] * xv; 337 s[8] -= v[8] * xv; 338 s[9] -= v[9] * xv; 339 s[10] -= v[10] * xv; 340 s[11] -= v[11] * xv; 341 v += 12; 342 } 343 } 344 PetscCall(PetscArrayzero(x + idt, bs)); 345 for (k = 0; k < bs; k++) { 346 x[idt] += v[0] * s[k]; 347 x[1 + idt] += v[1] * s[k]; 348 x[2 + idt] += v[2] * s[k]; 349 x[3 + idt] += v[3] * s[k]; 350 x[4 + idt] += v[4] * s[k]; 351 x[5 + idt] += v[5] * s[k]; 352 x[6 + idt] += v[6] * s[k]; 353 x[7 + idt] += v[7] * s[k]; 354 x[8 + idt] += v[8] * s[k]; 355 x[9 + idt] += v[9] * s[k]; 356 x[10 + idt] += v[10] * s[k]; 357 x[11 + idt] += v[11] * s[k]; 358 v += 12; 359 } 360 } 361 PetscCall(VecRestoreArrayRead(bb, &b)); 362 PetscCall(VecRestoreArray(xx, &x)); 363 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n)); 364 PetscFunctionReturn(PETSC_SUCCESS); 365 } 366