1 /* 2 Kernels used in sparse ILU (and LU) and in the resulting triangular 3 solves. These are for block algorithms where the block sizes are on 4 the order of 2-6+. 5 6 There are TWO versions of the macros below. 7 1) standard for MatScalar == PetscScalar use the standard BLAS for 8 block size larger than 7 and 9 2) handcoded Fortran single precision for the matrices, since BLAS 10 does not have some arguments in single and some in double. 11 12 */ 13 #pragma once 14 #include <petscblaslapack.h> 15 16 /* 17 These are C kernels,they are contained in 18 src/mat/impls/baij/seq 19 */ 20 21 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscLINPACKgefa(MatScalar *, PetscInt, PetscInt *, PetscBool, PetscBool *); 22 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscLINPACKgedi(MatScalar *, PetscInt, PetscInt *, MatScalar *); 23 24 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *, PetscReal, PetscBool, PetscBool *); 25 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscKernel_A_gets_inverse_A_3(MatScalar *, PetscReal, PetscBool, PetscBool *); 26 27 #define PetscKernel_A_gets_inverse_A_4_nopivot(mat) \ 28 PetscMacroReturnStandard(MatScalar d, di = mat[0]; mat[0] = d = 1.0 / di; mat[4] *= -d; mat[8] *= -d; mat[12] *= -d; mat[1] *= d; mat[2] *= d; mat[3] *= d; mat[5] += mat[4] * mat[1] * di; mat[6] += mat[4] * mat[2] * di; mat[7] += mat[4] * mat[3] * di; mat[9] += mat[8] * mat[1] * di; mat[10] += mat[8] * mat[2] * di; mat[11] += mat[8] * mat[3] * di; mat[13] += mat[12] * mat[1] * di; mat[14] += mat[12] * mat[2] * di; mat[15] += mat[12] * mat[3] * di; di = mat[5]; mat[5] = d = 1.0 / di; mat[1] *= -d; mat[9] *= -d; mat[13] *= -d; mat[4] *= d; mat[6] *= d; mat[7] *= d; mat[0] += mat[1] * mat[4] * di; mat[2] += mat[1] * mat[6] * di; mat[3] += mat[1] * mat[7] * di; mat[8] += mat[9] * mat[4] * di; mat[10] += mat[9] * mat[6] * di; mat[11] += mat[9] * mat[7] * di; mat[12] += mat[13] * mat[4] * di; mat[14] += mat[13] * mat[6] * di; mat[15] += mat[13] * mat[7] * di; di = mat[10]; mat[10] = d = 1.0 / di; mat[2] *= -d; mat[6] *= -d; mat[14] *= -d; mat[8] *= d; mat[9] *= d; mat[11] *= d; mat[0] += mat[2] * mat[8] * di; mat[1] += mat[2] * mat[9] * di; mat[3] += mat[2] * mat[11] * di; mat[4] += mat[6] * mat[8] * di; mat[5] += mat[6] * mat[9] * di; mat[7] += mat[6] * mat[11] * di; mat[12] += mat[14] * mat[8] * di; mat[13] += mat[14] * mat[9] * di; mat[15] += mat[14] * mat[11] * di; di = mat[15]; mat[15] = d = 1.0 / di; mat[3] *= -d; mat[7] *= -d; mat[11] *= -d; mat[12] *= d; mat[13] *= d; mat[14] *= d; mat[0] += mat[3] * mat[12] * di; mat[1] += mat[3] * mat[13] * di; mat[2] += mat[3] * mat[14] * di; mat[4] += mat[7] * mat[12] * di; mat[5] += mat[7] * mat[13] * di; mat[6] += mat[7] * mat[14] * di; mat[8] += mat[11] * mat[12] * di; mat[9] += mat[11] * mat[13] * di; mat[10] += mat[11] * mat[14] * di;) 29 30 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscKernel_A_gets_inverse_A_4(MatScalar *, PetscReal, PetscBool, PetscBool *); 31 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscKernel_A_gets_inverse_A_5(MatScalar *, PetscInt *, MatScalar *, PetscReal, PetscBool, PetscBool *); 32 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscKernel_A_gets_inverse_A_6(MatScalar *, PetscReal, PetscBool, PetscBool *); 33 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscKernel_A_gets_inverse_A_7(MatScalar *, PetscReal, PetscBool, PetscBool *); 34 PETSC_INTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *, PetscReal, PetscBool, PetscBool *); 35 PETSC_INTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *, PetscInt *, MatScalar *, PetscReal, PetscBool, PetscBool *); 36 37 /* 38 A = inv(A) A_gets_inverse_A 39 40 A - square bs by bs array stored in column major order 41 pivots - integer work array of length bs 42 W - bs by 1 work array 43 */ 44 #define PetscKernel_A_gets_inverse_A(bs, A, pivots, W, allowzeropivot, zeropivotdetected) ((PetscErrorCode)(PetscLINPACKgefa((A), (bs), (pivots), (allowzeropivot), (zeropivotdetected)) || PetscLINPACKgedi((A), (bs), (pivots), (W)))) 45 46 /* -----------------------------------------------------------------------*/ 47 48 #if !defined(PETSC_USE_REAL_MAT_SINGLE) 49 /* 50 Version that calls the BLAS directly 51 */ 52 /* 53 A = A * B A_gets_A_times_B 54 55 A, B - square bs by bs arrays stored in column major order 56 W - square bs by bs work array 57 58 */ 59 #define PetscKernel_A_gets_A_times_B(bs, A, B, W) \ 60 do { \ 61 PetscBLASInt _bbs; \ 62 PetscScalar _one = 1.0, _zero = 0.0; \ 63 PetscCall(PetscBLASIntCast(bs, &_bbs)); \ 64 PetscCall(PetscArraycpy((W), (A), (bs) * (bs))); \ 65 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(A, 2)); \ 66 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(B, 3)); \ 67 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(W, 4)); \ 68 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &(_bbs), &(_bbs), &(_bbs), &_one, (W), &(_bbs), (B), &(_bbs), &_zero, (A), &(_bbs))); \ 69 } while (0) 70 71 /* 72 73 A = A - B * C A_gets_A_minus_B_times_C 74 75 A, B, C - square bs by bs arrays stored in column major order 76 */ 77 #define PetscKernel_A_gets_A_minus_B_times_C(bs, A, B, C) \ 78 do { \ 79 PetscScalar _mone = -1.0, _one = 1.0; \ 80 PetscBLASInt _bbs; \ 81 PetscCall(PetscBLASIntCast(bs, &_bbs)); \ 82 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(A, 2)); \ 83 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(B, 3)); \ 84 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(C, 4)); \ 85 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &(_bbs), &(_bbs), &(_bbs), &_mone, (B), &(_bbs), (C), &(_bbs), &_one, (A), &(_bbs))); \ 86 } while (0) 87 88 /* 89 90 A = A + B^T * C A_gets_A_plus_Btranspose_times_C 91 92 A, B, C - square bs by bs arrays stored in column major order 93 */ 94 #define PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, A, B, C) \ 95 do { \ 96 PetscScalar _one = 1.0; \ 97 PetscBLASInt _bbs; \ 98 PetscCall(PetscBLASIntCast(bs, &_bbs)); \ 99 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(A, 2)); \ 100 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(B, 3)); \ 101 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(C, 4)); \ 102 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &(_bbs), &(_bbs), &(_bbs), &_one, (B), &(_bbs), (C), &(_bbs), &_one, (A), &(_bbs))); \ 103 } while (0) 104 105 /* 106 v = v + A^T w v_gets_v_plus_Atranspose_times_w 107 108 v - array of length bs 109 A - square bs by bs array 110 w - array of length bs 111 */ 112 #define PetscKernel_v_gets_v_plus_Atranspose_times_w(bs, v, A, w) \ 113 do { \ 114 PetscScalar _one = 1.0; \ 115 PetscBLASInt _ione = 1, _bbs; \ 116 PetscCall(PetscBLASIntCast(bs, &_bbs)); \ 117 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(v, 2)); \ 118 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(A, 3)); \ 119 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(w, 4)); \ 120 PetscCallBLAS("BLASgemv", BLASgemv_("T", &(_bbs), &(_bbs), &_one, A, &(_bbs), w, &_ione, &_one, v, &_ione)); \ 121 } while (0) 122 123 /* 124 v = v - A w v_gets_v_minus_A_times_w 125 126 v - array of length bs 127 A - square bs by bs array 128 w - array of length bs 129 */ 130 #define PetscKernel_v_gets_v_minus_A_times_w(bs, v, A, w) \ 131 do { \ 132 PetscScalar _mone = -1.0, _one = 1.0; \ 133 PetscBLASInt _ione = 1, _bbs; \ 134 PetscCall(PetscBLASIntCast(bs, &_bbs)); \ 135 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(v, 2)); \ 136 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(A, 3)); \ 137 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(w, 4)); \ 138 PetscCallBLAS("BLASgemv", BLASgemv_("N", &(_bbs), &(_bbs), &_mone, A, &(_bbs), w, &_ione, &_one, v, &_ione)); \ 139 } while (0) 140 141 /* 142 v = v - A w v_gets_v_minus_transA_times_w 143 144 v - array of length bs 145 A - square bs by bs array 146 w - array of length bs 147 */ 148 #define PetscKernel_v_gets_v_minus_transA_times_w(bs, v, A, w) \ 149 do { \ 150 PetscScalar _mone = -1.0, _one = 1.0; \ 151 PetscBLASInt _ione = 1, _bbs; \ 152 PetscCall(PetscBLASIntCast(bs, &_bbs)); \ 153 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(v, 2)); \ 154 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(A, 3)); \ 155 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(w, 4)); \ 156 PetscCallBLAS("BLASgemv", BLASgemv_("T", &(_bbs), &(_bbs), &_mone, A, &(_bbs), w, &_ione, &_one, v, &_ione)); \ 157 } while (0) 158 159 /* 160 v = v + A w v_gets_v_plus_A_times_w 161 162 v - array of length bs 163 A - square bs by bs array 164 w - array of length bs 165 */ 166 #define PetscKernel_v_gets_v_plus_A_times_w(bs, v, A, w) \ 167 do { \ 168 PetscScalar _one = 1.0; \ 169 PetscBLASInt _ione = 1, _bbs; \ 170 PetscCall(PetscBLASIntCast(bs, &_bbs)); \ 171 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(v, 2)); \ 172 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(A, 3)); \ 173 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(w, 4)); \ 174 PetscCallBLAS("BLASgemv", BLASgemv_("N", &(_bbs), &(_bbs), &_one, A, &(_bbs), w, &_ione, &_one, v, &_ione)); \ 175 } while (0) 176 177 /* 178 v = v + A w v_gets_v_plus_Ar_times_w 179 180 v - array of length bs 181 A - square bs by bs array 182 w - array of length bs 183 */ 184 #define PetscKernel_w_gets_w_plus_Ar_times_v(bs, ncols, v, A, w) \ 185 do { \ 186 PetscScalar _one = 1.0; \ 187 PetscBLASInt _ione = 1, _bbs, _bncols; \ 188 PetscCall(PetscBLASIntCast(bs, &_bbs)); \ 189 PetscCall(PetscBLASIntCast(ncols, &_bncols)); \ 190 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(v, 3)); \ 191 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(A, 4)); \ 192 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(w, 5)); \ 193 PetscCallBLAS("BLASgemv", BLASgemv_("N", &(_bbs), &(_bncols), &_one, A, &(_bbs), v, &_ione, &_one, w, &_ione)); \ 194 } while (0) 195 196 /* 197 w <- w - A v w_gets_w_minus_Ar_times_v 198 199 v - array of length ncol 200 A(bs,ncols) 201 w - array of length bs 202 */ 203 #define PetscKernel_w_gets_w_minus_Ar_times_v(bs, ncols, w, A, v) \ 204 do { \ 205 PetscScalar _one = 1.0, _mone = -1.0; \ 206 PetscBLASInt _ione = 1, _bbs, _bncols; \ 207 PetscCall(PetscBLASIntCast(bs, &_bbs)); \ 208 PetscCall(PetscBLASIntCast(ncols, &_bncols)); \ 209 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(w, 3)); \ 210 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(A, 4)); \ 211 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(v, 5)); \ 212 PetscCallBLAS("BLASgemv", BLASgemv_("N", &(_bbs), &(_bncols), &_mone, A, &(_bbs), v, &_ione, &_one, w, &_ione)); \ 213 } while (0) 214 215 /* 216 w = A v w_gets_A_times_v 217 218 v - array of length bs 219 A - square bs by bs array 220 w - array of length bs 221 */ 222 #define PetscKernel_w_gets_A_times_v(bs, v, A, w) \ 223 do { \ 224 PetscScalar _zero = 0.0, _one = 1.0; \ 225 PetscBLASInt _ione = 1, _bbs; \ 226 PetscCall(PetscBLASIntCast(bs, &_bbs)); \ 227 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(v, 2)); \ 228 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(A, 3)); \ 229 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(w, 4)); \ 230 PetscCallBLAS("BLASgemv", BLASgemv_("N", &(_bbs), &(_bbs), &_one, A, &(_bbs), v, &_ione, &_zero, w, &_ione)); \ 231 } while (0) 232 233 /* 234 w = A' v w_gets_transA_times_v 235 236 v - array of length bs 237 A - square bs by bs array 238 w - array of length bs 239 */ 240 #define PetscKernel_w_gets_transA_times_v(bs, v, A, w) \ 241 do { \ 242 PetscScalar _zero = 0.0, _one = 1.0; \ 243 PetscBLASInt _ione = 1, _bbs; \ 244 PetscCall(PetscBLASIntCast(bs, &_bbs)); \ 245 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(v, 2)); \ 246 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(A, 3)); \ 247 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(w, 4)); \ 248 PetscCallBLAS("BLASgemv", BLASgemv_("T", &(_bbs), &(_bbs), &_one, A, &(_bbs), v, &_ione, &_zero, w, &_ione)); \ 249 } while (0) 250 251 /* 252 z = A*x 253 */ 254 #define PetscKernel_w_gets_Ar_times_v(bs, ncols, x, A, z) \ 255 do { \ 256 PetscScalar _one = 1.0, _zero = 0.0; \ 257 PetscBLASInt _ione = 1, _bbs, _bncols; \ 258 PetscCall(PetscBLASIntCast(bs, &_bbs)); \ 259 PetscCall(PetscBLASIntCast(ncols, &_bncols)); \ 260 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(x, 3)); \ 261 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(A, 4)); \ 262 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(z, 5)); \ 263 PetscCallBLAS("BLASgemv", BLASgemv_("N", &(_bbs), &_bncols, &_one, A, &(_bbs), x, &_ione, &_zero, z, &_ione)); \ 264 } while (0) 265 266 /* 267 z = trans(A)*x 268 */ 269 #define PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs, ncols, x, A, z) \ 270 do { \ 271 PetscScalar _one = 1.0; \ 272 PetscBLASInt _ione = 1, _bbs, _bncols; \ 273 PetscCall(PetscBLASIntCast(bs, &_bbs)); \ 274 PetscCall(PetscBLASIntCast(ncols, &_bncols)); \ 275 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(x, 3)); \ 276 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(A, 4)); \ 277 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscAssertPointer(z, 5)); \ 278 PetscCallBLAS("BLASgemv", BLASgemv_("T", &_bbs, &_bncols, &_one, A, &_bbs, x, &_ione, &_one, z, &_ione)); \ 279 } while (0) 280 281 #else /* !defined(PETSC_USE_REAL_MAT_SINGLE) */ 282 /* 283 Version that calls Fortran routines; can handle different precision 284 of matrix (array) and vectors 285 */ 286 /* 287 These are Fortran kernels: They replace certain BLAS routines but 288 have some arguments that may be single precision,rather than double 289 These routines are provided in src/fortran/kernels/sgemv.F 290 They are pretty pitiful but get the job done. The intention is 291 that for important block sizes (currently 1,2,3,4,5,6,7) custom inlined 292 code is used. 293 */ 294 295 #if defined(PETSC_HAVE_FORTRAN_CAPS) 296 #define msgemv_ MSGEMV 297 #define msgemvp_ MSGEMVP 298 #define msgemvm_ MSGEMVM 299 #define msgemvt_ MSGEMVT 300 #define msgemmi_ MSGEMMI 301 #define msgemm_ MSGEMM 302 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 303 #define msgemv_ msgemv 304 #define msgemvp_ msgemvp 305 #define msgemvm_ msgemvm 306 #define msgemvt_ msgemvt 307 #define msgemmi_ msgemmi 308 #define msgemm_ msgemm 309 #endif 310 311 PETSC_INTERN void msgemv_(PetscInt *, PetscInt *, MatScalar *, PetscScalar *, PetscScalar *); 312 PETSC_INTERN void msgemvp_(PetscInt *, PetscInt *, MatScalar *, PetscScalar *, PetscScalar *); 313 PETSC_INTERN void msgemvm_(PetscInt *, PetscInt *, MatScalar *, PetscScalar *, PetscScalar *); 314 PETSC_INTERN void msgemvt_(PetscInt *, PetscInt *, MatScalar *, PetscScalar *, PetscScalar *); 315 PETSC_INTERN void msgemmi_(PetscInt *, MatScalar *, MatScalar *, MatScalar *); 316 PETSC_INTERN void msgemm_(PetscInt *, MatScalar *, MatScalar *, MatScalar *); 317 318 /* 319 A = A * B A_gets_A_times_B 320 321 A, B - square bs by bs arrays stored in column major order 322 W - square bs by bs work array 323 324 */ 325 #define PetscKernel_A_gets_A_times_B(bs, A, B, W) \ 326 do { \ 327 PetscCall(PetscArraycpy((W), (A), (bs) * (bs))); \ 328 msgemmi_(&bs, A, B, W); \ 329 } while (0) 330 331 /* 332 333 A = A - B * C A_gets_A_minus_B_times_C 334 335 A, B, C - square bs by bs arrays stored in column major order 336 */ 337 #define PetscKernel_A_gets_A_minus_B_times_C(bs, A, B, C) msgemm_(&(bs), (A), (B), (C)) 338 339 /* 340 v = v - A w v_gets_v_minus_A_times_w 341 342 v - array of length bs 343 A - square bs by bs array 344 w - array of length bs 345 */ 346 #define PetscKernel_v_gets_v_minus_A_times_w(bs, v, A, w) msgemvm_(&(bs), &(bs), (A), (w), (v)) 347 348 /* 349 v = v + A w v_gets_v_plus_A_times_w 350 351 v - array of length bs 352 A - square bs by bs array 353 w - array of length bs 354 */ 355 #define PetscKernel_v_gets_v_plus_A_times_w(bs, v, A, w) msgemvp_(&(bs), &(bs), (A), (w), (v)) 356 357 /* 358 v = v + A w v_gets_v_plus_Ar_times_w 359 360 v - array of length bs 361 A - square bs by bs array 362 w - array of length bs 363 */ 364 #define PetscKernel_w_gets_w_plus_Ar_times_v(bs, ncol, v, A, w) msgemvp_(&(bs), &(ncol), (A), (v), (w)) 365 366 /* 367 w = A v w_gets_A_times_v 368 369 v - array of length bs 370 A - square bs by bs array 371 w - array of length bs 372 */ 373 #define PetscKernel_w_gets_A_times_v(bs, v, A, w) msgemv_(&(bs), &(bs), (A), (v), (w)) 374 375 /* 376 z = A*x 377 */ 378 #define PetscKernel_w_gets_Ar_times_v(bs, ncols, x, A, z) msgemv_(&(bs), &(ncols), (A), (x), (z)) 379 380 /* 381 z = trans(A)*x 382 */ 383 #define PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs, ncols, x, A, z) msgemvt_(&(bs), &(ncols), (A), (x), (z)) 384 385 /* These do not work yet */ 386 #define PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, A, B, C) 387 #define PetscKernel_v_gets_v_plus_Atranspose_times_w(bs, v, A, w) 388 389 #endif /* !defined(PETSC_USE_REAL_MAT_SINGLE) */ 390