| /libCEED/include/ceed/jit-source/magma/ |
| H A D | magma-basis-grad-3d.h | 24 static __device__ __inline__ void op(T &rV, const T &rTmp) { rV += rTmp; } 29 static __device__ __inline__ void op(T &rV, const T &rTmp) { rV = rTmp; } 42 … T rV[DIM_V][NUM_COMP][rV_SIZE], const int tx, T rTmp, T *swork) { 104 magma_grad_3d_device_accumulate<T, ADD>::op(rV[i_DIM_V][comp][j], rTmp); 124 …CeedScalar rV[1][BASIS_NUM_COMP][BASIS_Q] = {0.0}; // here DIM_V = 1, but might be different for … 150 …S_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q, 0, 0, 0, false>(sTinterp, sTgrad, rU, rV, tx, rTmp, 153 write_V_3d<CeedScalar, BASIS_Q, 1, BASIS_NUM_COMP, BASIS_Q, 0>(dV + (0 * dstrdV), cstrdV, rV, tx); 157 …S_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q, 1, 0, 0, false>(sTinterp, sTgrad, rU, rV, tx, rTmp, 160 write_V_3d<CeedScalar, BASIS_Q, 1, BASIS_NUM_COMP, BASIS_Q, 0>(dV + (1 * dstrdV), cstrdV, rV, tx); 164 …S_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q, 2, 0, 0, false>(sTinterp, sTgrad, rU, rV, tx, rTmp, [all …]
|
| H A D | magma-basis-grad-2d.h | 23 static __device__ __inline__ void op(T &rV, const T &rTmp) { rV += rTmp; } 28 static __device__ __inline__ void op(T &rV, const T &rTmp) { rV = rTmp; } 41 … T rV[DIM_V][NUM_COMP][rV_SIZE], const int tx, T rTmp, T *swork) { 82 magma_grad_2d_device_accumulate<T, ADD>::op(rV[i_DIM_V][comp][j], rTmp); 102 …CeedScalar rV[1][BASIS_NUM_COMP][BASIS_Q] = {0.0}; // here DIM_V = 1, but might be different for … 127 …S_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q, 0, 0, 0, false>(sTinterp, sTgrad, rU, rV, tx, rTmp, 130 write_V_2d<CeedScalar, BASIS_Q, 1, BASIS_NUM_COMP, BASIS_Q, 0>(dV + (0 * dstrdV), cstrdV, rV, tx); 134 …S_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q, 1, 0, 0, false>(sTinterp, sTgrad, rU, rV, tx, rTmp, 137 write_V_2d<CeedScalar, BASIS_Q, 1, BASIS_NUM_COMP, BASIS_Q, 0>(dV + (1 * dstrdV), cstrdV, rV, tx); 153 …CeedScalar rV[1][BASIS_NUM_COMP][BASIS_P] = {0.0}; // here DIM_V = 1, but might be different for … [all …]
|
| H A D | magma-basis-weight-2d.h | 15 static __device__ __inline__ void magma_weight_2d_device(const T *sTweight, T rV[DIM][NUM_COMP][Q],… in magma_weight_2d_device() 27 rV[i_DIM][i_COMP][j] = sTweight[j] * sTweight[tx]; in magma_weight_2d_device() 43 …CeedScalar rV[1][1][BASIS_Q]; // allocate with BASIS_DIM=BASIS_NUM_COMP=1, but sizes may differ f… in __launch_bounds__() local 56 magma_weight_2d_device<CeedScalar, 1, 1, BASIS_Q, 0, 0>(sTweight, rV, tx); in __launch_bounds__() 61 dV[j * BASIS_Q + tx] = rV[0][0][j]; in __launch_bounds__()
|
| H A D | magma-basis-interp-2d.h | 19 …void magma_interp_2d_device(const T *sT, T rU[DIM_U][NUM_COMP][rU_SIZE], T rV[DIM_V][NUM_COMP][rV_… in magma_interp_2d_device() 58 rV[0][comp][j] += rTmp; in magma_interp_2d_device() 78 …CeedScalar rV[1][BASIS_NUM_COMP][BASIS_Q] = {0.0}; // for a non-fused operator BASIS_DIM is alway… in __launch_bounds__() local 99 …<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q>(sT, rU, rV, tx, rTmp, sTmp); in __launch_bounds__() 103 write_V_2d<CeedScalar, BASIS_Q, 1, BASIS_NUM_COMP, BASIS_Q, 0>(dV, cstrdV, rV, tx); in __launch_bounds__() 119 …CeedScalar rV[1][BASIS_NUM_COMP][BASIS_P] = {0.0}; // for a non-fused operator BASIS_DIM is alway… in __launch_bounds__() local 140 …<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_Q, BASIS_P, BASIS_Q, BASIS_P>(sT, rU, rV, tx, rTmp, sTmp); in __launch_bounds__() 144 write_V_2d<CeedScalar, BASIS_P, 1, BASIS_NUM_COMP, BASIS_P, 0>(dV, cstrdV, rV, tx); in __launch_bounds__() 160 …CeedScalar rV[1][BASIS_NUM_COMP][BASIS_P] = {0.0}; // for a non-fused operator BASIS_DIM is alway… in __launch_bounds__() local 181 …<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_Q, BASIS_P, BASIS_Q, BASIS_P>(sT, rU, rV, tx, rTmp, sTmp); in __launch_bounds__() [all …]
|
| H A D | magma-basis-weight-3d.h | 15 static __device__ __inline__ void magma_weight_3d_device(const T *sTweight, T rV[DIM][NUM_COMP][Q],… in magma_weight_3d_device() 28 rV[i_DIM][i_COMP][j] = sTweight[j] * sTweight[tx % Q] * sTweight[tx / Q]; in magma_weight_3d_device() 44 …CeedScalar rV[1][1][BASIS_Q]; // allocate with BASIS_DIM=BASIS_NUM_COMP=1, but sizes may differ f… in __launch_bounds__() local 57 magma_weight_3d_device<CeedScalar, 1, 1, BASIS_Q, 0, 0>(sTweight, rV, tx); in __launch_bounds__() 62 dV[j * (BASIS_Q * BASIS_Q) + tx] = rV[0][0][j]; in __launch_bounds__()
|
| H A D | magma-common-tensor.h | 95 static __device__ __inline__ void read_V_2d(const T *dV, const int compstride, T rV[DIM_V][NUM_COMP… in read_V_2d() 99 rV[i_DIM][comp][j] = dV[comp * compstride + j * Q + tx]; in read_V_2d() 112 static __device__ __inline__ void write_V_2d(T *dV, const int compstride, T rV[DIM_V][NUM_COMP][rV_… in write_V_2d() 116 dV[comp * compstride + j * Q + tx] = rV[i_DIM][comp][j]; in write_V_2d() 129 static __device__ __inline__ void sum_V_2d(T *dV, const int compstride, T rV[DIM_V][NUM_COMP][rV_SI… in sum_V_2d() 133 dV[comp * compstride + j * Q + tx] += rV[i_DIM][comp][j]; in sum_V_2d() 183 static __device__ __inline__ void read_V_3d(const T *dV, const int compstride, T rV[DIM_V][NUM_COMP… in read_V_3d() 187 rV[i_DIM][comp][j] = dV[comp * compstride + j * (Q * Q) + tx]; in read_V_3d() 200 static __device__ __inline__ void write_V_3d(T *dV, const int compstride, T rV[DIM_V][NUM_COMP][rV_… in write_V_3d() 204 dV[comp * compstride + j * (Q * Q) + tx] = rV[i_DIM][comp][j]; in write_V_3d() [all …]
|
| H A D | magma-basis-interp-3d.h | 19 …void magma_interp_3d_device(const T *sT, T rU[DIM_U][NUM_COMP][rU_SIZE], T rV[DIM_V][NUM_COMP][rV_… in magma_interp_3d_device() 86 rV[0][comp][j] += rTmp[0]; in magma_interp_3d_device() 106 …CeedScalar rV[1][BASIS_NUM_COMP][BASIS_Q] = {0.0}; // for a non-fused operator BASIS_DIM is alway… in __launch_bounds__() local 127 …<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q>(sT, rU, rV, tx, rTmp, sTmp); in __launch_bounds__() 131 write_V_3d<CeedScalar, BASIS_Q, 1, BASIS_NUM_COMP, BASIS_Q, 0>(dV, cstrdV, rV, tx); in __launch_bounds__() 147 …CeedScalar rV[1][BASIS_NUM_COMP][BASIS_P] = {0.0}; // for a non-fused operator BASIS_DIM is alway… in __launch_bounds__() local 168 …<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_Q, BASIS_P, BASIS_Q, BASIS_P>(sT, rU, rV, tx, rTmp, sTmp); in __launch_bounds__() 172 write_V_3d<CeedScalar, BASIS_P, 1, BASIS_NUM_COMP, BASIS_P, 0>(dV, cstrdV, rV, tx); in __launch_bounds__() 188 …CeedScalar rV[1][BASIS_NUM_COMP][BASIS_P] = {0.0}; // for a non-fused operator BASIS_DIM is alway… in __launch_bounds__() local 209 …<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_Q, BASIS_P, BASIS_Q, BASIS_P>(sT, rU, rV, tx, rTmp, sTmp); in __launch_bounds__() [all …]
|