| /libCEED/include/ceed/jit-source/magma/ |
| H A D | magma-basis-grad-3d.h | 123 …CeedScalar rU[1][BASIS_NUM_COMP][BASIS_P] = {0.0}; // here DIM_U = 1, but might be different for … 133 CeedScalar *sTgrad = sTinterp + BASIS_P * BASIS_Q; 134 CeedScalar *sTmp = sTgrad + BASIS_P * BASIS_Q; 135 …sTmp += ty * (max(BASIS_P * BASIS_P * BASIS_P, (BASIS_P * BASIS_P * BASIS_Q) + (BASIS_P * BASIS_Q … 139 read_T_notrans_gm2sm<BASIS_P, BASIS_Q>(tx, dinterp1d, sTinterp); 140 read_T_notrans_gm2sm<BASIS_P, BASIS_Q>(tx, dgrad1d, sTgrad); 146 …read_U_3d<CeedScalar, BASIS_P, 1, BASIS_NUM_COMP, BASIS_P, 0>(dU + (0 * dstrdU), cstrdU, rU, sTmp,… 150 …magma_grad_3d_device<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q, 0, 0, 0… 157 …magma_grad_3d_device<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q, 1, 0, 0… 164 …magma_grad_3d_device<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q, 2, 0, 0… [all …]
|
| H A D | magma-basis-interp-3d.h | 105 …CeedScalar rU[1][BASIS_NUM_COMP][BASIS_P] = {0.0}; // for a non-fused operator BASIS_DIM is alway… in __launch_bounds__() 115 CeedScalar *sTmp = sT + BASIS_P * BASIS_Q; in __launch_bounds__() 116 sTmp += ty * (max(BASIS_P * BASIS_P * BASIS_MAX_P_Q, BASIS_P * BASIS_Q * BASIS_Q)); in __launch_bounds__() 120 read_T_notrans_gm2sm<BASIS_P, BASIS_Q>(tx, dT, sT); in __launch_bounds__() 124 read_U_3d<CeedScalar, BASIS_P, 1, BASIS_NUM_COMP, BASIS_P, 0>(dU, cstrdU, rU, sTmp, tx); in __launch_bounds__() 127 …magma_interp_3d_device<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q>(sT, r… in __launch_bounds__() 147 …CeedScalar rV[1][BASIS_NUM_COMP][BASIS_P] = {0.0}; // for a non-fused operator BASIS_DIM is alway… in __launch_bounds__() 148 CeedScalar rTmp[BASIS_P] = {0.0}; in __launch_bounds__() 156 CeedScalar *sTmp = sT + BASIS_Q * BASIS_P; in __launch_bounds__() 157 sTmp += ty * (max(BASIS_Q * BASIS_Q * BASIS_MAX_P_Q, BASIS_Q * BASIS_P * BASIS_P)); in __launch_bounds__() [all …]
|
| H A D | magma-basis-grad-2d.h | 101 …CeedScalar rU[1][BASIS_NUM_COMP][BASIS_P] = {0.0}; // here DIM_U = 1, but might be different for … 111 CeedScalar *sTgrad = sTinterp + BASIS_P * BASIS_Q; 112 CeedScalar *sTmp = sTgrad + BASIS_P * BASIS_Q; 113 sTmp += ty * (BASIS_P * BASIS_MAX_P_Q); 117 read_T_notrans_gm2sm<BASIS_P, BASIS_Q>(tx, dinterp1d, sTinterp); 118 read_T_notrans_gm2sm<BASIS_P, BASIS_Q>(tx, dgrad1d, sTgrad); 123 …read_U_2d<CeedScalar, BASIS_P, 1, BASIS_NUM_COMP, BASIS_P, 0>(dU + (0 * dstrdU), cstrdU, rU, sTmp,… 127 …magma_grad_2d_device<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q, 0, 0, 0… 134 …magma_grad_2d_device<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q, 1, 0, 0… 153 …CeedScalar rV[1][BASIS_NUM_COMP][BASIS_P] = {0.0}; // here DIM_V = 1, but might be different for … [all …]
|
| H A D | magma-basis-interp-2d.h | 77 …CeedScalar rU[1][BASIS_NUM_COMP][BASIS_P] = {0.0}; // for a non-fused operator BASIS_DIM is alway… in __launch_bounds__() 87 CeedScalar *sTmp = sT + BASIS_P * BASIS_Q; in __launch_bounds__() 88 sTmp += ty * (BASIS_P * BASIS_MAX_P_Q); in __launch_bounds__() 92 read_T_notrans_gm2sm<BASIS_P, BASIS_Q>(tx, dT, sT); in __launch_bounds__() 96 read_U_2d<CeedScalar, BASIS_P, 1, BASIS_NUM_COMP, BASIS_P, 0>(dU, cstrdU, rU, sTmp, tx); in __launch_bounds__() 99 …magma_interp_2d_device<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q>(sT, r… in __launch_bounds__() 119 …CeedScalar rV[1][BASIS_NUM_COMP][BASIS_P] = {0.0}; // for a non-fused operator BASIS_DIM is alway… in __launch_bounds__() 128 CeedScalar *sTmp = sT + BASIS_Q * BASIS_P; in __launch_bounds__() 133 read_T_trans_gm2sm<BASIS_Q, BASIS_P>(tx, dT, sT); in __launch_bounds__() 140 …magma_interp_2d_device<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_Q, BASIS_P, BASIS_Q, BASIS_P>(sT, r… in __launch_bounds__() [all …]
|
| H A D | magma-basis-grad-1d.h | 60 CeedScalar *sW = sT + BASIS_P * BASIS_Q; in __launch_bounds__() 61 sU[0] = sW + ty * BASIS_NUM_COMP * (BASIS_P + BASIS_Q); in __launch_bounds__() 62 sV[0] = sU[0] + (BASIS_NUM_COMP * 1 * BASIS_P); in __launch_bounds__() 64 sU[comp] = sU[comp - 1] + (1 * BASIS_P); in __launch_bounds__() 70 read_T_notrans_gm2sm<BASIS_P, BASIS_Q>(tx, dTgrad, sT); in __launch_bounds__() 74 read_1d<CeedScalar, BASIS_P, BASIS_NUM_COMP>(dU, cstrdU, sU, tx); in __launch_bounds__() 77 magma_grad_1d_device<CeedScalar, BASIS_DIM, BASIS_NUM_COMP, BASIS_P, BASIS_Q>(sT, sU, sV, tx); in __launch_bounds__() 105 CeedScalar *sW = sT + BASIS_Q * BASIS_P; in __launch_bounds__() 106 sU[0] = sW + ty * BASIS_NUM_COMP * (BASIS_Q + BASIS_P); in __launch_bounds__() 110 sV[comp] = sV[comp - 1] + (1 * BASIS_P); in __launch_bounds__() [all …]
|
| H A D | magma-basis-interp-1d.h | 60 CeedScalar *sW = sT + BASIS_P * BASIS_Q; in __launch_bounds__() 61 sU[0] = sW + ty * BASIS_NUM_COMP * (BASIS_P + BASIS_Q); in __launch_bounds__() 62 sV[0] = sU[0] + (BASIS_NUM_COMP * 1 * BASIS_P); in __launch_bounds__() 64 sU[comp] = sU[comp - 1] + (1 * BASIS_P); in __launch_bounds__() 70 read_T_notrans_gm2sm<BASIS_P, BASIS_Q>(tx, dT, sT); in __launch_bounds__() 74 read_1d<CeedScalar, BASIS_P, BASIS_NUM_COMP>(dU, cstrdU, sU, tx); in __launch_bounds__() 77 magma_interp_1d_device<CeedScalar, BASIS_DIM, BASIS_NUM_COMP, BASIS_P, BASIS_Q>(sT, sU, sV, tx); in __launch_bounds__() 105 CeedScalar *sW = sT + BASIS_Q * BASIS_P; in __launch_bounds__() 106 sU[0] = sW + ty * BASIS_NUM_COMP * (BASIS_Q + BASIS_P); in __launch_bounds__() 110 sV[comp] = sV[comp - 1] + (1 * BASIS_P); in __launch_bounds__() [all …]
|
| H A D | magma-basis-interp-deriv-nontensor.h | 261 …magma_basis_nontensor_device_n1<CeedScalar, BASIS_P, BASIS_Q, BASIS_NB_INTERP_N>(n, dA, dB, dC, (C… in __launch_bounds__() 263 …magma_basis_nontensor_device_n<CeedScalar, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q, BASIS_NB_INTERP_… in __launch_bounds__() 268 extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(BASIS_P, MAGMA_MAXTHREADS_1D)) __global__ in __launch_bounds__() argument 273 …magma_basis_nontensor_device_t1<CeedScalar, BASIS_P, BASIS_Q, BASIS_NB_INTERP_T>(n, dA, dB, dC, (C… in __launch_bounds__() 275 …magma_basis_nontensor_device_t<CeedScalar, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q, BASIS_NB_INTERP_… in __launch_bounds__() 280 extern "C" __launch_bounds__(MAGMA_BASIS_BOUNDS(BASIS_P, MAGMA_MAXTHREADS_1D)) __global__ in __launch_bounds__() argument 285 …magma_basis_nontensor_device_ta1<CeedScalar, BASIS_P, BASIS_Q, BASIS_NB_INTERP_T>(n, dA, dB, dC, (… in __launch_bounds__() 287 …magma_basis_nontensor_device_ta<CeedScalar, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q, BASIS_NB_INTERP… in __launch_bounds__() 297 …magma_basis_nontensor_device_n1<CeedScalar, BASIS_P, BASIS_Q, BASIS_NB_DERIV_N>(n, dA, dB, dC, (Ce… in __launch_bounds__() 299 …magma_basis_nontensor_device_n<CeedScalar, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q, BASIS_NB_DERIV_N>… in __launch_bounds__() [all …]
|
| /libCEED/include/ceed/jit-source/cuda/ |
| H A D | cuda-shared-basis-nontensor.h | 32 __shared__ CeedScalar s_B[BASIS_P * BASIS_Q]; in Interp() 33 LoadMatrix<BASIS_P, BASIS_Q>(data, c_B, s_B); in Interp() 38 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, d_U, r_U… in Interp() 39 InterpNonTensor<BASIS_NUM_COMP, BASIS_P, BASIS_Q, BASIS_T_1D>(data, r_U, s_B, r_V); in Interp() 59 __shared__ CeedScalar s_B[BASIS_P * BASIS_Q]; in InterpTranspose() 60 LoadMatrix<BASIS_P, BASIS_Q>(data, c_B, s_B); in InterpTranspose() 66 InterpTransposeNonTensor<BASIS_NUM_COMP, BASIS_P, BASIS_Q, BASIS_T_1D>(data, r_U, s_B, r_V); in InterpTranspose() 67 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, r_V, d_… in InterpTranspose() 86 __shared__ CeedScalar s_B[BASIS_P * BASIS_Q]; in InterpTransposeAdd() 87 LoadMatrix<BASIS_P, BASIS_Q>(data, c_B, s_B); in InterpTransposeAdd() [all …]
|
| H A D | cuda-ref-basis-nontensor.h | 24 …Contract<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * … in Interp() 32 …pose<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_… in InterpTranspose() 43 …Contract<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * n… in Deriv() 51 …spose<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_… in DerivTranspose()
|
| /libCEED/include/ceed/jit-source/hip/ |
| H A D | hip-shared-basis-nontensor.h | 33 __shared__ CeedScalar s_B[BASIS_P * BASIS_Q]; in __launch_bounds__() 34 LoadMatrix<BASIS_P, BASIS_Q>(data, c_B, s_B); in __launch_bounds__() 39 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, d_U, r_U… in __launch_bounds__() 40 InterpNonTensor<BASIS_NUM_COMP, BASIS_P, BASIS_Q, BASIS_T_1D>(data, r_U, s_B, r_V); in __launch_bounds__() 60 __shared__ CeedScalar s_B[BASIS_P * BASIS_Q]; in __launch_bounds__() 61 LoadMatrix<BASIS_P, BASIS_Q>(data, c_B, s_B); in __launch_bounds__() 67 InterpTransposeNonTensor<BASIS_NUM_COMP, BASIS_P, BASIS_Q, BASIS_T_1D>(data, r_U, s_B, r_V); in __launch_bounds__() 68 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, r_V, d_… in __launch_bounds__() 87 __shared__ CeedScalar s_B[BASIS_P * BASIS_Q]; in __launch_bounds__() 88 LoadMatrix<BASIS_P, BASIS_Q>(data, c_B, s_B); in __launch_bounds__() [all …]
|
| H A D | hip-ref-basis-nontensor.h | 24 …Contract<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * … in Interp() 32 …pose<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_… in InterpTranspose() 43 …Contract<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * n… in Deriv() 51 …spose<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_… in DerivTranspose()
|