| /libCEED/include/ceed/jit-source/magma/ |
| H A D | magma-common-tensor.h | 19 …_inline__ void read_1d(const T *devptr, const int compstride, T *sBuffer[NUM_COMP], const int tx) { in read_1d() argument 20 if (tx < LENGTH) { in read_1d() 22 sBuffer[comp][tx] = devptr[comp * compstride + tx]; in read_1d() 31 …e__ __inline__ void write_1d(T *sBuffer[NUM_COMP], T *devptr, const int compstride, const int tx) { in write_1d() argument 32 if (tx < LENGTH) { in write_1d() 34 devptr[comp * compstride + tx] = sBuffer[comp][tx]; in write_1d() 43 …ice__ __inline__ void sum_1d(T *sBuffer[NUM_COMP], T *devptr, const int compstride, const int tx) { in sum_1d() argument 44 if (tx < LENGTH) { in sum_1d() 46 devptr[comp * compstride + tx] += sBuffer[comp][tx]; in sum_1d() 59 …ad_U_2d(const T *dU, const int compstride, T rU[DIM_U][NUM_COMP][rU_SIZE], T *sTmp, const int tx) { in read_U_2d() argument [all …]
|
| H A D | magma-basis-weight-1d.h | 15 static __device__ __inline__ void magma_weight_1d_device(const T *sTweight, T *sV, const int tx) { in magma_weight_1d_device() argument 19 if (tx < Q) { in magma_weight_1d_device() 20 sV[tx] = sTweight[tx]; in magma_weight_1d_device() 29 const int tx = threadIdx.x; in __launch_bounds__() 44 if (ty == 0 && tx < BASIS_Q) { in __launch_bounds__() 45 sTweight[tx] = dqweight1d[tx]; in __launch_bounds__() 49 magma_weight_1d_device<CeedScalar, BASIS_Q>(sTweight, sV, tx); in __launch_bounds__() 53 dV[tx] = sV[tx]; in __launch_bounds__()
|
| H A D | magma-basis-grad-3d.h | 42 … T rV[DIM_V][NUM_COMP][rV_SIZE], const int tx, T rTmp, T *swork) { 59 if (tx < (P * P)) { 60 const int batchid = tx; 75 if (tx < (P * Q)) { 76 const int batchid = tx / Q; 77 const int tx_ = tx % Q; 93 if (tx < (Q * Q)) { 102 rTmp += sTmp(tx, i, sld) * sT(i, j); 117 const int tx = threadIdx.x; 139 read_T_notrans_gm2sm<BASIS_P, BASIS_Q>(tx, dinterp1d, sTinterp); [all …]
|
| H A D | magma-basis-weight-3d.h | 15 …_ __inline__ void magma_weight_3d_device(const T *sTweight, T rV[DIM][NUM_COMP][Q], const int tx) { in magma_weight_3d_device() argument 23 if (tx < Q * Q) { in magma_weight_3d_device() 28 rV[i_DIM][i_COMP][j] = sTweight[j] * sTweight[tx % Q] * sTweight[tx / Q]; in magma_weight_3d_device() 38 const int tx = threadIdx.x; in __launch_bounds__() 52 if (tx < BASIS_Q) { in __launch_bounds__() 53 sTweight[tx] = dqweight1d[tx]; in __launch_bounds__() 57 magma_weight_3d_device<CeedScalar, 1, 1, BASIS_Q, 0, 0>(sTweight, rV, tx); in __launch_bounds__() 60 if (tx < (BASIS_Q * BASIS_Q)) { in __launch_bounds__() 62 dV[j * (BASIS_Q * BASIS_Q) + tx] = rV[0][0][j]; in __launch_bounds__()
|
| H A D | magma-basis-weight-nontensor.h | 17 const int tx = threadIdx.x; in __launch_bounds__() local 32 if (ty == 0 && tx < BASIS_Q) { in __launch_bounds__() 33 sqweight[tx] = dqweight[tx]; in __launch_bounds__() 37 if (tx < BASIS_Q) { in __launch_bounds__() 38 sV[tx] = sqweight[tx]; in __launch_bounds__() 42 dV[tx] = sV[tx]; in __launch_bounds__()
|
| H A D | magma-basis-weight-2d.h | 15 …_ __inline__ void magma_weight_2d_device(const T *sTweight, T rV[DIM][NUM_COMP][Q], const int tx) { in magma_weight_2d_device() argument 23 if (tx < Q) { in magma_weight_2d_device() 27 rV[i_DIM][i_COMP][j] = sTweight[j] * sTweight[tx]; in magma_weight_2d_device() 37 const int tx = threadIdx.x; in __launch_bounds__() 51 if (ty == 0 && tx < BASIS_Q) { in __launch_bounds__() 52 sTweight[tx] = dqweight1d[tx]; in __launch_bounds__() 56 magma_weight_2d_device<CeedScalar, 1, 1, BASIS_Q, 0, 0>(sTweight, rV, tx); in __launch_bounds__() 59 if (tx < BASIS_Q) { in __launch_bounds__() 61 dV[j * BASIS_Q + tx] = rV[0][0][j]; in __launch_bounds__()
|
| H A D | magma-common-nontensor.h | 20 static __device__ __inline__ void read_A_notrans_g2r_1D_nosync(const int tx, const int ty, const T … in read_A_notrans_g2r_1D_nosync() argument 21 const int tid = ty * P + tx; in read_A_notrans_g2r_1D_nosync() 35 rA[j] = sA[j * P + tx]; in read_A_notrans_g2r_1D_nosync() 45 static __device__ __inline__ void read_A_trans_g2r_1D_nosync(const int tx, const int ty, const T *d… in read_A_trans_g2r_1D_nosync() argument 46 const int tid = ty * P + tx; in read_A_trans_g2r_1D_nosync() 60 rA[j] = sA[tx * Q + j]; in read_A_trans_g2r_1D_nosync() 70 static __device__ __inline__ void read_B_g2s_1D_nosync(const int tx, const int n, const T *dB, T *s… in read_B_g2s_1D_nosync() argument 75 sB[i + tx] = dB[i + tx]; in read_B_g2s_1D_nosync() 80 sB[i + tx] = dB[i + tx]; in read_B_g2s_1D_nosync() 83 if (i + tx < Q * n) { in read_B_g2s_1D_nosync() [all …]
|
| H A D | magma-basis-interp-3d.h | 19 …d_device(const T *sT, T rU[DIM_U][NUM_COMP][rU_SIZE], T rV[DIM_V][NUM_COMP][rV_SIZE], const int tx, in magma_interp_3d_device() argument 34 if (tx < (P * P)) { in magma_interp_3d_device() 35 const int batchid = tx; in magma_interp_3d_device() 49 if (tx < (P * Q)) { in magma_interp_3d_device() 50 const int batchid = tx / Q; in magma_interp_3d_device() 51 const int tx_ = tx % Q; in magma_interp_3d_device() 64 if (tx < (P * Q)) { in magma_interp_3d_device() 65 const int batchid = tx / Q; in magma_interp_3d_device() 66 const int tx_ = tx % Q; in magma_interp_3d_device() 76 if (tx < (Q * Q)) { in magma_interp_3d_device() [all …]
|
| H A D | magma-basis-grad-2d.h | 41 … T rV[DIM_V][NUM_COMP][rV_SIZE], const int tx, T rTmp, T *swork) { 56 if (tx < P) { 57 const int batchid = tx; 72 if (tx < Q) { 80 rTmp += sTmp(tx, i, sld) * sT(i, j); 95 const int tx = threadIdx.x; 117 read_T_notrans_gm2sm<BASIS_P, BASIS_Q>(tx, dinterp1d, sTinterp); 118 read_T_notrans_gm2sm<BASIS_P, BASIS_Q>(tx, dgrad1d, sTgrad); 123 …_U_2d<CeedScalar, BASIS_P, 1, BASIS_NUM_COMP, BASIS_P, 0>(dU + (0 * dstrdU), cstrdU, rU, sTmp, tx); 127 …S_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q, 0, 0, 0, false>(sTinterp, sTgrad, rU, rV, tx, rTmp, [all …]
|
| H A D | magma-basis-grad-1d.h | 18 …__inline__ void magma_grad_1d_device(const T *sT, T *sU[NUM_COMP], T *sV[NUM_COMP], const int tx) { in magma_grad_1d_device() argument 28 if (tx < Q) { in magma_grad_1d_device() 32 rv += sU[comp][i] * sT(i, tx); in magma_grad_1d_device() 34 sV[comp][tx] = rv; in magma_grad_1d_device() 45 const int tx = threadIdx.x; in __launch_bounds__() 70 read_T_notrans_gm2sm<BASIS_P, BASIS_Q>(tx, dTgrad, sT); in __launch_bounds__() 74 read_1d<CeedScalar, BASIS_P, BASIS_NUM_COMP>(dU, cstrdU, sU, tx); in __launch_bounds__() 77 magma_grad_1d_device<CeedScalar, BASIS_DIM, BASIS_NUM_COMP, BASIS_P, BASIS_Q>(sT, sU, sV, tx); in __launch_bounds__() 81 write_1d<CeedScalar, BASIS_Q, BASIS_NUM_COMP>(sV, dV, cstrdV, tx); in __launch_bounds__() 90 const int tx = threadIdx.x; in __launch_bounds__() [all …]
|
| H A D | magma-basis-interp-1d.h | 18 …inline__ void magma_interp_1d_device(const T *sT, T *sU[NUM_COMP], T *sV[NUM_COMP], const int tx) { in magma_interp_1d_device() argument 28 if (tx < Q) { in magma_interp_1d_device() 32 rv += sU[comp][i] * sT(i, tx); // sT[tx * P + i]; in magma_interp_1d_device() 34 sV[comp][tx] = rv; in magma_interp_1d_device() 45 const int tx = threadIdx.x; in __launch_bounds__() 70 read_T_notrans_gm2sm<BASIS_P, BASIS_Q>(tx, dT, sT); in __launch_bounds__() 74 read_1d<CeedScalar, BASIS_P, BASIS_NUM_COMP>(dU, cstrdU, sU, tx); in __launch_bounds__() 77 magma_interp_1d_device<CeedScalar, BASIS_DIM, BASIS_NUM_COMP, BASIS_P, BASIS_Q>(sT, sU, sV, tx); in __launch_bounds__() 81 write_1d<CeedScalar, BASIS_Q, BASIS_NUM_COMP>(sV, dV, cstrdV, tx); in __launch_bounds__() 90 const int tx = threadIdx.x; in __launch_bounds__() [all …]
|
| H A D | magma-basis-interp-2d.h | 19 …d_device(const T *sT, T rU[DIM_U][NUM_COMP][rU_SIZE], T rV[DIM_V][NUM_COMP][rV_SIZE], const int tx, in magma_interp_2d_device() argument 34 if (tx < P) { in magma_interp_2d_device() 35 const int batchid = tx; in magma_interp_2d_device() 49 if (tx < Q) { in magma_interp_2d_device() 56 rTmp += sTmp(tx, i, sld) * sT(i, j); in magma_interp_2d_device() 71 const int tx = threadIdx.x; in __launch_bounds__() 92 read_T_notrans_gm2sm<BASIS_P, BASIS_Q>(tx, dT, sT); in __launch_bounds__() 96 read_U_2d<CeedScalar, BASIS_P, 1, BASIS_NUM_COMP, BASIS_P, 0>(dU, cstrdU, rU, sTmp, tx); in __launch_bounds__() 99 …<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q>(sT, rU, rV, tx, rTmp, sTmp); in __launch_bounds__() 103 write_V_2d<CeedScalar, BASIS_Q, 1, BASIS_NUM_COMP, BASIS_Q, 0>(dV, cstrdV, rV, tx); in __launch_bounds__() [all …]
|
| H A D | magma-basis-interp-deriv-nontensor.h | 16 const int tx = threadIdx.x; in magma_basis_nontensor_device_n() local 31 read_B_g2s_1D_nosync<CeedScalar, Q, P, NB>(tx, myn, dB, sB); in magma_basis_nontensor_device_n() 38 …read_A_trans_g2r_1D_nosync<CeedScalar, Q, P, MAGMA_BASIS_NTCOL(Q, MAGMA_MAXTHREADS_1D)>(tx, ty, dA… in magma_basis_nontensor_device_n() 45 write_C_r2g_1D_nosync<CeedScalar, Q, P, NB>(tx, myn, rC, dC); in magma_basis_nontensor_device_n() 59 const int tx = threadIdx.x; in magma_basis_nontensor_device_t() local 78 …read_A_notrans_g2r_1D_nosync<CeedScalar, P, Q, MAGMA_BASIS_NTCOL(P, MAGMA_MAXTHREADS_1D)>(tx, ty, … in magma_basis_nontensor_device_t() 83 read_B_g2s_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, dB, sB); in magma_basis_nontensor_device_t() 97 write_C_r2g_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, rC, dC); in magma_basis_nontensor_device_t() 105 const int tx = threadIdx.x; in magma_basis_nontensor_device_ta() local 124 …read_A_notrans_g2r_1D_nosync<CeedScalar, P, Q, MAGMA_BASIS_NTCOL(P, MAGMA_MAXTHREADS_1D)>(tx, ty, … in magma_basis_nontensor_device_ta() [all …]
|
| /libCEED/examples/solids/ |
| H A D | README.md | 48 …to set traction boundary conditions with the traction vector `-bc_traction_[facenumber] [tx,ty,tz]`
|