Searched refs:NB (Results 1 – 4 of 4) sorted by relevance
| /libCEED/include/ceed/jit-source/magma/ |
| H A D | magma-basis-interp-deriv-nontensor.h | 13 template <typename T, int Q_COMP, int P, int Q, int NB> 19 const int nblocks = (n + NB - 1) / NB; in magma_basis_nontensor_device_n() 20 const int myn = min(NB, n - id * NB); in magma_basis_nontensor_device_n() 22 dB += id * P * NB; in magma_basis_nontensor_device_n() 23 dC += id * Q * NB; in magma_basis_nontensor_device_n() 26 CeedScalar *sB = shared_data + ty * P * NB; in magma_basis_nontensor_device_n() 27 CeedScalar *sA = shared_data + blockDim.y * P * NB; in magma_basis_nontensor_device_n() 31 read_B_g2s_1D_nosync<CeedScalar, Q, P, NB>(tx, myn, dB, sB); in magma_basis_nontensor_device_n() 40 CeedScalar rC[NB]; in magma_basis_nontensor_device_n() 41 mul_rAsBrC_1D_nosync<CeedScalar, Q, P, NB>(rA, sB, rC); in magma_basis_nontensor_device_n() [all …]
|
| H A D | magma-common-nontensor.h | 69 template <typename T, int P, int Q, int NB> 73 if (n != NB) { in read_B_g2s_1D_nosync() 79 for (i = 0; i < Q * NB - P; i += P) { in read_B_g2s_1D_nosync() 93 template <typename T, int P, int Q, int NB> 94 static __device__ __inline__ void write_C_r2g_1D_nosync(const int tx, const int n, T rC[NB], T *dC)… in write_C_r2g_1D_nosync() argument 95 if (n != NB) { in write_C_r2g_1D_nosync() 101 for (int i = 0; i < NB; i++) { in write_C_r2g_1D_nosync() 112 template <typename T, int P, int Q, int NB> 113 static __device__ __inline__ void sum_C_r2g_1D_nosync(const int tx, const int n, T rC[NB], T *dC) { in sum_C_r2g_1D_nosync() argument 114 if (n != NB) { in sum_C_r2g_1D_nosync() [all …]
|
| /libCEED/backends/magma/tuning/ |
| H A D | README.md | 6 blocking factor parameter, `NB`, which varies with `P` and `Q` as well as the 10 generate the optimal `NB` selections for a new target architecture. 15 `NB` from 1 to 32 and saved to `a100_rtc.h`, is:
|
| /libCEED/backends/magma/ |
| H A D | ceed-magma-basis.c | 382 CeedInt q_comp, NB, M, K; in CeedBasisApplyNonTensorCore_Magma() local 411 NB = impl->NB_interp_t[iN]; in CeedBasisApplyNonTensorCore_Magma() 414 NB = impl->NB_interp[iN]; in CeedBasisApplyNonTensorCore_Magma() 419 NB = impl->NB_deriv_t[iN]; in CeedBasisApplyNonTensorCore_Magma() 422 NB = impl->NB_deriv[iN]; in CeedBasisApplyNonTensorCore_Magma() 426 CeedInt grid = CeedDivUpInt(N, num_t_col * NB); in CeedBasisApplyNonTensorCore_Magma() 428 CeedInt shared_mem_B = num_t_col * K * NB * sizeof(CeedScalar); in CeedBasisApplyNonTensorCore_Magma()
|