Home
last modified time | relevance | path

Searched refs:d_B (Results 1 – 13 of 13) sorted by relevance

/libCEED/backends/magma/
H A Dceed-magma-gemm-nontensor.cpp25 …const CeedScalar *d_A, magma_int_t ldda, const CeedScalar *d_B, magma_int_t lddb, CeedScalar beta,… in magmablas_gemm() argument
28 …, trans_B, m, n, k, (float)alpha, (const float *)d_A, ldda, (const float *)d_B, lddb, (float)beta,… in magmablas_gemm()
31 …rans_B, m, n, k, (double)alpha, (const double *)d_A, ldda, (const double *)d_B, lddb, (double)beta… in magmablas_gemm()
40 … const CeedScalar *d_B, magma_int_t lddb, magma_int_t strideB, CeedScalar beta, CeedScalar *d_C, in magmablas_gemm_batched_strided() argument
43 …ans_B, m, n, k, (float)alpha, (const float *)d_A, ldda, strideA, (const float *)d_B, lddb, strideB, in magmablas_gemm_batched_strided()
46 …_B, m, n, k, (double)alpha, (const double *)d_A, ldda, strideA, (const double *)d_B, lddb, strideB, in magmablas_gemm_batched_strided()
54 …const CeedScalar *d_A, magma_int_t ldda, const CeedScalar *d_B, magma_int_t lddb, CeedScalar beta,… in devblas_gemm() argument
57 …, trans_B, m, n, k, (float)alpha, (const float *)d_A, ldda, (const float *)d_B, lddb, (float)beta,… in devblas_gemm()
59 …rans_B, m, n, k, (double)alpha, (const double *)d_A, ldda, (const double *)d_B, lddb, (double)beta… in devblas_gemm()
67 …dScalar alpha, const CeedScalar *d_A, magma_int_t ldda, magma_int_t strideA, const CeedScalar *d_B, in devblas_gemm_batched_strided() argument
[all …]
H A Dceed-magma-gemm-nontensor.h13 … const CeedScalar *d_A, magma_int_t ldda, const CeedScalar *d_B, magma_int_t lddb, CeedScalar beta,
/libCEED/include/ceed/jit-source/hip/
H A Dhip-ref-basis-nontensor.h21 extern "C" __global__ void Interp(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const… in Interp() argument
25 … BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); in Interp()
29 …oid InterpTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *… in InterpTranspose() argument
33 … BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); in InterpTranspose()
40 extern "C" __global__ void Deriv(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const … in Deriv() argument
44 … BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); in Deriv()
48 …void DerivTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *… in DerivTranspose() argument
52 … BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); in DerivTranspose()
H A Dhip-ref-basis-nontensor-templates.h17 … const CeedInt strides_comp_V, const CeedInt strides_q_comp_V, const CeedScalar *__restrict__ d_B, in Contract() argument
31 for (CeedInt d = 0; d < Q_COMP; d++) r_V[d] += d_B[i + t_id * P + d * P * Q] * val; in Contract()
44 … const CeedInt strides_comp_V, const CeedInt strides_q_comp_U, const CeedScalar *__restrict__ d_B, in ContractTranspose() argument
56 for (CeedInt i = 0; i < Q; i++) r_V += d_B[t_id + i * P + d * P * Q] * U[i]; in ContractTranspose()
H A Dhip-shared-basis-read-write-templates.h16 inline __device__ void LoadMatrix(SharedData_Hip &data, const CeedScalar *__restrict__ d_B, CeedSca… in LoadMatrix() argument
17 for (CeedInt i = data.t_id; i < P * Q; i += blockDim.x * blockDim.y * blockDim.z) B[i] = d_B[i]; in LoadMatrix()
H A Dhip-gen-templates.h16 inline __device__ void LoadMatrix(SharedData_Hip &data, const CeedScalar *__restrict__ d_B, CeedSca… in LoadMatrix() argument
17 for (CeedInt i = data.t_id; i < P * Q; i += blockDim.x * blockDim.y * blockDim.z) B[i] = d_B[i]; in LoadMatrix()
/libCEED/include/ceed/jit-source/cuda/
H A Dcuda-ref-basis-nontensor.h21 extern "C" __global__ void Interp(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const… in Interp() argument
25 … BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); in Interp()
29 …oid InterpTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *… in InterpTranspose() argument
33 … BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); in InterpTranspose()
40 extern "C" __global__ void Deriv(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const … in Deriv() argument
44 … BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); in Deriv()
48 …void DerivTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *… in DerivTranspose() argument
52 … BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); in DerivTranspose()
H A Dcuda-ref-basis-nontensor-templates.h17 … const CeedInt strides_comp_V, const CeedInt strides_q_comp_V, const CeedScalar *__restrict__ d_B, in Contract() argument
31 for (CeedInt d = 0; d < Q_COMP; d++) r_V[d] += d_B[i + t_id * P + d * P * Q] * val; in Contract()
44 … const CeedInt strides_comp_V, const CeedInt strides_q_comp_U, const CeedScalar *__restrict__ d_B, in ContractTranspose() argument
56 for (CeedInt i = 0; i < Q; i++) r_V += d_B[t_id + i * P + d * P * Q] * U[i]; in ContractTranspose()
H A Dcuda-shared-basis-read-write-templates.h16 inline __device__ void LoadMatrix(SharedData_Cuda &data, const CeedScalar *__restrict__ d_B, CeedSc… in LoadMatrix() argument
17 for (CeedInt i = data.t_id; i < P * Q; i += blockDim.x * blockDim.y * blockDim.z) B[i] = d_B[i]; in LoadMatrix()
H A Dcuda-gen-templates.h16 inline __device__ void LoadMatrix(SharedData_Cuda &data, const CeedScalar *__restrict__ d_B, CeedSc… in LoadMatrix() argument
17 for (CeedInt i = data.t_id; i < P * Q; i += blockDim.x * blockDim.y * blockDim.z) B[i] = d_B[i]; in LoadMatrix()
/libCEED/include/ceed/jit-source/sycl/
H A Dsycl-shared-basis-read-write-templates.h15 inline void loadMatrix(const CeedInt N, const CeedScalar *restrict d_B, CeedScalar *restrict B) { in loadMatrix() argument
18 for (CeedInt i = item_id; i < N; i += group_size) B[i] = d_B[i]; in loadMatrix()
H A Dsycl-gen-templates.h20 inline void loadMatrix(const CeedInt N, const CeedScalar *restrict d_B, CeedScalar *restrict B) { in loadMatrix() argument
23 for (CeedInt i = item_id; i < N; i += group_size) B[i] = d_B[i]; in loadMatrix()
/libCEED/backends/sycl-ref/
H A Dceed-sycl-ref-basis.sycl.cpp351 const CeedScalar *d_B = impl->d_interp; in CeedBasisApplyNonTensorInterp_Sycl() local
368 V += d_B[i * stride_0 + j * stride_1] * U[j]; in CeedBasisApplyNonTensorInterp_Sycl()