Home
last modified time | relevance | path

Searched refs:block_size_x (Results 1 – 14 of 14) sorted by relevance

/libCEED/backends/hip/
H A Dceed-hip-compile.h22 …dRunKernelDim_Hip(Ceed ceed, hipFunction_t kernel, int grid_size, int block_size_x, int block_size…
25 …ed ceed, hipFunction_t kernel, hipStream_t stream, int grid_size, int block_size_x, int block_size…
27 …ed ceed, hipFunction_t kernel, hipStream_t stream, int grid_size, int block_size_x, int block_size…
H A Dceed-hip-compile.cpp220 …m_Hip(Ceed ceed, hipFunction_t kernel, const int grid_size, const int block_size_x, const int bloc… in CeedRunKernelDim_Hip() argument
222 …CeedCallHip(ceed, hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, block… in CeedRunKernelDim_Hip()
229 …p(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block_size_x, in CeedRunKernelDimSharedCore_Hip() argument
232 …hipError_t result = hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, blo… in CeedRunKernelDimSharedCore_Hip()
253 …p(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block_size_x, in CeedRunKernelDimShared_Hip() argument
257 …CeedCallBackend(CeedRunKernelDimSharedCore_Hip(ceed, kernel, stream, grid_size, block_size_x, bloc… in CeedRunKernelDimShared_Hip()
262 …p(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block_size_x, in CeedTryRunKernelDimShared_Hip() argument
264 …CeedCallBackend(CeedRunKernelDimSharedCore_Hip(ceed, kernel, stream, grid_size, block_size_x, bloc… in CeedTryRunKernelDimShared_Hip()
/libCEED/backends/cuda/
H A Dceed-cuda-compile.h24 CEED_INTERN int CeedRunKernelDim_Cuda(Ceed ceed, CUfunction kernel, int grid_size, int block_size_x
26 …uda(Ceed ceed, CUfunction kernel, CUstream stream, int grid_size, int block_size_x, int block_size…
28 …uda(Ceed ceed, CUfunction kernel, CUstream stream, int grid_size, int block_size_x, int block_size…
H A Dceed-cuda-compile.cpp462 int CeedRunKernelDim_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size_x in CeedRunKernelDim_Cuda() argument
464 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, kernel, NULL, grid_size, block_size_x, block_siz… in CeedRunKernelDim_Cuda()
471 …re_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int grid_size, const int block_size_x, in CeedRunKernelDimSharedCore_Cuda() argument
477 …CUresult result = cuLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, block_size_z… in CeedRunKernelDimSharedCore_Cuda()
488 … max_threads_per_block, block_size_x, block_size_y, block_size_z, shared_size_bytes, num_regs); in CeedRunKernelDimSharedCore_Cuda()
493 … max_threads_per_block, block_size_x, block_size_y, block_size_z, shared_size_bytes, num_regs); in CeedRunKernelDimSharedCore_Cuda()
502 …d, CUfunction kernel, CUstream stream, const int grid_size, const int block_size_x, const int bloc… in CeedRunKernelDimShared_Cuda() argument
506 …CeedCallBackend(CeedRunKernelDimSharedCore_Cuda(ceed, kernel, stream, grid_size, block_size_x, blo… in CeedRunKernelDimShared_Cuda()
511 …d, CUfunction kernel, CUstream stream, const int grid_size, const int block_size_x, const int bloc… in CeedTryRunKernelDimShared_Cuda() argument
513 …CeedCallBackend(CeedRunKernelDimSharedCore_Cuda(ceed, kernel, stream, grid_size, block_size_x, blo… in CeedTryRunKernelDimShared_Cuda()
/libCEED/backends/hip-ref/
H A Dceed-hip-ref-basis.c65 const int block_size_x = Q_1d; in CeedBasisApplyCore_Hip() local
68 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, num_elem, block_size_x, block_size_y, 1, … in CeedBasisApplyCore_Hip()
285 const int block_size_x = is_transpose ? num_nodes : num_qpts; in CeedBasisApplyNonTensorCore_Hip() local
288 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->InterpTranspose, grid, block_size_x, 1, elems_per… in CeedBasisApplyNonTensorCore_Hip()
290 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Interp, grid, block_size_x, 1, elems_per_block, i… in CeedBasisApplyNonTensorCore_Hip()
295 const int block_size_x = is_transpose ? num_nodes : num_qpts; in CeedBasisApplyNonTensorCore_Hip() local
298 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_… in CeedBasisApplyNonTensorCore_Hip()
300 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, gr… in CeedBasisApplyNonTensorCore_Hip()
305 const int block_size_x = is_transpose ? num_nodes : num_qpts; in CeedBasisApplyNonTensorCore_Hip() local
308 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_… in CeedBasisApplyNonTensorCore_Hip()
[all …]
H A Dceed-hip-ref.h135 CeedInt block_size_x, block_size_y, elems_per_block; member
H A Dceed-hip-ref-operator.c1604 asmb->block_size_x = elem_size_in; in CeedOperatorAssembleSingleSetup_Hip()
1608 …bool fallback = asmb->block_size_x * asmb->block_size_y * asmb->elems_per_block > hip_data->device… in CeedOperatorAssembleSingleSetup_Hip()
1623 …asmb->block_size_x * asmb->block_size_y * asmb->elems_per_block, "BLOCK_SIZE_Y", asmb->block_size_… in CeedOperatorAssembleSingleSetup_Hip()
1785 …dRunKernelDimShared_Hip(ceed, asmb->LinearAssemble, NULL, grid, asmb->block_size_x, asmb->block_si… in CeedOperatorAssembleSingle_Hip()
/libCEED/backends/cuda-ref/
H A Dceed-cuda-ref-basis.c65 const int block_size_x = Q_1d; in CeedBasisApplyCore_Cuda() local
68 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Weight, num_elem, block_size_x, block_size_y, 1,… in CeedBasisApplyCore_Cuda()
286 const int block_size_x = is_transpose ? num_nodes : num_qpts; in CeedBasisApplyNonTensorCore_Cuda() local
289 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->InterpTranspose, grid, block_size_x, 1, elems_pe… in CeedBasisApplyNonTensorCore_Cuda()
291 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Interp, grid, block_size_x, 1, elems_per_block, … in CeedBasisApplyNonTensorCore_Cuda()
296 const int block_size_x = is_transpose ? num_nodes : num_qpts; in CeedBasisApplyNonTensorCore_Cuda() local
299 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per… in CeedBasisApplyNonTensorCore_Cuda()
301 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, g… in CeedBasisApplyNonTensorCore_Cuda()
306 const int block_size_x = is_transpose ? num_nodes : num_qpts; in CeedBasisApplyNonTensorCore_Cuda() local
309 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per… in CeedBasisApplyNonTensorCore_Cuda()
[all …]
H A Dceed-cuda-ref.h130 CeedInt block_size_x, block_size_y, elems_per_block; member
H A Dceed-cuda-ref-operator.c1607 asmb->block_size_x = elem_size_in; in CeedOperatorAssembleSingleSetup_Cuda()
1611 …bool fallback = asmb->block_size_x * asmb->block_size_y * asmb->elems_per_block > cuda_data->devic… in CeedOperatorAssembleSingleSetup_Cuda()
1626 …asmb->block_size_x * asmb->block_size_y * asmb->elems_per_block, "BLOCK_SIZE_Y", asmb->block_size_… in CeedOperatorAssembleSingleSetup_Cuda()
1788 …RunKernelDimShared_Cuda(ceed, asmb->LinearAssemble, NULL, grid, asmb->block_size_x, asmb->block_si… in CeedOperatorAssembleSingle_Cuda()
/libCEED/backends/sycl/
H A Dceed-sycl-compile.sycl.cpp169 …dSycl(Ceed ceed, sycl::kernel *kernel, const int grid_size, const int block_size_x, const int bloc… in CeedRunKernelDimSharedSycl() argument
171 sycl::range<3> local_range(block_size_z, block_size_y, block_size_x); in CeedRunKernelDimSharedSycl()
172 sycl::range<3> global_range(grid_size * block_size_z, block_size_y, block_size_x); in CeedRunKernelDimSharedSycl()
H A Dceed-sycl-compile.hpp21 …dSycl(Ceed ceed, sycl::kernel *kernel, const int grid_size, const int block_size_x, const int bloc…
/libCEED/backends/sycl-ref/
H A Dceed-sycl-ref.hpp97 CeedInt num_elem, block_size_x, block_size_y, elems_per_block; member
H A Dceed-sycl-ref-operator.sycl.cpp1113 asmb->block_size_x = elem_size; in CeedOperatorAssembleSingleSetup_Sycl()
1216 const CeedInt block_size_x = asmb->block_size_x; in CeedOperatorLinearAssemble_Sycl() local
1219 sycl::range<3> kernel_range(num_elem, block_size_y, block_size_x); in CeedOperatorLinearAssemble_Sycl()