| /libCEED/backends/hip/ |
| H A D | ceed-hip-compile.h | 22 …dRunKernelDim_Hip(Ceed ceed, hipFunction_t kernel, int grid_size, int block_size_x, int block_size… 25 …ed ceed, hipFunction_t kernel, hipStream_t stream, int grid_size, int block_size_x, int block_size… 27 …ed ceed, hipFunction_t kernel, hipStream_t stream, int grid_size, int block_size_x, int block_size…
|
| H A D | ceed-hip-compile.cpp | 220 …m_Hip(Ceed ceed, hipFunction_t kernel, const int grid_size, const int block_size_x, const int bloc… in CeedRunKernelDim_Hip() argument 222 …CeedCallHip(ceed, hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, block… in CeedRunKernelDim_Hip() 229 …p(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block_size_x, in CeedRunKernelDimSharedCore_Hip() argument 232 …hipError_t result = hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, blo… in CeedRunKernelDimSharedCore_Hip() 253 …p(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block_size_x, in CeedRunKernelDimShared_Hip() argument 257 …CeedCallBackend(CeedRunKernelDimSharedCore_Hip(ceed, kernel, stream, grid_size, block_size_x, bloc… in CeedRunKernelDimShared_Hip() 262 …p(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block_size_x, in CeedTryRunKernelDimShared_Hip() argument 264 …CeedCallBackend(CeedRunKernelDimSharedCore_Hip(ceed, kernel, stream, grid_size, block_size_x, bloc… in CeedTryRunKernelDimShared_Hip()
|
| /libCEED/backends/cuda/ |
| H A D | ceed-cuda-compile.h | 24 CEED_INTERN int CeedRunKernelDim_Cuda(Ceed ceed, CUfunction kernel, int grid_size, int block_size_x… 26 …uda(Ceed ceed, CUfunction kernel, CUstream stream, int grid_size, int block_size_x, int block_size… 28 …uda(Ceed ceed, CUfunction kernel, CUstream stream, int grid_size, int block_size_x, int block_size…
|
| H A D | ceed-cuda-compile.cpp | 462 int CeedRunKernelDim_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size_x… in CeedRunKernelDim_Cuda() argument 464 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, kernel, NULL, grid_size, block_size_x, block_siz… in CeedRunKernelDim_Cuda() 471 …re_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int grid_size, const int block_size_x, in CeedRunKernelDimSharedCore_Cuda() argument 477 …CUresult result = cuLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, block_size_z… in CeedRunKernelDimSharedCore_Cuda() 488 … max_threads_per_block, block_size_x, block_size_y, block_size_z, shared_size_bytes, num_regs); in CeedRunKernelDimSharedCore_Cuda() 493 … max_threads_per_block, block_size_x, block_size_y, block_size_z, shared_size_bytes, num_regs); in CeedRunKernelDimSharedCore_Cuda() 502 …d, CUfunction kernel, CUstream stream, const int grid_size, const int block_size_x, const int bloc… in CeedRunKernelDimShared_Cuda() argument 506 …CeedCallBackend(CeedRunKernelDimSharedCore_Cuda(ceed, kernel, stream, grid_size, block_size_x, blo… in CeedRunKernelDimShared_Cuda() 511 …d, CUfunction kernel, CUstream stream, const int grid_size, const int block_size_x, const int bloc… in CeedTryRunKernelDimShared_Cuda() argument 513 …CeedCallBackend(CeedRunKernelDimSharedCore_Cuda(ceed, kernel, stream, grid_size, block_size_x, blo… in CeedTryRunKernelDimShared_Cuda()
|
| /libCEED/backends/hip-ref/ |
| H A D | ceed-hip-ref-basis.c | 65 const int block_size_x = Q_1d; in CeedBasisApplyCore_Hip() local 68 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, num_elem, block_size_x, block_size_y, 1, … in CeedBasisApplyCore_Hip() 285 const int block_size_x = is_transpose ? num_nodes : num_qpts; in CeedBasisApplyNonTensorCore_Hip() local 288 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->InterpTranspose, grid, block_size_x, 1, elems_per… in CeedBasisApplyNonTensorCore_Hip() 290 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Interp, grid, block_size_x, 1, elems_per_block, i… in CeedBasisApplyNonTensorCore_Hip() 295 const int block_size_x = is_transpose ? num_nodes : num_qpts; in CeedBasisApplyNonTensorCore_Hip() local 298 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_… in CeedBasisApplyNonTensorCore_Hip() 300 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, gr… in CeedBasisApplyNonTensorCore_Hip() 305 const int block_size_x = is_transpose ? num_nodes : num_qpts; in CeedBasisApplyNonTensorCore_Hip() local 308 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_… in CeedBasisApplyNonTensorCore_Hip() [all …]
|
| H A D | ceed-hip-ref.h | 135 CeedInt block_size_x, block_size_y, elems_per_block; member
|
| H A D | ceed-hip-ref-operator.c | 1604 asmb->block_size_x = elem_size_in; in CeedOperatorAssembleSingleSetup_Hip() 1608 …bool fallback = asmb->block_size_x * asmb->block_size_y * asmb->elems_per_block > hip_data->device… in CeedOperatorAssembleSingleSetup_Hip() 1623 …asmb->block_size_x * asmb->block_size_y * asmb->elems_per_block, "BLOCK_SIZE_Y", asmb->block_size_… in CeedOperatorAssembleSingleSetup_Hip() 1785 …dRunKernelDimShared_Hip(ceed, asmb->LinearAssemble, NULL, grid, asmb->block_size_x, asmb->block_si… in CeedOperatorAssembleSingle_Hip()
|
| /libCEED/backends/cuda-ref/ |
| H A D | ceed-cuda-ref-basis.c | 65 const int block_size_x = Q_1d; in CeedBasisApplyCore_Cuda() local 68 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Weight, num_elem, block_size_x, block_size_y, 1,… in CeedBasisApplyCore_Cuda() 286 const int block_size_x = is_transpose ? num_nodes : num_qpts; in CeedBasisApplyNonTensorCore_Cuda() local 289 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->InterpTranspose, grid, block_size_x, 1, elems_pe… in CeedBasisApplyNonTensorCore_Cuda() 291 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Interp, grid, block_size_x, 1, elems_per_block, … in CeedBasisApplyNonTensorCore_Cuda() 296 const int block_size_x = is_transpose ? num_nodes : num_qpts; in CeedBasisApplyNonTensorCore_Cuda() local 299 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per… in CeedBasisApplyNonTensorCore_Cuda() 301 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, g… in CeedBasisApplyNonTensorCore_Cuda() 306 const int block_size_x = is_transpose ? num_nodes : num_qpts; in CeedBasisApplyNonTensorCore_Cuda() local 309 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per… in CeedBasisApplyNonTensorCore_Cuda() [all …]
|
| H A D | ceed-cuda-ref.h | 130 CeedInt block_size_x, block_size_y, elems_per_block; member
|
| H A D | ceed-cuda-ref-operator.c | 1607 asmb->block_size_x = elem_size_in; in CeedOperatorAssembleSingleSetup_Cuda() 1611 …bool fallback = asmb->block_size_x * asmb->block_size_y * asmb->elems_per_block > cuda_data->devic… in CeedOperatorAssembleSingleSetup_Cuda() 1626 …asmb->block_size_x * asmb->block_size_y * asmb->elems_per_block, "BLOCK_SIZE_Y", asmb->block_size_… in CeedOperatorAssembleSingleSetup_Cuda() 1788 …RunKernelDimShared_Cuda(ceed, asmb->LinearAssemble, NULL, grid, asmb->block_size_x, asmb->block_si… in CeedOperatorAssembleSingle_Cuda()
|
| /libCEED/backends/sycl/ |
| H A D | ceed-sycl-compile.sycl.cpp | 169 …dSycl(Ceed ceed, sycl::kernel *kernel, const int grid_size, const int block_size_x, const int bloc… in CeedRunKernelDimSharedSycl() argument 171 sycl::range<3> local_range(block_size_z, block_size_y, block_size_x); in CeedRunKernelDimSharedSycl() 172 sycl::range<3> global_range(grid_size * block_size_z, block_size_y, block_size_x); in CeedRunKernelDimSharedSycl()
|
| H A D | ceed-sycl-compile.hpp | 21 …dSycl(Ceed ceed, sycl::kernel *kernel, const int grid_size, const int block_size_x, const int bloc…
|
| /libCEED/backends/sycl-ref/ |
| H A D | ceed-sycl-ref.hpp | 97 CeedInt num_elem, block_size_x, block_size_y, elems_per_block; member
|
| H A D | ceed-sycl-ref-operator.sycl.cpp | 1113 asmb->block_size_x = elem_size; in CeedOperatorAssembleSingleSetup_Sycl() 1216 const CeedInt block_size_x = asmb->block_size_x; in CeedOperatorLinearAssemble_Sycl() local 1219 sycl::range<3> kernel_range(num_elem, block_size_y, block_size_x); in CeedOperatorLinearAssemble_Sycl()
|