| /libCEED/backends/cuda-ref/kernels/ |
| H A D | cuda-ref-vector.cu | 28 int grid_size = copy_size / block_size; in CeedDeviceCopyStrided_Cuda() local 50 int grid_size = vec_size / block_size; in CeedDeviceSetValue_Cuda() local 74 int grid_size = set_size / block_size; in CeedDeviceSetValueStrided_Cuda() local 98 int grid_size = vec_size / block_size; in CeedDeviceReciprocal_Cuda() local 120 int grid_size = vec_size / block_size; in CeedDeviceScale_Cuda() local 142 int grid_size = vec_size / block_size; in CeedDeviceAXPY_Cuda() local 167 int grid_size = vec_size / block_size; in CeedDeviceAXPBY_Cuda() local 189 int grid_size = vec_size / block_size; in CeedDevicePointwiseMult_Cuda() local
|
| /libCEED/backends/hip-ref/kernels/ |
| H A D | hip-ref-vector.hip.cpp | 28 int grid_size = vec_size / block_size; in CeedDeviceCopyStrided_Hip() local 50 int grid_size = vec_size / block_size; in CeedDeviceSetValue_Hip() local 74 int grid_size = set_size / block_size; in CeedDeviceSetValueStrided_Hip() local 98 int grid_size = vec_size / block_size; in CeedDeviceReciprocal_Hip() local 120 int grid_size = vec_size / block_size; in CeedDeviceScale_Hip() local 142 int grid_size = vec_size / block_size; in CeedDeviceAXPY_Hip() local 167 int grid_size = vec_size / block_size; in CeedDeviceAXPBY_Hip() local 189 int grid_size = vec_size / block_size; in CeedDevicePointwiseMult_Hip() local
|
| /libCEED/backends/sycl-ref/kernels/ |
| H A D | sycl-ref-vector.cpp | 28 int grid_size = vec_size / block_size; in CeedDeviceSetValue_Sycl() local 51 int grid_size = vec_size / block_size; in CeedDeviceReciprocal_Sycl() local 74 int grid_size = vec_size / block_size; in CeedDeviceScale_Sycl() local 96 int grid_size = vec_size / block_size; in CeedDeviceAXPY_Sycl() local 119 int grid_size = vec_size / block_size; in CeedDevicePointwiseMult_Sycl() local
|
| /libCEED/backends/hip/ |
| H A D | ceed-hip-compile.cpp | 212 int CeedRunKernel_Hip(Ceed ceed, hipFunction_t kernel, const int grid_size, const int block_size, v… in CeedRunKernel_Hip() 220 int CeedRunKernelDim_Hip(Ceed ceed, hipFunction_t kernel, const int grid_size, const int block_size… in CeedRunKernelDim_Hip() 229 …haredCore_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int … in CeedRunKernelDimSharedCore_Hip() 253 …DimShared_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int … in CeedRunKernelDimShared_Hip() 262 …DimShared_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int … in CeedTryRunKernelDimShared_Hip()
|
| /libCEED/backends/cuda/ |
| H A D | ceed-cuda-compile.cpp | 454 int CeedRunKernel_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size, voi… in CeedRunKernel_Cuda() 462 int CeedRunKernelDim_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size_x… in CeedRunKernelDim_Cuda() 471 …lDimSharedCore_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int grid_size, const int … in CeedRunKernelDimSharedCore_Cuda() 502 int CeedRunKernelDimShared_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int grid_size,… in CeedRunKernelDimShared_Cuda() 511 …ernelDimShared_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int grid_size, const int … in CeedTryRunKernelDimShared_Cuda()
|
| /libCEED/backends/hip-shared/ |
| H A D | ceed-hip-shared-basis.c | 227 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local 233 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local 239 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local 595 const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyNonTensorCore_Hip_shared() local
|
| /libCEED/backends/cuda-shared/ |
| H A D | ceed-cuda-shared-basis.c | 161 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local 167 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local 173 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local
|
| /libCEED/backends/sycl/ |
| H A D | ceed-sycl-compile.sycl.cpp | 169 int CeedRunKernelDimSharedSycl(Ceed ceed, sycl::kernel *kernel, const int grid_size, const int bloc… in CeedRunKernelDimSharedSycl()
|