| /libCEED/backends/cuda-ref/kernels/ |
| H A D | cuda-ref-vector.cu | 28 int grid_size = copy_size / block_size; in CeedDeviceCopyStrided_Cuda() local 30 if (block_size * grid_size < copy_size) grid_size += 1; in CeedDeviceCopyStrided_Cuda() 31 copyStridedK<<<grid_size, block_size>>>(d_array, start, stop, step, d_copy_array); in CeedDeviceCopyStrided_Cuda() 50 int grid_size = vec_size / block_size; in CeedDeviceSetValue_Cuda() local 52 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceSetValue_Cuda() 53 setValueK<<<grid_size, block_size>>>(d_array, length, val); in CeedDeviceSetValue_Cuda() 74 int grid_size = set_size / block_size; in CeedDeviceSetValueStrided_Cuda() local 76 if (block_size * grid_size < set_size) grid_size += 1; in CeedDeviceSetValueStrided_Cuda() 77 setValueStridedK<<<grid_size, block_size>>>(d_array, start, stop, step, val); in CeedDeviceSetValueStrided_Cuda() 98 int grid_size = vec_size / block_size; in CeedDeviceReciprocal_Cuda() local [all …]
|
| /libCEED/backends/hip-ref/kernels/ |
| H A D | hip-ref-vector.hip.cpp | 28 int grid_size = vec_size / block_size; in CeedDeviceCopyStrided_Hip() local 30 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceCopyStrided_Hip() 31 …hipLaunchKernelGGL(copyStridedK, dim3(grid_size), dim3(block_size), 0, 0, d_array, start, step, le… in CeedDeviceCopyStrided_Hip() 50 int grid_size = vec_size / block_size; in CeedDeviceSetValue_Hip() local 52 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceSetValue_Hip() 53 hipLaunchKernelGGL(setValueK, dim3(grid_size), dim3(block_size), 0, 0, d_array, length, val); in CeedDeviceSetValue_Hip() 74 int grid_size = set_size / block_size; in CeedDeviceSetValueStrided_Hip() local 76 if (block_size * grid_size < set_size) grid_size += 1; in CeedDeviceSetValueStrided_Hip() 77 …hipLaunchKernelGGL(setValueStridedK, dim3(grid_size), dim3(block_size), 0, 0, d_array, start, stop… in CeedDeviceSetValueStrided_Hip() 98 int grid_size = vec_size / block_size; in CeedDeviceReciprocal_Hip() local [all …]
|
| /libCEED/backends/sycl-ref/kernels/ |
| H A D | sycl-ref-vector.cpp | 28 int grid_size = vec_size / block_size; in CeedDeviceSetValue_Sycl() local 30 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceSetValue_Sycl() 31 setValueK<<<grid_size, block_size>>>(d_array, length, val); in CeedDeviceSetValue_Sycl() 51 int grid_size = vec_size / block_size; in CeedDeviceReciprocal_Sycl() local 53 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceReciprocal_Sycl() 54 rcpValueK<<<grid_size, block_size>>>(d_array, length); in CeedDeviceReciprocal_Sycl() 74 int grid_size = vec_size / block_size; in CeedDeviceScale_Sycl() local 76 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceScale_Sycl() 77 scaleValueK<<<grid_size, block_size>>>(x_array, alpha, length); in CeedDeviceScale_Sycl() 96 int grid_size = vec_size / block_size; in CeedDeviceAXPY_Sycl() local [all …]
|
| /libCEED/backends/hip/ |
| H A D | ceed-hip-compile.h | 20 CEED_INTERN int CeedRunKernel_Hip(Ceed ceed, hipFunction_t kernel, int grid_size, int block_size, v… 22 CEED_INTERN int CeedRunKernelDim_Hip(Ceed ceed, hipFunction_t kernel, int grid_size, int block_size… 25 …lDimShared_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, int grid_size, int block_size_… 27 …lDimShared_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, int grid_size, int block_size_…
|
| H A D | ceed-hip-compile.cpp | 212 int CeedRunKernel_Hip(Ceed ceed, hipFunction_t kernel, const int grid_size, const int block_size, v… in CeedRunKernel_Hip() argument 213 …CeedCallHip(ceed, hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size, 1, 1, 0, NULL, args, … in CeedRunKernel_Hip() 220 int CeedRunKernelDim_Hip(Ceed ceed, hipFunction_t kernel, const int grid_size, const int block_size… in CeedRunKernelDim_Hip() argument 222 …CeedCallHip(ceed, hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, block… in CeedRunKernelDim_Hip() 229 …Core_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block… in CeedRunKernelDimSharedCore_Hip() argument 232 …hipError_t result = hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, blo… in CeedRunKernelDimSharedCore_Hip() 253 …ared_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block… in CeedRunKernelDimShared_Hip() argument 257 …CeedCallBackend(CeedRunKernelDimSharedCore_Hip(ceed, kernel, stream, grid_size, block_size_x, bloc… in CeedRunKernelDimShared_Hip() 262 …ared_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block… in CeedTryRunKernelDimShared_Hip() argument 264 …CeedCallBackend(CeedRunKernelDimSharedCore_Hip(ceed, kernel, stream, grid_size, block_size_x, bloc… in CeedTryRunKernelDimShared_Hip()
|
| /libCEED/backends/cuda/ |
| H A D | ceed-cuda-compile.h | 20 CEED_INTERN int CeedRunKernel_Cuda(Ceed ceed, CUfunction kernel, int grid_size, int block_size, voi… 24 CEED_INTERN int CeedRunKernelDim_Cuda(Ceed ceed, CUfunction kernel, int grid_size, int block_size_x… 26 …KernelDimShared_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, int grid_size, int block_size_… 28 …KernelDimShared_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, int grid_size, int block_size_…
|
| H A D | ceed-cuda-compile.cpp | 454 int CeedRunKernel_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size, voi… in CeedRunKernel_Cuda() argument 455 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, kernel, NULL, grid_size, block_size, 1, 1, 0, ar… in CeedRunKernel_Cuda() 462 int CeedRunKernelDim_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size_x… in CeedRunKernelDim_Cuda() argument 464 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, kernel, NULL, grid_size, block_size_x, block_siz… in CeedRunKernelDim_Cuda() 471 …haredCore_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int grid_size, const int block… in CeedRunKernelDimSharedCore_Cuda() argument 477 …CUresult result = cuLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, block_size_z… in CeedRunKernelDimSharedCore_Cuda() 502 int CeedRunKernelDimShared_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int grid_size,… in CeedRunKernelDimShared_Cuda() argument 506 …CeedCallBackend(CeedRunKernelDimSharedCore_Cuda(ceed, kernel, stream, grid_size, block_size_x, blo… in CeedRunKernelDimShared_Cuda() 511 …DimShared_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int grid_size, const int block… in CeedTryRunKernelDimShared_Cuda() argument 513 …CeedCallBackend(CeedRunKernelDimSharedCore_Cuda(ceed, kernel, stream, grid_size, block_size_x, blo… in CeedTryRunKernelDimShared_Cuda()
|
| /libCEED/backends/sycl/ |
| H A D | ceed-sycl-compile.hpp | 21 CEED_INTERN int CeedRunKernelDimSharedSycl(Ceed ceed, sycl::kernel *kernel, const int grid_size, co…
|
| H A D | ceed-sycl-compile.sycl.cpp | 169 int CeedRunKernelDimSharedSycl(Ceed ceed, sycl::kernel *kernel, const int grid_size, const int bloc… in CeedRunKernelDimSharedSycl() argument 172 sycl::range<3> global_range(grid_size * block_size_z, block_size_y, block_size_x); in CeedRunKernelDimSharedSycl()
|
| /libCEED/backends/hip-shared/ |
| H A D | ceed-hip-shared-basis.c | 227 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local 229 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid_size, Q_1d, elems_per_block, 1, weig… in CeedBasisApplyTensorCore_Hip_shared() 233 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local 235 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid_size, Q_1d, Q_1d, elems_per_block, w… in CeedBasisApplyTensorCore_Hip_shared() 239 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local 241 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid_size, Q_1d, Q_1d, elems_per_block, w… in CeedBasisApplyTensorCore_Hip_shared() 595 const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyNonTensorCore_Hip_shared() local 597 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid_size, thread, elems_per_block, 1, we… in CeedBasisApplyNonTensorCore_Hip_shared()
|
| /libCEED/backends/cuda-shared/ |
| H A D | ceed-cuda-shared-basis.c | 161 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local 163 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Weight, grid_size, Q_1d, elems_per_block, 1, wei… in CeedBasisApplyTensorCore_Cuda_shared() 167 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local 169 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Weight, grid_size, Q_1d, Q_1d, elems_per_block, … in CeedBasisApplyTensorCore_Cuda_shared() 173 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local 175 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Weight, grid_size, Q_1d, Q_1d, elems_per_block, … in CeedBasisApplyTensorCore_Cuda_shared()
|