Home
last modified time | relevance | path

Searched refs:grid_size (Results 1 – 11 of 11) sorted by relevance

/libCEED/backends/cuda-ref/kernels/
H A Dcuda-ref-vector.cu28 int grid_size = copy_size / block_size; in CeedDeviceCopyStrided_Cuda() local
30 if (block_size * grid_size < copy_size) grid_size += 1; in CeedDeviceCopyStrided_Cuda()
31 copyStridedK<<<grid_size, block_size>>>(d_array, start, stop, step, d_copy_array); in CeedDeviceCopyStrided_Cuda()
50 int grid_size = vec_size / block_size; in CeedDeviceSetValue_Cuda() local
52 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceSetValue_Cuda()
53 setValueK<<<grid_size, block_size>>>(d_array, length, val); in CeedDeviceSetValue_Cuda()
74 int grid_size = set_size / block_size; in CeedDeviceSetValueStrided_Cuda() local
76 if (block_size * grid_size < set_size) grid_size += 1; in CeedDeviceSetValueStrided_Cuda()
77 setValueStridedK<<<grid_size, block_size>>>(d_array, start, stop, step, val); in CeedDeviceSetValueStrided_Cuda()
98 int grid_size = vec_size / block_size; in CeedDeviceReciprocal_Cuda() local
[all …]
/libCEED/backends/hip-ref/kernels/
H A Dhip-ref-vector.hip.cpp28 int grid_size = vec_size / block_size; in CeedDeviceCopyStrided_Hip() local
30 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceCopyStrided_Hip()
31 …hipLaunchKernelGGL(copyStridedK, dim3(grid_size), dim3(block_size), 0, 0, d_array, start, step, le… in CeedDeviceCopyStrided_Hip()
50 int grid_size = vec_size / block_size; in CeedDeviceSetValue_Hip() local
52 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceSetValue_Hip()
53 hipLaunchKernelGGL(setValueK, dim3(grid_size), dim3(block_size), 0, 0, d_array, length, val); in CeedDeviceSetValue_Hip()
74 int grid_size = set_size / block_size; in CeedDeviceSetValueStrided_Hip() local
76 if (block_size * grid_size < set_size) grid_size += 1; in CeedDeviceSetValueStrided_Hip()
77 …hipLaunchKernelGGL(setValueStridedK, dim3(grid_size), dim3(block_size), 0, 0, d_array, start, stop… in CeedDeviceSetValueStrided_Hip()
98 int grid_size = vec_size / block_size; in CeedDeviceReciprocal_Hip() local
[all …]
/libCEED/backends/sycl-ref/kernels/
H A Dsycl-ref-vector.cpp28 int grid_size = vec_size / block_size; in CeedDeviceSetValue_Sycl() local
30 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceSetValue_Sycl()
31 setValueK<<<grid_size, block_size>>>(d_array, length, val); in CeedDeviceSetValue_Sycl()
51 int grid_size = vec_size / block_size; in CeedDeviceReciprocal_Sycl() local
53 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceReciprocal_Sycl()
54 rcpValueK<<<grid_size, block_size>>>(d_array, length); in CeedDeviceReciprocal_Sycl()
74 int grid_size = vec_size / block_size; in CeedDeviceScale_Sycl() local
76 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceScale_Sycl()
77 scaleValueK<<<grid_size, block_size>>>(x_array, alpha, length); in CeedDeviceScale_Sycl()
96 int grid_size = vec_size / block_size; in CeedDeviceAXPY_Sycl() local
[all …]
/libCEED/backends/hip/
H A Dceed-hip-compile.h20 CEED_INTERN int CeedRunKernel_Hip(Ceed ceed, hipFunction_t kernel, int grid_size, int block_size, v…
22 CEED_INTERN int CeedRunKernelDim_Hip(Ceed ceed, hipFunction_t kernel, int grid_size, int block_size…
25 …lDimShared_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, int grid_size, int block_size_…
27 …lDimShared_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, int grid_size, int block_size_…
H A Dceed-hip-compile.cpp212 int CeedRunKernel_Hip(Ceed ceed, hipFunction_t kernel, const int grid_size, const int block_size, v… in CeedRunKernel_Hip() argument
213 …CeedCallHip(ceed, hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size, 1, 1, 0, NULL, args, … in CeedRunKernel_Hip()
220 int CeedRunKernelDim_Hip(Ceed ceed, hipFunction_t kernel, const int grid_size, const int block_size… in CeedRunKernelDim_Hip() argument
222 …CeedCallHip(ceed, hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, block… in CeedRunKernelDim_Hip()
229 …Core_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block… in CeedRunKernelDimSharedCore_Hip() argument
232 …hipError_t result = hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, blo… in CeedRunKernelDimSharedCore_Hip()
253 …ared_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block… in CeedRunKernelDimShared_Hip() argument
257 …CeedCallBackend(CeedRunKernelDimSharedCore_Hip(ceed, kernel, stream, grid_size, block_size_x, bloc… in CeedRunKernelDimShared_Hip()
262 …ared_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block… in CeedTryRunKernelDimShared_Hip() argument
264 …CeedCallBackend(CeedRunKernelDimSharedCore_Hip(ceed, kernel, stream, grid_size, block_size_x, bloc… in CeedTryRunKernelDimShared_Hip()
/libCEED/backends/cuda/
H A Dceed-cuda-compile.h20 CEED_INTERN int CeedRunKernel_Cuda(Ceed ceed, CUfunction kernel, int grid_size, int block_size, voi…
24 CEED_INTERN int CeedRunKernelDim_Cuda(Ceed ceed, CUfunction kernel, int grid_size, int block_size_x…
26 …KernelDimShared_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, int grid_size, int block_size_…
28 …KernelDimShared_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, int grid_size, int block_size_…
H A Dceed-cuda-compile.cpp454 int CeedRunKernel_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size, voi… in CeedRunKernel_Cuda() argument
455 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, kernel, NULL, grid_size, block_size, 1, 1, 0, ar… in CeedRunKernel_Cuda()
462 int CeedRunKernelDim_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size_x… in CeedRunKernelDim_Cuda() argument
464 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, kernel, NULL, grid_size, block_size_x, block_siz… in CeedRunKernelDim_Cuda()
471 …haredCore_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int grid_size, const int block… in CeedRunKernelDimSharedCore_Cuda() argument
477 …CUresult result = cuLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, block_size_z… in CeedRunKernelDimSharedCore_Cuda()
502 int CeedRunKernelDimShared_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int grid_size,… in CeedRunKernelDimShared_Cuda() argument
506 …CeedCallBackend(CeedRunKernelDimSharedCore_Cuda(ceed, kernel, stream, grid_size, block_size_x, blo… in CeedRunKernelDimShared_Cuda()
511 …DimShared_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int grid_size, const int block… in CeedTryRunKernelDimShared_Cuda() argument
513 …CeedCallBackend(CeedRunKernelDimSharedCore_Cuda(ceed, kernel, stream, grid_size, block_size_x, blo… in CeedTryRunKernelDimShared_Cuda()
/libCEED/backends/sycl/
H A Dceed-sycl-compile.hpp21 CEED_INTERN int CeedRunKernelDimSharedSycl(Ceed ceed, sycl::kernel *kernel, const int grid_size, co…
H A Dceed-sycl-compile.sycl.cpp169 int CeedRunKernelDimSharedSycl(Ceed ceed, sycl::kernel *kernel, const int grid_size, const int bloc… in CeedRunKernelDimSharedSycl() argument
172 sycl::range<3> global_range(grid_size * block_size_z, block_size_y, block_size_x); in CeedRunKernelDimSharedSycl()
/libCEED/backends/hip-shared/
H A Dceed-hip-shared-basis.c227 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local
229 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid_size, Q_1d, elems_per_block, 1, weig… in CeedBasisApplyTensorCore_Hip_shared()
233 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local
235 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid_size, Q_1d, Q_1d, elems_per_block, w… in CeedBasisApplyTensorCore_Hip_shared()
239 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local
241 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid_size, Q_1d, Q_1d, elems_per_block, w… in CeedBasisApplyTensorCore_Hip_shared()
595 const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyNonTensorCore_Hip_shared() local
597 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid_size, thread, elems_per_block, 1, we… in CeedBasisApplyNonTensorCore_Hip_shared()
/libCEED/backends/cuda-shared/
H A Dceed-cuda-shared-basis.c161 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local
163 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Weight, grid_size, Q_1d, elems_per_block, 1, wei… in CeedBasisApplyTensorCore_Cuda_shared()
167 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local
169 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Weight, grid_size, Q_1d, Q_1d, elems_per_block, … in CeedBasisApplyTensorCore_Cuda_shared()
173 … const CeedInt grid_size = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local
175 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Weight, grid_size, Q_1d, Q_1d, elems_per_block, … in CeedBasisApplyTensorCore_Cuda_shared()