elems_per_block (definition) in projects: libCEED

Searched defs:elems_per_block (Results 1 – 14 of 14) sorted by relevance

/libCEED/backends/cuda-shared/
H A D	ceed-cuda-shared-basis.c	62 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr… in CeedBasisApplyTensorCore_Cuda_shared() local 75 … CeedInt elems_per_block = CeedIntMax(thread_1d < 7 ? opt_elems[thread_1d] / num_comp : 1, 1); in CeedBasisApplyTensorCore_Cuda_shared() local 87 CeedInt elems_per_block = 1; in CeedBasisApplyTensorCore_Cuda_shared() local 116 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr… in CeedBasisApplyTensorCore_Cuda_shared() local 129 … CeedInt elems_per_block = CeedIntMax(thread_1d < 7 ? opt_elems[thread_1d] / num_comp : 1, 1); in CeedBasisApplyTensorCore_Cuda_shared() local 140 CeedInt elems_per_block = 1; in CeedBasisApplyTensorCore_Cuda_shared() local 160 const CeedInt elems_per_block = block_size / Q_1d; in CeedBasisApplyTensorCore_Cuda_shared() local 166 const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; in CeedBasisApplyTensorCore_Cuda_shared() local 172 const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; in CeedBasisApplyTensorCore_Cuda_shared() local 326 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr… in CeedBasisApplyAtPointsCore_Cuda_shared() local [all …]
/libCEED/backends/hip-shared/
H A D	ceed-hip-shared-basis.c	128 CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64; in CeedBasisApplyTensorCore_Hip_shared() local 141 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared() local 152 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared() local 180 CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64; in CeedBasisApplyTensorCore_Hip_shared() local 193 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared() local 204 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared() local 226 const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; in CeedBasisApplyTensorCore_Hip_shared() local 232 const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; in CeedBasisApplyTensorCore_Hip_shared() local 238 const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; in CeedBasisApplyTensorCore_Hip_shared() local 391 CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64; in CeedBasisApplyAtPointsCore_Hip_shared() local [all …]
/libCEED/backends/sycl-gen/
H A D	ceed-sycl-gen-operator-build.sycl.cpp	`34 CeedInt elems_per_block = 64 * thread1d > 256 ? 256 / thread1d : 64; in BlockGridCalculate_Sycl_gen() local 41 const CeedInt elems_per_block = thread1d < 4 ? 16 : 2; in BlockGridCalculate_Sycl_gen() local 47 const CeedInt elems_per_block = thread1d < 6 ? 4 : (thread1d < 8 ? 2 : 1); in BlockGridCalculate_Sycl_gen() local`
/libCEED/backends/cuda-gen/
H A D	ceed-cuda-gen-operator.c	`40 static int Waste(int threads_per_sm, int warp_size, int threads_per_elem, int elems_per_block) { in Waste() 79 int elems_per_block = 1; in BlockGridCalculate() local 215 …CeedInt elems_per_block = CeedIntMin(cuda_data->device_prop.maxThreadsDim[2], CeedIntMax(512 / dat… in CeedOperatorApplyAddCore_Cuda_gen() local 470 …CeedInt elems_per_block = CeedIntMin(cuda_data->device_prop.maxThreadsDim[2], CeedIntMax(512 / dat… in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen() local`
/libCEED/backends/hip-gen/
H A D	ceed-hip-gen-operator.c	`157 CeedInt elems_per_block = 64 * data->thread_1d > 256 ? 256 / data->thread_1d : 64; in CeedOperatorApplyAddCore_Hip_gen() local 435 CeedInt elems_per_block = 64 * data->thread_1d > 256 ? 256 / data->thread_1d : 64; in CeedOperatorLinearAssembleQFunctionCore_Hip_gen() local`
H A D	ceed-hip-gen-operator-build.cpp	`37 CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64; in BlockGridCalculate_Hip_gen() local 44 const CeedInt elems_per_block = thread_1d < 4 ? 16 : 2; in BlockGridCalculate_Hip_gen() local 50 const CeedInt elems_per_block = thread_1d < 6 ? 4 : (thread_1d < 8 ? 2 : 1); in BlockGridCalculate_Hip_gen() local`
/libCEED/backends/cuda-ref/
H A D	ceed-cuda-ref-basis.c	`260 const int elems_per_block = 1; in CeedBasisApplyNonTensorCore_Cuda() local`
H A D	ceed-cuda-ref-operator.c	`1386 CeedInt elems_per_block = 1; in CeedOperatorAssembleDiagonalSetupCompile_Cuda() local 1465 CeedInt elems_per_block = 1; in CeedOperatorAssembleDiagonalCore_Cuda() local`
H A D	ceed-cuda-ref.h	`130 CeedInt block_size_x, block_size_y, elems_per_block; member`
/libCEED/backends/sycl-ref/
H A D	ceed-sycl-ref.hpp	`97 CeedInt num_elem, block_size_x, block_size_y, elems_per_block; member`
H A D	ceed-sycl-ref-operator.sycl.cpp	`1111 int elems_per_block = 1; in CeedOperatorAssembleSingleSetup_Sycl() local`
/libCEED/backends/hip-ref/
H A D	ceed-hip-ref-basis.c	`259 const int elems_per_block = 1; in CeedBasisApplyNonTensorCore_Hip() local`
H A D	ceed-hip-ref-operator.c	`1383 CeedInt elems_per_block = 1; in CeedOperatorAssembleDiagonalSetupCompile_Hip() local 1462 CeedInt elems_per_block = 1; in CeedOperatorAssembleDiagonalCore_Hip() local`
H A D	ceed-hip-ref.h	`135 CeedInt block_size_x, block_size_y, elems_per_block; member`

Project(s)

Full Search
Definition
Symbol
File Path
History
Type