Searched refs:opt_elems (Results 1 – 2 of 2) sorted by relevance
| /libCEED/backends/cuda-shared/ |
| H A D | ceed-cuda-shared-basis.c | 73 const CeedInt opt_elems[7] = {0, 32, 8, 6, 4, 2, 8}; in CeedBasisApplyTensorCore_Cuda_shared() local 75 … CeedInt elems_per_block = CeedIntMax(thread_1d < 7 ? opt_elems[thread_1d] / num_comp : 1, 1); in CeedBasisApplyTensorCore_Cuda_shared() 127 const CeedInt opt_elems[7] = {0, 32, 8, 6, 4, 2, 8}; in CeedBasisApplyTensorCore_Cuda_shared() local 129 … CeedInt elems_per_block = CeedIntMax(thread_1d < 7 ? opt_elems[thread_1d] / num_comp : 1, 1); in CeedBasisApplyTensorCore_Cuda_shared() 165 const CeedInt opt_elems = block_size / (Q_1d * Q_1d); in CeedBasisApplyTensorCore_Cuda_shared() local 166 const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; in CeedBasisApplyTensorCore_Cuda_shared() 171 const CeedInt opt_elems = block_size / (Q_1d * Q_1d); in CeedBasisApplyTensorCore_Cuda_shared() local 172 const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; in CeedBasisApplyTensorCore_Cuda_shared() 338 const CeedInt opt_elems[7] = {0, 32, 8, 6, 4, 2, 8}; in CeedBasisApplyAtPointsCore_Cuda_shared() local 340 … CeedInt elems_per_block = CeedIntMax(thread_1d < 7 ? opt_elems[thread_1d] / num_comp : 1, 1); in CeedBasisApplyAtPointsCore_Cuda_shared() [all …]
|
| /libCEED/backends/hip-shared/ |
| H A D | ceed-hip-shared-basis.c | 225 const CeedInt opt_elems = block_size / Q_1d; in CeedBasisApplyTensorCore_Hip_shared() local 226 const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; in CeedBasisApplyTensorCore_Hip_shared() 231 const CeedInt opt_elems = block_size / (Q_1d * Q_1d); in CeedBasisApplyTensorCore_Hip_shared() local 232 const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; in CeedBasisApplyTensorCore_Hip_shared() 237 const CeedInt opt_elems = block_size / (Q_1d * Q_1d); in CeedBasisApplyTensorCore_Hip_shared() local 238 const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; in CeedBasisApplyTensorCore_Hip_shared()
|