ceed-cuda-shared-basis.c - OpenGrok cross reference for /libCEED/backends/cuda-shared/ceed-cuda-shared-basis.c

Lines Matching defs:elems_per_block
62 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr…  in CeedBasisApplyTensorCore_Cuda_shared()  local
75 …      CeedInt elems_per_block = CeedIntMax(thread_1d < 7 ? opt_elems[thread_1d] / num_comp : 1, 1);  in CeedBasisApplyTensorCore_Cuda_shared()  local
87         CeedInt elems_per_block = 1;  in CeedBasisApplyTensorCore_Cuda_shared()  local
116 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr…  in CeedBasisApplyTensorCore_Cuda_shared()  local
129 …      CeedInt elems_per_block = CeedIntMax(thread_1d < 7 ? opt_elems[thread_1d] / num_comp : 1, 1);  in CeedBasisApplyTensorCore_Cuda_shared()  local
140         CeedInt elems_per_block = 1;  in CeedBasisApplyTensorCore_Cuda_shared()  local
160         const CeedInt elems_per_block = block_size / Q_1d;  in CeedBasisApplyTensorCore_Cuda_shared()  local
166         const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1;  in CeedBasisApplyTensorCore_Cuda_shared()  local
172         const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1;  in CeedBasisApplyTensorCore_Cuda_shared()  local
326 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr…  in CeedBasisApplyAtPointsCore_Cuda_shared()  local
340 …      CeedInt elems_per_block = CeedIntMax(thread_1d < 7 ? opt_elems[thread_1d] / num_comp : 1, 1);  in CeedBasisApplyAtPointsCore_Cuda_shared()  local
352         CeedInt elems_per_block = 1;  in CeedBasisApplyAtPointsCore_Cuda_shared()  local
376 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr…  in CeedBasisApplyAtPointsCore_Cuda_shared()  local
389 …      CeedInt elems_per_block = CeedIntMax(thread_1d < 7 ? opt_elems[thread_1d] / num_comp : 1, 1);  in CeedBasisApplyAtPointsCore_Cuda_shared()  local
401         CeedInt elems_per_block = 1;  in CeedBasisApplyAtPointsCore_Cuda_shared()  local
485 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr…  in CeedBasisApplyNonTensorCore_Cuda_shared()  local
509 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr…  in CeedBasisApplyNonTensorCore_Cuda_shared()  local
533 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr…  in CeedBasisApplyNonTensorCore_Cuda_shared()  local