Lines Matching defs:elems_per_block
62 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr… in CeedBasisApplyTensorCore_Cuda_shared() local
75 … CeedInt elems_per_block = CeedIntMax(thread_1d < 7 ? opt_elems[thread_1d] / num_comp : 1, 1); in CeedBasisApplyTensorCore_Cuda_shared() local
87 CeedInt elems_per_block = 1; in CeedBasisApplyTensorCore_Cuda_shared() local
116 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr… in CeedBasisApplyTensorCore_Cuda_shared() local
129 … CeedInt elems_per_block = CeedIntMax(thread_1d < 7 ? opt_elems[thread_1d] / num_comp : 1, 1); in CeedBasisApplyTensorCore_Cuda_shared() local
140 CeedInt elems_per_block = 1; in CeedBasisApplyTensorCore_Cuda_shared() local
160 const CeedInt elems_per_block = block_size / Q_1d; in CeedBasisApplyTensorCore_Cuda_shared() local
166 const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; in CeedBasisApplyTensorCore_Cuda_shared() local
172 const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; in CeedBasisApplyTensorCore_Cuda_shared() local
326 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr… in CeedBasisApplyAtPointsCore_Cuda_shared() local
340 … CeedInt elems_per_block = CeedIntMax(thread_1d < 7 ? opt_elems[thread_1d] / num_comp : 1, 1); in CeedBasisApplyAtPointsCore_Cuda_shared() local
352 CeedInt elems_per_block = 1; in CeedBasisApplyAtPointsCore_Cuda_shared() local
376 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr… in CeedBasisApplyAtPointsCore_Cuda_shared() local
389 … CeedInt elems_per_block = CeedIntMax(thread_1d < 7 ? opt_elems[thread_1d] / num_comp : 1, 1); in CeedBasisApplyAtPointsCore_Cuda_shared() local
401 CeedInt elems_per_block = 1; in CeedBasisApplyAtPointsCore_Cuda_shared() local
485 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr… in CeedBasisApplyNonTensorCore_Cuda_shared() local
509 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr… in CeedBasisApplyNonTensorCore_Cuda_shared() local
533 …CeedInt elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thr… in CeedBasisApplyNonTensorCore_Cuda_shared() local