Lines Matching refs:elems_per_block
40 static int Waste(int threads_per_sm, int warp_size, int threads_per_elem, int elems_per_block) { in Waste() argument
41 int useful_threads_per_block = threads_per_elem * elems_per_block; in Waste()
79 int elems_per_block = 1; in BlockGridCalculate() local
88 elems_per_block = i; in BlockGridCalculate()
94 block[2] = CeedIntMin(elems_per_block, max_threads_z); in BlockGridCalculate()
95 *grid = CeedDivUpInt(num_elem, elems_per_block); in BlockGridCalculate()
215 …CeedInt elems_per_block = CeedIntMin(cuda_data->device_prop.maxThreadsDim[2], CeedIntMax(512 / dat… in CeedOperatorApplyAddCore_Cuda_gen() local
217 grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedOperatorApplyAddCore_Cuda_gen()
218 block[2] = elems_per_block; in CeedOperatorApplyAddCore_Cuda_gen()
470 …CeedInt elems_per_block = CeedIntMin(cuda_data->device_prop.maxThreadsDim[2], CeedIntMax(512 / dat… in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen() local
472 grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen()
473 block[2] = elems_per_block; in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen()