Home
last modified time | relevance | path

Searched refs:max_threads_per_block (Results 1 – 2 of 2) sorted by relevance

/libCEED/backends/cuda-gen/
H A Dceed-cuda-gen-operator.c75 static int BlockGridCalculate(CeedInt num_elem, int blocks_per_sm, int max_threads_per_block, int m… in BlockGridCalculate() argument
77 const int threads_per_sm = blocks_per_sm * max_threads_per_block; in BlockGridCalculate()
82 for (int i = 2; i <= CeedIntMin(max_threads_per_block / threads_per_elem, num_elem); i++) { in BlockGridCalculate()
205 int max_threads_per_block, min_grid_size, grid; in CeedOperatorApplyAddCore_Cuda_gen() local
208 …CeedCallCuda(ceed, cuOccupancyMaxPotentialBlockSize(&min_grid_size, &max_threads_per_block, data->… in CeedOperatorApplyAddCore_Cuda_gen()
212 …culate(num_elem, min_grid_size / cuda_data->device_prop.multiProcessorCount, max_threads_per_block, in CeedOperatorApplyAddCore_Cuda_gen()
460 int max_threads_per_block, min_grid_size, grid; in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen() local
463 …CeedCallCuda(ceed, cuOccupancyMaxPotentialBlockSize(&min_grid_size, &max_threads_per_block, data->… in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen()
467 …culate(num_elem, min_grid_size / cuda_data->device_prop.multiProcessorCount, max_threads_per_block, in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen()
636 int max_threads_per_block, min_grid_size, grid; in CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda_gen() local
[all …]
/libCEED/backends/cuda/
H A Dceed-cuda-compile.cpp480 int max_threads_per_block, shared_size_bytes, num_regs; in CeedRunKernelDimSharedCore_Cuda() local
482 cuFuncGetAttribute(&max_threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, kernel); in CeedRunKernelDimSharedCore_Cuda()
488max_threads_per_block, block_size_x, block_size_y, block_size_z, shared_size_bytes, num_regs); in CeedRunKernelDimSharedCore_Cuda()
493max_threads_per_block, block_size_x, block_size_y, block_size_z, shared_size_bytes, num_regs); in CeedRunKernelDimSharedCore_Cuda()