Home
last modified time | relevance | path

Searched refs:threads_per_elem (Results 1 – 1 of 1) sorted by relevance

/libCEED/backends/cuda-gen/
H A Dceed-cuda-gen-operator.c40 static int Waste(int threads_per_sm, int warp_size, int threads_per_elem, int elems_per_block) { in Waste() argument
41 int useful_threads_per_block = threads_per_elem * elems_per_block; in Waste()
78 const int threads_per_elem = block[0] * block[1]; in BlockGridCalculate() local
80 int waste = Waste(threads_per_sm, warp_size, threads_per_elem, 1); in BlockGridCalculate()
82 for (int i = 2; i <= CeedIntMin(max_threads_per_block / threads_per_elem, num_elem); i++) { in BlockGridCalculate()
83 int i_waste = Waste(threads_per_sm, warp_size, threads_per_elem, i); in BlockGridCalculate()
87 if (i_waste < waste || (i_waste == waste && threads_per_elem * i <= 128)) { in BlockGridCalculate()