Searched refs:threads_per_elem (Results 1 – 1 of 1) sorted by relevance
| /libCEED/backends/cuda-gen/ |
| H A D | ceed-cuda-gen-operator.c | 40 static int Waste(int threads_per_sm, int warp_size, int threads_per_elem, int elems_per_block) { in Waste() argument 41 int useful_threads_per_block = threads_per_elem * elems_per_block; in Waste() 78 const int threads_per_elem = block[0] * block[1]; in BlockGridCalculate() local 80 int waste = Waste(threads_per_sm, warp_size, threads_per_elem, 1); in BlockGridCalculate() 82 for (int i = 2; i <= CeedIntMin(max_threads_per_block / threads_per_elem, num_elem); i++) { in BlockGridCalculate() 83 int i_waste = Waste(threads_per_sm, warp_size, threads_per_elem, i); in BlockGridCalculate() 87 if (i_waste < waste || (i_waste == waste && threads_per_elem * i <= 128)) { in BlockGridCalculate()
|