Searched refs:WARP_SIZE (Results 1 – 1 of 1) sorted by relevance
8 #define WARP_SIZE 64 macro114 __shared__ MatScalar shared[WARP_SIZE][BLOCKY]; in matmult_seqsell_tiled_kernel9()116 int tid = threadIdx.x + threadIdx.y * WARP_SIZE; in matmult_seqsell_tiled_kernel9()124 …for (i = sliidx[slice_id] + threadIdx.x + WARP_SIZE * threadIdx.y; i < sliidx[slice_id + 1]; i += … in matmult_seqsell_tiled_kernel9()127 for (int offset = WARP_SIZE / 2; offset >= sliceheight; offset /= 2) t += __shfl_down(t, offset); in matmult_seqsell_tiled_kernel9()143 __shared__ MatScalar shared[WARP_SIZE][BLOCKY]; in matmultadd_seqsell_tiled_kernel9()145 int tid = threadIdx.x + threadIdx.y * WARP_SIZE; in matmultadd_seqsell_tiled_kernel9()153 …for (i = sliidx[slice_id] + threadIdx.x + WARP_SIZE * threadIdx.y; i < sliidx[slice_id + 1]; i += … in matmultadd_seqsell_tiled_kernel9()156 for (int offset = WARP_SIZE / 2; offset >= sliceheight; offset /= 2) t += __shfl_down(t, offset); in matmultadd_seqsell_tiled_kernel9()174 int halfwarpid = threadIdx.y * 2 + threadIdx.x / (WARP_SIZE / 2); in segment_scan()[all …]