Lines Matching refs:slice_id
77 PetscInt i, row, slice_id, row_in_slice; in matmult_seqsell_basic_kernel() local
82 slice_id = row / sliceheight; in matmult_seqsell_basic_kernel()
85 …for (i = sliidx[slice_id] + row_in_slice; i < sliidx[slice_id + 1]; i += sliceheight) sum += aval[… in matmult_seqsell_basic_kernel()
92 PetscInt i, row, slice_id, row_in_slice; in matmultadd_seqsell_basic_kernel() local
97 slice_id = row / sliceheight; in matmultadd_seqsell_basic_kernel()
100 …for (i = sliidx[slice_id] + row_in_slice; i < sliidx[slice_id + 1]; i += sliceheight) sum += aval[… in matmultadd_seqsell_basic_kernel()
111 PetscInt i, row, slice_id = blockIdx.x; in matmult_seqsell_tiled_kernel9() local
118 row = slice_id * sliceheight + threadIdx.x % sliceheight; in matmult_seqsell_tiled_kernel9()
120 …for (i = sliidx[slice_id] + threadIdx.x + 32 * threadIdx.y; i < sliidx[slice_id + 1]; i += 32 * BL… in matmult_seqsell_tiled_kernel9()
140 PetscInt i, row, slice_id = blockIdx.x; in matmultadd_seqsell_tiled_kernel9() local
147 row = slice_id * sliceheight + threadIdx.x % sliceheight; in matmultadd_seqsell_tiled_kernel9()
149 …for (i = sliidx[slice_id] + threadIdx.x + 32 * threadIdx.y; i < sliidx[slice_id + 1]; i += 32 * BL… in matmultadd_seqsell_tiled_kernel9()
204 …PetscInt slice_id = start_slice, totalslices = PetscCeilIntMacro(nrows, sliceheight), t… in matmult_seqsell_tiled_kernel8() local
206 while (gid < totalentries && gid >= sliidx[slice_id + 1]) slice_id++; in matmult_seqsell_tiled_kernel8()
207 if (threadIdx.x % 16 == 0) flag[threadIdx.y * 2 + threadIdx.x / 16] = slice_id; in matmult_seqsell_tiled_kernel8()
208 row = slice_id * sliceheight + threadIdx.x % sliceheight; in matmult_seqsell_tiled_kernel8()
259 …PetscInt slice_id = start_slice, totalslices = PetscCeilIntMacro(nrows, sliceheight), t… in matmultadd_seqsell_tiled_kernel8() local
261 while (gid < totalentries && gid >= sliidx[slice_id + 1]) slice_id++; in matmultadd_seqsell_tiled_kernel8()
262 if (threadIdx.x % 16 == 0) flag[threadIdx.y * 2 + threadIdx.x / 16] = slice_id; in matmultadd_seqsell_tiled_kernel8()
263 row = slice_id * sliceheight + threadIdx.x % sliceheight; in matmultadd_seqsell_tiled_kernel8()
296 PetscInt i, row, slice_id; in matmult_seqsell_tiled_kernel7() local
297 slice_id = blockIdx.x * blockDim.y + threadIdx.y; in matmult_seqsell_tiled_kernel7()
298 row = slice_id * sliceheight + threadIdx.x % sliceheight; in matmult_seqsell_tiled_kernel7()
301 …for (i = sliidx[slice_id] + threadIdx.x; i < sliidx[slice_id + 1]; i += 32) t += aval[i] * x[acoli… in matmult_seqsell_tiled_kernel7()
311 PetscInt i, row, slice_id; in matmultadd_seqsell_tiled_kernel7() local
312 slice_id = blockIdx.x * blockDim.y + threadIdx.y; in matmultadd_seqsell_tiled_kernel7()
313 row = slice_id * sliceheight + threadIdx.x % sliceheight; in matmultadd_seqsell_tiled_kernel7()
316 …for (i = sliidx[slice_id] + threadIdx.x; i < sliidx[slice_id + 1]; i += 32) t += aval[i] * x[acoli… in matmultadd_seqsell_tiled_kernel7()
329 PetscInt i, row, slice_id, row_in_slice; in matmult_seqsell_tiled_kernel6() local
333 slice_id = row / SLICE_HEIGHT; in matmult_seqsell_tiled_kernel6()
337 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmult_seqsell_tiled_kernel6()
357 PetscInt i, row, slice_id, row_in_slice; in matmult_seqsell_tiled_kernel5() local
361 slice_id = row / SLICE_HEIGHT; in matmult_seqsell_tiled_kernel5()
365 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmult_seqsell_tiled_kernel5()
383 PetscInt i, row, slice_id, row_in_slice; in matmult_seqsell_tiled_kernel4() local
387 slice_id = row / SLICE_HEIGHT; in matmult_seqsell_tiled_kernel4()
391 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmult_seqsell_tiled_kernel4()
407 PetscInt i, row, slice_id, row_in_slice; in matmult_seqsell_tiled_kernel3() local
411 slice_id = row / SLICE_HEIGHT; in matmult_seqsell_tiled_kernel3()
415 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmult_seqsell_tiled_kernel3()
429 PetscInt i, row, slice_id, row_in_slice; in matmult_seqsell_tiled_kernel2() local
433 slice_id = row / SLICE_HEIGHT; in matmult_seqsell_tiled_kernel2()
437 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmult_seqsell_tiled_kernel2()
449 PetscInt i, row, slice_id, row_in_slice; in matmultadd_seqsell_tiled_kernel6() local
453 slice_id = row / SLICE_HEIGHT; in matmultadd_seqsell_tiled_kernel6()
457 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmultadd_seqsell_tiled_kernel6()
477 PetscInt i, row, slice_id, row_in_slice; in matmultadd_seqsell_tiled_kernel5() local
481 slice_id = row / SLICE_HEIGHT; in matmultadd_seqsell_tiled_kernel5()
485 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmultadd_seqsell_tiled_kernel5()
503 PetscInt i, row, slice_id, row_in_slice; in matmultadd_seqsell_tiled_kernel4() local
507 slice_id = row / SLICE_HEIGHT; in matmultadd_seqsell_tiled_kernel4()
511 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmultadd_seqsell_tiled_kernel4()
527 PetscInt i, row, slice_id, row_in_slice; in matmultadd_seqsell_tiled_kernel3() local
531 slice_id = row / SLICE_HEIGHT; in matmultadd_seqsell_tiled_kernel3()
535 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmultadd_seqsell_tiled_kernel3()
549 PetscInt i, row, slice_id, row_in_slice; in matmultadd_seqsell_tiled_kernel2() local
553 slice_id = row / SLICE_HEIGHT; in matmultadd_seqsell_tiled_kernel2()
557 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmultadd_seqsell_tiled_kernel2()