Lines Matching refs:slice_id
80 PetscInt i, row, slice_id, row_in_slice; in matmult_seqsell_basic_kernel() local
85 slice_id = row / sliceheight; in matmult_seqsell_basic_kernel()
88 …for (i = sliidx[slice_id] + row_in_slice; i < sliidx[slice_id + 1]; i += sliceheight) sum += aval[… in matmult_seqsell_basic_kernel()
95 PetscInt i, row, slice_id, row_in_slice; in matmultadd_seqsell_basic_kernel() local
100 slice_id = row / sliceheight; in matmultadd_seqsell_basic_kernel()
103 …for (i = sliidx[slice_id] + row_in_slice; i < sliidx[slice_id + 1]; i += sliceheight) sum += aval[… in matmultadd_seqsell_basic_kernel()
115 PetscInt i, row, slice_id = blockIdx.x; in matmult_seqsell_tiled_kernel9() local
122 row = slice_id * sliceheight + threadIdx.x % sliceheight; in matmult_seqsell_tiled_kernel9()
124 …for (i = sliidx[slice_id] + threadIdx.x + WARP_SIZE * threadIdx.y; i < sliidx[slice_id + 1]; i += … in matmult_seqsell_tiled_kernel9()
144 PetscInt i, row, slice_id = blockIdx.x; in matmultadd_seqsell_tiled_kernel9() local
151 row = slice_id * sliceheight + threadIdx.x % sliceheight; in matmultadd_seqsell_tiled_kernel9()
153 …for (i = sliidx[slice_id] + threadIdx.x + WARP_SIZE * threadIdx.y; i < sliidx[slice_id + 1]; i += … in matmultadd_seqsell_tiled_kernel9()
208 …PetscInt slice_id = start_slice, totalslices = PetscCeilIntMacro(nrows, sliceheight), t… in matmult_seqsell_tiled_kernel8() local
210 while (gid < totalentries && gid >= sliidx[slice_id + 1]) slice_id++; in matmult_seqsell_tiled_kernel8()
211 …readIdx.x % (WARP_SIZE / 2) == 0) flag[threadIdx.y * 2 + threadIdx.x / (WARP_SIZE / 2)] = slice_id; in matmult_seqsell_tiled_kernel8()
212 row = slice_id * sliceheight + threadIdx.x % sliceheight; in matmult_seqsell_tiled_kernel8()
263 …PetscInt slice_id = start_slice, totalslices = PetscCeilIntMacro(nrows, sliceheight), t… in matmultadd_seqsell_tiled_kernel8() local
265 while (gid < totalentries && gid >= sliidx[slice_id + 1]) slice_id++; in matmultadd_seqsell_tiled_kernel8()
266 …readIdx.x % (WARP_SIZE / 2) == 0) flag[threadIdx.y * 2 + threadIdx.x / (WARP_SIZE / 2)] = slice_id; in matmultadd_seqsell_tiled_kernel8()
267 row = slice_id * sliceheight + threadIdx.x % sliceheight; in matmultadd_seqsell_tiled_kernel8()
300 PetscInt i, row, slice_id; in matmult_seqsell_tiled_kernel7() local
301 slice_id = blockIdx.x * blockDim.y + threadIdx.y; in matmult_seqsell_tiled_kernel7()
302 row = slice_id * sliceheight + threadIdx.x % sliceheight; in matmult_seqsell_tiled_kernel7()
305 …for (i = sliidx[slice_id] + threadIdx.x; i < sliidx[slice_id + 1]; i += WARP_SIZE) t += aval[i] * … in matmult_seqsell_tiled_kernel7()
315 PetscInt i, row, slice_id; in matmultadd_seqsell_tiled_kernel7() local
316 slice_id = blockIdx.x * blockDim.y + threadIdx.y; in matmultadd_seqsell_tiled_kernel7()
317 row = slice_id * sliceheight + threadIdx.x % sliceheight; in matmultadd_seqsell_tiled_kernel7()
320 …for (i = sliidx[slice_id] + threadIdx.x; i < sliidx[slice_id + 1]; i += WARP_SIZE) t += aval[i] * … in matmultadd_seqsell_tiled_kernel7()
334 PetscInt i, row, slice_id, row_in_slice; in PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END() local
338 slice_id = row / sliceheight; in PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END()
342 …for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i … in PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END()
362 PetscInt i, row, slice_id, row_in_slice; in matmult_seqsell_tiled_kernel5() local
366 slice_id = row / sliceheight; in matmult_seqsell_tiled_kernel5()
370 …for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i … in matmult_seqsell_tiled_kernel5()
388 PetscInt i, row, slice_id, row_in_slice; in matmult_seqsell_tiled_kernel4() local
392 slice_id = row / sliceheight; in matmult_seqsell_tiled_kernel4()
396 …for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i … in matmult_seqsell_tiled_kernel4()
412 PetscInt i, row, slice_id, row_in_slice; in matmult_seqsell_tiled_kernel3() local
416 slice_id = row / sliceheight; in matmult_seqsell_tiled_kernel3()
420 …for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i … in matmult_seqsell_tiled_kernel3()
434 PetscInt i, row, slice_id, row_in_slice; in matmult_seqsell_tiled_kernel2() local
438 slice_id = row / sliceheight; in matmult_seqsell_tiled_kernel2()
442 …for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i … in matmult_seqsell_tiled_kernel2()
454 PetscInt i, row, slice_id, row_in_slice; in matmultadd_seqsell_tiled_kernel6() local
458 slice_id = row / sliceheight; in matmultadd_seqsell_tiled_kernel6()
462 …for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i … in matmultadd_seqsell_tiled_kernel6()
482 PetscInt i, row, slice_id, row_in_slice; in matmultadd_seqsell_tiled_kernel5() local
486 slice_id = row / sliceheight; in matmultadd_seqsell_tiled_kernel5()
490 …for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i … in matmultadd_seqsell_tiled_kernel5()
508 PetscInt i, row, slice_id, row_in_slice; in matmultadd_seqsell_tiled_kernel4() local
512 slice_id = row / sliceheight; in matmultadd_seqsell_tiled_kernel4()
516 …for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i … in matmultadd_seqsell_tiled_kernel4()
532 PetscInt i, row, slice_id, row_in_slice; in matmultadd_seqsell_tiled_kernel3() local
536 slice_id = row / sliceheight; in matmultadd_seqsell_tiled_kernel3()
540 …for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i … in matmultadd_seqsell_tiled_kernel3()
554 PetscInt i, row, slice_id, row_in_slice; in matmultadd_seqsell_tiled_kernel2() local
558 slice_id = row / sliceheight; in matmultadd_seqsell_tiled_kernel2()
562 …for (i = sliidx[slice_id] + row_in_slice + sliceheight * threadIdx.y; i < sliidx[slice_id + 1]; i … in matmultadd_seqsell_tiled_kernel2()