Lines Matching refs:aval
75 …nt nrows, PetscInt sliceheight, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmult_seqsell_basic_kernel() argument
85 …ice_id] + row_in_slice; i < sliidx[slice_id + 1]; i += sliceheight) sum += aval[i] * x[acolidx[i]]; in matmult_seqsell_basic_kernel()
90 …nt nrows, PetscInt sliceheight, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmultadd_seqsell_basic_kernel() argument
100 …ice_id] + row_in_slice; i < sliidx[slice_id + 1]; i += sliceheight) sum += aval[i] * x[acolidx[i]]; in matmultadd_seqsell_basic_kernel()
108 …nt nrows, PetscInt sliceheight, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmult_seqsell_tiled_kernel9() argument
120 …Idx.x + 32 * threadIdx.y; i < sliidx[slice_id + 1]; i += 32 * BLOCKY) t += aval[i] * x[acolidx[i]]; in matmult_seqsell_tiled_kernel9()
137 …nt nrows, PetscInt sliceheight, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmultadd_seqsell_tiled_kernel9() argument
149 …Idx.x + 32 * threadIdx.y; i < sliidx[slice_id + 1]; i += 32 * BLOCKY) t += aval[i] * x[acolidx[i]]; in matmultadd_seqsell_tiled_kernel9()
185 …onst PetscInt *chunk_slice_map, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmult_seqsell_tiled_kernel8() argument
209 if (row < nrows && gid < totalentries) t = aval[gid] * x[acolidx[gid]]; in matmult_seqsell_tiled_kernel8()
216 if (row < nrows) t += aval[gid] * x[acolidx[gid]]; in matmult_seqsell_tiled_kernel8()
240 …onst PetscInt *chunk_slice_map, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmultadd_seqsell_tiled_kernel8() argument
264 if (row < nrows && gid < totalentries) t = aval[gid] * x[acolidx[gid]]; in matmultadd_seqsell_tiled_kernel8()
271 if (row < nrows) t += aval[gid] * x[acolidx[gid]]; in matmultadd_seqsell_tiled_kernel8()
294 …nt nrows, PetscInt sliceheight, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmult_seqsell_tiled_kernel7() argument
301 …for (i = sliidx[slice_id] + threadIdx.x; i < sliidx[slice_id + 1]; i += 32) t += aval[i] * x[acoli… in matmult_seqsell_tiled_kernel7()
309 …nt nrows, PetscInt sliceheight, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmultadd_seqsell_tiled_kernel7() argument
316 …for (i = sliidx[slice_id] + threadIdx.x; i < sliidx[slice_id + 1]; i += 32) t += aval[i] * x[acoli… in matmultadd_seqsell_tiled_kernel7()
326 …l_tiled_kernel6(PetscInt nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmult_seqsell_tiled_kernel6() argument
337 …ICE_HEIGHT * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]]; in matmult_seqsell_tiled_kernel6()
354 …l_tiled_kernel5(PetscInt nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmult_seqsell_tiled_kernel5() argument
365 …ICE_HEIGHT * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]]; in matmult_seqsell_tiled_kernel5()
380 …l_tiled_kernel4(PetscInt nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmult_seqsell_tiled_kernel4() argument
391 …ICE_HEIGHT * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]]; in matmult_seqsell_tiled_kernel4()
404 …l_tiled_kernel3(PetscInt nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmult_seqsell_tiled_kernel3() argument
415 …ICE_HEIGHT * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]]; in matmult_seqsell_tiled_kernel3()
426 …l_tiled_kernel2(PetscInt nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmult_seqsell_tiled_kernel2() argument
437 …ICE_HEIGHT * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]]; in matmult_seqsell_tiled_kernel2()
446 …l_tiled_kernel6(PetscInt nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmultadd_seqsell_tiled_kernel6() argument
457 …ICE_HEIGHT * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]]; in matmultadd_seqsell_tiled_kernel6()
474 …l_tiled_kernel5(PetscInt nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmultadd_seqsell_tiled_kernel5() argument
485 …ICE_HEIGHT * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]]; in matmultadd_seqsell_tiled_kernel5()
500 …l_tiled_kernel4(PetscInt nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmultadd_seqsell_tiled_kernel4() argument
511 …ICE_HEIGHT * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]]; in matmultadd_seqsell_tiled_kernel4()
524 …l_tiled_kernel3(PetscInt nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmultadd_seqsell_tiled_kernel3() argument
535 …ICE_HEIGHT * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]]; in matmultadd_seqsell_tiled_kernel3()
546 …l_tiled_kernel2(PetscInt nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sl… in matmultadd_seqsell_tiled_kernel2() argument
557 …ICE_HEIGHT * blockDim.y) shared[threadIdx.y * blockDim.x + threadIdx.x] += aval[i] * x[acolidx[i]]; in matmultadd_seqsell_tiled_kernel2()
573 MatScalar *aval; in MatMult_SeqSELLCUDA() local
588 aval = cudastruct->val; in MatMult_SeqSELLCUDA()
601 …matmult_seqsell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, slii… in MatMult_SeqSELLCUDA()
603 …matmult_seqsell_tiled_kernel9<4><<<nblocks, dim3(32, 4)>>>(nrows, sliceheight, acolidx, aval, slii… in MatMult_SeqSELLCUDA()
605 …matmult_seqsell_tiled_kernel9<8><<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, slii… in MatMult_SeqSELLCUDA()
607 …sell_tiled_kernel9<16><<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
609 …sell_tiled_kernel9<32><<<nblocks, dim3(32, 32)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
611 …matmult_seqsell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, slii… in MatMult_SeqSELLCUDA()
617 …matmult_seqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx,… in MatMult_SeqSELLCUDA()
619 …matmult_seqsell_tiled_kernel7<<<nblocks, dim3(32, 4)>>>(nrows, sliceheight, acolidx, aval, sliidx,… in MatMult_SeqSELLCUDA()
621 …matmult_seqsell_tiled_kernel7<<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx,… in MatMult_SeqSELLCUDA()
623 …matmult_seqsell_tiled_kernel7<<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx… in MatMult_SeqSELLCUDA()
625 …matmult_seqsell_tiled_kernel7<<<nblocks, dim3(32, 32)>>>(nrows, sliceheight, acolidx, aval, sliidx… in MatMult_SeqSELLCUDA()
627 …matmult_seqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx,… in MatMult_SeqSELLCUDA()
633 matmult_seqsell_tiled_kernel6<<<nblocks, block32>>>(nrows, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
637 matmult_seqsell_tiled_kernel5<<<nblocks, block16>>>(nrows, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
641 matmult_seqsell_tiled_kernel4<<<nblocks, block8>>>(nrows, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
645 matmult_seqsell_tiled_kernel3<<<nblocks, block4>>>(nrows, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
649 matmult_seqsell_tiled_kernel2<<<nblocks, block2>>>(nrows, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
653 …matmult_seqsell_basic_kernel<<<nblocks, blocksize>>>(nrows, sliceheight, acolidx, aval, sliidx, x,… in MatMult_SeqSELLCUDA()
666 …, 2)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
668 …, 4)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
670 …, 8)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
672 … 16)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
674 … 32)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
676 …, 2)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
683 …matmult_seqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx,… in MatMult_SeqSELLCUDA()
686 …eqsell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
690 …eqsell_tiled_kernel9<8><<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
693 …sell_tiled_kernel9<16><<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
715 MatScalar *aval = cudastruct->val; in MatMultAdd_SeqSELLCUDA() local
741 …ell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
743 …ell_tiled_kernel9<4><<<nblocks, dim3(32, 4)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
745 …ell_tiled_kernel9<8><<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
747 …l_tiled_kernel9<16><<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
749 …l_tiled_kernel9<32><<<nblocks, dim3(32, 32)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
751 …ell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
762 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
764 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
766 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
768 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
770 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
772 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
778 …eqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
780 …eqsell_tiled_kernel7<<<nblocks, dim3(32, 4)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
782 …eqsell_tiled_kernel7<<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
784 …qsell_tiled_kernel7<<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
786 …qsell_tiled_kernel7<<<nblocks, dim3(32, 32)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
788 …eqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
794 matmultadd_seqsell_tiled_kernel6<<<nblocks, block32>>>(nrows, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
798 matmultadd_seqsell_tiled_kernel5<<<nblocks, block16>>>(nrows, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
802 matmultadd_seqsell_tiled_kernel4<<<nblocks, block8>>>(nrows, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
806 matmultadd_seqsell_tiled_kernel3<<<nblocks, block4>>>(nrows, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
810 matmultadd_seqsell_tiled_kernel2<<<nblocks, block2>>>(nrows, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
814 …matmultadd_seqsell_basic_kernel<<<nblocks, blocksize>>>(nrows, sliceheight, acolidx, aval, sliidx,… in MatMultAdd_SeqSELLCUDA()
827 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
829 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
831 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
833 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
835 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
837 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
844 …eqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
847 …ell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
851 …ell_tiled_kernel9<8><<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
854 …l_tiled_kernel9<16><<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()