Lines Matching refs:nrows

78 static __global__ void matmult_seqsell_basic_kernel(PetscInt nrows, PetscInt sliceheight, const Pet…  in matmult_seqsell_basic_kernel()  argument
84 if (row < nrows) { in matmult_seqsell_basic_kernel()
93 static __global__ void matmultadd_seqsell_basic_kernel(PetscInt nrows, PetscInt sliceheight, const … in matmultadd_seqsell_basic_kernel() argument
99 if (row < nrows) { in matmultadd_seqsell_basic_kernel()
112 __global__ void matmult_seqsell_tiled_kernel9(PetscInt nrows, PetscInt sliceheight, const PetscInt … in matmult_seqsell_tiled_kernel9() argument
123 if (row < nrows) { in matmult_seqsell_tiled_kernel9()
136 if (row < nrows && threadIdx.y == 0 && threadIdx.x < sliceheight) y[row] = shared[0][threadIdx.x]; in matmult_seqsell_tiled_kernel9()
141 __global__ void matmultadd_seqsell_tiled_kernel9(PetscInt nrows, PetscInt sliceheight, const PetscI… in matmultadd_seqsell_tiled_kernel9() argument
152 if (row < nrows) { in matmultadd_seqsell_tiled_kernel9()
165 …if (row < nrows && threadIdx.y == 0 && threadIdx.x < sliceheight) z[row] = y[row] + shared[0][thre… in matmultadd_seqsell_tiled_kernel9()
189 __global__ void matmult_seqsell_tiled_kernel8(PetscInt nrows, PetscInt sliceheight, PetscInt chunks… in matmult_seqsell_tiled_kernel8() argument
196 for (int iter = 0; iter < 1 + (nrows - 1) / (gridDim.x * WARP_SIZE * BLOCKY); iter++) { in matmult_seqsell_tiled_kernel8()
198 if (gid < nrows) y[gid] = 0.0; in matmult_seqsell_tiled_kernel8()
208 …PetscInt slice_id = start_slice, totalslices = PetscCeilIntMacro(nrows, sliceheight), t… in matmult_seqsell_tiled_kernel8()
213 if (row < nrows && gid < totalentries) t = aval[gid] * x[acolidx[gid]]; in matmult_seqsell_tiled_kernel8()
216 if (row < nrows && gid < totalentries && write) atomAdd(y[row], t); in matmult_seqsell_tiled_kernel8()
220 if (row < nrows) t += aval[gid] * x[acolidx[gid]]; in matmult_seqsell_tiled_kernel8()
234 …if (row < nrows && threadIdx.y == 0 && threadIdx.x < sliceheight) atomAdd(y[row], shared[threadIdx… in matmult_seqsell_tiled_kernel8()
244 __global__ void matmultadd_seqsell_tiled_kernel8(PetscInt nrows, PetscInt sliceheight, PetscInt chu… in matmultadd_seqsell_tiled_kernel8() argument
251 for (int iter = 0; iter < 1 + (nrows - 1) / (gridDim.x * WARP_SIZE * BLOCKY); iter++) { in matmultadd_seqsell_tiled_kernel8()
253 if (gid < nrows) z[gid] = y[gid]; in matmultadd_seqsell_tiled_kernel8()
263 …PetscInt slice_id = start_slice, totalslices = PetscCeilIntMacro(nrows, sliceheight), t… in matmultadd_seqsell_tiled_kernel8()
268 if (row < nrows && gid < totalentries) t = aval[gid] * x[acolidx[gid]]; in matmultadd_seqsell_tiled_kernel8()
271 if (row < nrows && gid < totalentries && write) atomAdd(z[row], t); in matmultadd_seqsell_tiled_kernel8()
275 if (row < nrows) t += aval[gid] * x[acolidx[gid]]; in matmultadd_seqsell_tiled_kernel8()
289 …if (row < nrows && threadIdx.y == 0 && threadIdx.x < sliceheight) atomAdd(z[row], shared[threadIdx… in matmultadd_seqsell_tiled_kernel8()
298 static __global__ void matmult_seqsell_tiled_kernel7(PetscInt nrows, PetscInt sliceheight, const Pe… in matmult_seqsell_tiled_kernel7() argument
304 if (row < nrows) { in matmult_seqsell_tiled_kernel7()
309 if (row < nrows && threadIdx.x < sliceheight) y[row] = t; in matmult_seqsell_tiled_kernel7()
313 static __global__ void matmultadd_seqsell_tiled_kernel7(PetscInt nrows, PetscInt sliceheight, const… in matmultadd_seqsell_tiled_kernel7() argument
319 if (row < nrows) { in matmultadd_seqsell_tiled_kernel7()
324 if (row < nrows && threadIdx.x < sliceheight) z[row] = y[row] + t; in matmultadd_seqsell_tiled_kernel7()
331 static __global__ void matmult_seqsell_tiled_kernel6(PetscInt nrows, PetscInt sliceheight, const Pe… in PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END()
337 if (row < nrows) { in PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END()
359 static __global__ void matmult_seqsell_tiled_kernel5(PetscInt nrows, PetscInt sliceheight, const Pe… in matmult_seqsell_tiled_kernel5() argument
365 if (row < nrows) { in matmult_seqsell_tiled_kernel5()
385 static __global__ void matmult_seqsell_tiled_kernel4(PetscInt nrows, PetscInt sliceheight, const Pe… in matmult_seqsell_tiled_kernel4() argument
391 if (row < nrows) { in matmult_seqsell_tiled_kernel4()
409 static __global__ void matmult_seqsell_tiled_kernel3(PetscInt nrows, PetscInt sliceheight, const Pe… in matmult_seqsell_tiled_kernel3() argument
415 if (row < nrows) { in matmult_seqsell_tiled_kernel3()
431 static __global__ void matmult_seqsell_tiled_kernel2(PetscInt nrows, PetscInt sliceheight, const Pe… in matmult_seqsell_tiled_kernel2() argument
437 if (row < nrows) { in matmult_seqsell_tiled_kernel2()
451 static __global__ void matmultadd_seqsell_tiled_kernel6(PetscInt nrows, PetscInt sliceheight, const… in matmultadd_seqsell_tiled_kernel6() argument
457 if (row < nrows) { in matmultadd_seqsell_tiled_kernel6()
479 static __global__ void matmultadd_seqsell_tiled_kernel5(PetscInt nrows, PetscInt sliceheight, const… in matmultadd_seqsell_tiled_kernel5() argument
485 if (row < nrows) { in matmultadd_seqsell_tiled_kernel5()
505 static __global__ void matmultadd_seqsell_tiled_kernel4(PetscInt nrows, PetscInt sliceheight, const… in matmultadd_seqsell_tiled_kernel4() argument
511 if (row < nrows) { in matmultadd_seqsell_tiled_kernel4()
529 static __global__ void matmultadd_seqsell_tiled_kernel3(PetscInt nrows, PetscInt sliceheight, const… in matmultadd_seqsell_tiled_kernel3() argument
535 if (row < nrows) { in matmultadd_seqsell_tiled_kernel3()
551 static __global__ void matmultadd_seqsell_tiled_kernel2(PetscInt nrows, PetscInt sliceheight, const… in matmultadd_seqsell_tiled_kernel2() argument
557 if (row < nrows) { in matmultadd_seqsell_tiled_kernel2()
577 PetscInt nrows = A->rmap->n, sliceheight = a->sliceheight; in MatMult_SeqSELLHIP() local
604 nblocks = 1 + (nrows - 1) / sliceheight; in MatMult_SeqSELLHIP()
606 …matmult_seqsell_tiled_kernel9<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, ava… in MatMult_SeqSELLHIP()
608 …matmult_seqsell_tiled_kernel9<4><<<nblocks, dim3(WARP_SIZE, 4)>>>(nrows, sliceheight, acolidx, ava… in MatMult_SeqSELLHIP()
610 …matmult_seqsell_tiled_kernel9<8><<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, acolidx, ava… in MatMult_SeqSELLHIP()
612 …matmult_seqsell_tiled_kernel9<16><<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, acolidx, a… in MatMult_SeqSELLHIP()
614 …matmult_seqsell_tiled_kernel9<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, ava… in MatMult_SeqSELLHIP()
618 nblocks = 1 + (nrows - 1) / (hipstruct->blocky * sliceheight); in MatMult_SeqSELLHIP()
620 …matmult_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, aval, … in MatMult_SeqSELLHIP()
622 …matmult_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 4)>>>(nrows, sliceheight, acolidx, aval, … in MatMult_SeqSELLHIP()
624 …matmult_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, acolidx, aval, … in MatMult_SeqSELLHIP()
626 …matmult_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, acolidx, aval,… in MatMult_SeqSELLHIP()
628 nblocks = 1 + (nrows - 1) / (2 * sliceheight); in MatMult_SeqSELLHIP()
629 …matmult_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, aval, … in MatMult_SeqSELLHIP()
634 nblocks = 1 + (nrows - 1) / (blocksize / 32); /* 1 slice per block if sliceheight=32 */ in MatMult_SeqSELLHIP()
635 …matmult_seqsell_tiled_kernel6<<<nblocks, block32>>>(nrows, sliceheight, acolidx, aval, sliidx, x, … in MatMult_SeqSELLHIP()
638 nblocks = 1 + (nrows - 1) / (blocksize / 16); /* 2 slices per block if sliceheight=32*/ in MatMult_SeqSELLHIP()
639 …matmult_seqsell_tiled_kernel5<<<nblocks, block16>>>(nrows, sliceheight, acolidx, aval, sliidx, x, … in MatMult_SeqSELLHIP()
642 nblocks = 1 + (nrows - 1) / (blocksize / 8); /* 4 slices per block if sliceheight=32 */ in MatMult_SeqSELLHIP()
643 …matmult_seqsell_tiled_kernel4<<<nblocks, block8>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y… in MatMult_SeqSELLHIP()
646 nblocks = 1 + (nrows - 1) / (blocksize / 4); /* 8 slices per block if sliceheight=32 */ in MatMult_SeqSELLHIP()
647 …matmult_seqsell_tiled_kernel3<<<nblocks, block4>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y… in MatMult_SeqSELLHIP()
650 nblocks = 1 + (nrows - 1) / (blocksize / 2); in MatMult_SeqSELLHIP()
651 …matmult_seqsell_tiled_kernel2<<<nblocks, block2>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y… in MatMult_SeqSELLHIP()
654 nblocks = 1 + (nrows - 1) / blocksize; in MatMult_SeqSELLHIP()
655 …matmult_seqsell_basic_kernel<<<nblocks, blocksize>>>(nrows, sliceheight, acolidx, aval, sliidx, x,… in MatMult_SeqSELLHIP()
660 if (maxoveravg > 12.0 && maxoveravg / nrows > 0.001) { /* important threshold */ in MatMult_SeqSELLHIP()
668 …matmult_seqsell_tiled_kernel8<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, chunksperblo… in MatMult_SeqSELLHIP()
670 …matmult_seqsell_tiled_kernel8<4><<<nblocks, dim3(WARP_SIZE, 4)>>>(nrows, sliceheight, chunksperblo… in MatMult_SeqSELLHIP()
672 …matmult_seqsell_tiled_kernel8<8><<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, chunksperblo… in MatMult_SeqSELLHIP()
674 …matmult_seqsell_tiled_kernel8<16><<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, chunksperb… in MatMult_SeqSELLHIP()
676 …matmult_seqsell_tiled_kernel8<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, chunksperblo… in MatMult_SeqSELLHIP()
681 if (sliceheight * a->maxslicewidth < 2048 && nrows > 100000) { in MatMult_SeqSELLHIP()
682 nblocks = 1 + (nrows - 1) / (2 * sliceheight); /* two slices per block */ in MatMult_SeqSELLHIP()
683 …matmult_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, aval, … in MatMult_SeqSELLHIP()
685 nblocks = 1 + (nrows - 1) / sliceheight; in MatMult_SeqSELLHIP()
686 …matmult_seqsell_tiled_kernel9<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, ava… in MatMult_SeqSELLHIP()
689 nblocks = 1 + (nrows - 1) / sliceheight; in MatMult_SeqSELLHIP()
690 …matmult_seqsell_tiled_kernel9<8><<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, acolidx, ava… in MatMult_SeqSELLHIP()
692 nblocks = 1 + (nrows - 1) / sliceheight; in MatMult_SeqSELLHIP()
693 …matmult_seqsell_tiled_kernel9<16><<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, acolidx, a… in MatMult_SeqSELLHIP()
714 PetscInt nrows = A->rmap->n, sliceheight = a->sliceheight; in MatMultAdd_SeqSELLHIP() local
739 nblocks = 1 + (nrows - 1) / sliceheight; in MatMultAdd_SeqSELLHIP()
741 …matmultadd_seqsell_tiled_kernel9<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, … in MatMultAdd_SeqSELLHIP()
743 …matmultadd_seqsell_tiled_kernel9<4><<<nblocks, dim3(WARP_SIZE, 4)>>>(nrows, sliceheight, acolidx, … in MatMultAdd_SeqSELLHIP()
745 …matmultadd_seqsell_tiled_kernel9<8><<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, acolidx, … in MatMultAdd_SeqSELLHIP()
747 …matmultadd_seqsell_tiled_kernel9<16><<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, acolidx… in MatMultAdd_SeqSELLHIP()
749 …matmultadd_seqsell_tiled_kernel9<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, … in MatMultAdd_SeqSELLHIP()
760 …matmultadd_seqsell_tiled_kernel8<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, chunksper… in MatMultAdd_SeqSELLHIP()
762 …matmultadd_seqsell_tiled_kernel8<4><<<nblocks, dim3(WARP_SIZE, 4)>>>(nrows, sliceheight, chunksper… in MatMultAdd_SeqSELLHIP()
764 …matmultadd_seqsell_tiled_kernel8<8><<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, chunksper… in MatMultAdd_SeqSELLHIP()
766 …matmultadd_seqsell_tiled_kernel8<16><<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, chunksp… in MatMultAdd_SeqSELLHIP()
768 …matmultadd_seqsell_tiled_kernel8<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, chunksper… in MatMultAdd_SeqSELLHIP()
772 nblocks = 1 + (nrows - 1) / (blocky * sliceheight); in MatMultAdd_SeqSELLHIP()
774 …matmultadd_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, ava… in MatMultAdd_SeqSELLHIP()
776 …matmultadd_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 4)>>>(nrows, sliceheight, acolidx, ava… in MatMultAdd_SeqSELLHIP()
778 …matmultadd_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, acolidx, ava… in MatMultAdd_SeqSELLHIP()
780 …matmultadd_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, acolidx, av… in MatMultAdd_SeqSELLHIP()
782 nblocks = 1 + (nrows - 1) / (2 * sliceheight); in MatMultAdd_SeqSELLHIP()
783 …matmultadd_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, ava… in MatMultAdd_SeqSELLHIP()
788 nblocks = 1 + (nrows - 1) / (blocksize / 32); in MatMultAdd_SeqSELLHIP()
789 …matmultadd_seqsell_tiled_kernel6<<<nblocks, block32>>>(nrows, sliceheight, acolidx, aval, sliidx, … in MatMultAdd_SeqSELLHIP()
792 nblocks = 1 + (nrows - 1) / (blocksize / 16); in MatMultAdd_SeqSELLHIP()
793 …matmultadd_seqsell_tiled_kernel5<<<nblocks, block16>>>(nrows, sliceheight, acolidx, aval, sliidx, … in MatMultAdd_SeqSELLHIP()
796 nblocks = 1 + (nrows - 1) / (blocksize / 8); in MatMultAdd_SeqSELLHIP()
797 …matmultadd_seqsell_tiled_kernel4<<<nblocks, block8>>>(nrows, sliceheight, acolidx, aval, sliidx, x… in MatMultAdd_SeqSELLHIP()
800 nblocks = 1 + (nrows - 1) / (blocksize / 4); in MatMultAdd_SeqSELLHIP()
801 …matmultadd_seqsell_tiled_kernel3<<<nblocks, block4>>>(nrows, sliceheight, acolidx, aval, sliidx, x… in MatMultAdd_SeqSELLHIP()
804 nblocks = 1 + (nrows - 1) / (blocksize / 2); in MatMultAdd_SeqSELLHIP()
805 …matmultadd_seqsell_tiled_kernel2<<<nblocks, block2>>>(nrows, sliceheight, acolidx, aval, sliidx, x… in MatMultAdd_SeqSELLHIP()
808 nblocks = 1 + (nrows - 1) / blocksize; in MatMultAdd_SeqSELLHIP()
809 …matmultadd_seqsell_basic_kernel<<<nblocks, blocksize>>>(nrows, sliceheight, acolidx, aval, sliidx,… in MatMultAdd_SeqSELLHIP()
814 if (maxoveravg > 12.0 && maxoveravg / nrows > 0.001) { /* important threshold */ in MatMultAdd_SeqSELLHIP()
822 …matmultadd_seqsell_tiled_kernel8<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, chunksper… in MatMultAdd_SeqSELLHIP()
824 …matmultadd_seqsell_tiled_kernel8<4><<<nblocks, dim3(WARP_SIZE, 4)>>>(nrows, sliceheight, chunksper… in MatMultAdd_SeqSELLHIP()
826 …matmultadd_seqsell_tiled_kernel8<8><<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, chunksper… in MatMultAdd_SeqSELLHIP()
828 …matmultadd_seqsell_tiled_kernel8<16><<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, chunksp… in MatMultAdd_SeqSELLHIP()
830 …matmultadd_seqsell_tiled_kernel8<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, chunksper… in MatMultAdd_SeqSELLHIP()
835 if (sliceheight * a->maxslicewidth < 2048 && nrows > 100000) { in MatMultAdd_SeqSELLHIP()
836 nblocks = 1 + (nrows - 1) / (2 * sliceheight); /* two slices per block */ in MatMultAdd_SeqSELLHIP()
837 …matmultadd_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, ava… in MatMultAdd_SeqSELLHIP()
839 nblocks = 1 + (nrows - 1) / sliceheight; in MatMultAdd_SeqSELLHIP()
840 …matmultadd_seqsell_tiled_kernel9<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, … in MatMultAdd_SeqSELLHIP()
843 nblocks = 1 + (nrows - 1) / sliceheight; in MatMultAdd_SeqSELLHIP()
844 …matmultadd_seqsell_tiled_kernel9<8><<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, acolidx, … in MatMultAdd_SeqSELLHIP()
846 nblocks = 1 + (nrows - 1) / sliceheight; in MatMultAdd_SeqSELLHIP()
847 …matmultadd_seqsell_tiled_kernel9<16><<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, acolidx… in MatMultAdd_SeqSELLHIP()