Lines Matching refs:nblocks

581   PetscInt           nblocks, blocksize = 512; /* blocksize is fixed to be 512 */
604 nblocks = 1 + (nrows - 1) / sliceheight;
606 matmult_seqsell_tiled_kernel9<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
608 matmult_seqsell_tiled_kernel9<4><<<nblocks, dim3(WARP_SIZE, 4)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
610 matmult_seqsell_tiled_kernel9<8><<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
612 matmult_seqsell_tiled_kernel9<16><<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
614 matmult_seqsell_tiled_kernel9<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
618 nblocks = 1 + (nrows - 1) / (hipstruct->blocky * sliceheight);
620 matmult_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
622 matmult_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 4)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
624 matmult_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
626 matmult_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
628 nblocks = 1 + (nrows - 1) / (2 * sliceheight);
629 matmult_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
634 nblocks = 1 + (nrows - 1) / (blocksize / 32); /* 1 slice per block if sliceheight=32 */
635 matmult_seqsell_tiled_kernel6<<<nblocks, block32>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
638 nblocks = 1 + (nrows - 1) / (blocksize / 16); /* 2 slices per block if sliceheight=32*/
639 matmult_seqsell_tiled_kernel5<<<nblocks, block16>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
642 nblocks = 1 + (nrows - 1) / (blocksize / 8); /* 4 slices per block if sliceheight=32 */
643 matmult_seqsell_tiled_kernel4<<<nblocks, block8>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
646 nblocks = 1 + (nrows - 1) / (blocksize / 4); /* 8 slices per block if sliceheight=32 */
647 matmult_seqsell_tiled_kernel3<<<nblocks, block4>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
650 nblocks = 1 + (nrows - 1) / (blocksize / 2);
651 matmult_seqsell_tiled_kernel2<<<nblocks, block2>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
654 nblocks = 1 + (nrows - 1) / blocksize;
655 matmult_seqsell_basic_kernel<<<nblocks, blocksize>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
665 nblocks = 1 + (nchunks - 1) / chunksperblock;
668 matmult_seqsell_tiled_kernel8<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y);
670 matmult_seqsell_tiled_kernel8<4><<<nblocks, dim3(WARP_SIZE, 4)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y);
672 matmult_seqsell_tiled_kernel8<8><<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y);
674 matmult_seqsell_tiled_kernel8<16><<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y);
676 matmult_seqsell_tiled_kernel8<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y);
682 nblocks = 1 + (nrows - 1) / (2 * sliceheight); /* two slices per block */
683 matmult_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
685 nblocks = 1 + (nrows - 1) / sliceheight;
686 matmult_seqsell_tiled_kernel9<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
689 nblocks = 1 + (nrows - 1) / sliceheight;
690 matmult_seqsell_tiled_kernel9<8><<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
692 nblocks = 1 + (nrows - 1) / sliceheight;
693 matmult_seqsell_tiled_kernel9<16><<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y);
729 PetscInt nblocks, blocksize = 512;
739 nblocks = 1 + (nrows - 1) / sliceheight;
741 matmultadd_seqsell_tiled_kernel9<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
743 matmultadd_seqsell_tiled_kernel9<4><<<nblocks, dim3(WARP_SIZE, 4)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
745 matmultadd_seqsell_tiled_kernel9<8><<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
747 matmultadd_seqsell_tiled_kernel9<16><<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
749 matmultadd_seqsell_tiled_kernel9<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
757 nblocks = 1 + (nchunks - 1) / chunksperblock;
760 matmultadd_seqsell_tiled_kernel8<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z);
762 matmultadd_seqsell_tiled_kernel8<4><<<nblocks, dim3(WARP_SIZE, 4)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z);
764 matmultadd_seqsell_tiled_kernel8<8><<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z);
766 matmultadd_seqsell_tiled_kernel8<16><<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z);
768 matmultadd_seqsell_tiled_kernel8<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z);
772 nblocks = 1 + (nrows - 1) / (blocky * sliceheight);
774 matmultadd_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
776 matmultadd_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 4)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
778 matmultadd_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
780 matmultadd_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
782 nblocks = 1 + (nrows - 1) / (2 * sliceheight);
783 matmultadd_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
788 nblocks = 1 + (nrows - 1) / (blocksize / 32);
789 matmultadd_seqsell_tiled_kernel6<<<nblocks, block32>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
792 nblocks = 1 + (nrows - 1) / (blocksize / 16);
793 matmultadd_seqsell_tiled_kernel5<<<nblocks, block16>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
796 nblocks = 1 + (nrows - 1) / (blocksize / 8);
797 matmultadd_seqsell_tiled_kernel4<<<nblocks, block8>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
800 nblocks = 1 + (nrows - 1) / (blocksize / 4);
801 matmultadd_seqsell_tiled_kernel3<<<nblocks, block4>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
804 nblocks = 1 + (nrows - 1) / (blocksize / 2);
805 matmultadd_seqsell_tiled_kernel2<<<nblocks, block2>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
808 nblocks = 1 + (nrows - 1) / blocksize;
809 matmultadd_seqsell_basic_kernel<<<nblocks, blocksize>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
819 nblocks = 1 + (nchunks - 1) / chunksperblock;
822 matmultadd_seqsell_tiled_kernel8<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z);
824 matmultadd_seqsell_tiled_kernel8<4><<<nblocks, dim3(WARP_SIZE, 4)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z);
826 matmultadd_seqsell_tiled_kernel8<8><<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z);
828 matmultadd_seqsell_tiled_kernel8<16><<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z);
830 matmultadd_seqsell_tiled_kernel8<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z);
836 nblocks = 1 + (nrows - 1) / (2 * sliceheight); /* two slices per block */
837 matmultadd_seqsell_tiled_kernel7<<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
839 nblocks = 1 + (nrows - 1) / sliceheight;
840 matmultadd_seqsell_tiled_kernel9<2><<<nblocks, dim3(WARP_SIZE, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
843 nblocks = 1 + (nrows - 1) / sliceheight;
844 matmultadd_seqsell_tiled_kernel9<8><<<nblocks, dim3(WARP_SIZE, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);
846 nblocks = 1 + (nrows - 1) / sliceheight;
847 matmultadd_seqsell_tiled_kernel9<16><<<nblocks, dim3(WARP_SIZE, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z);