Lines Matching refs:sliidx

13   PetscInt  *sliidx; /* slice index array, device pointer */  member
28 if ((*cudastruct)->sliidx) PetscCallCUDA(cudaFree((*cudastruct)->sliidx)); in MatSeqSELLCUDA_Destroy()
45 …PetscCallCUDA(cudaMemcpy(cudastruct->val, a->val, a->sliidx[a->totalslices] * sizeof(MatScalar), c… in MatSeqSELLCUDACopyToGPU()
46 PetscCall(PetscLogCpuToGpu(a->sliidx[a->totalslices] * (sizeof(MatScalar)))); in MatSeqSELLCUDACopyToGPU()
50 if (cudastruct->sliidx) PetscCallCUDA(cudaFree(cudastruct->sliidx)); in MatSeqSELLCUDACopyToGPU()
53 cudastruct->totalentries = a->sliidx[a->totalslices]; in MatSeqSELLCUDACopyToGPU()
59 …PetscCallCUDA(cudaMemcpy(cudastruct->colidx, a->colidx, a->sliidx[a->totalslices] * sizeof(*a->col… in MatSeqSELLCUDACopyToGPU()
60 …PetscCallCUDA(cudaMemcpy(cudastruct->val, a->val, a->sliidx[a->totalslices] * sizeof(*a->val), cud… in MatSeqSELLCUDACopyToGPU()
62 …PetscCallCUDA(cudaMalloc((void **)&cudastruct->sliidx, (a->totalslices + 1) * sizeof(*cudastruct-> in MatSeqSELLCUDACopyToGPU()
63 …PetscCallCUDA(cudaMemcpy(cudastruct->sliidx, a->sliidx, (a->totalslices + 1) * sizeof(*a->sliidx),… in MatSeqSELLCUDACopyToGPU()
66 …PetscCall(PetscLogCpuToGpu(a->sliidx[a->totalslices] * (sizeof(MatScalar) + sizeof(PetscInt)) + (a… in MatSeqSELLCUDACopyToGPU()
75 …eheight, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmult_seqsell_basic_kernel() argument
85 …for (i = sliidx[slice_id] + row_in_slice; i < sliidx[slice_id + 1]; i += sliceheight) sum += aval[… in matmult_seqsell_basic_kernel()
90 …eheight, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmultadd_seqsell_basic_kernel() argument
100 …for (i = sliidx[slice_id] + row_in_slice; i < sliidx[slice_id + 1]; i += sliceheight) sum += aval[… in matmultadd_seqsell_basic_kernel()
108 …eheight, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmult_seqsell_tiled_kernel9() argument
120 …for (i = sliidx[slice_id] + threadIdx.x + 32 * threadIdx.y; i < sliidx[slice_id + 1]; i += 32 * BL… in matmult_seqsell_tiled_kernel9()
137 …eheight, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmultadd_seqsell_tiled_kernel9() argument
149 …for (i = sliidx[slice_id] + threadIdx.x + 32 * threadIdx.y; i < sliidx[slice_id + 1]; i += 32 * BL… in matmultadd_seqsell_tiled_kernel9()
185 …ice_map, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmult_seqsell_tiled_kernel8() argument
201 …if ((cid + 1) * BLOCKY * 32 > sliidx[start_slice + 1]) { /* this iteration covers more than one sl… in matmult_seqsell_tiled_kernel8()
204 …art_slice, totalslices = PetscCeilIntMacro(nrows, sliceheight), totalentries = sliidx[totalslices]; in matmult_seqsell_tiled_kernel8()
206 while (gid < totalentries && gid >= sliidx[slice_id + 1]) slice_id++; in matmult_seqsell_tiled_kernel8()
217 …if (iter == chunksperblock - 1 || (cid + 2) * BLOCKY * 32 > sliidx[start_slice + 1]) { /* last ite… in matmult_seqsell_tiled_kernel8()
240 …ice_map, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmultadd_seqsell_tiled_kernel8() argument
256 …if ((cid + 1) * BLOCKY * 32 > sliidx[start_slice + 1]) { /* this iteration covers more than one sl… in matmultadd_seqsell_tiled_kernel8()
259 …art_slice, totalslices = PetscCeilIntMacro(nrows, sliceheight), totalentries = sliidx[totalslices]; in matmultadd_seqsell_tiled_kernel8()
261 while (gid < totalentries && gid >= sliidx[slice_id + 1]) slice_id++; in matmultadd_seqsell_tiled_kernel8()
272 …if (iter == chunksperblock - 1 || (cid + 2) * BLOCKY * 32 > sliidx[start_slice + 1]) { /* last ite… in matmultadd_seqsell_tiled_kernel8()
294 …eheight, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmult_seqsell_tiled_kernel7() argument
301 …for (i = sliidx[slice_id] + threadIdx.x; i < sliidx[slice_id + 1]; i += 32) t += aval[i] * x[acoli… in matmult_seqsell_tiled_kernel7()
309 …eheight, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmultadd_seqsell_tiled_kernel7() argument
316 …for (i = sliidx[slice_id] + threadIdx.x; i < sliidx[slice_id + 1]; i += 32) t += aval[i] * x[acoli… in matmultadd_seqsell_tiled_kernel7()
326 …t nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmult_seqsell_tiled_kernel6() argument
337 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmult_seqsell_tiled_kernel6()
354 …t nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmult_seqsell_tiled_kernel5() argument
365 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmult_seqsell_tiled_kernel5()
380 …t nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmult_seqsell_tiled_kernel4() argument
391 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmult_seqsell_tiled_kernel4()
404 …t nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmult_seqsell_tiled_kernel3() argument
415 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmult_seqsell_tiled_kernel3()
426 …t nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmult_seqsell_tiled_kernel2() argument
437 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmult_seqsell_tiled_kernel2()
446 …t nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmultadd_seqsell_tiled_kernel6() argument
457 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmultadd_seqsell_tiled_kernel6()
474 …t nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmultadd_seqsell_tiled_kernel5() argument
485 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmultadd_seqsell_tiled_kernel5()
500 …t nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmultadd_seqsell_tiled_kernel4() argument
511 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmultadd_seqsell_tiled_kernel4()
524 …t nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmultadd_seqsell_tiled_kernel3() argument
535 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmultadd_seqsell_tiled_kernel3()
546 …t nrows, const PetscInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar… in matmultadd_seqsell_tiled_kernel2() argument
557 …for (i = sliidx[slice_id] + row_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i… in matmultadd_seqsell_tiled_kernel2()
575 PetscInt *sliidx; in MatMult_SeqSELLCUDA() local
590 sliidx = cudastruct->sliidx; in MatMult_SeqSELLCUDA()
601 …eqsell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
603 …eqsell_tiled_kernel9<4><<<nblocks, dim3(32, 4)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
605 …eqsell_tiled_kernel9<8><<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
607 …sell_tiled_kernel9<16><<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
609 …sell_tiled_kernel9<32><<<nblocks, dim3(32, 32)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
611 …eqsell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
617 …t_seqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
619 …t_seqsell_tiled_kernel7<<<nblocks, dim3(32, 4)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
621 …t_seqsell_tiled_kernel7<<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
623 …_seqsell_tiled_kernel7<<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
625 …_seqsell_tiled_kernel7<<<nblocks, dim3(32, 32)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
627 …t_seqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
633 matmult_seqsell_tiled_kernel6<<<nblocks, block32>>>(nrows, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
637 matmult_seqsell_tiled_kernel5<<<nblocks, block16>>>(nrows, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
641 matmult_seqsell_tiled_kernel4<<<nblocks, block8>>>(nrows, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
645 matmult_seqsell_tiled_kernel3<<<nblocks, block4>>>(nrows, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
649 matmult_seqsell_tiled_kernel2<<<nblocks, block2>>>(nrows, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
653 …matmult_seqsell_basic_kernel<<<nblocks, blocksize>>>(nrows, sliceheight, acolidx, aval, sliidx, x,… in MatMult_SeqSELLCUDA()
666 …, 2)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
668 …, 4)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
670 …, 8)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
672 … 16)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
674 … 32)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
676 …, 2)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
683 …t_seqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
686 …eqsell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
690 …eqsell_tiled_kernel9<8><<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
693 …sell_tiled_kernel9<16><<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
717 PetscInt *sliidx = cudastruct->sliidx; in MatMultAdd_SeqSELLCUDA() local
741 …ell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
743 …ell_tiled_kernel9<4><<<nblocks, dim3(32, 4)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
745 …ell_tiled_kernel9<8><<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
747 …l_tiled_kernel9<16><<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
749 …l_tiled_kernel9<32><<<nblocks, dim3(32, 32)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
751 …ell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
762 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
764 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
766 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
768 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
770 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
772 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
778 …eqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
780 …eqsell_tiled_kernel7<<<nblocks, dim3(32, 4)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
782 …eqsell_tiled_kernel7<<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
784 …qsell_tiled_kernel7<<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
786 …qsell_tiled_kernel7<<<nblocks, dim3(32, 32)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
788 …eqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
794 matmultadd_seqsell_tiled_kernel6<<<nblocks, block32>>>(nrows, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
798 matmultadd_seqsell_tiled_kernel5<<<nblocks, block16>>>(nrows, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
802 matmultadd_seqsell_tiled_kernel4<<<nblocks, block8>>>(nrows, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
806 matmultadd_seqsell_tiled_kernel3<<<nblocks, block4>>>(nrows, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
810 matmultadd_seqsell_tiled_kernel2<<<nblocks, block2>>>(nrows, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
814 …d_seqsell_basic_kernel<<<nblocks, blocksize>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
827 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
829 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
831 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
833 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
835 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
837 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
844 …eqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
847 …ell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
851 …ell_tiled_kernel9<8><<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
854 …l_tiled_kernel9<16><<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
929 …PetscCallCUDA(cudaMemset(cudastruct->val, 0, a->sliidx[a->totalslices] * sizeof(*cudastruct->val))… in MatZeroEntries_SeqSELLCUDA()
932 PetscCall(PetscArrayzero(a->val, a->sliidx[a->totalslices])); in MatZeroEntries_SeqSELLCUDA()