Lines Matching refs:y
75 …cInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar *x, PetscScalar *y) in matmult_seqsell_basic_kernel() argument
86 y[row] = sum; in matmult_seqsell_basic_kernel()
90 …atScalar *aval, const PetscInt *sliidx, const PetscScalar *x, const PetscScalar *y, PetscScalar *z) in matmultadd_seqsell_basic_kernel() argument
101 z[row] = y[row] + sum; in matmultadd_seqsell_basic_kernel()
108 …cInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar *x, PetscScalar *y) in matmult_seqsell_tiled_kernel9() argument
112 int tid = threadIdx.x + threadIdx.y * 32; in matmult_seqsell_tiled_kernel9()
120 …for (i = sliidx[slice_id] + threadIdx.x + 32 * threadIdx.y; i < sliidx[slice_id + 1]; i += 32 * BL… in matmult_seqsell_tiled_kernel9()
125 if (threadIdx.x < sliceheight) shared[threadIdx.x][threadIdx.y] = t; in matmult_seqsell_tiled_kernel9()
132 if (row < nrows && threadIdx.y == 0 && threadIdx.x < sliceheight) y[row] = shared[0][threadIdx.x]; in matmult_seqsell_tiled_kernel9()
137 …atScalar *aval, const PetscInt *sliidx, const PetscScalar *x, const PetscScalar *y, PetscScalar *z) in matmultadd_seqsell_tiled_kernel9() argument
141 int tid = threadIdx.x + threadIdx.y * 32; in matmultadd_seqsell_tiled_kernel9()
149 …for (i = sliidx[slice_id] + threadIdx.x + 32 * threadIdx.y; i < sliidx[slice_id + 1]; i += 32 * BL… in matmultadd_seqsell_tiled_kernel9()
154 if (threadIdx.x < sliceheight) shared[threadIdx.x][threadIdx.y] = t; in matmultadd_seqsell_tiled_kernel9()
161 …if (row < nrows && threadIdx.y == 0 && threadIdx.x < sliceheight) z[row] = y[row] + shared[0][thre… in matmultadd_seqsell_tiled_kernel9()
170 int halfwarpid = threadIdx.y * 2 + threadIdx.x / 16; in segment_scan()
171 shared[threadIdx.x + threadIdx.y * 32] = 0; in segment_scan()
173 shared[threadIdx.x + threadIdx.y * 32] = *val; in segment_scan()
177 if (halfwarpid < BLOCKY * 2 - i) *val += shared[threadIdx.x + threadIdx.y * 32 + i * 16]; in segment_scan()
185 …cInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar *x, PetscScalar *y) in matmult_seqsell_tiled_kernel8() argument
193 … gid = gridDim.x * 32 * BLOCKY * iter + blockIdx.x * BLOCKY * 32 + threadIdx.y * 32 + threadIdx.x; in matmult_seqsell_tiled_kernel8()
194 if (gid < nrows) y[gid] = 0.0; in matmult_seqsell_tiled_kernel8()
200 gid = cid * BLOCKY * 32 + threadIdx.y * 32 + threadIdx.x; in matmult_seqsell_tiled_kernel8()
207 if (threadIdx.x % 16 == 0) flag[threadIdx.y * 2 + threadIdx.x / 16] = slice_id; in matmult_seqsell_tiled_kernel8()
212 if (row < nrows && gid < totalentries && write) atomAdd(y[row], t); in matmult_seqsell_tiled_kernel8()
218 int tid = threadIdx.x + threadIdx.y * 32, tidx = tid % BLOCKY, tidy = tid / BLOCKY; in matmult_seqsell_tiled_kernel8()
223 …if (threadIdx.x < sliceheight) shared[threadIdx.x * BLOCKY + threadIdx.y] = t; /* shared[threadIdx… in matmult_seqsell_tiled_kernel8()
230 …if (row < nrows && threadIdx.y == 0 && threadIdx.x < sliceheight) atomAdd(y[row], shared[threadIdx… in matmult_seqsell_tiled_kernel8()
240 …atScalar *aval, const PetscInt *sliidx, const PetscScalar *x, const PetscScalar *y, PetscScalar *z) in matmultadd_seqsell_tiled_kernel8() argument
248 … gid = gridDim.x * 32 * BLOCKY * iter + blockIdx.x * BLOCKY * 32 + threadIdx.y * 32 + threadIdx.x; in matmultadd_seqsell_tiled_kernel8()
249 if (gid < nrows) z[gid] = y[gid]; in matmultadd_seqsell_tiled_kernel8()
255 gid = cid * BLOCKY * 32 + threadIdx.y * 32 + threadIdx.x; in matmultadd_seqsell_tiled_kernel8()
262 if (threadIdx.x % 16 == 0) flag[threadIdx.y * 2 + threadIdx.x / 16] = slice_id; in matmultadd_seqsell_tiled_kernel8()
273 int tid = threadIdx.x + threadIdx.y * 32, tidx = tid % BLOCKY, tidy = tid / BLOCKY; in matmultadd_seqsell_tiled_kernel8()
278 …if (threadIdx.x < sliceheight) shared[threadIdx.x * BLOCKY + threadIdx.y] = t; /* shared[threadIdx… in matmultadd_seqsell_tiled_kernel8()
285 …if (row < nrows && threadIdx.y == 0 && threadIdx.x < sliceheight) atomAdd(z[row], shared[threadIdx… in matmultadd_seqsell_tiled_kernel8()
294 …cInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar *x, PetscScalar *y) in matmult_seqsell_tiled_kernel7() argument
297 slice_id = blockIdx.x * blockDim.y + threadIdx.y; in matmult_seqsell_tiled_kernel7()
305 if (row < nrows && threadIdx.x < sliceheight) y[row] = t; in matmult_seqsell_tiled_kernel7()
309 …atScalar *aval, const PetscInt *sliidx, const PetscScalar *x, const PetscScalar *y, PetscScalar *z) in matmultadd_seqsell_tiled_kernel7() argument
312 slice_id = blockIdx.x * blockDim.y + threadIdx.y; in matmultadd_seqsell_tiled_kernel7()
320 if (row < nrows && threadIdx.x < sliceheight) z[row] = y[row] + t; in matmultadd_seqsell_tiled_kernel7()
326 …cInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar *x, PetscScalar *y) in matmult_seqsell_tiled_kernel6() argument
336 shared[threadIdx.y * blockDim.x + threadIdx.x] = 0.0; in matmult_seqsell_tiled_kernel6()
337 …_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i += SLICE_HEIGHT * blockDim.y) … in matmult_seqsell_tiled_kernel6()
339 …if (threadIdx.y < 16) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 16) … in matmult_seqsell_tiled_kernel6()
341 …if (threadIdx.y < 8) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 8) * … in matmult_seqsell_tiled_kernel6()
343 …if (threadIdx.y < 4) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 4) * … in matmult_seqsell_tiled_kernel6()
345 …if (threadIdx.y < 2) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 2) * … in matmult_seqsell_tiled_kernel6()
347 if (threadIdx.y < 1) { in matmult_seqsell_tiled_kernel6()
349 y[row] = shared[threadIdx.x]; in matmult_seqsell_tiled_kernel6()
354 …cInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar *x, PetscScalar *y) in matmult_seqsell_tiled_kernel5() argument
364 shared[threadIdx.y * blockDim.x + threadIdx.x] = 0.0; in matmult_seqsell_tiled_kernel5()
365 …_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i += SLICE_HEIGHT * blockDim.y) … in matmult_seqsell_tiled_kernel5()
367 …if (threadIdx.y < 8) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 8) * … in matmult_seqsell_tiled_kernel5()
369 …if (threadIdx.y < 4) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 4) * … in matmult_seqsell_tiled_kernel5()
371 …if (threadIdx.y < 2) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 2) * … in matmult_seqsell_tiled_kernel5()
373 if (threadIdx.y < 1) { in matmult_seqsell_tiled_kernel5()
375 y[row] = shared[threadIdx.x]; in matmult_seqsell_tiled_kernel5()
380 …cInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar *x, PetscScalar *y) in matmult_seqsell_tiled_kernel4() argument
390 shared[threadIdx.y * blockDim.x + threadIdx.x] = 0.0; in matmult_seqsell_tiled_kernel4()
391 …_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i += SLICE_HEIGHT * blockDim.y) … in matmult_seqsell_tiled_kernel4()
393 …if (threadIdx.y < 4) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 4) * … in matmult_seqsell_tiled_kernel4()
395 …if (threadIdx.y < 2) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 2) * … in matmult_seqsell_tiled_kernel4()
397 if (threadIdx.y < 1) { in matmult_seqsell_tiled_kernel4()
399 y[row] = shared[threadIdx.x]; in matmult_seqsell_tiled_kernel4()
404 …cInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar *x, PetscScalar *y) in matmult_seqsell_tiled_kernel3() argument
414 shared[threadIdx.y * blockDim.x + threadIdx.x] = 0.0; in matmult_seqsell_tiled_kernel3()
415 …_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i += SLICE_HEIGHT * blockDim.y) … in matmult_seqsell_tiled_kernel3()
417 …if (threadIdx.y < 2) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 2) * … in matmult_seqsell_tiled_kernel3()
419 if (threadIdx.y < 1) { in matmult_seqsell_tiled_kernel3()
421 y[row] = shared[threadIdx.x]; in matmult_seqsell_tiled_kernel3()
426 …cInt *acolidx, const MatScalar *aval, const PetscInt *sliidx, const PetscScalar *x, PetscScalar *y) in matmult_seqsell_tiled_kernel2() argument
436 shared[threadIdx.y * blockDim.x + threadIdx.x] = 0.0; in matmult_seqsell_tiled_kernel2()
437 …_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i += SLICE_HEIGHT * blockDim.y) … in matmult_seqsell_tiled_kernel2()
439 if (threadIdx.y < 1) { in matmult_seqsell_tiled_kernel2()
441 y[row] = shared[threadIdx.x]; in matmult_seqsell_tiled_kernel2()
446 …atScalar *aval, const PetscInt *sliidx, const PetscScalar *x, const PetscScalar *y, PetscScalar *z) in matmultadd_seqsell_tiled_kernel6() argument
456 shared[threadIdx.y * blockDim.x + threadIdx.x] = 0.0; in matmultadd_seqsell_tiled_kernel6()
457 …_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i += SLICE_HEIGHT * blockDim.y) … in matmultadd_seqsell_tiled_kernel6()
459 …if (threadIdx.y < 16) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 16) … in matmultadd_seqsell_tiled_kernel6()
461 …if (threadIdx.y < 8) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 8) * … in matmultadd_seqsell_tiled_kernel6()
463 …if (threadIdx.y < 4) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 4) * … in matmultadd_seqsell_tiled_kernel6()
465 …if (threadIdx.y < 2) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 2) * … in matmultadd_seqsell_tiled_kernel6()
467 if (threadIdx.y < 1) { in matmultadd_seqsell_tiled_kernel6()
469 z[row] = y[row] + shared[threadIdx.x]; in matmultadd_seqsell_tiled_kernel6()
474 …atScalar *aval, const PetscInt *sliidx, const PetscScalar *x, const PetscScalar *y, PetscScalar *z) in matmultadd_seqsell_tiled_kernel5() argument
484 shared[threadIdx.y * blockDim.x + threadIdx.x] = 0.0; in matmultadd_seqsell_tiled_kernel5()
485 …_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i += SLICE_HEIGHT * blockDim.y) … in matmultadd_seqsell_tiled_kernel5()
487 …if (threadIdx.y < 8) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 8) * … in matmultadd_seqsell_tiled_kernel5()
489 …if (threadIdx.y < 4) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 4) * … in matmultadd_seqsell_tiled_kernel5()
491 …if (threadIdx.y < 2) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 2) * … in matmultadd_seqsell_tiled_kernel5()
493 if (threadIdx.y < 1) { in matmultadd_seqsell_tiled_kernel5()
495 z[row] = y[row] + shared[threadIdx.x]; in matmultadd_seqsell_tiled_kernel5()
500 …atScalar *aval, const PetscInt *sliidx, const PetscScalar *x, const PetscScalar *y, PetscScalar *z) in matmultadd_seqsell_tiled_kernel4() argument
510 shared[threadIdx.y * blockDim.x + threadIdx.x] = 0.0; in matmultadd_seqsell_tiled_kernel4()
511 …_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i += SLICE_HEIGHT * blockDim.y) … in matmultadd_seqsell_tiled_kernel4()
513 …if (threadIdx.y < 4) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 4) * … in matmultadd_seqsell_tiled_kernel4()
515 …if (threadIdx.y < 2) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 2) * … in matmultadd_seqsell_tiled_kernel4()
517 if (threadIdx.y < 1) { in matmultadd_seqsell_tiled_kernel4()
519 z[row] = y[row] + shared[threadIdx.x]; in matmultadd_seqsell_tiled_kernel4()
524 …atScalar *aval, const PetscInt *sliidx, const PetscScalar *x, const PetscScalar *y, PetscScalar *z) in matmultadd_seqsell_tiled_kernel3() argument
534 shared[threadIdx.y * blockDim.x + threadIdx.x] = 0.0; in matmultadd_seqsell_tiled_kernel3()
535 …_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i += SLICE_HEIGHT * blockDim.y) … in matmultadd_seqsell_tiled_kernel3()
537 …if (threadIdx.y < 2) shared[threadIdx.y * blockDim.x + threadIdx.x] += shared[(threadIdx.y + 2) * … in matmultadd_seqsell_tiled_kernel3()
539 if (threadIdx.y < 1) { in matmultadd_seqsell_tiled_kernel3()
541 z[row] = y[row] + shared[threadIdx.x]; in matmultadd_seqsell_tiled_kernel3()
546 …atScalar *aval, const PetscInt *sliidx, const PetscScalar *x, const PetscScalar *y, PetscScalar *z) in matmultadd_seqsell_tiled_kernel2() argument
556 shared[threadIdx.y * blockDim.x + threadIdx.x] = 0.0; in matmultadd_seqsell_tiled_kernel2()
557 …_in_slice + SLICE_HEIGHT * threadIdx.y; i < sliidx[slice_id + 1]; i += SLICE_HEIGHT * blockDim.y) … in matmultadd_seqsell_tiled_kernel2()
559 if (threadIdx.y < 1) { in matmultadd_seqsell_tiled_kernel2()
561 z[row] = y[row] + shared[threadIdx.x]; in matmultadd_seqsell_tiled_kernel2()
570 PetscScalar *y; in MatMult_SeqSELLCUDA() local
593 PetscCall(VecCUDAGetArrayWrite(yy, &y)); in MatMult_SeqSELLCUDA()
601 …eqsell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
603 …eqsell_tiled_kernel9<4><<<nblocks, dim3(32, 4)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
605 …eqsell_tiled_kernel9<8><<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
607 …sell_tiled_kernel9<16><<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
609 …sell_tiled_kernel9<32><<<nblocks, dim3(32, 32)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
611 …eqsell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
617 …t_seqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
619 …t_seqsell_tiled_kernel7<<<nblocks, dim3(32, 4)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
621 …t_seqsell_tiled_kernel7<<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
623 …_seqsell_tiled_kernel7<<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
625 …_seqsell_tiled_kernel7<<<nblocks, dim3(32, 32)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
627 …t_seqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
633 matmult_seqsell_tiled_kernel6<<<nblocks, block32>>>(nrows, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
637 matmult_seqsell_tiled_kernel5<<<nblocks, block16>>>(nrows, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
641 matmult_seqsell_tiled_kernel4<<<nblocks, block8>>>(nrows, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
645 matmult_seqsell_tiled_kernel3<<<nblocks, block4>>>(nrows, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
649 matmult_seqsell_tiled_kernel2<<<nblocks, block2>>>(nrows, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
653 …mult_seqsell_basic_kernel<<<nblocks, blocksize>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
666 …, 2)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
668 …, 4)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
670 …, 8)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
672 … 16)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
674 … 32)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
676 …, 2)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
683 …t_seqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
686 …eqsell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
690 …eqsell_tiled_kernel9<8><<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
693 …sell_tiled_kernel9<16><<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y); in MatMult_SeqSELLCUDA()
703 PetscCall(VecCUDARestoreArrayWrite(yy, &y)); in MatMult_SeqSELLCUDA()
713 const PetscScalar *y, *x; in MatMultAdd_SeqSELLCUDA() local
732 PetscCall(VecCUDAGetArrayRead(yy, &y)); in MatMultAdd_SeqSELLCUDA()
741 …ell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
743 …ell_tiled_kernel9<4><<<nblocks, dim3(32, 4)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
745 …ell_tiled_kernel9<8><<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
747 …l_tiled_kernel9<16><<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
749 …l_tiled_kernel9<32><<<nblocks, dim3(32, 32)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
751 …ell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
762 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
764 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
766 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
768 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
770 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
772 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
778 …eqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
780 …eqsell_tiled_kernel7<<<nblocks, dim3(32, 4)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
782 …eqsell_tiled_kernel7<<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
784 …qsell_tiled_kernel7<<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
786 …qsell_tiled_kernel7<<<nblocks, dim3(32, 32)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
788 …eqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
794 matmultadd_seqsell_tiled_kernel6<<<nblocks, block32>>>(nrows, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
798 matmultadd_seqsell_tiled_kernel5<<<nblocks, block16>>>(nrows, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
802 matmultadd_seqsell_tiled_kernel4<<<nblocks, block8>>>(nrows, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
806 matmultadd_seqsell_tiled_kernel3<<<nblocks, block4>>>(nrows, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
810 matmultadd_seqsell_tiled_kernel2<<<nblocks, block2>>>(nrows, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
814 …d_seqsell_basic_kernel<<<nblocks, blocksize>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
827 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
829 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
831 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
833 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
835 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
837 …)>>>(nrows, sliceheight, chunksperblock, nchunks, chunk_slice_map, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
844 …eqsell_tiled_kernel7<<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
847 …ell_tiled_kernel9<2><<<nblocks, dim3(32, 2)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
851 …ell_tiled_kernel9<8><<<nblocks, dim3(32, 8)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
854 …l_tiled_kernel9<16><<<nblocks, dim3(32, 16)>>>(nrows, sliceheight, acolidx, aval, sliidx, x, y, z); in MatMultAdd_SeqSELLCUDA()
864 PetscCall(VecCUDARestoreArrayRead(yy, &y)); in MatMultAdd_SeqSELLCUDA()