Lines Matching refs:bs
6 __global__ static void MatMultBatched(PetscInt bs, PetscInt mbs, const PetscScalar *A, const PetscScalar *x, PetscScalar *y, PetscBool transpose)
10 const PetscInt bs2 = bs * bs;
13 for (; row < bs * mbs; row += gridSize) {
18 k = row / bs; /* k-th block */
19 i = row % bs; /* this thread deals with i-th row of the block */
20 Ap = &A[bs2 * k + i * (transpose ? bs : 1)]; /* Ap points to the first entry of i-th row */
21 xp = &x[bs * k];
22 yp = &y[bs * k];
25 for (j = 0; j < bs; j++) {
27 Ap += (transpose ? 1 : bs); /* block is in column major order */
40 const PetscInt bs = jac->bs, mbs = jac->mbs;
51 PetscCallCUBLAS(cublasXgemvStridedBatched(handle, op, bs, bs, &alpha, A, bs, bs * bs, xx, 1, bs, &beta, yy, 1, bs, mbs));
53 PetscInt gridSize = PetscMin((bs * mbs + 255) / 256, 2147483647); /* <= 2^31-1 */
54 MatMultBatched<<<gridSize, 256>>>(bs, mbs, A, xx, yy, op == CUBLAS_OP_T ? PETSC_TRUE : PETSC_FALSE);
59 PetscCall(PetscLogGpuFlops(bs * bs * mbs * 2));
94 size = sizeof(PetscScalar) * jac->bs * jac->bs * jac->mbs;