Lines Matching refs:num_t_col
90 CeedInt num_t_col = 1; in CeedBasisApplyCore_Magma() local
97 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_1D); in CeedBasisApplyCore_Magma()
98 shared_mem += sizeof(CeedScalar) * num_t_col * (num_comp * (1 * P + 1 * Q)); in CeedBasisApplyCore_Magma()
103 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_2D); in CeedBasisApplyCore_Magma()
106 shared_mem += num_t_col * (P * max_P_Q * sizeof(CeedScalar)); in CeedBasisApplyCore_Magma()
110 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_3D); in CeedBasisApplyCore_Magma()
113 shared_mem += sizeof(CeedScalar) * num_t_col * (CeedIntMax(P * P * max_P_Q, P * Q * Q)); in CeedBasisApplyCore_Magma()
116 CeedInt grid = CeedDivUpInt(num_elem, num_t_col); in CeedBasisApplyCore_Magma()
121 num_t_col, 1, shared_mem, args)); in CeedBasisApplyCore_Magma()
123 …eedRunKernelDimSharedMagma(ceed, impl->Interp, NULL, grid, num_threads, num_t_col, 1, shared_mem, … in CeedBasisApplyCore_Magma()
167 CeedInt num_t_col = 1; in CeedBasisApplyCore_Magma() local
174 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_1D); in CeedBasisApplyCore_Magma()
175 shared_mem += sizeof(CeedScalar) * num_t_col * (num_comp * (1 * P + 1 * Q)); in CeedBasisApplyCore_Magma()
180 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_2D); in CeedBasisApplyCore_Magma()
183 shared_mem += sizeof(CeedScalar) * num_t_col * (P * max_P_Q); in CeedBasisApplyCore_Magma()
187 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_3D); in CeedBasisApplyCore_Magma()
190 … shared_mem += sizeof(CeedScalar) * num_t_col * CeedIntMax(P * P * P, (P * P * Q) + (P * Q * Q)); in CeedBasisApplyCore_Magma()
193 CeedInt grid = CeedDivUpInt(num_elem, num_t_col); in CeedBasisApplyCore_Magma()
199 num_t_col, 1, shared_mem, args)); in CeedBasisApplyCore_Magma()
201 …(CeedRunKernelDimSharedMagma(ceed, impl->Grad, NULL, grid, num_threads, num_t_col, 1, shared_mem, … in CeedBasisApplyCore_Magma()
209 CeedInt num_t_col = 1; in CeedBasisApplyCore_Magma() local
215 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_1D); in CeedBasisApplyCore_Magma()
217 shared_mem += sizeof(CeedScalar) * num_t_col * Q; // for output in CeedBasisApplyCore_Magma()
221 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_2D); in CeedBasisApplyCore_Magma()
226 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_3D); in CeedBasisApplyCore_Magma()
230 CeedInt grid = CeedDivUpInt(num_elem, num_t_col); in CeedBasisApplyCore_Magma()
233 …eedRunKernelDimSharedMagma(ceed, impl->Weight, NULL, grid, num_threads, num_t_col, 1, shared_mem, … in CeedBasisApplyCore_Magma()
425 CeedInt num_t_col = MAGMA_BASIS_NTCOL(M, MAGMA_MAXTHREADS_1D); in CeedBasisApplyNonTensorCore_Magma() local
426 CeedInt grid = CeedDivUpInt(N, num_t_col * NB); in CeedBasisApplyNonTensorCore_Magma()
428 CeedInt shared_mem_B = num_t_col * K * NB * sizeof(CeedScalar); in CeedBasisApplyNonTensorCore_Magma()
432 …CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, Kernel, NULL, grid, M, num_t_col, 1, shared_mem,… in CeedBasisApplyNonTensorCore_Magma()
446 CeedInt num_t_col = MAGMA_BASIS_NTCOL(Q, MAGMA_MAXTHREADS_1D); in CeedBasisApplyNonTensorCore_Magma() local
447 CeedInt grid = CeedDivUpInt(num_elem, num_t_col); in CeedBasisApplyNonTensorCore_Magma()
448 CeedInt shared_mem = Q * sizeof(CeedScalar) + num_t_col * Q * sizeof(CeedScalar); in CeedBasisApplyNonTensorCore_Magma()
451 …CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Weight, NULL, grid, Q, num_t_col, 1, share… in CeedBasisApplyNonTensorCore_Magma()