Searched defs:shared_mem (Results 1 – 6 of 6) sorted by relevance
64 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local77 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local89 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local118 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local131 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local142 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local328 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Cuda_shared() local342 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Cuda_shared() local354 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Cuda_shared() local378 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Cuda_shared() local[all …]
131 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared() local143 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared() local154 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared() local183 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared() local195 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared() local206 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared() local394 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Hip_shared() local406 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Hip_shared() local418 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Hip_shared() local442 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Hip_shared() local[all …]
91 CeedInt shared_mem = 0; in CeedBasisApplyCore_Magma() local168 CeedInt shared_mem = 0; in CeedBasisApplyCore_Magma() local210 CeedInt shared_mem = 0; in CeedBasisApplyCore_Magma() local429 …CeedInt shared_mem = (t_mode != CEED_TRANSPOSE && q_comp > 1) ? (shared_mem_A + shared_mem_B) : … in CeedBasisApplyNonTensorCore_Magma() local448 CeedInt shared_mem = Q * sizeof(CeedScalar) + num_t_col * Q * sizeof(CeedScalar); in CeedBasisApplyNonTensorCore_Magma() local
220 CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar); in CeedOperatorApplyAddCore_Cuda_gen() local475 CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar); in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen() local643 CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar); in CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda_gen() local808 CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar); in CeedOperatorAssembleSingleAtPoints_Cuda_gen() local
1778 CeedInt shared_mem = in CeedOperatorAssembleSingle_Hip() local
1781 CeedInt shared_mem = in CeedOperatorAssembleSingle_Cuda() local