Home
last modified time | relevance | path

Searched refs:shared_mem (Results 1 – 6 of 6) sorted by relevance

/libCEED/backends/hip-shared/
H A Dceed-hip-shared-basis.c131 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared() local
135 elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Hip_shared()
137 …hared_Hip(ceed, data->Interp, NULL, grid, thread_1d, 1, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Hip_shared()
143 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared() local
147 … thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Hip_shared()
149 …p(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Hip_shared()
154 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared() local
158 … thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Hip_shared()
160 …p(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Hip_shared()
183 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared() local
[all …]
/libCEED/backends/cuda-shared/
H A Dceed-cuda-shared-basis.c64 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local
68 elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Cuda_shared()
70 …ared_Cuda(ceed, data->Interp, NULL, grid, thread_1d, 1, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Cuda_shared()
77 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local
81 … thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Cuda_shared()
83 …elDimShared_Cuda(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, elems_per_block, shared_mem, in CeedBasisApplyTensorCore_Cuda_shared()
89 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local
93 … thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Cuda_shared()
95 …elDimShared_Cuda(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, elems_per_block, shared_mem, in CeedBasisApplyTensorCore_Cuda_shared()
118 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local
[all …]
/libCEED/backends/magma/
H A Dceed-magma-basis.c91 CeedInt shared_mem = 0; in CeedBasisApplyCore_Magma() local
98 shared_mem += sizeof(CeedScalar) * num_t_col * (num_comp * (1 * P + 1 * Q)); in CeedBasisApplyCore_Magma()
99 shared_mem += sizeof(CeedScalar) * (P * Q); in CeedBasisApplyCore_Magma()
104 shared_mem += P * Q * sizeof(CeedScalar); // for sT in CeedBasisApplyCore_Magma()
106 shared_mem += num_t_col * (P * max_P_Q * sizeof(CeedScalar)); in CeedBasisApplyCore_Magma()
111 shared_mem += sizeof(CeedScalar) * (P * Q); // for sT in CeedBasisApplyCore_Magma()
113 shared_mem += sizeof(CeedScalar) * num_t_col * (CeedIntMax(P * P * max_P_Q, P * Q * Q)); in CeedBasisApplyCore_Magma()
121 num_t_col, 1, shared_mem, args)); in CeedBasisApplyCore_Magma()
123 …KernelDimSharedMagma(ceed, impl->Interp, NULL, grid, num_threads, num_t_col, 1, shared_mem, args)); in CeedBasisApplyCore_Magma()
168 CeedInt shared_mem = 0; in CeedBasisApplyCore_Magma() local
[all …]
/libCEED/backends/cuda-gen/
H A Dceed-cuda-gen-operator.c220 CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar); in CeedOperatorApplyAddCore_Cuda_gen() local
222 …hared_Cuda(ceed, data->op, stream, grid, block[0], block[1], block[2], shared_mem, is_run_good, op… in CeedOperatorApplyAddCore_Cuda_gen()
475 CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar); in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen() local
477 …ceed, data->assemble_qfunction, NULL, grid, block[0], block[1], block[2], shared_mem, &is_run_good, in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen()
643 CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar); in CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda_gen() local
645 …(ceed, data->assemble_diagonal, NULL, grid, block[0], block[1], block[2], shared_mem, &is_run_good, in CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda_gen()
808 CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar); in CeedOperatorAssembleSingleAtPoints_Cuda_gen() local
810 …Cuda(ceed, data->assemble_full, NULL, grid, block[0], block[1], block[2], shared_mem, &is_run_good, in CeedOperatorAssembleSingleAtPoints_Cuda_gen()
/libCEED/backends/hip-ref/
H A Dceed-hip-ref-operator.c1778 CeedInt shared_mem = in CeedOperatorAssembleSingle_Hip() local
1786 shared_mem, args)); in CeedOperatorAssembleSingle_Hip()
/libCEED/backends/cuda-ref/
H A Dceed-cuda-ref-operator.c1781 CeedInt shared_mem = in CeedOperatorAssembleSingle_Cuda() local
1789 shared_mem, args)); in CeedOperatorAssembleSingle_Cuda()