| /libCEED/backends/hip-shared/ |
| H A D | ceed-hip-shared-basis.c | 131 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared() local 135 elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Hip_shared() 137 …hared_Hip(ceed, data->Interp, NULL, grid, thread_1d, 1, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Hip_shared() 143 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared() local 147 … thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Hip_shared() 149 …p(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Hip_shared() 154 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared() local 158 … thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Hip_shared() 160 …p(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Hip_shared() 183 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared() local [all …]
|
| /libCEED/backends/cuda-shared/ |
| H A D | ceed-cuda-shared-basis.c | 64 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local 68 elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Cuda_shared() 70 …ared_Cuda(ceed, data->Interp, NULL, grid, thread_1d, 1, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Cuda_shared() 77 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local 81 … thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Cuda_shared() 83 …elDimShared_Cuda(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, elems_per_block, shared_mem, in CeedBasisApplyTensorCore_Cuda_shared() 89 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local 93 … thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Cuda_shared() 95 …elDimShared_Cuda(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, elems_per_block, shared_mem, in CeedBasisApplyTensorCore_Cuda_shared() 118 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local [all …]
|
| /libCEED/backends/magma/ |
| H A D | ceed-magma-basis.c | 91 CeedInt shared_mem = 0; in CeedBasisApplyCore_Magma() local 98 shared_mem += sizeof(CeedScalar) * num_t_col * (num_comp * (1 * P + 1 * Q)); in CeedBasisApplyCore_Magma() 99 shared_mem += sizeof(CeedScalar) * (P * Q); in CeedBasisApplyCore_Magma() 104 shared_mem += P * Q * sizeof(CeedScalar); // for sT in CeedBasisApplyCore_Magma() 106 shared_mem += num_t_col * (P * max_P_Q * sizeof(CeedScalar)); in CeedBasisApplyCore_Magma() 111 shared_mem += sizeof(CeedScalar) * (P * Q); // for sT in CeedBasisApplyCore_Magma() 113 shared_mem += sizeof(CeedScalar) * num_t_col * (CeedIntMax(P * P * max_P_Q, P * Q * Q)); in CeedBasisApplyCore_Magma() 121 num_t_col, 1, shared_mem, args)); in CeedBasisApplyCore_Magma() 123 …KernelDimSharedMagma(ceed, impl->Interp, NULL, grid, num_threads, num_t_col, 1, shared_mem, args)); in CeedBasisApplyCore_Magma() 168 CeedInt shared_mem = 0; in CeedBasisApplyCore_Magma() local [all …]
|
| /libCEED/backends/cuda-gen/ |
| H A D | ceed-cuda-gen-operator.c | 220 CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar); in CeedOperatorApplyAddCore_Cuda_gen() local 222 …hared_Cuda(ceed, data->op, stream, grid, block[0], block[1], block[2], shared_mem, is_run_good, op… in CeedOperatorApplyAddCore_Cuda_gen() 475 CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar); in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen() local 477 …ceed, data->assemble_qfunction, NULL, grid, block[0], block[1], block[2], shared_mem, &is_run_good, in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen() 643 CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar); in CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda_gen() local 645 …(ceed, data->assemble_diagonal, NULL, grid, block[0], block[1], block[2], shared_mem, &is_run_good, in CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda_gen() 808 CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar); in CeedOperatorAssembleSingleAtPoints_Cuda_gen() local 810 …Cuda(ceed, data->assemble_full, NULL, grid, block[0], block[1], block[2], shared_mem, &is_run_good, in CeedOperatorAssembleSingleAtPoints_Cuda_gen()
|
| /libCEED/backends/hip-ref/ |
| H A D | ceed-hip-ref-operator.c | 1778 CeedInt shared_mem = in CeedOperatorAssembleSingle_Hip() local 1786 shared_mem, args)); in CeedOperatorAssembleSingle_Hip()
|
| /libCEED/backends/cuda-ref/ |
| H A D | ceed-cuda-ref-operator.c | 1781 CeedInt shared_mem = in CeedOperatorAssembleSingle_Cuda() local 1789 shared_mem, args)); in CeedOperatorAssembleSingle_Cuda()
|