Lines Matching refs:thread

543       CeedInt thread        = CeedIntMax(Q, P);  in CeedBasisApplyNonTensorCore_Hip_shared()  local
547 CeedInt elems_per_block = 64 * thread > 256 ? 256 / thread : 64; in CeedBasisApplyNonTensorCore_Hip_shared()
550 CeedInt shared_mem = elems_per_block * thread * sizeof(CeedScalar); in CeedBasisApplyNonTensorCore_Hip_shared()
553 …ared_Hip(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, NULL, grid, thread, 1, in CeedBasisApplyNonTensorCore_Hip_shared()
556 …CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->Interp, NULL, grid, thread, 1, elems_per_bl… in CeedBasisApplyNonTensorCore_Hip_shared()
566 CeedInt thread = CeedIntMax(Q, P); in CeedBasisApplyNonTensorCore_Hip_shared() local
570 CeedInt elems_per_block = 64 * thread > 256 ? 256 / thread : 64; in CeedBasisApplyNonTensorCore_Hip_shared()
573 CeedInt shared_mem = elems_per_block * thread * sizeof(CeedScalar); in CeedBasisApplyNonTensorCore_Hip_shared()
576 …imShared_Hip(ceed, apply_add ? data->GradTransposeAdd : data->GradTranspose, NULL, grid, thread, 1, in CeedBasisApplyNonTensorCore_Hip_shared()
579 …CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->Grad, NULL, grid, thread, 1, elems_per_bloc… in CeedBasisApplyNonTensorCore_Hip_shared()
589 CeedInt thread = CeedIntMax(Q, P); in CeedBasisApplyNonTensorCore_Hip_shared() local
593 CeedInt elems_per_block = 64 * thread > 256 ? 256 / thread : 64; in CeedBasisApplyNonTensorCore_Hip_shared()
597 …CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid_size, thread, elems_per_block, 1, we… in CeedBasisApplyNonTensorCore_Hip_shared()