Lines Matching refs:thread_1d

44   const CeedInt thread_1d = CeedIntMax(P_1d, Q_1d);  in ComputeBasisThreadBlockSizes()  local
59 CeedInt required = thread_1d * thread_1d; in ComputeBasisThreadBlockSizes()
73 CeedInt required = thread_1d * thread_1d; in ComputeBasisThreadBlockSizes()
124 CeedInt thread_1d = CeedIntMax(Q_1d, P_1d); in CeedBasisApplyTensorCore_Hip_shared() local
128 CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64; in CeedBasisApplyTensorCore_Hip_shared()
131 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared()
134 …d_Hip(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, NULL, grid, thread_1d, 1, in CeedBasisApplyTensorCore_Hip_shared()
137 …CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->Interp, NULL, grid, thread_1d, 1, elems_per… in CeedBasisApplyTensorCore_Hip_shared()
141 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared()
143 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared()
146 …ared_Hip(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, NULL, grid, thread_1d, in CeedBasisApplyTensorCore_Hip_shared()
147thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Hip_shared()
149 …ackend(CeedRunKernelDimShared_Hip(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, elems_per_… in CeedBasisApplyTensorCore_Hip_shared()
152 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared()
154 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared()
157 …ared_Hip(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, NULL, grid, thread_1d, in CeedBasisApplyTensorCore_Hip_shared()
158thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Hip_shared()
160 …ackend(CeedRunKernelDimShared_Hip(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, elems_per_… in CeedBasisApplyTensorCore_Hip_shared()
171 CeedInt thread_1d = CeedIntMax(Q_1d, P_1d); in CeedBasisApplyTensorCore_Hip_shared() local
180 CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64; in CeedBasisApplyTensorCore_Hip_shared()
183 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared()
186 …hared_Hip(ceed, apply_add ? data->GradTransposeAdd : data->GradTranspose, NULL, grid, thread_1d, 1, in CeedBasisApplyTensorCore_Hip_shared()
189 …CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->Grad, NULL, grid, thread_1d, 1, elems_per_b… in CeedBasisApplyTensorCore_Hip_shared()
193 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared()
195 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared()
198 …p(ceed, apply_add ? data->GradTransposeAdd : data->GradTranspose, NULL, grid, thread_1d, thread_1d, in CeedBasisApplyTensorCore_Hip_shared()
201 …lBackend(CeedRunKernelDimShared_Hip(ceed, data->Grad, NULL, grid, thread_1d, thread_1d, elems_per_… in CeedBasisApplyTensorCore_Hip_shared()
204 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared()
206 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Hip_shared()
209 …p(ceed, apply_add ? data->GradTransposeAdd : data->GradTranspose, NULL, grid, thread_1d, thread_1d, in CeedBasisApplyTensorCore_Hip_shared()
212 …lBackend(CeedRunKernelDimShared_Hip(ceed, data->Grad, NULL, grid, thread_1d, thread_1d, elems_per_… in CeedBasisApplyTensorCore_Hip_shared()
387 CeedInt thread_1d = CeedIntMax(Q_1d, P_1d); in CeedBasisApplyAtPointsCore_Hip_shared() local
391 CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64; in CeedBasisApplyAtPointsCore_Hip_shared()
394 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Hip_shared()
398thread_1d, 1, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyAtPointsCore_Hip_shared()
400 …kend(CeedRunKernelDimShared_Hip(ceed, data->InterpAtPoints, NULL, grid, thread_1d, 1, elems_per_bl… in CeedBasisApplyAtPointsCore_Hip_shared()
404 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyAtPointsCore_Hip_shared()
406 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Hip_shared()
410thread_1d, thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyAtPointsCore_Hip_shared()
412 …eedRunKernelDimShared_Hip(ceed, data->InterpAtPoints, NULL, grid, thread_1d, thread_1d, elems_per_… in CeedBasisApplyAtPointsCore_Hip_shared()
418 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Hip_shared()
422thread_1d, thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyAtPointsCore_Hip_shared()
424 …eedRunKernelDimShared_Hip(ceed, data->InterpAtPoints, NULL, grid, thread_1d, thread_1d, elems_per_… in CeedBasisApplyAtPointsCore_Hip_shared()
435 CeedInt thread_1d = CeedIntMax(Q_1d, P_1d); in CeedBasisApplyAtPointsCore_Hip_shared() local
439 CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64; in CeedBasisApplyAtPointsCore_Hip_shared()
442 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Hip_shared()
446thread_1d, 1, elems_per_block, shared_mem, grad_args)); in CeedBasisApplyAtPointsCore_Hip_shared()
448 …ackend(CeedRunKernelDimShared_Hip(ceed, data->GradAtPoints, NULL, grid, thread_1d, 1, elems_per_bl… in CeedBasisApplyAtPointsCore_Hip_shared()
452 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyAtPointsCore_Hip_shared()
454 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Hip_shared()
458thread_1d, thread_1d, elems_per_block, shared_mem, grad_args)); in CeedBasisApplyAtPointsCore_Hip_shared()
460 …(CeedRunKernelDimShared_Hip(ceed, data->GradAtPoints, NULL, grid, thread_1d, thread_1d, elems_per_… in CeedBasisApplyAtPointsCore_Hip_shared()
466 … CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Hip_shared()
470thread_1d, thread_1d, elems_per_block, shared_mem, grad_args)); in CeedBasisApplyAtPointsCore_Hip_shared()
472 …(CeedRunKernelDimShared_Hip(ceed, data->GradAtPoints, NULL, grid, thread_1d, thread_1d, elems_per_… in CeedBasisApplyAtPointsCore_Hip_shared()