Lines Matching defs:elems_per_block

128         CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64;  in CeedBasisApplyTensorCore_Hip_shared()  local
141 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared() local
152 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared() local
180 CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64; in CeedBasisApplyTensorCore_Hip_shared() local
193 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared() local
204 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared() local
226 const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; in CeedBasisApplyTensorCore_Hip_shared() local
232 const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; in CeedBasisApplyTensorCore_Hip_shared() local
238 const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; in CeedBasisApplyTensorCore_Hip_shared() local
391 CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64; in CeedBasisApplyAtPointsCore_Hip_shared() local
404 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyAtPointsCore_Hip_shared() local
416 const CeedInt elems_per_block = 1; in CeedBasisApplyAtPointsCore_Hip_shared() local
439 CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64; in CeedBasisApplyAtPointsCore_Hip_shared() local
452 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyAtPointsCore_Hip_shared() local
464 const CeedInt elems_per_block = 1; in CeedBasisApplyAtPointsCore_Hip_shared() local
547 CeedInt elems_per_block = 64 * thread > 256 ? 256 / thread : 64; in CeedBasisApplyNonTensorCore_Hip_shared() local
570 CeedInt elems_per_block = 64 * thread > 256 ? 256 / thread : 64; in CeedBasisApplyNonTensorCore_Hip_shared() local
593 CeedInt elems_per_block = 64 * thread > 256 ? 256 / thread : 64; in CeedBasisApplyNonTensorCore_Hip_shared() local