Lines Matching refs:shared_mem
64 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local
68 elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Cuda_shared()
70 …ared_Cuda(ceed, data->Interp, NULL, grid, thread_1d, 1, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Cuda_shared()
77 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local
81 … thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Cuda_shared()
83 …elDimShared_Cuda(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, elems_per_block, shared_mem, in CeedBasisApplyTensorCore_Cuda_shared()
89 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local
93 … thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyTensorCore_Cuda_shared()
95 …elDimShared_Cuda(ceed, data->Interp, NULL, grid, thread_1d, thread_1d, elems_per_block, shared_mem, in CeedBasisApplyTensorCore_Cuda_shared()
118 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local
122 elems_per_block, shared_mem, grad_args)); in CeedBasisApplyTensorCore_Cuda_shared()
124 …imShared_Cuda(ceed, data->Grad, NULL, grid, thread_1d, 1, elems_per_block, shared_mem, grad_args)); in CeedBasisApplyTensorCore_Cuda_shared()
131 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local
135 … thread_1d, elems_per_block, shared_mem, grad_args)); in CeedBasisApplyTensorCore_Cuda_shared()
137 …_Cuda(ceed, data->Grad, NULL, grid, thread_1d, thread_1d, elems_per_block, shared_mem, grad_args)); in CeedBasisApplyTensorCore_Cuda_shared()
142 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyTensorCore_Cuda_shared() local
146 … thread_1d, elems_per_block, shared_mem, grad_args)); in CeedBasisApplyTensorCore_Cuda_shared()
148 …_Cuda(ceed, data->Grad, NULL, grid, thread_1d, thread_1d, elems_per_block, shared_mem, grad_args)); in CeedBasisApplyTensorCore_Cuda_shared()
328 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Cuda_shared() local
332 … thread_1d, 1, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyAtPointsCore_Cuda_shared()
334 …elDimShared_Cuda(ceed, data->InterpAtPoints, NULL, grid, thread_1d, 1, elems_per_block, shared_mem, in CeedBasisApplyAtPointsCore_Cuda_shared()
342 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Cuda_shared() local
346 … thread_1d, thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyAtPointsCore_Cuda_shared()
348 …red_Cuda(ceed, data->InterpAtPoints, NULL, grid, thread_1d, thread_1d, elems_per_block, shared_mem, in CeedBasisApplyAtPointsCore_Cuda_shared()
354 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Cuda_shared() local
358 … thread_1d, thread_1d, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyAtPointsCore_Cuda_shared()
360 …red_Cuda(ceed, data->InterpAtPoints, NULL, grid, thread_1d, thread_1d, elems_per_block, shared_mem, in CeedBasisApplyAtPointsCore_Cuda_shared()
378 CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Cuda_shared() local
382 … thread_1d, 1, elems_per_block, shared_mem, grad_args)); in CeedBasisApplyAtPointsCore_Cuda_shared()
384 …_Cuda(ceed, data->GradAtPoints, NULL, grid, thread_1d, 1, elems_per_block, shared_mem, grad_args)); in CeedBasisApplyAtPointsCore_Cuda_shared()
391 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Cuda_shared() local
395 … thread_1d, thread_1d, elems_per_block, shared_mem, grad_args)); in CeedBasisApplyAtPointsCore_Cuda_shared()
397 …hared_Cuda(ceed, data->GradAtPoints, NULL, grid, thread_1d, thread_1d, elems_per_block, shared_mem, in CeedBasisApplyAtPointsCore_Cuda_shared()
403 CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); in CeedBasisApplyAtPointsCore_Cuda_shared() local
407 … thread_1d, thread_1d, elems_per_block, shared_mem, grad_args)); in CeedBasisApplyAtPointsCore_Cuda_shared()
409 …hared_Cuda(ceed, data->GradAtPoints, NULL, grid, thread_1d, thread_1d, elems_per_block, shared_mem, in CeedBasisApplyAtPointsCore_Cuda_shared()
487 CeedInt shared_mem = elems_per_block * thread * sizeof(CeedScalar); in CeedBasisApplyNonTensorCore_Cuda_shared() local
491 elems_per_block, shared_mem, interp_args)); in CeedBasisApplyNonTensorCore_Cuda_shared()
493 …mShared_Cuda(ceed, data->Interp, NULL, grid, thread, 1, elems_per_block, shared_mem, interp_args)); in CeedBasisApplyNonTensorCore_Cuda_shared()
511 CeedInt shared_mem = elems_per_block * thread * sizeof(CeedScalar); in CeedBasisApplyNonTensorCore_Cuda_shared() local
515 elems_per_block, shared_mem, grad_args)); in CeedBasisApplyNonTensorCore_Cuda_shared()
517 …elDimShared_Cuda(ceed, data->Grad, NULL, grid, thread, 1, elems_per_block, shared_mem, grad_args)); in CeedBasisApplyNonTensorCore_Cuda_shared()