ceed-cuda-shared-basis.c - OpenGrok cross reference for /libCEED/backends/cuda-shared/ceed-cuda-shared-basis.c

Lines Matching refs:thread
479       CeedInt thread = CeedIntMax(Q, P);  in CeedBasisApplyNonTensorCore_Cuda_shared()  local
485 …elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thread, 1));  in CeedBasisApplyNonTensorCore_Cuda_shared()
487         CeedInt shared_mem      = elems_per_block * thread * sizeof(CeedScalar);  in CeedBasisApplyNonTensorCore_Cuda_shared()
490 …red_Cuda(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, NULL, grid, thread, 1,  in CeedBasisApplyNonTensorCore_Cuda_shared()
493 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->Interp, NULL, grid, thread, 1, elems_per_b…  in CeedBasisApplyNonTensorCore_Cuda_shared()
503       CeedInt thread = CeedIntMax(Q, P);  in CeedBasisApplyNonTensorCore_Cuda_shared()  local
509 …elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thread, 1));  in CeedBasisApplyNonTensorCore_Cuda_shared()
511         CeedInt shared_mem      = elems_per_block * thread * sizeof(CeedScalar);  in CeedBasisApplyNonTensorCore_Cuda_shared()
514 …mShared_Cuda(ceed, apply_add ? data->GradTransposeAdd : data->GradTranspose, NULL, grid, thread, 1,  in CeedBasisApplyNonTensorCore_Cuda_shared()
517 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, data->Grad, NULL, grid, thread, 1, elems_per_blo…  in CeedBasisApplyNonTensorCore_Cuda_shared()
527       CeedInt thread = CeedIntMax(Q, P);  in CeedBasisApplyNonTensorCore_Cuda_shared()  local
533 …elems_per_block = CeedIntMin(ceed_Cuda->device_prop.maxThreadsDim[2], CeedIntMax(512 / thread, 1));  in CeedBasisApplyNonTensorCore_Cuda_shared()
536 …CeedCallBackend(CeedRunKernelDim_Cuda(ceed, data->Weight, grid, thread, elems_per_block, 1, weight…  in CeedBasisApplyNonTensorCore_Cuda_shared()