Lines Matching refs:block

75 …elem, int blocks_per_sm, int max_threads_per_block, int max_threads_z, int warp_size, int block[3],  in BlockGridCalculate()
78 const int threads_per_elem = block[0] * block[1]; in BlockGridCalculate()
94 block[2] = CeedIntMin(elems_per_block, max_threads_z); in BlockGridCalculate()
209 int block[3] = {data->thread_1d, ((!is_tensor || data->dim == 1) ? 1 : data->thread_1d), -1}; in CeedOperatorApplyAddCore_Cuda_gen() local
213 … cuda_data->device_prop.maxThreadsDim[2], cuda_data->device_prop.warpSize, block, &grid)); in CeedOperatorApplyAddCore_Cuda_gen()
218 block[2] = elems_per_block; in CeedOperatorApplyAddCore_Cuda_gen()
220 CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar); in CeedOperatorApplyAddCore_Cuda_gen()
222 …nd(CeedTryRunKernelDimShared_Cuda(ceed, data->op, stream, grid, block[0], block[1], block[2], shar… in CeedOperatorApplyAddCore_Cuda_gen()
464 int block[3] = {data->thread_1d, ((!is_tensor || data->dim == 1) ? 1 : data->thread_1d), -1}; in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen() local
468 … cuda_data->device_prop.maxThreadsDim[2], cuda_data->device_prop.warpSize, block, &grid)); in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen()
473 block[2] = elems_per_block; in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen()
475 CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar); in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen()
477 …ernelDimShared_Cuda(ceed, data->assemble_qfunction, NULL, grid, block[0], block[1], block[2], shar… in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen()
639 int block[3] = {data->thread_1d, (data->dim == 1 ? 1 : data->thread_1d), -1}; in CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda_gen() local
642 … cuda_data->device_prop.maxThreadsDim[2], cuda_data->device_prop.warpSize, block, &grid)); in CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda_gen()
643 CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar); in CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda_gen()
645 …KernelDimShared_Cuda(ceed, data->assemble_diagonal, NULL, grid, block[0], block[1], block[2], shar… in CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda_gen()
804 int block[3] = {data->thread_1d, (data->dim == 1 ? 1 : data->thread_1d), -1}; in CeedOperatorAssembleSingleAtPoints_Cuda_gen() local
807 … cuda_data->device_prop.maxThreadsDim[2], cuda_data->device_prop.warpSize, block, &grid)); in CeedOperatorAssembleSingleAtPoints_Cuda_gen()
808 CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar); in CeedOperatorAssembleSingleAtPoints_Cuda_gen()
810 …yRunKernelDimShared_Cuda(ceed, data->assemble_full, NULL, grid, block[0], block[1], block[2], shar… in CeedOperatorAssembleSingleAtPoints_Cuda_gen()