| /libCEED/backends/cuda-shared/ |
| H A D | ceed-cuda-shared-basis.c | 63 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local 76 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local 88 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local 117 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local 130 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local 141 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Cuda_shared() local 327 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyAtPointsCore_Cuda_shared() local 341 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyAtPointsCore_Cuda_shared() local 353 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyAtPointsCore_Cuda_shared() local 377 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyAtPointsCore_Cuda_shared() local [all …]
|
| /libCEED/backends/hip-shared/ |
| H A D | ceed-hip-shared-basis.c | 130 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local 142 … CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local 153 … CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local 182 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local 194 … CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local 205 … CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyTensorCore_Hip_shared() local 393 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyAtPointsCore_Hip_shared() local 405 … CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyAtPointsCore_Hip_shared() local 417 … CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyAtPointsCore_Hip_shared() local 441 CeedInt grid = num_elem / elems_per_block + (num_elem % elems_per_block > 0); in CeedBasisApplyAtPointsCore_Hip_shared() local [all …]
|
| /libCEED/backends/hip-gen/ |
| H A D | ceed-hip-gen-operator.c | 163 …CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num… in CeedOperatorApplyAddCore_Hip_gen() local 169 …CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num… in CeedOperatorApplyAddCore_Hip_gen() local 175 …CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num… in CeedOperatorApplyAddCore_Hip_gen() local 441 …CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num… in CeedOperatorLinearAssembleQFunctionCore_Hip_gen() local 447 …CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num… in CeedOperatorLinearAssembleQFunctionCore_Hip_gen() local 453 …CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num… in CeedOperatorLinearAssembleQFunctionCore_Hip_gen() local 621 …CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num… in CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip_gen() local 627 …CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num… in CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip_gen() local 633 …CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num… in CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip_gen() local 803 …CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num… in CeedOperatorAssembleSingleAtPoints_Hip_gen() local [all …]
|
| /libCEED/backends/magma/ |
| H A D | ceed-magma-basis.c | 116 CeedInt grid = CeedDivUpInt(num_elem, num_t_col); in CeedBasisApplyCore_Magma() local 193 CeedInt grid = CeedDivUpInt(num_elem, num_t_col); in CeedBasisApplyCore_Magma() local 230 CeedInt grid = CeedDivUpInt(num_elem, num_t_col); in CeedBasisApplyCore_Magma() local 426 CeedInt grid = CeedDivUpInt(N, num_t_col * NB); in CeedBasisApplyNonTensorCore_Magma() local 447 CeedInt grid = CeedDivUpInt(num_elem, num_t_col); in CeedBasisApplyNonTensorCore_Magma() local
|
| /libCEED/backends/cuda-gen/ |
| H A D | ceed-cuda-gen-operator.c | 76 int *grid) { in BlockGridCalculate() 205 int max_threads_per_block, min_grid_size, grid; in CeedOperatorApplyAddCore_Cuda_gen() local 460 int max_threads_per_block, min_grid_size, grid; in CeedOperatorLinearAssembleQFunctionCore_Cuda_gen() local 636 int max_threads_per_block, min_grid_size, grid; in CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda_gen() local 801 int max_threads_per_block, min_grid_size, grid; in CeedOperatorAssembleSingleAtPoints_Cuda_gen() local
|
| /libCEED/backends/sycl-gen/ |
| H A D | ceed-sycl-gen-operator.sycl.cpp | 126 CeedInt block_sizes[3], grid = 0; in CeedOperatorApplyAdd_Sycl_gen() local
|
| /libCEED/backends/cuda-ref/ |
| H A D | ceed-cuda-ref-restriction.c | 150 const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); in CeedElemRestrictionApply_Cuda_Core() local 195 const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); in CeedElemRestrictionApply_Cuda_Core() local
|
| H A D | ceed-cuda-ref-basis.c | 261 const int grid = CeedDivUpInt(num_elem, elems_per_block); in CeedBasisApplyNonTensorCore_Cuda() local
|
| H A D | ceed-cuda-ref-operator.c | 1466 CeedInt grid = CeedDivUpInt(num_elem, elems_per_block); in CeedOperatorAssembleDiagonalCore_Cuda() local 1784 CeedInt grid = CeedDivUpInt(num_elem_in, asmb->elems_per_block); in CeedOperatorAssembleSingle_Cuda() local
|
| /libCEED/backends/hip-ref/ |
| H A D | ceed-hip-ref-restriction.c | 151 const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); in CeedElemRestrictionApply_Hip_Core() local 196 const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); in CeedElemRestrictionApply_Hip_Core() local
|
| H A D | ceed-hip-ref-basis.c | 260 const int grid = CeedDivUpInt(num_elem, elems_per_block); in CeedBasisApplyNonTensorCore_Hip() local
|
| H A D | ceed-hip-ref-operator.c | 1463 CeedInt grid = CeedDivUpInt(num_elem, elems_per_block); in CeedOperatorAssembleDiagonalCore_Hip() local 1781 CeedInt grid = CeedDivUpInt(num_elem_in, asmb->elems_per_block); in CeedOperatorAssembleSingle_Hip() local
|