| /libCEED/backends/memcheck/ |
| H A D | ceed-memcheck-restriction.c | 46 …Transpose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, in CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core() argument 59 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core() 62 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { in CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core() 63 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = in CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core() 72 …Transpose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, in CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core() argument 80 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core() 82 CeedPragmaSIMD for (CeedSize i = 0; i < elem_size * block_size; i++) { in CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core() 83 …vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_siz… in CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core() 91 … const CeedInt block_size, const CeedInt comp_stride, CeedInt start, in CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core() argument 98 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core() [all …]
|
| /libCEED/backends/ref/ |
| H A D | ceed-ref-restriction.c | 19 …dedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core() argument 30 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core() 33 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core() 34 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core() 45 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core() 48 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core() 49 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core() 59 …setNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, in CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core() argument 67 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core() 69 CeedPragmaSIMD for (CeedSize i = 0; i < elem_size * block_size; i++) { in CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core() [all …]
|
| /libCEED/backends/cuda-ref/kernels/ |
| H A D | cuda-ref-vector.cu | 26 const int block_size = 512; in CeedDeviceCopyStrided_Cuda() local 28 int grid_size = copy_size / block_size; in CeedDeviceCopyStrided_Cuda() 30 if (block_size * grid_size < copy_size) grid_size += 1; in CeedDeviceCopyStrided_Cuda() 31 copyStridedK<<<grid_size, block_size>>>(d_array, start, stop, step, d_copy_array); in CeedDeviceCopyStrided_Cuda() 48 const int block_size = 512; in CeedDeviceSetValue_Cuda() local 50 int grid_size = vec_size / block_size; in CeedDeviceSetValue_Cuda() 52 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceSetValue_Cuda() 53 setValueK<<<grid_size, block_size>>>(d_array, length, val); in CeedDeviceSetValue_Cuda() 72 const int block_size = 512; in CeedDeviceSetValueStrided_Cuda() local 74 int grid_size = set_size / block_size; in CeedDeviceSetValueStrided_Cuda() [all …]
|
| /libCEED/backends/hip-ref/kernels/ |
| H A D | hip-ref-vector.hip.cpp | 26 const int block_size = 512; in CeedDeviceCopyStrided_Hip() local 28 int grid_size = vec_size / block_size; in CeedDeviceCopyStrided_Hip() 30 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceCopyStrided_Hip() 31 …hipLaunchKernelGGL(copyStridedK, dim3(grid_size), dim3(block_size), 0, 0, d_array, start, step, le… in CeedDeviceCopyStrided_Hip() 48 const int block_size = 512; in CeedDeviceSetValue_Hip() local 50 int grid_size = vec_size / block_size; in CeedDeviceSetValue_Hip() 52 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceSetValue_Hip() 53 hipLaunchKernelGGL(setValueK, dim3(grid_size), dim3(block_size), 0, 0, d_array, length, val); in CeedDeviceSetValue_Hip() 72 const int block_size = 512; in CeedDeviceSetValueStrided_Hip() local 74 int grid_size = set_size / block_size; in CeedDeviceSetValueStrided_Hip() [all …]
|
| /libCEED/backends/sycl-ref/kernels/ |
| H A D | sycl-ref-vector.cpp | 26 const int block_size = 512; in CeedDeviceSetValue_Sycl() local 28 int grid_size = vec_size / block_size; in CeedDeviceSetValue_Sycl() 30 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceSetValue_Sycl() 31 setValueK<<<grid_size, block_size>>>(d_array, length, val); in CeedDeviceSetValue_Sycl() 49 const int block_size = 512; in CeedDeviceReciprocal_Sycl() local 51 int grid_size = vec_size / block_size; in CeedDeviceReciprocal_Sycl() 53 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceReciprocal_Sycl() 54 rcpValueK<<<grid_size, block_size>>>(d_array, length); in CeedDeviceReciprocal_Sycl() 72 const int block_size = 512; in CeedDeviceScale_Sycl() local 74 int grid_size = vec_size / block_size; in CeedDeviceScale_Sycl() [all …]
|
| /libCEED/backends/opt/ |
| H A D | ceed-opt-operator.c | 20 …const CeedInt block_size, CeedElemRestriction *block_rstr, CeedVector *e_vecs_full, CeedVector *e_… in CeedOperatorSetupFields_Opt() argument 71 …ckend(CeedElemRestrictionCreateBlocked(ceed_rstr, num_elem, elem_size, block_size, num_comp, comp_… in CeedOperatorSetupFields_Opt() 81 …edElemRestrictionCreateBlockedOriented(ceed_rstr, num_elem, elem_size, block_size, num_comp, comp_… in CeedOperatorSetupFields_Opt() 92 …emRestrictionCreateBlockedCurlOriented(ceed_rstr, num_elem, elem_size, block_size, num_comp, comp_… in CeedOperatorSetupFields_Opt() 102 …eedElemRestrictionCreateBlockedStrided(ceed_rstr, num_elem, elem_size, block_size, num_comp, l_siz… in CeedOperatorSetupFields_Opt() 117 e_size = (CeedSize)Q * size * block_size; in CeedOperatorSetupFields_Opt() 119 q_size = (CeedSize)Q * size * block_size; in CeedOperatorSetupFields_Opt() 131 e_size = (CeedSize)P * num_comp * block_size; in CeedOperatorSetupFields_Opt() 133 q_size = (CeedSize)Q * size * block_size; in CeedOperatorSetupFields_Opt() 138 q_size = (CeedSize)Q * block_size; in CeedOperatorSetupFields_Opt() [all …]
|
| H A D | ceed-opt.h | 15 CeedInt block_size; member
|
| H A D | ceed-opt-serial.c | 48 data->block_size = 1; in CeedInit_Opt_Serial()
|
| H A D | ceed-opt-blocked.c | 48 data->block_size = 8; in CeedInit_Opt_Blocked()
|
| /libCEED/interface/ |
| H A D | ceed-elemrestriction.c | 38 … CeedInt *offsets, CeedInt *block_offsets, CeedInt num_block, CeedInt num_elem, CeedInt block_size, in CeedPermutePadOffsets() argument 40 for (CeedInt e = 0; e < num_block * block_size; e += block_size) { in CeedPermutePadOffsets() 41 for (CeedInt j = 0; j < block_size; j++) { in CeedPermutePadOffsets() 43 …block_offsets[e * elem_size + k * block_size + j] = offsets[CeedIntMin(e + j, num_elem - 1) * elem… in CeedPermutePadOffsets() 64 …nts, bool *block_orients, CeedInt num_block, CeedInt num_elem, CeedInt block_size, CeedInt elem_si… in CeedPermutePadOrients() argument 65 for (CeedInt e = 0; e < num_block * block_size; e += block_size) { in CeedPermutePadOrients() 66 for (CeedInt j = 0; j < block_size; j++) { in CeedPermutePadOrients() 68 …block_orients[e * elem_size + k * block_size + j] = orients[CeedIntMin(e + j, num_elem - 1) * elem… in CeedPermutePadOrients() 89 …url_orients, CeedInt8 *block_curl_orients, CeedInt num_block, CeedInt num_elem, CeedInt block_size, in CeedPermutePadCurlOrients() argument 91 for (CeedInt e = 0; e < num_block * block_size; e += block_size) { in CeedPermutePadCurlOrients() [all …]
|
| /libCEED/backends/blocked/ |
| H A D | ceed-blocked-operator.c | 20 …bool *apply_add_basis, const CeedInt block_size, CeedElemRestriction *block_rstr, CeedVector *e_ve… in CeedOperatorSetupFields_Blocked() argument 71 …ckend(CeedElemRestrictionCreateBlocked(ceed_rstr, num_elem, elem_size, block_size, num_comp, comp_… in CeedOperatorSetupFields_Blocked() 81 …edElemRestrictionCreateBlockedOriented(ceed_rstr, num_elem, elem_size, block_size, num_comp, comp_… in CeedOperatorSetupFields_Blocked() 92 …emRestrictionCreateBlockedCurlOriented(ceed_rstr, num_elem, elem_size, block_size, num_comp, comp_… in CeedOperatorSetupFields_Blocked() 102 …eedElemRestrictionCreateBlockedStrided(ceed_rstr, num_elem, elem_size, block_size, num_comp, l_siz… in CeedOperatorSetupFields_Blocked() 117 q_size = (CeedSize)Q * size * block_size; in CeedOperatorSetupFields_Blocked() 129 e_size = (CeedSize)P * num_comp * block_size; in CeedOperatorSetupFields_Blocked() 131 q_size = (CeedSize)Q * size * block_size; in CeedOperatorSetupFields_Blocked() 136 q_size = (CeedSize)Q * block_size; in CeedOperatorSetupFields_Blocked() 138 …CeedCallBackend(CeedBasisApply(basis, block_size, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_… in CeedOperatorSetupFields_Blocked() [all …]
|
| /libCEED/backends/cuda-ref/ |
| H A D | ceed-cuda-ref-restriction.c | 149 const CeedInt block_size = elem_size < 1024 ? (elem_size > 32 ? elem_size : 32) : 1024; in CeedElemRestrictionApply_Cuda_Core() local 150 const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); in CeedElemRestrictionApply_Cuda_Core() 156 CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core() 162 CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core() 168 CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core() 172 … CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core() 179 CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core() 183 … CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core() 187 …eedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnorientedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core() 194 const CeedInt block_size = 32; in CeedElemRestrictionApply_Cuda_Core() local [all …]
|
| H A D | ceed-cuda-ref-basis.c | 52 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); in CeedBasisApplyCore_Cuda() local 54 CeedCallBackend(CeedRunKernel_Cuda(ceed, data->Interp, num_elem, block_size, interp_args)); in CeedBasisApplyCore_Cuda() 58 const CeedInt block_size = max_block_size; in CeedBasisApplyCore_Cuda() local 60 CeedCallBackend(CeedRunKernel_Cuda(ceed, data->Grad, num_elem, block_size, grad_args)); in CeedBasisApplyCore_Cuda() 210 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); in CeedBasisApplyAtPointsCore_Cuda() local 212 …da(ceed, is_transpose ? data->InterpTransposeAtPoints : data->InterpAtPoints, num_elem, block_size, in CeedBasisApplyAtPointsCore_Cuda() 217 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); in CeedBasisApplyAtPointsCore_Cuda() local 219 …is_transpose ? data->GradTransposeAtPoints : data->GradAtPoints, num_elem, block_size, grad_args)); in CeedBasisApplyAtPointsCore_Cuda()
|
| /libCEED/backends/hip-ref/ |
| H A D | ceed-hip-ref-restriction.c | 150 const CeedInt block_size = elem_size < 256 ? (elem_size > 64 ? elem_size : 64) : 256; in CeedElemRestrictionApply_Hip_Core() local 151 const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); in CeedElemRestrictionApply_Hip_Core() 157 CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core() 163 CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core() 169 CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core() 173 … CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core() 180 CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core() 184 … CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core() 188 …CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnorientedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core() 195 const CeedInt block_size = 64; in CeedElemRestrictionApply_Hip_Core() local [all …]
|
| H A D | ceed-hip-ref-qfunction.c | 35 const int block_size = ceed_Hip->opt_block_size; in CeedQFunctionApply_Hip() local 51 …CeedCallBackend(CeedRunKernel_Hip(ceed, data->QFunction, CeedDivUpInt(Q, block_size), block_size, … in CeedQFunctionApply_Hip()
|
| H A D | ceed-hip-ref-basis.c | 52 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); in CeedBasisApplyCore_Hip() local 54 CeedCallBackend(CeedRunKernel_Hip(ceed, data->Interp, num_elem, block_size, interp_args)); in CeedBasisApplyCore_Hip() 58 const CeedInt block_size = max_block_size; in CeedBasisApplyCore_Hip() local 60 CeedCallBackend(CeedRunKernel_Hip(ceed, data->Grad, num_elem, block_size, grad_args)); in CeedBasisApplyCore_Hip() 209 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); in CeedBasisApplyAtPointsCore_Hip() local 211 …ip(ceed, is_transpose ? data->InterpTransposeAtPoints : data->InterpAtPoints, num_elem, block_size, in CeedBasisApplyAtPointsCore_Hip() 216 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); in CeedBasisApplyAtPointsCore_Hip() local 218 …is_transpose ? data->GradTransposeAtPoints : data->GradAtPoints, num_elem, block_size, grad_args)); in CeedBasisApplyAtPointsCore_Hip()
|
| /libCEED/backends/hip-shared/ |
| H A D | ceed-hip-shared-basis.c | 119 CeedInt block_size = data->block_sizes[0]; in CeedBasisApplyTensorCore_Hip_shared() local 141 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared() 152 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared() 166 CeedInt block_size = data->block_sizes[1]; in CeedBasisApplyTensorCore_Hip_shared() local 193 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared() 204 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared() 218 CeedInt block_size = data->block_sizes[2]; in CeedBasisApplyTensorCore_Hip_shared() local 225 const CeedInt opt_elems = block_size / Q_1d; in CeedBasisApplyTensorCore_Hip_shared() 231 const CeedInt opt_elems = block_size / (Q_1d * Q_1d); in CeedBasisApplyTensorCore_Hip_shared() 237 const CeedInt opt_elems = block_size / (Q_1d * Q_1d); in CeedBasisApplyTensorCore_Hip_shared() [all …]
|
| /libCEED/backends/hip/ |
| H A D | ceed-hip-compile.h | 20 CEED_INTERN int CeedRunKernel_Hip(Ceed ceed, hipFunction_t kernel, int grid_size, int block_size, v…
|
| H A D | ceed-hip-compile.cpp | 212 int CeedRunKernel_Hip(Ceed ceed, hipFunction_t kernel, const int grid_size, const int block_size, v… in CeedRunKernel_Hip() argument 213 …CeedCallHip(ceed, hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size, 1, 1, 0, NULL, args, … in CeedRunKernel_Hip()
|
| /libCEED/examples/python/ |
| H A D | tutorial-2-elemrestriction.ipynb | 237 "block_size = 5\n", 248 "r = ceed.BlockedElemRestriction(num_elem, 2, block_size, 1, 1, num_elem+1, indices,\n", 251 "y = ceed.Vector(2*block_size*2)\n", 307 "block_size = 5\n", 318 "r = ceed.BlockedElemRestriction(num_elem, 2, block_size, 1, 1, num_elem+1, indices,\n", 321 "y = ceed.Vector(block_size*2)\n",
|
| /libCEED/backends/cuda/ |
| H A D | ceed-cuda-compile.h | 20 CEED_INTERN int CeedRunKernel_Cuda(Ceed ceed, CUfunction kernel, int grid_size, int block_size, voi…
|
| /libCEED/examples/fluids/src/ |
| H A D | mat-ceed.c | 380 PetscInt block_size, num_blocks, max_vblock_size = PETSC_INT_MAX; in MatCreateCeed() local 384 PetscCall(MatGetBlockSize(temp_mat, &block_size)); in MatCreateCeed() 395 if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size)); in MatCreateCeed() 423 if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE; in MatCreateCeed() 439 if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE; in MatCreateCeed() 544 PetscInt block_size; in MatCeedCopy() local 546 PetscCall(MatGetBlockSize(mat_ceed, &block_size)); in MatCeedCopy() 547 if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size)); in MatCeedCopy()
|
| /libCEED/backends/sycl-ref/ |
| H A D | ceed-sycl-ref.hpp | 98 …CeedInt num_eval_mode_in, num_eval_mode_out, num_qpts, num_nodes, block_size, num_comp; // Ke… member
|
| /libCEED/include/ceed/ |
| H A D | ceed.h | 274 …nCreateBlocked(Ceed ceed, CeedInt num_elem, CeedInt elem_size, CeedInt block_size, CeedInt num_com… 277 …lockedOriented(Ceed ceed, CeedInt num_elem, CeedInt elem_size, CeedInt block_size, CeedInt num_com… 280 …edCurlOriented(Ceed ceed, CeedInt num_elem, CeedInt elem_size, CeedInt block_size, CeedInt num_com… 283 …BlockedStrided(Ceed ceed, CeedInt num_elem, CeedInt elem_size, CeedInt block_size, CeedInt num_com… 308 CEED_EXTERN int CeedElemRestrictionGetBlockSize(CeedElemRestriction rstr, CeedInt *block_size);
|
| /libCEED/backends/cuda-shared/ |
| H A D | ceed-cuda-shared-basis.c | 154 CeedInt block_size = 32; in CeedBasisApplyTensorCore_Cuda_shared() local 160 const CeedInt elems_per_block = block_size / Q_1d; in CeedBasisApplyTensorCore_Cuda_shared() 165 const CeedInt opt_elems = block_size / (Q_1d * Q_1d); in CeedBasisApplyTensorCore_Cuda_shared() 171 const CeedInt opt_elems = block_size / (Q_1d * Q_1d); in CeedBasisApplyTensorCore_Cuda_shared()
|