Home
last modified time | relevance | path

Searched refs:block_size (Results 1 – 25 of 30) sorted by relevance

12

/libCEED/backends/memcheck/
H A Dceed-memcheck-restriction.c46 …Transpose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, in CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core() argument
59 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core()
62 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { in CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core()
63 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = in CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core()
72 …Transpose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, in CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core() argument
80 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core()
82 CeedPragmaSIMD for (CeedSize i = 0; i < elem_size * block_size; i++) { in CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core()
83 …vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_siz… in CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core()
91 … const CeedInt block_size, const CeedInt comp_stride, CeedInt start, in CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core() argument
98 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core()
[all …]
/libCEED/backends/ref/
H A Dceed-ref-restriction.c19 …dedNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core() argument
30 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core()
33 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core()
34 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core()
45 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core()
48 CeedPragmaSIMD for (CeedSize j = 0; j < block_size; j++) { in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core()
49 vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = in CeedElemRestrictionApplyStridedNoTranspose_Ref_Core()
59 …setNoTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, in CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core() argument
67 for (CeedSize e = start * block_size; e < stop * block_size; e += block_size) { in CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core()
69 CeedPragmaSIMD for (CeedSize i = 0; i < elem_size * block_size; i++) { in CeedElemRestrictionApplyOffsetNoTranspose_Ref_Core()
[all …]
/libCEED/backends/cuda-ref/kernels/
H A Dcuda-ref-vector.cu26 const int block_size = 512; in CeedDeviceCopyStrided_Cuda() local
28 int grid_size = copy_size / block_size; in CeedDeviceCopyStrided_Cuda()
30 if (block_size * grid_size < copy_size) grid_size += 1; in CeedDeviceCopyStrided_Cuda()
31 copyStridedK<<<grid_size, block_size>>>(d_array, start, stop, step, d_copy_array); in CeedDeviceCopyStrided_Cuda()
48 const int block_size = 512; in CeedDeviceSetValue_Cuda() local
50 int grid_size = vec_size / block_size; in CeedDeviceSetValue_Cuda()
52 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceSetValue_Cuda()
53 setValueK<<<grid_size, block_size>>>(d_array, length, val); in CeedDeviceSetValue_Cuda()
72 const int block_size = 512; in CeedDeviceSetValueStrided_Cuda() local
74 int grid_size = set_size / block_size; in CeedDeviceSetValueStrided_Cuda()
[all …]
/libCEED/backends/hip-ref/kernels/
H A Dhip-ref-vector.hip.cpp26 const int block_size = 512; in CeedDeviceCopyStrided_Hip() local
28 int grid_size = vec_size / block_size; in CeedDeviceCopyStrided_Hip()
30 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceCopyStrided_Hip()
31 …hipLaunchKernelGGL(copyStridedK, dim3(grid_size), dim3(block_size), 0, 0, d_array, start, step, le… in CeedDeviceCopyStrided_Hip()
48 const int block_size = 512; in CeedDeviceSetValue_Hip() local
50 int grid_size = vec_size / block_size; in CeedDeviceSetValue_Hip()
52 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceSetValue_Hip()
53 hipLaunchKernelGGL(setValueK, dim3(grid_size), dim3(block_size), 0, 0, d_array, length, val); in CeedDeviceSetValue_Hip()
72 const int block_size = 512; in CeedDeviceSetValueStrided_Hip() local
74 int grid_size = set_size / block_size; in CeedDeviceSetValueStrided_Hip()
[all …]
/libCEED/backends/sycl-ref/kernels/
H A Dsycl-ref-vector.cpp26 const int block_size = 512; in CeedDeviceSetValue_Sycl() local
28 int grid_size = vec_size / block_size; in CeedDeviceSetValue_Sycl()
30 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceSetValue_Sycl()
31 setValueK<<<grid_size, block_size>>>(d_array, length, val); in CeedDeviceSetValue_Sycl()
49 const int block_size = 512; in CeedDeviceReciprocal_Sycl() local
51 int grid_size = vec_size / block_size; in CeedDeviceReciprocal_Sycl()
53 if (block_size * grid_size < vec_size) grid_size += 1; in CeedDeviceReciprocal_Sycl()
54 rcpValueK<<<grid_size, block_size>>>(d_array, length); in CeedDeviceReciprocal_Sycl()
72 const int block_size = 512; in CeedDeviceScale_Sycl() local
74 int grid_size = vec_size / block_size; in CeedDeviceScale_Sycl()
[all …]
/libCEED/backends/opt/
H A Dceed-opt-operator.c20 …const CeedInt block_size, CeedElemRestriction *block_rstr, CeedVector *e_vecs_full, CeedVector *e_… in CeedOperatorSetupFields_Opt() argument
71 …ckend(CeedElemRestrictionCreateBlocked(ceed_rstr, num_elem, elem_size, block_size, num_comp, comp_… in CeedOperatorSetupFields_Opt()
81 …edElemRestrictionCreateBlockedOriented(ceed_rstr, num_elem, elem_size, block_size, num_comp, comp_… in CeedOperatorSetupFields_Opt()
92 …emRestrictionCreateBlockedCurlOriented(ceed_rstr, num_elem, elem_size, block_size, num_comp, comp_… in CeedOperatorSetupFields_Opt()
102 …eedElemRestrictionCreateBlockedStrided(ceed_rstr, num_elem, elem_size, block_size, num_comp, l_siz… in CeedOperatorSetupFields_Opt()
117 e_size = (CeedSize)Q * size * block_size; in CeedOperatorSetupFields_Opt()
119 q_size = (CeedSize)Q * size * block_size; in CeedOperatorSetupFields_Opt()
131 e_size = (CeedSize)P * num_comp * block_size; in CeedOperatorSetupFields_Opt()
133 q_size = (CeedSize)Q * size * block_size; in CeedOperatorSetupFields_Opt()
138 q_size = (CeedSize)Q * block_size; in CeedOperatorSetupFields_Opt()
[all …]
H A Dceed-opt.h15 CeedInt block_size; member
H A Dceed-opt-serial.c48 data->block_size = 1; in CeedInit_Opt_Serial()
H A Dceed-opt-blocked.c48 data->block_size = 8; in CeedInit_Opt_Blocked()
/libCEED/interface/
H A Dceed-elemrestriction.c38 … CeedInt *offsets, CeedInt *block_offsets, CeedInt num_block, CeedInt num_elem, CeedInt block_size, in CeedPermutePadOffsets() argument
40 for (CeedInt e = 0; e < num_block * block_size; e += block_size) { in CeedPermutePadOffsets()
41 for (CeedInt j = 0; j < block_size; j++) { in CeedPermutePadOffsets()
43 …block_offsets[e * elem_size + k * block_size + j] = offsets[CeedIntMin(e + j, num_elem - 1) * elem… in CeedPermutePadOffsets()
64 …nts, bool *block_orients, CeedInt num_block, CeedInt num_elem, CeedInt block_size, CeedInt elem_si… in CeedPermutePadOrients() argument
65 for (CeedInt e = 0; e < num_block * block_size; e += block_size) { in CeedPermutePadOrients()
66 for (CeedInt j = 0; j < block_size; j++) { in CeedPermutePadOrients()
68 …block_orients[e * elem_size + k * block_size + j] = orients[CeedIntMin(e + j, num_elem - 1) * elem… in CeedPermutePadOrients()
89 …url_orients, CeedInt8 *block_curl_orients, CeedInt num_block, CeedInt num_elem, CeedInt block_size, in CeedPermutePadCurlOrients() argument
91 for (CeedInt e = 0; e < num_block * block_size; e += block_size) { in CeedPermutePadCurlOrients()
[all …]
/libCEED/backends/blocked/
H A Dceed-blocked-operator.c20 …bool *apply_add_basis, const CeedInt block_size, CeedElemRestriction *block_rstr, CeedVector *e_ve… in CeedOperatorSetupFields_Blocked() argument
71 …ckend(CeedElemRestrictionCreateBlocked(ceed_rstr, num_elem, elem_size, block_size, num_comp, comp_… in CeedOperatorSetupFields_Blocked()
81 …edElemRestrictionCreateBlockedOriented(ceed_rstr, num_elem, elem_size, block_size, num_comp, comp_… in CeedOperatorSetupFields_Blocked()
92 …emRestrictionCreateBlockedCurlOriented(ceed_rstr, num_elem, elem_size, block_size, num_comp, comp_… in CeedOperatorSetupFields_Blocked()
102 …eedElemRestrictionCreateBlockedStrided(ceed_rstr, num_elem, elem_size, block_size, num_comp, l_siz… in CeedOperatorSetupFields_Blocked()
117 q_size = (CeedSize)Q * size * block_size; in CeedOperatorSetupFields_Blocked()
129 e_size = (CeedSize)P * num_comp * block_size; in CeedOperatorSetupFields_Blocked()
131 q_size = (CeedSize)Q * size * block_size; in CeedOperatorSetupFields_Blocked()
136 q_size = (CeedSize)Q * block_size; in CeedOperatorSetupFields_Blocked()
138 …CeedCallBackend(CeedBasisApply(basis, block_size, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_… in CeedOperatorSetupFields_Blocked()
[all …]
/libCEED/backends/cuda-ref/
H A Dceed-cuda-ref-restriction.c149 const CeedInt block_size = elem_size < 1024 ? (elem_size > 32 ? elem_size : 32) : 1024; in CeedElemRestrictionApply_Cuda_Core() local
150 const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); in CeedElemRestrictionApply_Cuda_Core()
156 CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core()
162 CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core()
168 CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core()
172 … CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core()
179 CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core()
183 … CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core()
187 …eedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnorientedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Cuda_Core()
194 const CeedInt block_size = 32; in CeedElemRestrictionApply_Cuda_Core() local
[all …]
H A Dceed-cuda-ref-basis.c52 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); in CeedBasisApplyCore_Cuda() local
54 CeedCallBackend(CeedRunKernel_Cuda(ceed, data->Interp, num_elem, block_size, interp_args)); in CeedBasisApplyCore_Cuda()
58 const CeedInt block_size = max_block_size; in CeedBasisApplyCore_Cuda() local
60 CeedCallBackend(CeedRunKernel_Cuda(ceed, data->Grad, num_elem, block_size, grad_args)); in CeedBasisApplyCore_Cuda()
210 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); in CeedBasisApplyAtPointsCore_Cuda() local
212 …da(ceed, is_transpose ? data->InterpTransposeAtPoints : data->InterpAtPoints, num_elem, block_size, in CeedBasisApplyAtPointsCore_Cuda()
217 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); in CeedBasisApplyAtPointsCore_Cuda() local
219 …is_transpose ? data->GradTransposeAtPoints : data->GradAtPoints, num_elem, block_size, grad_args)); in CeedBasisApplyAtPointsCore_Cuda()
/libCEED/backends/hip-ref/
H A Dceed-hip-ref-restriction.c150 const CeedInt block_size = elem_size < 256 ? (elem_size > 64 ? elem_size : 64) : 256; in CeedElemRestrictionApply_Hip_Core() local
151 const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); in CeedElemRestrictionApply_Hip_Core()
157 CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core()
163 CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core()
169 CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core()
173 … CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core()
180 CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core()
184 … CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core()
188 …CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnorientedNoTranspose, grid, block_size, args)); in CeedElemRestrictionApply_Hip_Core()
195 const CeedInt block_size = 64; in CeedElemRestrictionApply_Hip_Core() local
[all …]
H A Dceed-hip-ref-qfunction.c35 const int block_size = ceed_Hip->opt_block_size; in CeedQFunctionApply_Hip() local
51 …CeedCallBackend(CeedRunKernel_Hip(ceed, data->QFunction, CeedDivUpInt(Q, block_size), block_size, … in CeedQFunctionApply_Hip()
H A Dceed-hip-ref-basis.c52 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); in CeedBasisApplyCore_Hip() local
54 CeedCallBackend(CeedRunKernel_Hip(ceed, data->Interp, num_elem, block_size, interp_args)); in CeedBasisApplyCore_Hip()
58 const CeedInt block_size = max_block_size; in CeedBasisApplyCore_Hip() local
60 CeedCallBackend(CeedRunKernel_Hip(ceed, data->Grad, num_elem, block_size, grad_args)); in CeedBasisApplyCore_Hip()
209 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); in CeedBasisApplyAtPointsCore_Hip() local
211 …ip(ceed, is_transpose ? data->InterpTransposeAtPoints : data->InterpAtPoints, num_elem, block_size, in CeedBasisApplyAtPointsCore_Hip()
216 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); in CeedBasisApplyAtPointsCore_Hip() local
218 …is_transpose ? data->GradTransposeAtPoints : data->GradAtPoints, num_elem, block_size, grad_args)); in CeedBasisApplyAtPointsCore_Hip()
/libCEED/backends/hip-shared/
H A Dceed-hip-shared-basis.c119 CeedInt block_size = data->block_sizes[0]; in CeedBasisApplyTensorCore_Hip_shared() local
141 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared()
152 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared()
166 CeedInt block_size = data->block_sizes[1]; in CeedBasisApplyTensorCore_Hip_shared() local
193 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared()
204 const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); in CeedBasisApplyTensorCore_Hip_shared()
218 CeedInt block_size = data->block_sizes[2]; in CeedBasisApplyTensorCore_Hip_shared() local
225 const CeedInt opt_elems = block_size / Q_1d; in CeedBasisApplyTensorCore_Hip_shared()
231 const CeedInt opt_elems = block_size / (Q_1d * Q_1d); in CeedBasisApplyTensorCore_Hip_shared()
237 const CeedInt opt_elems = block_size / (Q_1d * Q_1d); in CeedBasisApplyTensorCore_Hip_shared()
[all …]
/libCEED/backends/hip/
H A Dceed-hip-compile.h20 CEED_INTERN int CeedRunKernel_Hip(Ceed ceed, hipFunction_t kernel, int grid_size, int block_size, v…
H A Dceed-hip-compile.cpp212 int CeedRunKernel_Hip(Ceed ceed, hipFunction_t kernel, const int grid_size, const int block_size, v… in CeedRunKernel_Hip() argument
213 …CeedCallHip(ceed, hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size, 1, 1, 0, NULL, args, … in CeedRunKernel_Hip()
/libCEED/examples/python/
H A Dtutorial-2-elemrestriction.ipynb237 "block_size = 5\n",
248 "r = ceed.BlockedElemRestriction(num_elem, 2, block_size, 1, 1, num_elem+1, indices,\n",
251 "y = ceed.Vector(2*block_size*2)\n",
307 "block_size = 5\n",
318 "r = ceed.BlockedElemRestriction(num_elem, 2, block_size, 1, 1, num_elem+1, indices,\n",
321 "y = ceed.Vector(block_size*2)\n",
/libCEED/backends/cuda/
H A Dceed-cuda-compile.h20 CEED_INTERN int CeedRunKernel_Cuda(Ceed ceed, CUfunction kernel, int grid_size, int block_size, voi…
/libCEED/examples/fluids/src/
H A Dmat-ceed.c380 PetscInt block_size, num_blocks, max_vblock_size = PETSC_INT_MAX; in MatCreateCeed() local
384 PetscCall(MatGetBlockSize(temp_mat, &block_size)); in MatCreateCeed()
395 if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size)); in MatCreateCeed()
423 if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE; in MatCreateCeed()
439 if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE; in MatCreateCeed()
544 PetscInt block_size; in MatCeedCopy() local
546 PetscCall(MatGetBlockSize(mat_ceed, &block_size)); in MatCeedCopy()
547 if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size)); in MatCeedCopy()
/libCEED/backends/sycl-ref/
H A Dceed-sycl-ref.hpp98 …CeedInt num_eval_mode_in, num_eval_mode_out, num_qpts, num_nodes, block_size, num_comp; // Ke… member
/libCEED/include/ceed/
H A Dceed.h274 …nCreateBlocked(Ceed ceed, CeedInt num_elem, CeedInt elem_size, CeedInt block_size, CeedInt num_com…
277 …lockedOriented(Ceed ceed, CeedInt num_elem, CeedInt elem_size, CeedInt block_size, CeedInt num_com…
280 …edCurlOriented(Ceed ceed, CeedInt num_elem, CeedInt elem_size, CeedInt block_size, CeedInt num_com…
283 …BlockedStrided(Ceed ceed, CeedInt num_elem, CeedInt elem_size, CeedInt block_size, CeedInt num_com…
308 CEED_EXTERN int CeedElemRestrictionGetBlockSize(CeedElemRestriction rstr, CeedInt *block_size);
/libCEED/backends/cuda-shared/
H A Dceed-cuda-shared-basis.c154 CeedInt block_size = 32; in CeedBasisApplyTensorCore_Cuda_shared() local
160 const CeedInt elems_per_block = block_size / Q_1d; in CeedBasisApplyTensorCore_Cuda_shared()
165 const CeedInt opt_elems = block_size / (Q_1d * Q_1d); in CeedBasisApplyTensorCore_Cuda_shared()
171 const CeedInt opt_elems = block_size / (Q_1d * Q_1d); in CeedBasisApplyTensorCore_Cuda_shared()

12