| /libCEED/backends/cuda-ref/ |
| H A D | ceed-cuda-ref-restriction.c | 120 const CeedScalar *d_u; in CeedElemRestrictionApply_Cuda_Core() local 134 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); in CeedElemRestrictionApply_Cuda_Core() 154 void *args[] = {&d_u, &d_v}; in CeedElemRestrictionApply_Cuda_Core() 160 void *args[] = {&impl->d_offsets, &d_u, &d_v}; in CeedElemRestrictionApply_Cuda_Core() 166 void *args[] = {&impl->d_offsets, &impl->d_orients, &d_u, &d_v}; in CeedElemRestrictionApply_Cuda_Core() 170 void *args[] = {&impl->d_offsets, &d_u, &d_v}; in CeedElemRestrictionApply_Cuda_Core() 177 void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; in CeedElemRestrictionApply_Cuda_Core() 181 void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; in CeedElemRestrictionApply_Cuda_Core() 185 void *args[] = {&impl->d_offsets, &d_u, &d_v}; in CeedElemRestrictionApply_Cuda_Core() 199 void *args[] = {&d_u, &d_v}; in CeedElemRestrictionApply_Cuda_Core() [all …]
|
| H A D | ceed-cuda-ref-basis.c | 28 const CeedScalar *d_u; in CeedBasisApplyCore_Cuda() local 36 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); in CeedBasisApplyCore_Cuda() 51 … *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &d_u, &d_v}; in CeedBasisApplyCore_Cuda() 57 …s[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &data->d_grad_1d, &d_u, &d_v}; in CeedBasisApplyCore_Cuda() 81 …NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); in CeedBasisApplyCore_Cuda() 82 if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); in CeedBasisApplyCore_Cuda() 108 const CeedScalar *d_x, *d_u; in CeedBasisApplyAtPointsCore_Cuda() local 196 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); in CeedBasisApplyAtPointsCore_Cuda() 209 …[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v}; in CeedBasisApplyAtPointsCore_Cuda() 216 …[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v}; in CeedBasisApplyAtPointsCore_Cuda() [all …]
|
| /libCEED/backends/hip-ref/ |
| H A D | ceed-hip-ref-restriction.c | 121 const CeedScalar *d_u; in CeedElemRestrictionApply_Hip_Core() local 135 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); in CeedElemRestrictionApply_Hip_Core() 155 void *args[] = {&d_u, &d_v}; in CeedElemRestrictionApply_Hip_Core() 161 void *args[] = {&impl->d_offsets, &d_u, &d_v}; in CeedElemRestrictionApply_Hip_Core() 167 void *args[] = {&impl->d_offsets, &impl->d_orients, &d_u, &d_v}; in CeedElemRestrictionApply_Hip_Core() 171 void *args[] = {&impl->d_offsets, &d_u, &d_v}; in CeedElemRestrictionApply_Hip_Core() 178 void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; in CeedElemRestrictionApply_Hip_Core() 182 void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; in CeedElemRestrictionApply_Hip_Core() 186 void *args[] = {&impl->d_offsets, &d_u, &d_v}; in CeedElemRestrictionApply_Hip_Core() 200 void *args[] = {&d_u, &d_v}; in CeedElemRestrictionApply_Hip_Core() [all …]
|
| H A D | ceed-hip-ref-basis.c | 27 const CeedScalar *d_u; in CeedBasisApplyCore_Hip() local 35 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); in CeedBasisApplyCore_Hip() 51 … *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &d_u, &d_v}; in CeedBasisApplyCore_Hip() 57 …s[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &data->d_grad_1d, &d_u, &d_v}; in CeedBasisApplyCore_Hip() 81 …NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); in CeedBasisApplyCore_Hip() 82 if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); in CeedBasisApplyCore_Hip() 107 const CeedScalar *d_x, *d_u; in CeedBasisApplyAtPointsCore_Hip() local 195 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); in CeedBasisApplyAtPointsCore_Hip() 208 …[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v}; in CeedBasisApplyAtPointsCore_Hip() 215 …[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v}; in CeedBasisApplyAtPointsCore_Hip() [all …]
|
| /libCEED/include/ceed/jit-source/sycl/ |
| H A D | sycl-gen-templates.h | 34 …const global CeedInt *restrict indices, const global CeedScalar *restrict d_u, private CeedScalar … in readDofsOffset1d() argument 42 r_u[comp] = d_u[ind + strides_comp * comp]; in readDofsOffset1d() 51 … const CeedInt strides_elem, const CeedInt num_elem, global const CeedScalar *restrict d_u, in readDofsStrided1d() argument 60 r_u[comp] = d_u[ind + comp * strides_comp]; in readDofsStrided1d() 107 …const global CeedInt *restrict indices, const global CeedScalar *restrict d_u, private CeedScalar … in readDofsOffset2d() argument 115 for (CeedInt comp = 0; comp < num_comp; ++comp) r_u[comp] = d_u[ind + strides_comp * comp]; in readDofsOffset2d() 123 … const CeedInt strides_elem, const CeedInt num_elem, const global CeedScalar *restrict d_u, in readDofsStrided2d() argument 132 for (CeedInt comp = 0; comp < num_comp; ++comp) r_u[comp] = d_u[ind + comp * strides_comp]; in readDofsStrided2d() 178 …const global CeedInt *restrict indices, const global CeedScalar *restrict d_u, private CeedScalar … in readDofsOffset3d() argument 187 …for (CeedInt comp = 0; comp < num_comp; ++comp) r_u[z + comp * P_1D] = d_u[ind + strides_comp * co… in readDofsOffset3d() [all …]
|
| H A D | sycl-shared-basis-read-write-templates.h | 29 … const CeedInt strides_comp, const CeedInt strides_elem, global const CeedScalar *restrict d_u, in ReadElementStrided1d() argument 38 r_u[comp] = d_u[ind + comp * strides_comp]; in ReadElementStrided1d() 69 … const CeedInt strides_comp, const CeedInt strides_elem, global const CeedScalar *restrict d_u, in ReadElementStrided2d() argument 79 r_u[comp] = d_u[ind + comp * strides_comp]; in ReadElementStrided2d() 111 … const CeedInt strides_comp, const CeedInt strides_elem, global const CeedScalar *restrict d_u, in ReadElementStrided3d() argument 122 r_u[z + comp * P_1D] = d_u[ind + comp * strides_comp]; in ReadElementStrided3d()
|
| /libCEED/include/ceed/jit-source/cuda/ |
| H A D | cuda-shared-basis-read-write-templates.h | 29 … const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { in ReadElementStrided1d() argument 35 r_u[comp] = d_u[ind + comp * strides_comp]; in ReadElementStrided1d() 78 … const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { in ReadElementStrided2d() argument 84 r_u[comp] = d_u[ind + comp * strides_comp]; in ReadElementStrided2d() 127 … const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { in ReadElementStrided3d() argument 134 r_u[z + comp * P_1D] = d_u[ind + comp * strides_comp]; in ReadElementStrided3d() 183 const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { in ReadPoint() argument 188 r_u[comp] = d_u[ind + comp * strides_comp]; in ReadPoint()
|
| H A D | cuda-gen-templates.h | 29 … const CeedInt *__restrict__ indices, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { in ReadPoint() argument 33 r_u[comp] = d_u[ind + comp * COMP_STRIDE]; in ReadPoint() 42 … const CeedInt *__restrict__ indices, const CeedScalar *__restrict__ r_u, CeedScalar *d_u) { in WritePoint() argument 47 d_u[ind + comp * COMP_STRIDE] += r_u[comp]; in WritePoint() 74 … const CeedScalar *__restrict__ d_u, CeedScalar *__restrict__ r_u) { in ReadLVecStandard1d() argument 79 for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_u[comp] = d_u[ind + COMP_STRIDE * comp]; in ReadLVecStandard1d() 87 …id ReadLVecStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedScalar *__restrict__ d_u, in ReadLVecStrided1d() argument 93 for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_u[comp] = d_u[ind + comp * STRIDES_COMP]; in ReadLVecStrided1d() 196 … const CeedScalar *__restrict__ d_u, CeedScalar *__restrict__ r_u) { in ReadLVecStandard2d() argument 201 for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_u[comp] = d_u[ind + COMP_STRIDE * comp]; in ReadLVecStandard2d() [all …]
|
| H A D | cuda-ref-qfunction.h | 16 inline __device__ void readQuads(const CeedInt quad, const CeedInt num_qpts, const CeedScalar *d_u,… in readQuads() argument 18 r_u[comp] = d_u[quad + num_qpts * comp]; in readQuads()
|
| /libCEED/include/ceed/jit-source/hip/ |
| H A D | hip-shared-basis-read-write-templates.h | 29 … const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { in ReadElementStrided1d() argument 35 r_u[comp] = d_u[ind + comp * strides_comp]; in ReadElementStrided1d() 78 … const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { in ReadElementStrided2d() argument 84 r_u[comp] = d_u[ind + comp * strides_comp]; in ReadElementStrided2d() 127 … const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { in ReadElementStrided3d() argument 134 r_u[z + comp * P_1D] = d_u[ind + comp * strides_comp]; in ReadElementStrided3d() 182 …nt strides_comp, const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { in ReadPoint() argument 187 r_u[comp] = d_u[ind + comp * strides_comp]; in ReadPoint()
|
| H A D | hip-gen-templates.h | 29 … const CeedInt *__restrict__ indices, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { in ReadPoint() argument 33 r_u[comp] = d_u[ind + comp * COMP_STRIDE]; in ReadPoint() 42 … const CeedInt *__restrict__ indices, const CeedScalar *__restrict__ r_u, CeedScalar *d_u) { in WritePoint() argument 47 d_u[ind + comp * COMP_STRIDE] += r_u[comp]; in WritePoint() 74 … const CeedScalar *__restrict__ d_u, CeedScalar *__restrict__ r_u) { in ReadLVecStandard1d() argument 79 for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_u[comp] = d_u[ind + COMP_STRIDE * comp]; in ReadLVecStandard1d() 87 …d(SharedData_Hip &data, const CeedInt elem, const CeedScalar *__restrict__ d_u, CeedScalar *__rest… in ReadLVecStrided1d() argument 92 for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_u[comp] = d_u[ind + comp * STRIDES_COMP]; in ReadLVecStrided1d() 195 … const CeedScalar *__restrict__ d_u, CeedScalar *__restrict__ r_u) { in ReadLVecStandard2d() argument 200 for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_u[comp] = d_u[ind + COMP_STRIDE * comp]; in ReadLVecStandard2d() [all …]
|
| H A D | hip-ref-qfunction.h | 16 inline __device__ void readQuads(const CeedInt quad, const CeedInt num_qpts, const CeedScalar *d_u,… in readQuads() argument 18 r_u[comp] = d_u[quad + num_qpts * comp]; in readQuads()
|
| /libCEED/backends/hip-shared/ |
| H A D | ceed-hip-shared-basis.c | 96 const CeedScalar *d_u; in CeedBasisApplyTensorCore_Hip_shared() local 107 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); in CeedBasisApplyTensorCore_Hip_shared() 125 void *interp_args[] = {(void *)&num_elem, &data->d_interp_1d, &d_u, &d_v}; in CeedBasisApplyTensorCore_Hip_shared() 177 void *grad_args[] = {(void *)&num_elem, &data->d_interp_1d, &d_grad_1d, &d_u, &d_v}; in CeedBasisApplyTensorCore_Hip_shared() 255 …NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); in CeedBasisApplyTensorCore_Hip_shared() 256 if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); in CeedBasisApplyTensorCore_Hip_shared() 281 const CeedScalar *d_x, *d_u; in CeedBasisApplyAtPointsCore_Hip_shared() local 371 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); in CeedBasisApplyAtPointsCore_Hip_shared() 388 …[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v}; in CeedBasisApplyAtPointsCore_Hip_shared() 436 …[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v}; in CeedBasisApplyAtPointsCore_Hip_shared() [all …]
|
| /libCEED/backends/cuda-shared/ |
| H A D | ceed-cuda-shared-basis.c | 29 const CeedScalar *d_u; in CeedBasisApplyTensorCore_Cuda_shared() local 40 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); in CeedBasisApplyTensorCore_Cuda_shared() 58 void *interp_args[] = {(void *)&num_elem, &data->d_interp_1d, &d_u, &d_v}; in CeedBasisApplyTensorCore_Cuda_shared() 112 void *grad_args[] = {(void *)&num_elem, &data->d_interp_1d, &d_grad_1d, &d_u, &d_v}; in CeedBasisApplyTensorCore_Cuda_shared() 189 …NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); in CeedBasisApplyTensorCore_Cuda_shared() 190 if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); in CeedBasisApplyTensorCore_Cuda_shared() 216 const CeedScalar *d_x, *d_u; in CeedBasisApplyAtPointsCore_Cuda_shared() local 305 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); in CeedBasisApplyAtPointsCore_Cuda_shared() 322 …[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v}; in CeedBasisApplyAtPointsCore_Cuda_shared() 372 …[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v}; in CeedBasisApplyAtPointsCore_Cuda_shared() [all …]
|
| /libCEED/backends/sycl-shared/ |
| H A D | ceed-sycl-shared-basis.sycl.cpp | 40 const CeedScalar *d_u; in CeedBasisApplyTensor_Sycl_shared() local 49 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); in CeedBasisApplyTensor_Sycl_shared() 71 cgh.set_args(num_elem, impl->d_interp_1d, d_u, d_v); in CeedBasisApplyTensor_Sycl_shared() 94 cgh.set_args(num_elem, impl->d_interp_1d, d_grad_1d, d_u, d_v); in CeedBasisApplyTensor_Sycl_shared() 129 …NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); in CeedBasisApplyTensor_Sycl_shared() 130 if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); in CeedBasisApplyTensor_Sycl_shared()
|
| /libCEED/backends/sycl-ref/ |
| H A D | ceed-sycl-ref-basis.sycl.cpp | 274 const CeedScalar *d_u; in CeedBasisApply_Sycl() local 284 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); in CeedBasisApply_Sycl() 302 …d(CeedBasisApplyInterp_Sycl<true>(data->sycl_queue, *impl->sycl_module, num_elem, impl, d_u, d_v)); in CeedBasisApply_Sycl() 304 …(CeedBasisApplyInterp_Sycl<false>(data->sycl_queue, *impl->sycl_module, num_elem, impl, d_u, d_v)); in CeedBasisApply_Sycl() 309 …end(CeedBasisApplyGrad_Sycl<true>(data->sycl_queue, *impl->sycl_module, num_elem, impl, d_u, d_v)); in CeedBasisApply_Sycl() 311 …nd(CeedBasisApplyGrad_Sycl<false>(data->sycl_queue, *impl->sycl_module, num_elem, impl, d_u, d_v)); in CeedBasisApply_Sycl() 329 …NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); in CeedBasisApply_Sycl() 330 if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); in CeedBasisApply_Sycl() 460 const CeedScalar *d_u; in CeedBasisApplyNonTensor_Sycl() local 470 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); in CeedBasisApplyNonTensor_Sycl() [all …]
|
| H A D | ceed-sycl-restriction.sycl.cpp | 152 const CeedScalar *d_u; in CeedElemRestrictionApply_Sycl() local 161 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); in CeedElemRestrictionApply_Sycl() 175 CeedCallBackend(CeedElemRestrictionOffsetNoTranspose_Sycl(data->sycl_queue, impl, d_u, d_v)); in CeedElemRestrictionApply_Sycl() 178 CeedCallBackend(CeedElemRestrictionStridedNoTranspose_Sycl(data->sycl_queue, impl, d_u, d_v)); in CeedElemRestrictionApply_Sycl() 184 CeedCallBackend(CeedElemRestrictionOffsetTranspose_Sycl(data->sycl_queue, impl, d_u, d_v)); in CeedElemRestrictionApply_Sycl() 187 CeedCallBackend(CeedElemRestrictionStridedTranspose_Sycl(data->sycl_queue, impl, d_u, d_v)); in CeedElemRestrictionApply_Sycl() 196 CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); in CeedElemRestrictionApply_Sycl()
|
| /libCEED/backends/magma/ |
| H A D | ceed-magma-basis.c | 34 const CeedScalar *d_u; in CeedBasisApplyCore_Magma() local 54 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); in CeedBasisApplyCore_Magma() 117 …void *args[] = {&impl->d_interp_1d, &d_u, &u_elem_stride, &u_comp_stride, &d_v, &v_elem_stride, … in CeedBasisApplyCore_Magma() 194 …void *args[] = {&impl->d_interp_1d, &impl->d_grad_1d, &d_u, &u_elem_stride, &u_comp_str… in CeedBasisApplyCore_Magma() 249 CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); in CeedBasisApplyCore_Magma() 282 const CeedScalar *d_u; in CeedBasisApplyNonTensorCore_Magma() local 297 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); in CeedBasisApplyNonTensorCore_Magma() 430 void *args[] = {&N, &d_b, &d_u, &d_v}; in CeedBasisApplyNonTensorCore_Magma() 437 …magma_gemm_nontensor(MagmaNoTrans, MagmaNoTrans, P, N, Q, 1.0, d_b + d * P * Q, P, d_u + d * N * Q… in CeedBasisApplyNonTensorCore_Magma() 439 …magma_gemm_nontensor(MagmaTrans, MagmaNoTrans, Q, N, P, 1.0, d_b + d * P * Q, P, d_u, P, 0.0, d_v … in CeedBasisApplyNonTensorCore_Magma() [all …]
|