xref: /libCEED/backends/sycl-ref/kernels/sycl-ref-vector.cpp (revision d4cc18453651bd0f94c1a2e078b2646a92dafdcc)
1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other
249ed4312SSebastian Grimberg // CEED contributors. All Rights Reserved. See the top-level LICENSE and NOTICE
349ed4312SSebastian Grimberg // files for details.
449ed4312SSebastian Grimberg //
549ed4312SSebastian Grimberg // SPDX-License-Identifier: BSD-2-Clause
649ed4312SSebastian Grimberg //
749ed4312SSebastian Grimberg // This file is part of CEED:  http://github.com/ceed
849ed4312SSebastian Grimberg 
949ed4312SSebastian Grimberg #include <ceed/ceed.h>
1049ed4312SSebastian Grimberg #include <sycl/sycl.hpp>
1149ed4312SSebastian Grimberg 
1249ed4312SSebastian Grimberg //------------------------------------------------------------------------------
1349ed4312SSebastian Grimberg // Kernel for set value on device
1449ed4312SSebastian Grimberg //------------------------------------------------------------------------------
setValueK(CeedScalar * __restrict__ vec,CeedInt size,CeedScalar val)1549ed4312SSebastian Grimberg __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedInt size, CeedScalar val) {
16dd64fc84SJeremy L Thompson   int index = threadIdx.x + blockDim.x * blockIdx.x;
17dd64fc84SJeremy L Thompson 
18dd64fc84SJeremy L Thompson   if (index >= size) return;
19dd64fc84SJeremy L Thompson   vec[index] = val;
2049ed4312SSebastian Grimberg }
2149ed4312SSebastian Grimberg 
2249ed4312SSebastian Grimberg //------------------------------------------------------------------------------
2349ed4312SSebastian Grimberg // Set value on device memory
2449ed4312SSebastian Grimberg //------------------------------------------------------------------------------
CeedDeviceSetValue_Sycl(CeedScalar * d_array,CeedInt length,CeedScalar val)2549ed4312SSebastian Grimberg extern "C" int CeedDeviceSetValue_Sycl(CeedScalar *d_array, CeedInt length, CeedScalar val) {
26dd64fc84SJeremy L Thompson   const int block_size = 512;
27dd64fc84SJeremy L Thompson   const int vec_size   = length;
28dd64fc84SJeremy L Thompson   int       grid_size  = vec_size / block_size;
2949ed4312SSebastian Grimberg 
30dd64fc84SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
31dd64fc84SJeremy L Thompson   setValueK<<<grid_size, block_size>>>(d_array, length, val);
3249ed4312SSebastian Grimberg   return 0;
3349ed4312SSebastian Grimberg }
3449ed4312SSebastian Grimberg 
3549ed4312SSebastian Grimberg //------------------------------------------------------------------------------
3649ed4312SSebastian Grimberg // Kernel for taking reciprocal
3749ed4312SSebastian Grimberg //------------------------------------------------------------------------------
rcpValueK(CeedScalar * __restrict__ vec,CeedInt size)3849ed4312SSebastian Grimberg __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedInt size) {
39dd64fc84SJeremy L Thompson   int index = threadIdx.x + blockDim.x * blockIdx.x;
40dd64fc84SJeremy L Thompson 
41dd64fc84SJeremy L Thompson   if (index >= size) return;
42dd64fc84SJeremy L Thompson   if (fabs(vec[index]) > 1E-16) vec[index] = 1. / vec[index];
4349ed4312SSebastian Grimberg }
4449ed4312SSebastian Grimberg 
4549ed4312SSebastian Grimberg //------------------------------------------------------------------------------
4649ed4312SSebastian Grimberg // Take vector reciprocal in device memory
4749ed4312SSebastian Grimberg //------------------------------------------------------------------------------
CeedDeviceReciprocal_Sycl(CeedScalar * d_array,CeedInt length)4849ed4312SSebastian Grimberg extern "C" int CeedDeviceReciprocal_Sycl(CeedScalar *d_array, CeedInt length) {
49dd64fc84SJeremy L Thompson   const int block_size = 512;
50dd64fc84SJeremy L Thompson   const int vec_size   = length;
51dd64fc84SJeremy L Thompson   int       grid_size  = vec_size / block_size;
5249ed4312SSebastian Grimberg 
53dd64fc84SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
54dd64fc84SJeremy L Thompson   rcpValueK<<<grid_size, block_size>>>(d_array, length);
5549ed4312SSebastian Grimberg   return 0;
5649ed4312SSebastian Grimberg }
5749ed4312SSebastian Grimberg 
5849ed4312SSebastian Grimberg //------------------------------------------------------------------------------
5949ed4312SSebastian Grimberg // Kernel for scale
6049ed4312SSebastian Grimberg //------------------------------------------------------------------------------
scaleValueK(CeedScalar * __restrict__ x,CeedScalar alpha,CeedInt size)6149ed4312SSebastian Grimberg __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedInt size) {
62dd64fc84SJeremy L Thompson   int index = threadIdx.x + blockDim.x * blockIdx.x;
63dd64fc84SJeremy L Thompson 
64dd64fc84SJeremy L Thompson   if (index >= size) return;
65dd64fc84SJeremy L Thompson   x[index] *= alpha;
6649ed4312SSebastian Grimberg }
6749ed4312SSebastian Grimberg 
6849ed4312SSebastian Grimberg //------------------------------------------------------------------------------
6949ed4312SSebastian Grimberg // Compute x = alpha x on device
7049ed4312SSebastian Grimberg //------------------------------------------------------------------------------
CeedDeviceScale_Sycl(CeedScalar * x_array,CeedScalar alpha,CeedInt length)7149ed4312SSebastian Grimberg extern "C" int CeedDeviceScale_Sycl(CeedScalar *x_array, CeedScalar alpha, CeedInt length) {
72dd64fc84SJeremy L Thompson   const int block_size = 512;
73dd64fc84SJeremy L Thompson   const int vec_size   = length;
74dd64fc84SJeremy L Thompson   int       grid_size  = vec_size / block_size;
7549ed4312SSebastian Grimberg 
76dd64fc84SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
77dd64fc84SJeremy L Thompson   scaleValueK<<<grid_size, block_size>>>(x_array, alpha, length);
7849ed4312SSebastian Grimberg   return 0;
7949ed4312SSebastian Grimberg }
8049ed4312SSebastian Grimberg 
8149ed4312SSebastian Grimberg //------------------------------------------------------------------------------
8249ed4312SSebastian Grimberg // Kernel for axpy
8349ed4312SSebastian Grimberg //------------------------------------------------------------------------------
axpyValueK(CeedScalar * __restrict__ y,CeedScalar alpha,CeedScalar * __restrict__ x,CeedInt size)8449ed4312SSebastian Grimberg __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedInt size) {
85dd64fc84SJeremy L Thompson   int index = threadIdx.x + blockDim.x * blockIdx.x;
86dd64fc84SJeremy L Thompson   if (index >= size) return;
87dd64fc84SJeremy L Thompson   y[index] += alpha * x[index];
8849ed4312SSebastian Grimberg }
8949ed4312SSebastian Grimberg 
9049ed4312SSebastian Grimberg //------------------------------------------------------------------------------
9149ed4312SSebastian Grimberg // Compute y = alpha x + y on device
9249ed4312SSebastian Grimberg //------------------------------------------------------------------------------
CeedDeviceAXPY_Sycl(CeedScalar * y_array,CeedScalar alpha,CeedScalar * x_array,CeedInt length)9349ed4312SSebastian Grimberg extern "C" int CeedDeviceAXPY_Sycl(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedInt length) {
94dd64fc84SJeremy L Thompson   const int block_size = 512;
95dd64fc84SJeremy L Thompson   const int vec_size   = length;
96dd64fc84SJeremy L Thompson   int       grid_size  = vec_size / block_size;
9749ed4312SSebastian Grimberg 
98dd64fc84SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
99dd64fc84SJeremy L Thompson   axpyValueK<<<grid_size, block_size>>>(y_array, alpha, x_array, length);
10049ed4312SSebastian Grimberg   return 0;
10149ed4312SSebastian Grimberg }
10249ed4312SSebastian Grimberg 
10349ed4312SSebastian Grimberg //------------------------------------------------------------------------------
10449ed4312SSebastian Grimberg // Kernel for pointwise mult
10549ed4312SSebastian Grimberg //------------------------------------------------------------------------------
pointwiseMultValueK(CeedScalar * __restrict__ w,CeedScalar * x,CeedScalar * __restrict__ y,CeedInt size)10649ed4312SSebastian Grimberg __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedInt size) {
107dd64fc84SJeremy L Thompson   int index = threadIdx.x + blockDim.x * blockIdx.x;
108dd64fc84SJeremy L Thompson 
109dd64fc84SJeremy L Thompson   if (index >= size) return;
110dd64fc84SJeremy L Thompson   w[index] = x[index] * y[index];
11149ed4312SSebastian Grimberg }
11249ed4312SSebastian Grimberg 
11349ed4312SSebastian Grimberg //------------------------------------------------------------------------------
11449ed4312SSebastian Grimberg // Compute the pointwise multiplication w = x .* y on device
11549ed4312SSebastian Grimberg //------------------------------------------------------------------------------
CeedDevicePointwiseMult_Sycl(CeedScalar * w_array,CeedScalar * x_array,CeedScalar * y_array,CeedInt length)11649ed4312SSebastian Grimberg extern "C" int CeedDevicePointwiseMult_Sycl(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedInt length) {
117dd64fc84SJeremy L Thompson   const int block_size = 512;
118dd64fc84SJeremy L Thompson   const int vec_size   = length;
119dd64fc84SJeremy L Thompson   int       grid_size  = vec_size / block_size;
12049ed4312SSebastian Grimberg 
121dd64fc84SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
122dd64fc84SJeremy L Thompson   pointwiseMultValueK<<<grid_size, block_size>>>(w_array, x_array, y_array, length);
12349ed4312SSebastian Grimberg   return 0;
12449ed4312SSebastian Grimberg }
12549ed4312SSebastian Grimberg 
12649ed4312SSebastian Grimberg //------------------------------------------------------------------------------
127