xref: /libCEED/backends/sycl-ref/kernels/sycl-ref-vector.cpp (revision 49ed4312d52444017c6cc4554c818c7a7ccf1959)
1*49ed4312SSebastian Grimberg // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other
2*49ed4312SSebastian Grimberg // CEED contributors. All Rights Reserved. See the top-level LICENSE and NOTICE
3*49ed4312SSebastian Grimberg // files for details.
4*49ed4312SSebastian Grimberg //
5*49ed4312SSebastian Grimberg // SPDX-License-Identifier: BSD-2-Clause
6*49ed4312SSebastian Grimberg //
7*49ed4312SSebastian Grimberg // This file is part of CEED:  http://github.com/ceed
8*49ed4312SSebastian Grimberg 
9*49ed4312SSebastian Grimberg #include <ceed/ceed.h>
10*49ed4312SSebastian Grimberg #include <sycl/sycl.hpp>
11*49ed4312SSebastian Grimberg 
12*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
13*49ed4312SSebastian Grimberg // Kernel for set value on device
14*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
15*49ed4312SSebastian Grimberg __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedInt size, CeedScalar val) {
16*49ed4312SSebastian Grimberg   int idx = threadIdx.x + blockDim.x * blockIdx.x;
17*49ed4312SSebastian Grimberg   if (idx >= size) return;
18*49ed4312SSebastian Grimberg   vec[idx] = val;
19*49ed4312SSebastian Grimberg }
20*49ed4312SSebastian Grimberg 
21*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
22*49ed4312SSebastian Grimberg // Set value on device memory
23*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
24*49ed4312SSebastian Grimberg extern "C" int CeedDeviceSetValue_Sycl(CeedScalar *d_array, CeedInt length, CeedScalar val) {
25*49ed4312SSebastian Grimberg   const int bsize    = 512;
26*49ed4312SSebastian Grimberg   const int vecsize  = length;
27*49ed4312SSebastian Grimberg   int       gridsize = vecsize / bsize;
28*49ed4312SSebastian Grimberg 
29*49ed4312SSebastian Grimberg   if (bsize * gridsize < vecsize) gridsize += 1;
30*49ed4312SSebastian Grimberg   setValueK<<<gridsize, bsize>>>(d_array, length, val);
31*49ed4312SSebastian Grimberg   return 0;
32*49ed4312SSebastian Grimberg }
33*49ed4312SSebastian Grimberg 
34*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
35*49ed4312SSebastian Grimberg // Kernel for taking reciprocal
36*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
37*49ed4312SSebastian Grimberg __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedInt size) {
38*49ed4312SSebastian Grimberg   int idx = threadIdx.x + blockDim.x * blockIdx.x;
39*49ed4312SSebastian Grimberg   if (idx >= size) return;
40*49ed4312SSebastian Grimberg   if (fabs(vec[idx]) > 1E-16) vec[idx] = 1. / vec[idx];
41*49ed4312SSebastian Grimberg }
42*49ed4312SSebastian Grimberg 
43*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
44*49ed4312SSebastian Grimberg // Take vector reciprocal in device memory
45*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
46*49ed4312SSebastian Grimberg extern "C" int CeedDeviceReciprocal_Sycl(CeedScalar *d_array, CeedInt length) {
47*49ed4312SSebastian Grimberg   const int bsize    = 512;
48*49ed4312SSebastian Grimberg   const int vecsize  = length;
49*49ed4312SSebastian Grimberg   int       gridsize = vecsize / bsize;
50*49ed4312SSebastian Grimberg 
51*49ed4312SSebastian Grimberg   if (bsize * gridsize < vecsize) gridsize += 1;
52*49ed4312SSebastian Grimberg   rcpValueK<<<gridsize, bsize>>>(d_array, length);
53*49ed4312SSebastian Grimberg   return 0;
54*49ed4312SSebastian Grimberg }
55*49ed4312SSebastian Grimberg 
56*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
57*49ed4312SSebastian Grimberg // Kernel for scale
58*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
59*49ed4312SSebastian Grimberg __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedInt size) {
60*49ed4312SSebastian Grimberg   int idx = threadIdx.x + blockDim.x * blockIdx.x;
61*49ed4312SSebastian Grimberg   if (idx >= size) return;
62*49ed4312SSebastian Grimberg   x[idx] *= alpha;
63*49ed4312SSebastian Grimberg }
64*49ed4312SSebastian Grimberg 
65*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
66*49ed4312SSebastian Grimberg // Compute x = alpha x on device
67*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
68*49ed4312SSebastian Grimberg extern "C" int CeedDeviceScale_Sycl(CeedScalar *x_array, CeedScalar alpha, CeedInt length) {
69*49ed4312SSebastian Grimberg   const int bsize    = 512;
70*49ed4312SSebastian Grimberg   const int vecsize  = length;
71*49ed4312SSebastian Grimberg   int       gridsize = vecsize / bsize;
72*49ed4312SSebastian Grimberg 
73*49ed4312SSebastian Grimberg   if (bsize * gridsize < vecsize) gridsize += 1;
74*49ed4312SSebastian Grimberg   scaleValueK<<<gridsize, bsize>>>(x_array, alpha, length);
75*49ed4312SSebastian Grimberg   return 0;
76*49ed4312SSebastian Grimberg }
77*49ed4312SSebastian Grimberg 
78*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
79*49ed4312SSebastian Grimberg // Kernel for axpy
80*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
81*49ed4312SSebastian Grimberg __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedInt size) {
82*49ed4312SSebastian Grimberg   int idx = threadIdx.x + blockDim.x * blockIdx.x;
83*49ed4312SSebastian Grimberg   if (idx >= size) return;
84*49ed4312SSebastian Grimberg   y[idx] += alpha * x[idx];
85*49ed4312SSebastian Grimberg }
86*49ed4312SSebastian Grimberg 
87*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
88*49ed4312SSebastian Grimberg // Compute y = alpha x + y on device
89*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
90*49ed4312SSebastian Grimberg extern "C" int CeedDeviceAXPY_Sycl(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedInt length) {
91*49ed4312SSebastian Grimberg   const int bsize    = 512;
92*49ed4312SSebastian Grimberg   const int vecsize  = length;
93*49ed4312SSebastian Grimberg   int       gridsize = vecsize / bsize;
94*49ed4312SSebastian Grimberg 
95*49ed4312SSebastian Grimberg   if (bsize * gridsize < vecsize) gridsize += 1;
96*49ed4312SSebastian Grimberg   axpyValueK<<<gridsize, bsize>>>(y_array, alpha, x_array, length);
97*49ed4312SSebastian Grimberg   return 0;
98*49ed4312SSebastian Grimberg }
99*49ed4312SSebastian Grimberg 
100*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
101*49ed4312SSebastian Grimberg // Kernel for pointwise mult
102*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
103*49ed4312SSebastian Grimberg __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedInt size) {
104*49ed4312SSebastian Grimberg   int idx = threadIdx.x + blockDim.x * blockIdx.x;
105*49ed4312SSebastian Grimberg   if (idx >= size) return;
106*49ed4312SSebastian Grimberg   w[idx] = x[idx] * y[idx];
107*49ed4312SSebastian Grimberg }
108*49ed4312SSebastian Grimberg 
109*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
110*49ed4312SSebastian Grimberg // Compute the pointwise multiplication w = x .* y on device
111*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
112*49ed4312SSebastian Grimberg extern "C" int CeedDevicePointwiseMult_Sycl(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedInt length) {
113*49ed4312SSebastian Grimberg   const int bsize    = 512;
114*49ed4312SSebastian Grimberg   const int vecsize  = length;
115*49ed4312SSebastian Grimberg   int       gridsize = vecsize / bsize;
116*49ed4312SSebastian Grimberg 
117*49ed4312SSebastian Grimberg   if (bsize * gridsize < vecsize) gridsize += 1;
118*49ed4312SSebastian Grimberg   pointwiseMultValueK<<<gridsize, bsize>>>(w_array, x_array, y_array, length);
119*49ed4312SSebastian Grimberg   return 0;
120*49ed4312SSebastian Grimberg }
121*49ed4312SSebastian Grimberg 
122*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------
123