xref: /libCEED/backends/sycl-ref/kernels/sycl-ref-vector.cpp (revision 1c66c397a67401e1a222857807e6e5b7c45b88c0)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other
2 // CEED contributors. All Rights Reserved. See the top-level LICENSE and NOTICE
3 // files for details.
4 //
5 // SPDX-License-Identifier: BSD-2-Clause
6 //
7 // This file is part of CEED:  http://github.com/ceed
8 
9 #include <ceed/ceed.h>
10 #include <sycl/sycl.hpp>
11 
12 //------------------------------------------------------------------------------
13 // Kernel for set value on device
14 //------------------------------------------------------------------------------
15 __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedInt size, CeedScalar val) {
16   int idx = threadIdx.x + blockDim.x * blockIdx.x;
17   if (idx >= size) return;
18   vec[idx] = val;
19 }
20 
21 //------------------------------------------------------------------------------
22 // Set value on device memory
23 //------------------------------------------------------------------------------
24 extern "C" int CeedDeviceSetValue_Sycl(CeedScalar *d_array, CeedInt length, CeedScalar val) {
25   const int bsize    = 512;
26   const int vecsize  = length;
27   int       gridsize = vecsize / bsize;
28 
29   if (bsize * gridsize < vecsize) gridsize += 1;
30   setValueK<<<gridsize, bsize>>>(d_array, length, val);
31   return 0;
32 }
33 
34 //------------------------------------------------------------------------------
35 // Kernel for taking reciprocal
36 //------------------------------------------------------------------------------
37 __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedInt size) {
38   int idx = threadIdx.x + blockDim.x * blockIdx.x;
39   if (idx >= size) return;
40   if (fabs(vec[idx]) > 1E-16) vec[idx] = 1. / vec[idx];
41 }
42 
43 //------------------------------------------------------------------------------
44 // Take vector reciprocal in device memory
45 //------------------------------------------------------------------------------
46 extern "C" int CeedDeviceReciprocal_Sycl(CeedScalar *d_array, CeedInt length) {
47   const int bsize    = 512;
48   const int vecsize  = length;
49   int       gridsize = vecsize / bsize;
50 
51   if (bsize * gridsize < vecsize) gridsize += 1;
52   rcpValueK<<<gridsize, bsize>>>(d_array, length);
53   return 0;
54 }
55 
56 //------------------------------------------------------------------------------
57 // Kernel for scale
58 //------------------------------------------------------------------------------
59 __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedInt size) {
60   int idx = threadIdx.x + blockDim.x * blockIdx.x;
61   if (idx >= size) return;
62   x[idx] *= alpha;
63 }
64 
65 //------------------------------------------------------------------------------
66 // Compute x = alpha x on device
67 //------------------------------------------------------------------------------
68 extern "C" int CeedDeviceScale_Sycl(CeedScalar *x_array, CeedScalar alpha, CeedInt length) {
69   const int bsize    = 512;
70   const int vecsize  = length;
71   int       gridsize = vecsize / bsize;
72 
73   if (bsize * gridsize < vecsize) gridsize += 1;
74   scaleValueK<<<gridsize, bsize>>>(x_array, alpha, length);
75   return 0;
76 }
77 
78 //------------------------------------------------------------------------------
79 // Kernel for axpy
80 //------------------------------------------------------------------------------
81 __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedInt size) {
82   int idx = threadIdx.x + blockDim.x * blockIdx.x;
83   if (idx >= size) return;
84   y[idx] += alpha * x[idx];
85 }
86 
87 //------------------------------------------------------------------------------
88 // Compute y = alpha x + y on device
89 //------------------------------------------------------------------------------
90 extern "C" int CeedDeviceAXPY_Sycl(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedInt length) {
91   const int bsize    = 512;
92   const int vecsize  = length;
93   int       gridsize = vecsize / bsize;
94 
95   if (bsize * gridsize < vecsize) gridsize += 1;
96   axpyValueK<<<gridsize, bsize>>>(y_array, alpha, x_array, length);
97   return 0;
98 }
99 
100 //------------------------------------------------------------------------------
101 // Kernel for pointwise mult
102 //------------------------------------------------------------------------------
103 __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedInt size) {
104   int idx = threadIdx.x + blockDim.x * blockIdx.x;
105   if (idx >= size) return;
106   w[idx] = x[idx] * y[idx];
107 }
108 
109 //------------------------------------------------------------------------------
110 // Compute the pointwise multiplication w = x .* y on device
111 //------------------------------------------------------------------------------
112 extern "C" int CeedDevicePointwiseMult_Sycl(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedInt length) {
113   const int bsize    = 512;
114   const int vecsize  = length;
115   int       gridsize = vecsize / bsize;
116 
117   if (bsize * gridsize < vecsize) gridsize += 1;
118   pointwiseMultValueK<<<gridsize, bsize>>>(w_array, x_array, y_array, length);
119   return 0;
120 }
121 
122 //------------------------------------------------------------------------------
123