xref: /libCEED/backends/sycl-ref/kernels/sycl-ref-vector.cpp (revision ac5aa7bc7371bf37e58d1cad715ff1f25da1e838)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other
2 // CEED contributors. All Rights Reserved. See the top-level LICENSE and NOTICE
3 // files for details.
4 //
5 // SPDX-License-Identifier: BSD-2-Clause
6 //
7 // This file is part of CEED:  http://github.com/ceed
8 
9 #include <ceed/ceed.h>
10 #include <sycl/sycl.hpp>
11 
12 //------------------------------------------------------------------------------
13 // Kernel for set value on device
14 //------------------------------------------------------------------------------
15 __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedInt size, CeedScalar val) {
16   int index = threadIdx.x + blockDim.x * blockIdx.x;
17 
18   if (index >= size) return;
19   vec[index] = val;
20 }
21 
22 //------------------------------------------------------------------------------
23 // Set value on device memory
24 //------------------------------------------------------------------------------
25 extern "C" int CeedDeviceSetValue_Sycl(CeedScalar *d_array, CeedInt length, CeedScalar val) {
26   const int block_size = 512;
27   const int vec_size   = length;
28   int       grid_size  = vec_size / block_size;
29 
30   if (block_size * grid_size < vec_size) grid_size += 1;
31   setValueK<<<grid_size, block_size>>>(d_array, length, val);
32   return 0;
33 }
34 
35 //------------------------------------------------------------------------------
36 // Kernel for taking reciprocal
37 //------------------------------------------------------------------------------
38 __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedInt size) {
39   int index = threadIdx.x + blockDim.x * blockIdx.x;
40 
41   if (index >= size) return;
42   if (fabs(vec[index]) > 1E-16) vec[index] = 1. / vec[index];
43 }
44 
45 //------------------------------------------------------------------------------
46 // Take vector reciprocal in device memory
47 //------------------------------------------------------------------------------
48 extern "C" int CeedDeviceReciprocal_Sycl(CeedScalar *d_array, CeedInt length) {
49   const int block_size = 512;
50   const int vec_size   = length;
51   int       grid_size  = vec_size / block_size;
52 
53   if (block_size * grid_size < vec_size) grid_size += 1;
54   rcpValueK<<<grid_size, block_size>>>(d_array, length);
55   return 0;
56 }
57 
58 //------------------------------------------------------------------------------
59 // Kernel for scale
60 //------------------------------------------------------------------------------
61 __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedInt size) {
62   int index = threadIdx.x + blockDim.x * blockIdx.x;
63 
64   if (index >= size) return;
65   x[index] *= alpha;
66 }
67 
68 //------------------------------------------------------------------------------
69 // Compute x = alpha x on device
70 //------------------------------------------------------------------------------
71 extern "C" int CeedDeviceScale_Sycl(CeedScalar *x_array, CeedScalar alpha, CeedInt length) {
72   const int block_size = 512;
73   const int vec_size   = length;
74   int       grid_size  = vec_size / block_size;
75 
76   if (block_size * grid_size < vec_size) grid_size += 1;
77   scaleValueK<<<grid_size, block_size>>>(x_array, alpha, length);
78   return 0;
79 }
80 
81 //------------------------------------------------------------------------------
82 // Kernel for axpy
83 //------------------------------------------------------------------------------
84 __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedInt size) {
85   int index = threadIdx.x + blockDim.x * blockIdx.x;
86   if (index >= size) return;
87   y[index] += alpha * x[index];
88 }
89 
90 //------------------------------------------------------------------------------
91 // Compute y = alpha x + y on device
92 //------------------------------------------------------------------------------
93 extern "C" int CeedDeviceAXPY_Sycl(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedInt length) {
94   const int block_size = 512;
95   const int vec_size   = length;
96   int       grid_size  = vec_size / block_size;
97 
98   if (block_size * grid_size < vec_size) grid_size += 1;
99   axpyValueK<<<grid_size, block_size>>>(y_array, alpha, x_array, length);
100   return 0;
101 }
102 
103 //------------------------------------------------------------------------------
104 // Kernel for pointwise mult
105 //------------------------------------------------------------------------------
106 __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedInt size) {
107   int index = threadIdx.x + blockDim.x * blockIdx.x;
108 
109   if (index >= size) return;
110   w[index] = x[index] * y[index];
111 }
112 
113 //------------------------------------------------------------------------------
114 // Compute the pointwise multiplication w = x .* y on device
115 //------------------------------------------------------------------------------
116 extern "C" int CeedDevicePointwiseMult_Sycl(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedInt length) {
117   const int block_size = 512;
118   const int vec_size   = length;
119   int       grid_size  = vec_size / block_size;
120 
121   if (block_size * grid_size < vec_size) grid_size += 1;
122   pointwiseMultValueK<<<grid_size, block_size>>>(w_array, x_array, y_array, length);
123   return 0;
124 }
125 
126 //------------------------------------------------------------------------------
127