xref: /libCEED/backends/hip-ref/kernels/hip-ref-vector.hip.cpp (revision 93b6d8191bb649fce95198206c6bff32567615d1)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed/ceed.h>
9 #include <hip/hip_runtime.h>
10 
11 //------------------------------------------------------------------------------
12 // Kernel for set value on device
13 //------------------------------------------------------------------------------
14 __global__ static void setValueK(CeedScalar * __restrict__ vec, CeedInt size,
15                                  CeedScalar val) {
16   int idx = threadIdx.x + blockDim.x * blockIdx.x;
17   if (idx >= size)
18     return;
19   vec[idx] = val;
20 }
21 
22 //------------------------------------------------------------------------------
23 // Set value on device memory
24 //------------------------------------------------------------------------------
25 extern "C" int CeedDeviceSetValue_Hip(CeedScalar* d_array, CeedInt length,
26                                       CeedScalar val) {
27   const int bsize = 512;
28   const int vecsize = length;
29   int gridsize = vecsize / bsize;
30 
31   if (bsize * gridsize < vecsize)
32     gridsize += 1;
33   hipLaunchKernelGGL(setValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length, val);
34   return 0;
35 }
36 
37 //------------------------------------------------------------------------------
38 // Kernel for taking reciprocal
39 //------------------------------------------------------------------------------
40 __global__ static void rcpValueK(CeedScalar * __restrict__ vec, CeedInt size) {
41   int idx = threadIdx.x + blockDim.x * blockIdx.x;
42   if (idx >= size)
43     return;
44   if (fabs(vec[idx]) > 1E-16)
45     vec[idx] = 1./vec[idx];
46 }
47 
48 //------------------------------------------------------------------------------
49 // Take vector reciprocal in device memory
50 //------------------------------------------------------------------------------
51 extern "C" int CeedDeviceReciprocal_Hip(CeedScalar* d_array, CeedInt length) {
52   const int bsize = 512;
53   const int vecsize = length;
54   int gridsize = vecsize / bsize;
55 
56   if (bsize * gridsize < vecsize)
57     gridsize += 1;
58   hipLaunchKernelGGL(rcpValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length);
59   return 0;
60 }
61 
62 //------------------------------------------------------------------------------
63 // Kernel for scale
64 //------------------------------------------------------------------------------
65 __global__ static void scaleValueK(CeedScalar * __restrict__ x, CeedScalar alpha,
66     CeedInt size) {
67   int idx = threadIdx.x + blockDim.x * blockIdx.x;
68   if (idx >= size)
69     return;
70   x[idx] *= alpha;
71 }
72 
73 //------------------------------------------------------------------------------
74 // Compute x = alpha x on device
75 //------------------------------------------------------------------------------
76 extern "C" int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha,
77     CeedInt length) {
78   const int bsize = 512;
79   const int vecsize = length;
80   int gridsize = vecsize / bsize;
81 
82   if (bsize * gridsize < vecsize)
83     gridsize += 1;
84   hipLaunchKernelGGL(scaleValueK, dim3(gridsize), dim3(bsize), 0, 0, x_array, alpha,
85                      length);
86   return 0;
87 }
88 
89 //------------------------------------------------------------------------------
90 // Kernel for axpy
91 //------------------------------------------------------------------------------
92 __global__ static void axpyValueK(CeedScalar * __restrict__ y, CeedScalar alpha,
93     CeedScalar * __restrict__ x, CeedInt size) {
94   int idx = threadIdx.x + blockDim.x * blockIdx.x;
95   if (idx >= size)
96     return;
97   y[idx] += alpha * x[idx];
98 }
99 
100 //------------------------------------------------------------------------------
101 // Compute y = alpha x + y on device
102 //------------------------------------------------------------------------------
103 extern "C" int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha,
104     CeedScalar *x_array, CeedInt length) {
105   const int bsize = 512;
106   const int vecsize = length;
107   int gridsize = vecsize / bsize;
108 
109   if (bsize * gridsize < vecsize)
110     gridsize += 1;
111   hipLaunchKernelGGL(axpyValueK, dim3(gridsize), dim3(bsize), 0, 0, y_array, alpha,
112                      x_array, length);
113   return 0;
114 }
115 
116 //------------------------------------------------------------------------------
117 // Kernel for pointwise mult
118 //------------------------------------------------------------------------------
119 __global__ static void pointwiseMultValueK(CeedScalar * __restrict__ w,
120     CeedScalar * x, CeedScalar * __restrict__ y, CeedInt size) {
121   int idx = threadIdx.x + blockDim.x * blockIdx.x;
122   if (idx >= size)
123     return;
124   w[idx] = x[idx] * y[idx];
125 }
126 
127 //------------------------------------------------------------------------------
128 // Compute the pointwise multiplication w = x .* y on device
129 //------------------------------------------------------------------------------
130 extern "C" int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array,
131     CeedScalar *y_array, CeedInt length) {
132   const int bsize = 512;
133   const int vecsize = length;
134   int gridsize = vecsize / bsize;
135 
136   if (bsize * gridsize < vecsize)
137     gridsize += 1;
138   hipLaunchKernelGGL(pointwiseMultValueK, dim3(gridsize), dim3(bsize), 0, 0, w_array,
139                      x_array, y_array, length);
140   return 0;
141 }
142