xref: /libCEED/backends/hip-ref/kernels/hip-ref-vector.hip.cpp (revision db002c03923317a1c3814dcd861330002c00a8ea)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed.h>
9 #include <hip/hip_runtime.h>
10 
11 //------------------------------------------------------------------------------
12 // Kernel for set value on device
13 //------------------------------------------------------------------------------
14 __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedInt size, CeedScalar val) {
15   int idx = threadIdx.x + blockDim.x * blockIdx.x;
16   if (idx >= size) return;
17   vec[idx] = val;
18 }
19 
20 //------------------------------------------------------------------------------
21 // Set value on device memory
22 //------------------------------------------------------------------------------
23 extern "C" int CeedDeviceSetValue_Hip(CeedScalar *d_array, CeedInt length, CeedScalar val) {
24   const int bsize    = 512;
25   const int vecsize  = length;
26   int       gridsize = vecsize / bsize;
27 
28   if (bsize * gridsize < vecsize) gridsize += 1;
29   hipLaunchKernelGGL(setValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length, val);
30   return 0;
31 }
32 
33 //------------------------------------------------------------------------------
34 // Kernel for taking reciprocal
35 //------------------------------------------------------------------------------
36 __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedInt size) {
37   int idx = threadIdx.x + blockDim.x * blockIdx.x;
38   if (idx >= size) return;
39   if (fabs(vec[idx]) > 1E-16) vec[idx] = 1. / vec[idx];
40 }
41 
42 //------------------------------------------------------------------------------
43 // Take vector reciprocal in device memory
44 //------------------------------------------------------------------------------
45 extern "C" int CeedDeviceReciprocal_Hip(CeedScalar *d_array, CeedInt length) {
46   const int bsize    = 512;
47   const int vecsize  = length;
48   int       gridsize = vecsize / bsize;
49 
50   if (bsize * gridsize < vecsize) gridsize += 1;
51   hipLaunchKernelGGL(rcpValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length);
52   return 0;
53 }
54 
55 //------------------------------------------------------------------------------
56 // Kernel for scale
57 //------------------------------------------------------------------------------
58 __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedInt size) {
59   int idx = threadIdx.x + blockDim.x * blockIdx.x;
60   if (idx >= size) return;
61   x[idx] *= alpha;
62 }
63 
64 //------------------------------------------------------------------------------
65 // Compute x = alpha x on device
66 //------------------------------------------------------------------------------
67 extern "C" int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedInt length) {
68   const int bsize    = 512;
69   const int vecsize  = length;
70   int       gridsize = vecsize / bsize;
71 
72   if (bsize * gridsize < vecsize) gridsize += 1;
73   hipLaunchKernelGGL(scaleValueK, dim3(gridsize), dim3(bsize), 0, 0, x_array, alpha, length);
74   return 0;
75 }
76 
77 //------------------------------------------------------------------------------
78 // Kernel for axpy
79 //------------------------------------------------------------------------------
80 __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedInt size) {
81   int idx = threadIdx.x + blockDim.x * blockIdx.x;
82   if (idx >= size) return;
83   y[idx] += alpha * x[idx];
84 }
85 
86 //------------------------------------------------------------------------------
87 // Compute y = alpha x + y on device
88 //------------------------------------------------------------------------------
89 extern "C" int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedInt length) {
90   const int bsize    = 512;
91   const int vecsize  = length;
92   int       gridsize = vecsize / bsize;
93 
94   if (bsize * gridsize < vecsize) gridsize += 1;
95   hipLaunchKernelGGL(axpyValueK, dim3(gridsize), dim3(bsize), 0, 0, y_array, alpha, x_array, length);
96   return 0;
97 }
98 
99 //------------------------------------------------------------------------------
100 // Kernel for axpby
101 //------------------------------------------------------------------------------
102 __global__ static void axpbyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar beta, CeedScalar *__restrict__ x, CeedInt size) {
103   int idx = threadIdx.x + blockDim.x * blockIdx.x;
104   if (idx >= size) return;
105   y[idx] = beta * y[idx];
106   y[idx] += alpha * x[idx];
107 }
108 
109 //------------------------------------------------------------------------------
110 // Compute y = alpha x + beta y on device
111 //------------------------------------------------------------------------------
112 extern "C" int CeedDeviceAXPBY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedInt length) {
113   const int bsize    = 512;
114   const int vecsize  = length;
115   int       gridsize = vecsize / bsize;
116 
117   if (bsize * gridsize < vecsize) gridsize += 1;
118   hipLaunchKernelGGL(axpbyValueK, dim3(gridsize), dim3(bsize), 0, 0, y_array, alpha, beta, x_array, length);
119   return 0;
120 }
121 
122 //------------------------------------------------------------------------------
123 // Kernel for pointwise mult
124 //------------------------------------------------------------------------------
125 __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedInt size) {
126   int idx = threadIdx.x + blockDim.x * blockIdx.x;
127   if (idx >= size) return;
128   w[idx] = x[idx] * y[idx];
129 }
130 
131 //------------------------------------------------------------------------------
132 // Compute the pointwise multiplication w = x .* y on device
133 //------------------------------------------------------------------------------
134 extern "C" int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedInt length) {
135   const int bsize    = 512;
136   const int vecsize  = length;
137   int       gridsize = vecsize / bsize;
138 
139   if (bsize * gridsize < vecsize) gridsize += 1;
140   hipLaunchKernelGGL(pointwiseMultValueK, dim3(gridsize), dim3(bsize), 0, 0, w_array, x_array, y_array, length);
141   return 0;
142 }
143 
144 //------------------------------------------------------------------------------
145