xref: /libCEED/backends/hip-ref/kernels/hip-ref-vector.hip.cpp (revision 1d5592410e85618bc90219351a01600c29cd94ba)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed/ceed.h>
9 #include <hip/hip_runtime.h>
10 
11 //------------------------------------------------------------------------------
12 // Kernel for set value on device
13 //------------------------------------------------------------------------------
14 __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedInt size, CeedScalar val) {
15   int idx = threadIdx.x + blockDim.x * blockIdx.x;
16   if (idx >= size) return;
17   vec[idx] = val;
18 }
19 
20 //------------------------------------------------------------------------------
21 // Set value on device memory
22 //------------------------------------------------------------------------------
23 extern "C" int CeedDeviceSetValue_Hip(CeedScalar *d_array, CeedInt length, CeedScalar val) {
24   const int bsize    = 512;
25   const int vecsize  = length;
26   int       gridsize = vecsize / bsize;
27 
28   if (bsize * gridsize < vecsize) gridsize += 1;
29   hipLaunchKernelGGL(setValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length, val);
30   return 0;
31 }
32 
33 //------------------------------------------------------------------------------
34 // Kernel for taking reciprocal
35 //------------------------------------------------------------------------------
36 __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedInt size) {
37   int idx = threadIdx.x + blockDim.x * blockIdx.x;
38   if (idx >= size) return;
39   if (fabs(vec[idx]) > 1E-16) vec[idx] = 1. / vec[idx];
40 }
41 
42 //------------------------------------------------------------------------------
43 // Take vector reciprocal in device memory
44 //------------------------------------------------------------------------------
45 extern "C" int CeedDeviceReciprocal_Hip(CeedScalar *d_array, CeedInt length) {
46   const int bsize    = 512;
47   const int vecsize  = length;
48   int       gridsize = vecsize / bsize;
49 
50   if (bsize * gridsize < vecsize) gridsize += 1;
51   hipLaunchKernelGGL(rcpValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length);
52   return 0;
53 }
54 
55 //------------------------------------------------------------------------------
56 // Kernel for scale
57 //------------------------------------------------------------------------------
58 __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedInt size) {
59   int idx = threadIdx.x + blockDim.x * blockIdx.x;
60   if (idx >= size) return;
61   x[idx] *= alpha;
62 }
63 
64 //------------------------------------------------------------------------------
65 // Compute x = alpha x on device
66 //------------------------------------------------------------------------------
67 extern "C" int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedInt length) {
68   const int bsize    = 512;
69   const int vecsize  = length;
70   int       gridsize = vecsize / bsize;
71 
72   if (bsize * gridsize < vecsize) gridsize += 1;
73   hipLaunchKernelGGL(scaleValueK, dim3(gridsize), dim3(bsize), 0, 0, x_array, alpha, length);
74   return 0;
75 }
76 
77 //------------------------------------------------------------------------------
78 // Kernel for axpy
79 //------------------------------------------------------------------------------
80 __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedInt size) {
81   int idx = threadIdx.x + blockDim.x * blockIdx.x;
82   if (idx >= size) return;
83   y[idx] += alpha * x[idx];
84 }
85 
86 //------------------------------------------------------------------------------
87 // Compute y = alpha x + y on device
88 //------------------------------------------------------------------------------
89 extern "C" int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedInt length) {
90   const int bsize    = 512;
91   const int vecsize  = length;
92   int       gridsize = vecsize / bsize;
93 
94   if (bsize * gridsize < vecsize) gridsize += 1;
95   hipLaunchKernelGGL(axpyValueK, dim3(gridsize), dim3(bsize), 0, 0, y_array, alpha, x_array, length);
96   return 0;
97 }
98 
99 //------------------------------------------------------------------------------
100 // Kernel for pointwise mult
101 //------------------------------------------------------------------------------
102 __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedInt size) {
103   int idx = threadIdx.x + blockDim.x * blockIdx.x;
104   if (idx >= size) return;
105   w[idx] = x[idx] * y[idx];
106 }
107 
108 //------------------------------------------------------------------------------
109 // Compute the pointwise multiplication w = x .* y on device
110 //------------------------------------------------------------------------------
111 extern "C" int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedInt length) {
112   const int bsize    = 512;
113   const int vecsize  = length;
114   int       gridsize = vecsize / bsize;
115 
116   if (bsize * gridsize < vecsize) gridsize += 1;
117   hipLaunchKernelGGL(pointwiseMultValueK, dim3(gridsize), dim3(bsize), 0, 0, w_array, x_array, y_array, length);
118   return 0;
119 }
120