xref: /libCEED/backends/hip-ref/kernels/hip-ref-vector.hip.cpp (revision a61c78d6a6d5ea69db49949746e6dc59b544c365)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <hip/hip_runtime.h>
19 
20 //------------------------------------------------------------------------------
21 // Kernel for set value on device
22 //------------------------------------------------------------------------------
23 __global__ static void setValueK(CeedScalar * __restrict__ vec, CeedInt size,
24                                  CeedScalar val) {
25   int idx = threadIdx.x + blockDim.x * blockIdx.x;
26   if (idx >= size)
27     return;
28   vec[idx] = val;
29 }
30 
31 //------------------------------------------------------------------------------
32 // Set value on device memory
33 //------------------------------------------------------------------------------
34 extern "C" int CeedDeviceSetValue_Hip(CeedScalar* d_array, CeedInt length,
35                                       CeedScalar val) {
36   const int bsize = 512;
37   const int vecsize = length;
38   int gridsize = vecsize / bsize;
39 
40   if (bsize * gridsize < vecsize)
41     gridsize += 1;
42   hipLaunchKernelGGL(setValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length, val);
43   return 0;
44 }
45 
46 //------------------------------------------------------------------------------
47 // Kernel for taking reciprocal
48 //------------------------------------------------------------------------------
49 __global__ static void rcpValueK(CeedScalar * __restrict__ vec, CeedInt size) {
50   int idx = threadIdx.x + blockDim.x * blockIdx.x;
51   if (idx >= size)
52     return;
53   if (fabs(vec[idx]) > 1E-16)
54     vec[idx] = 1./vec[idx];
55 }
56 
57 //------------------------------------------------------------------------------
58 // Take vector reciprocal in device memory
59 //------------------------------------------------------------------------------
60 extern "C" int CeedDeviceReciprocal_Hip(CeedScalar* d_array, CeedInt length) {
61   const int bsize = 512;
62   const int vecsize = length;
63   int gridsize = vecsize / bsize;
64 
65   if (bsize * gridsize < vecsize)
66     gridsize += 1;
67   hipLaunchKernelGGL(rcpValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length);
68   return 0;
69 }
70 
71 //------------------------------------------------------------------------------
72 // Kernel for scale
73 //------------------------------------------------------------------------------
74 __global__ static void scaleValueK(CeedScalar * __restrict__ x, CeedScalar alpha,
75     CeedInt size) {
76   int idx = threadIdx.x + blockDim.x * blockIdx.x;
77   if (idx >= size)
78     return;
79   x[idx] *= alpha;
80 }
81 
82 //------------------------------------------------------------------------------
83 // Compute x = alpha x on device
84 //------------------------------------------------------------------------------
85 extern "C" int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha,
86     CeedInt length) {
87   const int bsize = 512;
88   const int vecsize = length;
89   int gridsize = vecsize / bsize;
90 
91   if (bsize * gridsize < vecsize)
92     gridsize += 1;
93   hipLaunchKernelGGL(scaleValueK, dim3(gridsize), dim3(bsize), 0, 0, x_array, alpha,
94                      length);
95   return 0;
96 }
97 
98 //------------------------------------------------------------------------------
99 // Kernel for axpy
100 //------------------------------------------------------------------------------
101 __global__ static void axpyValueK(CeedScalar * __restrict__ y, CeedScalar alpha,
102     CeedScalar * __restrict__ x, CeedInt size) {
103   int idx = threadIdx.x + blockDim.x * blockIdx.x;
104   if (idx >= size)
105     return;
106   y[idx] += alpha * x[idx];
107 }
108 
109 //------------------------------------------------------------------------------
110 // Compute y = alpha x + y on device
111 //------------------------------------------------------------------------------
112 extern "C" int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha,
113     CeedScalar *x_array, CeedInt length) {
114   const int bsize = 512;
115   const int vecsize = length;
116   int gridsize = vecsize / bsize;
117 
118   if (bsize * gridsize < vecsize)
119     gridsize += 1;
120   hipLaunchKernelGGL(axpyValueK, dim3(gridsize), dim3(bsize), 0, 0, y_array, alpha,
121                      x_array, length);
122   return 0;
123 }
124 
125 //------------------------------------------------------------------------------
126 // Kernel for pointwise mult
127 //------------------------------------------------------------------------------
128 __global__ static void pointwiseMultValueK(CeedScalar * __restrict__ w,
129     CeedScalar * x, CeedScalar * __restrict__ y, CeedInt size) {
130   int idx = threadIdx.x + blockDim.x * blockIdx.x;
131   if (idx >= size)
132     return;
133   w[idx] = x[idx] * y[idx];
134 }
135 
136 //------------------------------------------------------------------------------
137 // Compute the pointwise multiplication w = x .* y on device
138 //------------------------------------------------------------------------------
139 extern "C" int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array,
140     CeedScalar *y_array, CeedInt length) {
141   const int bsize = 512;
142   const int vecsize = length;
143   int gridsize = vecsize / bsize;
144 
145   if (bsize * gridsize < vecsize)
146     gridsize += 1;
147   hipLaunchKernelGGL(pointwiseMultValueK, dim3(gridsize), dim3(bsize), 0, 0, w_array,
148                      x_array, y_array, length);
149   return 0;
150 }
151