xref: /libCEED/backends/hip-ref/kernels/hip-ref-vector.hip.cpp (revision 5aed82e4fa97acf4ba24a7f10a35f5303a6798e0)
1*5aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
30d0321e0SJeremy L Thompson //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
50d0321e0SJeremy L Thompson //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
70d0321e0SJeremy L Thompson 
849aac155SJeremy L Thompson #include <ceed.h>
90d0321e0SJeremy L Thompson #include <hip/hip_runtime.h>
100d0321e0SJeremy L Thompson 
110d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
120d0321e0SJeremy L Thompson // Kernel for set value on device
130d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
149330daecSnbeams __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedSize size, CeedScalar val) {
15b7453713SJeremy L Thompson   CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
16b7453713SJeremy L Thompson 
17b7453713SJeremy L Thompson   if (index >= size) return;
18b7453713SJeremy L Thompson   vec[index] = val;
190d0321e0SJeremy L Thompson }
200d0321e0SJeremy L Thompson 
210d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
220d0321e0SJeremy L Thompson // Set value on device memory
230d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
249330daecSnbeams extern "C" int CeedDeviceSetValue_Hip(CeedScalar *d_array, CeedSize length, CeedScalar val) {
25b7453713SJeremy L Thompson   const int      block_size = 512;
26b7453713SJeremy L Thompson   const CeedSize vec_size   = length;
27b7453713SJeremy L Thompson   int            grid_size  = vec_size / block_size;
280d0321e0SJeremy L Thompson 
29b7453713SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
30b7453713SJeremy L Thompson   hipLaunchKernelGGL(setValueK, dim3(grid_size), dim3(block_size), 0, 0, d_array, length, val);
310d0321e0SJeremy L Thompson   return 0;
320d0321e0SJeremy L Thompson }
330d0321e0SJeremy L Thompson 
340d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
350d0321e0SJeremy L Thompson // Kernel for taking reciprocal
360d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
379330daecSnbeams __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedSize size) {
38b7453713SJeremy L Thompson   CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
39b7453713SJeremy L Thompson 
40b7453713SJeremy L Thompson   if (index >= size) return;
41b7453713SJeremy L Thompson   if (fabs(vec[index]) > 1E-16) vec[index] = 1. / vec[index];
420d0321e0SJeremy L Thompson }
430d0321e0SJeremy L Thompson 
440d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
450d0321e0SJeremy L Thompson // Take vector reciprocal in device memory
460d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
479330daecSnbeams extern "C" int CeedDeviceReciprocal_Hip(CeedScalar *d_array, CeedSize length) {
48b7453713SJeremy L Thompson   const int      block_size = 512;
49b7453713SJeremy L Thompson   const CeedSize vec_size   = length;
50b7453713SJeremy L Thompson   int            grid_size  = vec_size / block_size;
510d0321e0SJeremy L Thompson 
52b7453713SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
53b7453713SJeremy L Thompson   hipLaunchKernelGGL(rcpValueK, dim3(grid_size), dim3(block_size), 0, 0, d_array, length);
540d0321e0SJeremy L Thompson   return 0;
550d0321e0SJeremy L Thompson }
560d0321e0SJeremy L Thompson 
570d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
580d0321e0SJeremy L Thompson // Kernel for scale
590d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
609330daecSnbeams __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedSize size) {
61b7453713SJeremy L Thompson   CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
62b7453713SJeremy L Thompson 
63b7453713SJeremy L Thompson   if (index >= size) return;
64b7453713SJeremy L Thompson   x[index] *= alpha;
650d0321e0SJeremy L Thompson }
660d0321e0SJeremy L Thompson 
670d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
680d0321e0SJeremy L Thompson // Compute x = alpha x on device
690d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
709330daecSnbeams extern "C" int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
71b7453713SJeremy L Thompson   const int      block_size = 512;
72b7453713SJeremy L Thompson   const CeedSize vec_size   = length;
73b7453713SJeremy L Thompson   int            grid_size  = vec_size / block_size;
740d0321e0SJeremy L Thompson 
75b7453713SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
76b7453713SJeremy L Thompson   hipLaunchKernelGGL(scaleValueK, dim3(grid_size), dim3(block_size), 0, 0, x_array, alpha, length);
770d0321e0SJeremy L Thompson   return 0;
780d0321e0SJeremy L Thompson }
790d0321e0SJeremy L Thompson 
800d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
810d0321e0SJeremy L Thompson // Kernel for axpy
820d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
839330daecSnbeams __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedSize size) {
84b7453713SJeremy L Thompson   CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
85b7453713SJeremy L Thompson 
86b7453713SJeremy L Thompson   if (index >= size) return;
87b7453713SJeremy L Thompson   y[index] += alpha * x[index];
880d0321e0SJeremy L Thompson }
890d0321e0SJeremy L Thompson 
900d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
910d0321e0SJeremy L Thompson // Compute y = alpha x + y on device
920d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
939330daecSnbeams extern "C" int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
94b7453713SJeremy L Thompson   const int      block_size = 512;
95b7453713SJeremy L Thompson   const CeedSize vec_size   = length;
96b7453713SJeremy L Thompson   int            grid_size  = vec_size / block_size;
970d0321e0SJeremy L Thompson 
98b7453713SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
99b7453713SJeremy L Thompson   hipLaunchKernelGGL(axpyValueK, dim3(grid_size), dim3(block_size), 0, 0, y_array, alpha, x_array, length);
1000d0321e0SJeremy L Thompson   return 0;
1010d0321e0SJeremy L Thompson }
1020d0321e0SJeremy L Thompson 
1030d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1045fb68f37SKaren (Ren) Stengel // Kernel for axpby
1055fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
1069330daecSnbeams __global__ static void axpbyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar beta, CeedScalar *__restrict__ x, CeedSize size) {
107b7453713SJeremy L Thompson   CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
108b7453713SJeremy L Thompson 
109b7453713SJeremy L Thompson   if (index >= size) return;
110b7453713SJeremy L Thompson   y[index] = beta * y[index];
111b7453713SJeremy L Thompson   y[index] += alpha * x[index];
1125fb68f37SKaren (Ren) Stengel }
1135fb68f37SKaren (Ren) Stengel 
1145fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
1155fb68f37SKaren (Ren) Stengel // Compute y = alpha x + beta y on device
1165fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
1179330daecSnbeams extern "C" int CeedDeviceAXPBY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedSize length) {
118b7453713SJeremy L Thompson   const int      block_size = 512;
119b7453713SJeremy L Thompson   const CeedSize vec_size   = length;
120b7453713SJeremy L Thompson   int            grid_size  = vec_size / block_size;
1215fb68f37SKaren (Ren) Stengel 
122b7453713SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
123b7453713SJeremy L Thompson   hipLaunchKernelGGL(axpbyValueK, dim3(grid_size), dim3(block_size), 0, 0, y_array, alpha, beta, x_array, length);
1245fb68f37SKaren (Ren) Stengel   return 0;
1255fb68f37SKaren (Ren) Stengel }
1265fb68f37SKaren (Ren) Stengel 
1275fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
1280d0321e0SJeremy L Thompson // Kernel for pointwise mult
1290d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1309330daecSnbeams __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedSize size) {
131b7453713SJeremy L Thompson   CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
132b7453713SJeremy L Thompson 
133b7453713SJeremy L Thompson   if (index >= size) return;
134b7453713SJeremy L Thompson   w[index] = x[index] * y[index];
1350d0321e0SJeremy L Thompson }
1360d0321e0SJeremy L Thompson 
1370d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1380d0321e0SJeremy L Thompson // Compute the pointwise multiplication w = x .* y on device
1390d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1409330daecSnbeams extern "C" int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
141b7453713SJeremy L Thompson   const int      block_size = 512;
142b7453713SJeremy L Thompson   const CeedSize vec_size   = length;
143b7453713SJeremy L Thompson   int            grid_size  = vec_size / block_size;
1440d0321e0SJeremy L Thompson 
145b7453713SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
146b7453713SJeremy L Thompson   hipLaunchKernelGGL(pointwiseMultValueK, dim3(grid_size), dim3(block_size), 0, 0, w_array, x_array, y_array, length);
1470d0321e0SJeremy L Thompson   return 0;
1480d0321e0SJeremy L Thompson }
1492a86cc9dSSebastian Grimberg 
1502a86cc9dSSebastian Grimberg //------------------------------------------------------------------------------
151