xref: /libCEED/backends/hip-ref/kernels/hip-ref-vector.hip.cpp (revision 5fb68f377259d3910de46d787b7c5d1587fd01e1)
13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
30d0321e0SJeremy L Thompson //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
50d0321e0SJeremy L Thompson //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
70d0321e0SJeremy L Thompson 
80d0321e0SJeremy L Thompson #include <ceed/ceed.h>
90d0321e0SJeremy L Thompson #include <hip/hip_runtime.h>
100d0321e0SJeremy L Thompson 
110d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
120d0321e0SJeremy L Thompson // Kernel for set value on device
130d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
142b730f8bSJeremy L Thompson __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedInt size, CeedScalar val) {
150d0321e0SJeremy L Thompson   int idx = threadIdx.x + blockDim.x * blockIdx.x;
162b730f8bSJeremy L Thompson   if (idx >= size) return;
170d0321e0SJeremy L Thompson   vec[idx] = val;
180d0321e0SJeremy L Thompson }
190d0321e0SJeremy L Thompson 
200d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
210d0321e0SJeremy L Thompson // Set value on device memory
220d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
232b730f8bSJeremy L Thompson extern "C" int CeedDeviceSetValue_Hip(CeedScalar *d_array, CeedInt length, CeedScalar val) {
240d0321e0SJeremy L Thompson   const int bsize    = 512;
250d0321e0SJeremy L Thompson   const int vecsize  = length;
260d0321e0SJeremy L Thompson   int       gridsize = vecsize / bsize;
270d0321e0SJeremy L Thompson 
282b730f8bSJeremy L Thompson   if (bsize * gridsize < vecsize) gridsize += 1;
290d0321e0SJeremy L Thompson   hipLaunchKernelGGL(setValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length, val);
300d0321e0SJeremy L Thompson   return 0;
310d0321e0SJeremy L Thompson }
320d0321e0SJeremy L Thompson 
330d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
340d0321e0SJeremy L Thompson // Kernel for taking reciprocal
350d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
360d0321e0SJeremy L Thompson __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedInt size) {
370d0321e0SJeremy L Thompson   int idx = threadIdx.x + blockDim.x * blockIdx.x;
382b730f8bSJeremy L Thompson   if (idx >= size) return;
392b730f8bSJeremy L Thompson   if (fabs(vec[idx]) > 1E-16) vec[idx] = 1. / vec[idx];
400d0321e0SJeremy L Thompson }
410d0321e0SJeremy L Thompson 
420d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
430d0321e0SJeremy L Thompson // Take vector reciprocal in device memory
440d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
450d0321e0SJeremy L Thompson extern "C" int CeedDeviceReciprocal_Hip(CeedScalar *d_array, CeedInt length) {
460d0321e0SJeremy L Thompson   const int bsize    = 512;
470d0321e0SJeremy L Thompson   const int vecsize  = length;
480d0321e0SJeremy L Thompson   int       gridsize = vecsize / bsize;
490d0321e0SJeremy L Thompson 
502b730f8bSJeremy L Thompson   if (bsize * gridsize < vecsize) gridsize += 1;
510d0321e0SJeremy L Thompson   hipLaunchKernelGGL(rcpValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length);
520d0321e0SJeremy L Thompson   return 0;
530d0321e0SJeremy L Thompson }
540d0321e0SJeremy L Thompson 
550d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
560d0321e0SJeremy L Thompson // Kernel for scale
570d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
582b730f8bSJeremy L Thompson __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedInt size) {
590d0321e0SJeremy L Thompson   int idx = threadIdx.x + blockDim.x * blockIdx.x;
602b730f8bSJeremy L Thompson   if (idx >= size) return;
610d0321e0SJeremy L Thompson   x[idx] *= alpha;
620d0321e0SJeremy L Thompson }
630d0321e0SJeremy L Thompson 
640d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
650d0321e0SJeremy L Thompson // Compute x = alpha x on device
660d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
672b730f8bSJeremy L Thompson extern "C" int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedInt length) {
680d0321e0SJeremy L Thompson   const int bsize    = 512;
690d0321e0SJeremy L Thompson   const int vecsize  = length;
700d0321e0SJeremy L Thompson   int       gridsize = vecsize / bsize;
710d0321e0SJeremy L Thompson 
722b730f8bSJeremy L Thompson   if (bsize * gridsize < vecsize) gridsize += 1;
732b730f8bSJeremy L Thompson   hipLaunchKernelGGL(scaleValueK, dim3(gridsize), dim3(bsize), 0, 0, x_array, alpha, length);
740d0321e0SJeremy L Thompson   return 0;
750d0321e0SJeremy L Thompson }
760d0321e0SJeremy L Thompson 
770d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
780d0321e0SJeremy L Thompson // Kernel for axpy
790d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
802b730f8bSJeremy L Thompson __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedInt size) {
810d0321e0SJeremy L Thompson   int idx = threadIdx.x + blockDim.x * blockIdx.x;
822b730f8bSJeremy L Thompson   if (idx >= size) return;
830d0321e0SJeremy L Thompson   y[idx] += alpha * x[idx];
840d0321e0SJeremy L Thompson }
850d0321e0SJeremy L Thompson 
860d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
870d0321e0SJeremy L Thompson // Compute y = alpha x + y on device
880d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
892b730f8bSJeremy L Thompson extern "C" int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedInt length) {
900d0321e0SJeremy L Thompson   const int bsize    = 512;
910d0321e0SJeremy L Thompson   const int vecsize  = length;
920d0321e0SJeremy L Thompson   int       gridsize = vecsize / bsize;
930d0321e0SJeremy L Thompson 
942b730f8bSJeremy L Thompson   if (bsize * gridsize < vecsize) gridsize += 1;
952b730f8bSJeremy L Thompson   hipLaunchKernelGGL(axpyValueK, dim3(gridsize), dim3(bsize), 0, 0, y_array, alpha, x_array, length);
960d0321e0SJeremy L Thompson   return 0;
970d0321e0SJeremy L Thompson }
980d0321e0SJeremy L Thompson 
990d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
100*5fb68f37SKaren (Ren) Stengel // Kernel for axpby
101*5fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
102*5fb68f37SKaren (Ren) Stengel __global__ static void axpbyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar beta, CeedScalar *__restrict__ x, CeedInt size) {
103*5fb68f37SKaren (Ren) Stengel   int idx = threadIdx.x + blockDim.x * blockIdx.x;
104*5fb68f37SKaren (Ren) Stengel   if (idx >= size) return;
105*5fb68f37SKaren (Ren) Stengel   y[idx] = beta * y[idx];
106*5fb68f37SKaren (Ren) Stengel   y[idx] += alpha * x[idx];
107*5fb68f37SKaren (Ren) Stengel }
108*5fb68f37SKaren (Ren) Stengel 
109*5fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
110*5fb68f37SKaren (Ren) Stengel // Compute y = alpha x + beta y on device
111*5fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
112*5fb68f37SKaren (Ren) Stengel extern "C" int CeedDeviceAXPBY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedInt length) {
113*5fb68f37SKaren (Ren) Stengel   const int bsize    = 512;
114*5fb68f37SKaren (Ren) Stengel   const int vecsize  = length;
115*5fb68f37SKaren (Ren) Stengel   int       gridsize = vecsize / bsize;
116*5fb68f37SKaren (Ren) Stengel 
117*5fb68f37SKaren (Ren) Stengel   if (bsize * gridsize < vecsize) gridsize += 1;
118*5fb68f37SKaren (Ren) Stengel   hipLaunchKernelGGL(axpbyValueK, dim3(gridsize), dim3(bsize), 0, 0, y_array, alpha, beta, x_array, length);
119*5fb68f37SKaren (Ren) Stengel   return 0;
120*5fb68f37SKaren (Ren) Stengel }
121*5fb68f37SKaren (Ren) Stengel 
122*5fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
1230d0321e0SJeremy L Thompson // Kernel for pointwise mult
1240d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1252b730f8bSJeremy L Thompson __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedInt size) {
1260d0321e0SJeremy L Thompson   int idx = threadIdx.x + blockDim.x * blockIdx.x;
1272b730f8bSJeremy L Thompson   if (idx >= size) return;
1280d0321e0SJeremy L Thompson   w[idx] = x[idx] * y[idx];
1290d0321e0SJeremy L Thompson }
1300d0321e0SJeremy L Thompson 
1310d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1320d0321e0SJeremy L Thompson // Compute the pointwise multiplication w = x .* y on device
1330d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1342b730f8bSJeremy L Thompson extern "C" int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedInt length) {
1350d0321e0SJeremy L Thompson   const int bsize    = 512;
1360d0321e0SJeremy L Thompson   const int vecsize  = length;
1370d0321e0SJeremy L Thompson   int       gridsize = vecsize / bsize;
1380d0321e0SJeremy L Thompson 
1392b730f8bSJeremy L Thompson   if (bsize * gridsize < vecsize) gridsize += 1;
1402b730f8bSJeremy L Thompson   hipLaunchKernelGGL(pointwiseMultValueK, dim3(gridsize), dim3(bsize), 0, 0, w_array, x_array, y_array, length);
1410d0321e0SJeremy L Thompson   return 0;
1420d0321e0SJeremy L Thompson }
143