13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 30d0321e0SJeremy L Thompson // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 50d0321e0SJeremy L Thompson // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 70d0321e0SJeremy L Thompson 849aac155SJeremy L Thompson #include <ceed.h> 90d0321e0SJeremy L Thompson #include <hip/hip_runtime.h> 100d0321e0SJeremy L Thompson 110d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 120d0321e0SJeremy L Thompson // Kernel for set value on device 130d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 142b730f8bSJeremy L Thompson __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedInt size, CeedScalar val) { 150d0321e0SJeremy L Thompson int idx = threadIdx.x + blockDim.x * blockIdx.x; 162b730f8bSJeremy L Thompson if (idx >= size) return; 170d0321e0SJeremy L Thompson vec[idx] = val; 180d0321e0SJeremy L Thompson } 190d0321e0SJeremy L Thompson 200d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 210d0321e0SJeremy L Thompson // Set value on device memory 220d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 232b730f8bSJeremy L Thompson extern "C" int CeedDeviceSetValue_Hip(CeedScalar *d_array, CeedInt length, CeedScalar val) { 240d0321e0SJeremy L Thompson const int bsize = 512; 250d0321e0SJeremy L Thompson const int vecsize = length; 260d0321e0SJeremy L Thompson int gridsize = vecsize / bsize; 270d0321e0SJeremy L Thompson 282b730f8bSJeremy L Thompson if (bsize * gridsize < vecsize) gridsize += 1; 290d0321e0SJeremy L Thompson hipLaunchKernelGGL(setValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length, val); 300d0321e0SJeremy L Thompson return 0; 310d0321e0SJeremy L Thompson } 320d0321e0SJeremy L Thompson 330d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 340d0321e0SJeremy L Thompson // Kernel for taking reciprocal 350d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 360d0321e0SJeremy L Thompson __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedInt size) { 370d0321e0SJeremy L Thompson int idx = threadIdx.x + blockDim.x * blockIdx.x; 382b730f8bSJeremy L Thompson if (idx >= size) return; 392b730f8bSJeremy L Thompson if (fabs(vec[idx]) > 1E-16) vec[idx] = 1. / vec[idx]; 400d0321e0SJeremy L Thompson } 410d0321e0SJeremy L Thompson 420d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 430d0321e0SJeremy L Thompson // Take vector reciprocal in device memory 440d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 450d0321e0SJeremy L Thompson extern "C" int CeedDeviceReciprocal_Hip(CeedScalar *d_array, CeedInt length) { 460d0321e0SJeremy L Thompson const int bsize = 512; 470d0321e0SJeremy L Thompson const int vecsize = length; 480d0321e0SJeremy L Thompson int gridsize = vecsize / bsize; 490d0321e0SJeremy L Thompson 502b730f8bSJeremy L Thompson if (bsize * gridsize < vecsize) gridsize += 1; 510d0321e0SJeremy L Thompson hipLaunchKernelGGL(rcpValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length); 520d0321e0SJeremy L Thompson return 0; 530d0321e0SJeremy L Thompson } 540d0321e0SJeremy L Thompson 550d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 560d0321e0SJeremy L Thompson // Kernel for scale 570d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 582b730f8bSJeremy L Thompson __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedInt size) { 590d0321e0SJeremy L Thompson int idx = threadIdx.x + blockDim.x * blockIdx.x; 602b730f8bSJeremy L Thompson if (idx >= size) return; 610d0321e0SJeremy L Thompson x[idx] *= alpha; 620d0321e0SJeremy L Thompson } 630d0321e0SJeremy L Thompson 640d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 650d0321e0SJeremy L Thompson // Compute x = alpha x on device 660d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 672b730f8bSJeremy L Thompson extern "C" int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedInt length) { 680d0321e0SJeremy L Thompson const int bsize = 512; 690d0321e0SJeremy L Thompson const int vecsize = length; 700d0321e0SJeremy L Thompson int gridsize = vecsize / bsize; 710d0321e0SJeremy L Thompson 722b730f8bSJeremy L Thompson if (bsize * gridsize < vecsize) gridsize += 1; 732b730f8bSJeremy L Thompson hipLaunchKernelGGL(scaleValueK, dim3(gridsize), dim3(bsize), 0, 0, x_array, alpha, length); 740d0321e0SJeremy L Thompson return 0; 750d0321e0SJeremy L Thompson } 760d0321e0SJeremy L Thompson 770d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 780d0321e0SJeremy L Thompson // Kernel for axpy 790d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 802b730f8bSJeremy L Thompson __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedInt size) { 810d0321e0SJeremy L Thompson int idx = threadIdx.x + blockDim.x * blockIdx.x; 822b730f8bSJeremy L Thompson if (idx >= size) return; 830d0321e0SJeremy L Thompson y[idx] += alpha * x[idx]; 840d0321e0SJeremy L Thompson } 850d0321e0SJeremy L Thompson 860d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 870d0321e0SJeremy L Thompson // Compute y = alpha x + y on device 880d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 892b730f8bSJeremy L Thompson extern "C" int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedInt length) { 900d0321e0SJeremy L Thompson const int bsize = 512; 910d0321e0SJeremy L Thompson const int vecsize = length; 920d0321e0SJeremy L Thompson int gridsize = vecsize / bsize; 930d0321e0SJeremy L Thompson 942b730f8bSJeremy L Thompson if (bsize * gridsize < vecsize) gridsize += 1; 952b730f8bSJeremy L Thompson hipLaunchKernelGGL(axpyValueK, dim3(gridsize), dim3(bsize), 0, 0, y_array, alpha, x_array, length); 960d0321e0SJeremy L Thompson return 0; 970d0321e0SJeremy L Thompson } 980d0321e0SJeremy L Thompson 990d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1005fb68f37SKaren (Ren) Stengel // Kernel for axpby 1015fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------ 1025fb68f37SKaren (Ren) Stengel __global__ static void axpbyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar beta, CeedScalar *__restrict__ x, CeedInt size) { 1035fb68f37SKaren (Ren) Stengel int idx = threadIdx.x + blockDim.x * blockIdx.x; 1045fb68f37SKaren (Ren) Stengel if (idx >= size) return; 1055fb68f37SKaren (Ren) Stengel y[idx] = beta * y[idx]; 1065fb68f37SKaren (Ren) Stengel y[idx] += alpha * x[idx]; 1075fb68f37SKaren (Ren) Stengel } 1085fb68f37SKaren (Ren) Stengel 1095fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------ 1105fb68f37SKaren (Ren) Stengel // Compute y = alpha x + beta y on device 1115fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------ 1125fb68f37SKaren (Ren) Stengel extern "C" int CeedDeviceAXPBY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedInt length) { 1135fb68f37SKaren (Ren) Stengel const int bsize = 512; 1145fb68f37SKaren (Ren) Stengel const int vecsize = length; 1155fb68f37SKaren (Ren) Stengel int gridsize = vecsize / bsize; 1165fb68f37SKaren (Ren) Stengel 1175fb68f37SKaren (Ren) Stengel if (bsize * gridsize < vecsize) gridsize += 1; 1185fb68f37SKaren (Ren) Stengel hipLaunchKernelGGL(axpbyValueK, dim3(gridsize), dim3(bsize), 0, 0, y_array, alpha, beta, x_array, length); 1195fb68f37SKaren (Ren) Stengel return 0; 1205fb68f37SKaren (Ren) Stengel } 1215fb68f37SKaren (Ren) Stengel 1225fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------ 1230d0321e0SJeremy L Thompson // Kernel for pointwise mult 1240d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1252b730f8bSJeremy L Thompson __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedInt size) { 1260d0321e0SJeremy L Thompson int idx = threadIdx.x + blockDim.x * blockIdx.x; 1272b730f8bSJeremy L Thompson if (idx >= size) return; 1280d0321e0SJeremy L Thompson w[idx] = x[idx] * y[idx]; 1290d0321e0SJeremy L Thompson } 1300d0321e0SJeremy L Thompson 1310d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1320d0321e0SJeremy L Thompson // Compute the pointwise multiplication w = x .* y on device 1330d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1342b730f8bSJeremy L Thompson extern "C" int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedInt length) { 1350d0321e0SJeremy L Thompson const int bsize = 512; 1360d0321e0SJeremy L Thompson const int vecsize = length; 1370d0321e0SJeremy L Thompson int gridsize = vecsize / bsize; 1380d0321e0SJeremy L Thompson 1392b730f8bSJeremy L Thompson if (bsize * gridsize < vecsize) gridsize += 1; 1402b730f8bSJeremy L Thompson hipLaunchKernelGGL(pointwiseMultValueK, dim3(gridsize), dim3(bsize), 0, 0, w_array, x_array, y_array, length); 1410d0321e0SJeremy L Thompson return 0; 1420d0321e0SJeremy L Thompson } 143*2a86cc9dSSebastian Grimberg 144*2a86cc9dSSebastian Grimberg //------------------------------------------------------------------------------ 145