1*3d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2*3d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 30d0321e0SJeremy L Thompson // 4*3d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 50d0321e0SJeremy L Thompson // 6*3d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 70d0321e0SJeremy L Thompson 80d0321e0SJeremy L Thompson #include <ceed/ceed.h> 90d0321e0SJeremy L Thompson #include <hip/hip_runtime.h> 100d0321e0SJeremy L Thompson 110d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 120d0321e0SJeremy L Thompson // Kernel for set value on device 130d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 140d0321e0SJeremy L Thompson __global__ static void setValueK(CeedScalar * __restrict__ vec, CeedInt size, 150d0321e0SJeremy L Thompson CeedScalar val) { 160d0321e0SJeremy L Thompson int idx = threadIdx.x + blockDim.x * blockIdx.x; 170d0321e0SJeremy L Thompson if (idx >= size) 180d0321e0SJeremy L Thompson return; 190d0321e0SJeremy L Thompson vec[idx] = val; 200d0321e0SJeremy L Thompson } 210d0321e0SJeremy L Thompson 220d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 230d0321e0SJeremy L Thompson // Set value on device memory 240d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 250d0321e0SJeremy L Thompson extern "C" int CeedDeviceSetValue_Hip(CeedScalar* d_array, CeedInt length, 260d0321e0SJeremy L Thompson CeedScalar val) { 270d0321e0SJeremy L Thompson const int bsize = 512; 280d0321e0SJeremy L Thompson const int vecsize = length; 290d0321e0SJeremy L Thompson int gridsize = vecsize / bsize; 300d0321e0SJeremy L Thompson 310d0321e0SJeremy L Thompson if (bsize * gridsize < vecsize) 320d0321e0SJeremy L Thompson gridsize += 1; 330d0321e0SJeremy L Thompson hipLaunchKernelGGL(setValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length, val); 340d0321e0SJeremy L Thompson return 0; 350d0321e0SJeremy L Thompson } 360d0321e0SJeremy L Thompson 370d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 380d0321e0SJeremy L Thompson // Kernel for taking reciprocal 390d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 400d0321e0SJeremy L Thompson __global__ static void rcpValueK(CeedScalar * __restrict__ vec, CeedInt size) { 410d0321e0SJeremy L Thompson int idx = threadIdx.x + blockDim.x * blockIdx.x; 420d0321e0SJeremy L Thompson if (idx >= size) 430d0321e0SJeremy L Thompson return; 440d0321e0SJeremy L Thompson if (fabs(vec[idx]) > 1E-16) 450d0321e0SJeremy L Thompson vec[idx] = 1./vec[idx]; 460d0321e0SJeremy L Thompson } 470d0321e0SJeremy L Thompson 480d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 490d0321e0SJeremy L Thompson // Take vector reciprocal in device memory 500d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 510d0321e0SJeremy L Thompson extern "C" int CeedDeviceReciprocal_Hip(CeedScalar* d_array, CeedInt length) { 520d0321e0SJeremy L Thompson const int bsize = 512; 530d0321e0SJeremy L Thompson const int vecsize = length; 540d0321e0SJeremy L Thompson int gridsize = vecsize / bsize; 550d0321e0SJeremy L Thompson 560d0321e0SJeremy L Thompson if (bsize * gridsize < vecsize) 570d0321e0SJeremy L Thompson gridsize += 1; 580d0321e0SJeremy L Thompson hipLaunchKernelGGL(rcpValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length); 590d0321e0SJeremy L Thompson return 0; 600d0321e0SJeremy L Thompson } 610d0321e0SJeremy L Thompson 620d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 630d0321e0SJeremy L Thompson // Kernel for scale 640d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 650d0321e0SJeremy L Thompson __global__ static void scaleValueK(CeedScalar * __restrict__ x, CeedScalar alpha, 660d0321e0SJeremy L Thompson CeedInt size) { 670d0321e0SJeremy L Thompson int idx = threadIdx.x + blockDim.x * blockIdx.x; 680d0321e0SJeremy L Thompson if (idx >= size) 690d0321e0SJeremy L Thompson return; 700d0321e0SJeremy L Thompson x[idx] *= alpha; 710d0321e0SJeremy L Thompson } 720d0321e0SJeremy L Thompson 730d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 740d0321e0SJeremy L Thompson // Compute x = alpha x on device 750d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 760d0321e0SJeremy L Thompson extern "C" int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha, 770d0321e0SJeremy L Thompson CeedInt length) { 780d0321e0SJeremy L Thompson const int bsize = 512; 790d0321e0SJeremy L Thompson const int vecsize = length; 800d0321e0SJeremy L Thompson int gridsize = vecsize / bsize; 810d0321e0SJeremy L Thompson 820d0321e0SJeremy L Thompson if (bsize * gridsize < vecsize) 830d0321e0SJeremy L Thompson gridsize += 1; 840d0321e0SJeremy L Thompson hipLaunchKernelGGL(scaleValueK, dim3(gridsize), dim3(bsize), 0, 0, x_array, alpha, 850d0321e0SJeremy L Thompson length); 860d0321e0SJeremy L Thompson return 0; 870d0321e0SJeremy L Thompson } 880d0321e0SJeremy L Thompson 890d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 900d0321e0SJeremy L Thompson // Kernel for axpy 910d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 920d0321e0SJeremy L Thompson __global__ static void axpyValueK(CeedScalar * __restrict__ y, CeedScalar alpha, 930d0321e0SJeremy L Thompson CeedScalar * __restrict__ x, CeedInt size) { 940d0321e0SJeremy L Thompson int idx = threadIdx.x + blockDim.x * blockIdx.x; 950d0321e0SJeremy L Thompson if (idx >= size) 960d0321e0SJeremy L Thompson return; 970d0321e0SJeremy L Thompson y[idx] += alpha * x[idx]; 980d0321e0SJeremy L Thompson } 990d0321e0SJeremy L Thompson 1000d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1010d0321e0SJeremy L Thompson // Compute y = alpha x + y on device 1020d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1030d0321e0SJeremy L Thompson extern "C" int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, 1040d0321e0SJeremy L Thompson CeedScalar *x_array, CeedInt length) { 1050d0321e0SJeremy L Thompson const int bsize = 512; 1060d0321e0SJeremy L Thompson const int vecsize = length; 1070d0321e0SJeremy L Thompson int gridsize = vecsize / bsize; 1080d0321e0SJeremy L Thompson 1090d0321e0SJeremy L Thompson if (bsize * gridsize < vecsize) 1100d0321e0SJeremy L Thompson gridsize += 1; 1110d0321e0SJeremy L Thompson hipLaunchKernelGGL(axpyValueK, dim3(gridsize), dim3(bsize), 0, 0, y_array, alpha, 1120d0321e0SJeremy L Thompson x_array, length); 1130d0321e0SJeremy L Thompson return 0; 1140d0321e0SJeremy L Thompson } 1150d0321e0SJeremy L Thompson 1160d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1170d0321e0SJeremy L Thompson // Kernel for pointwise mult 1180d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1190d0321e0SJeremy L Thompson __global__ static void pointwiseMultValueK(CeedScalar * __restrict__ w, 1200d0321e0SJeremy L Thompson CeedScalar * x, CeedScalar * __restrict__ y, CeedInt size) { 1210d0321e0SJeremy L Thompson int idx = threadIdx.x + blockDim.x * blockIdx.x; 1220d0321e0SJeremy L Thompson if (idx >= size) 1230d0321e0SJeremy L Thompson return; 1240d0321e0SJeremy L Thompson w[idx] = x[idx] * y[idx]; 1250d0321e0SJeremy L Thompson } 1260d0321e0SJeremy L Thompson 1270d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1280d0321e0SJeremy L Thompson // Compute the pointwise multiplication w = x .* y on device 1290d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1300d0321e0SJeremy L Thompson extern "C" int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, 1310d0321e0SJeremy L Thompson CeedScalar *y_array, CeedInt length) { 1320d0321e0SJeremy L Thompson const int bsize = 512; 1330d0321e0SJeremy L Thompson const int vecsize = length; 1340d0321e0SJeremy L Thompson int gridsize = vecsize / bsize; 1350d0321e0SJeremy L Thompson 1360d0321e0SJeremy L Thompson if (bsize * gridsize < vecsize) 1370d0321e0SJeremy L Thompson gridsize += 1; 1380d0321e0SJeremy L Thompson hipLaunchKernelGGL(pointwiseMultValueK, dim3(gridsize), dim3(bsize), 0, 0, w_array, 1390d0321e0SJeremy L Thompson x_array, y_array, length); 1400d0321e0SJeremy L Thompson return 0; 1410d0321e0SJeremy L Thompson } 142