hip-ref/kernels/hip-ref-vector.hip.cpp

5aed82e4SJeremy L Thompson// Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
3d8e8822SJeremy L Thompson// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
0d0321e0SJeremy L Thompson//
3d8e8822SJeremy L Thompson// SPDX-License-Identifier: BSD-2-Clause
0d0321e0SJeremy L Thompson//
3d8e8822SJeremy L Thompson// This file is part of CEED:  http://github.com/ceed
0d0321e0SJeremy L Thompson
49aac155SJeremy L Thompson#include <ceed.h>
0d0321e0SJeremy L Thompson#include <hip/hip_runtime.h>
0d0321e0SJeremy L Thompson
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
*3196072fSJeremy L Thompson// Kernel for copy strided on device
*3196072fSJeremy L Thompson//------------------------------------------------------------------------------
*3196072fSJeremy L Thompson__global__ static void copyStridedK(CeedScalar *__restrict__ vec, CeedSize start, CeedSize step, CeedSize size, CeedScalar *__restrict__ vec_copy) {
*3196072fSJeremy L Thompson  CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
*3196072fSJeremy L Thompson
*3196072fSJeremy L Thompson  if (index >= size) return;
*3196072fSJeremy L Thompson  if ((index - start) % step == 0) vec_copy[index] = vec[index];
*3196072fSJeremy L Thompson}
*3196072fSJeremy L Thompson
*3196072fSJeremy L Thompson//------------------------------------------------------------------------------
*3196072fSJeremy L Thompson// Copy strided on device memory
*3196072fSJeremy L Thompson//------------------------------------------------------------------------------
*3196072fSJeremy L Thompsonextern "C" int CeedDeviceCopyStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *d_copy_array) {
*3196072fSJeremy L Thompson  const int      block_size = 512;
*3196072fSJeremy L Thompson  const CeedSize vec_size   = length;
*3196072fSJeremy L Thompson  int            grid_size  = vec_size / block_size;
*3196072fSJeremy L Thompson
*3196072fSJeremy L Thompson  if (block_size * grid_size < vec_size) grid_size += 1;
*3196072fSJeremy L Thompson  hipLaunchKernelGGL(copyStridedK, dim3(grid_size), dim3(block_size), 0, 0, d_array, start, step, length, d_copy_array);
*3196072fSJeremy L Thompson  return 0;
*3196072fSJeremy L Thompson}
*3196072fSJeremy L Thompson
*3196072fSJeremy L Thompson//------------------------------------------------------------------------------
0d0321e0SJeremy L Thompson// Kernel for set value on device
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
9330daecSnbeams__global__ static void setValueK(CeedScalar *__restrict__ vec, CeedSize size, CeedScalar val) {
b7453713SJeremy L Thompson  CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
b7453713SJeremy L Thompson
b7453713SJeremy L Thompson  if (index >= size) return;
b7453713SJeremy L Thompson  vec[index] = val;
0d0321e0SJeremy L Thompson}
0d0321e0SJeremy L Thompson
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
0d0321e0SJeremy L Thompson// Set value on device memory
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
9330daecSnbeamsextern "C" int CeedDeviceSetValue_Hip(CeedScalar *d_array, CeedSize length, CeedScalar val) {
b7453713SJeremy L Thompson  const int      block_size = 512;
b7453713SJeremy L Thompson  const CeedSize vec_size   = length;
b7453713SJeremy L Thompson  int            grid_size  = vec_size / block_size;
0d0321e0SJeremy L Thompson
b7453713SJeremy L Thompson  if (block_size * grid_size < vec_size) grid_size += 1;
b7453713SJeremy L Thompson  hipLaunchKernelGGL(setValueK, dim3(grid_size), dim3(block_size), 0, 0, d_array, length, val);
0d0321e0SJeremy L Thompson  return 0;
0d0321e0SJeremy L Thompson}
0d0321e0SJeremy L Thompson
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
*3196072fSJeremy L Thompson// Kernel for set value strided on device
*3196072fSJeremy L Thompson//------------------------------------------------------------------------------
*3196072fSJeremy L Thompson__global__ static void setValueStridedK(CeedScalar *__restrict__ vec, CeedSize start, CeedSize step, CeedSize size, CeedScalar val) {
*3196072fSJeremy L Thompson  CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
*3196072fSJeremy L Thompson
*3196072fSJeremy L Thompson  if (index >= size) return;
*3196072fSJeremy L Thompson  if ((index - start) % step == 0) vec[index] = val;
*3196072fSJeremy L Thompson}
*3196072fSJeremy L Thompson
*3196072fSJeremy L Thompson//------------------------------------------------------------------------------
*3196072fSJeremy L Thompson// Set value strided on device memory
*3196072fSJeremy L Thompson//------------------------------------------------------------------------------
*3196072fSJeremy L Thompsonextern "C" int CeedDeviceSetValueStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val) {
*3196072fSJeremy L Thompson  const int      block_size = 512;
*3196072fSJeremy L Thompson  const CeedSize vec_size   = length;
*3196072fSJeremy L Thompson  int            grid_size  = vec_size / block_size;
*3196072fSJeremy L Thompson
*3196072fSJeremy L Thompson  if (block_size * grid_size < vec_size) grid_size += 1;
*3196072fSJeremy L Thompson  hipLaunchKernelGGL(setValueStridedK, dim3(grid_size), dim3(block_size), 0, 0, d_array, start, step, length, val);
*3196072fSJeremy L Thompson  return 0;
*3196072fSJeremy L Thompson}
*3196072fSJeremy L Thompson
*3196072fSJeremy L Thompson//------------------------------------------------------------------------------
0d0321e0SJeremy L Thompson// Kernel for taking reciprocal
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
9330daecSnbeams__global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedSize size) {
b7453713SJeremy L Thompson  CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
b7453713SJeremy L Thompson
b7453713SJeremy L Thompson  if (index >= size) return;
b7453713SJeremy L Thompson  if (fabs(vec[index]) > 1E-16) vec[index] = 1. / vec[index];
0d0321e0SJeremy L Thompson}
0d0321e0SJeremy L Thompson
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
0d0321e0SJeremy L Thompson// Take vector reciprocal in device memory
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
9330daecSnbeamsextern "C" int CeedDeviceReciprocal_Hip(CeedScalar *d_array, CeedSize length) {
b7453713SJeremy L Thompson  const int      block_size = 512;
b7453713SJeremy L Thompson  const CeedSize vec_size   = length;
b7453713SJeremy L Thompson  int            grid_size  = vec_size / block_size;
0d0321e0SJeremy L Thompson
b7453713SJeremy L Thompson  if (block_size * grid_size < vec_size) grid_size += 1;
b7453713SJeremy L Thompson  hipLaunchKernelGGL(rcpValueK, dim3(grid_size), dim3(block_size), 0, 0, d_array, length);
0d0321e0SJeremy L Thompson  return 0;
0d0321e0SJeremy L Thompson}
0d0321e0SJeremy L Thompson
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
0d0321e0SJeremy L Thompson// Kernel for scale
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
9330daecSnbeams__global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedSize size) {
b7453713SJeremy L Thompson  CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
b7453713SJeremy L Thompson
b7453713SJeremy L Thompson  if (index >= size) return;
b7453713SJeremy L Thompson  x[index] *= alpha;
0d0321e0SJeremy L Thompson}
0d0321e0SJeremy L Thompson
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
0d0321e0SJeremy L Thompson// Compute x = alpha x on device
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
9330daecSnbeamsextern "C" int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
b7453713SJeremy L Thompson  const int      block_size = 512;
b7453713SJeremy L Thompson  const CeedSize vec_size   = length;
b7453713SJeremy L Thompson  int            grid_size  = vec_size / block_size;
0d0321e0SJeremy L Thompson
b7453713SJeremy L Thompson  if (block_size * grid_size < vec_size) grid_size += 1;
b7453713SJeremy L Thompson  hipLaunchKernelGGL(scaleValueK, dim3(grid_size), dim3(block_size), 0, 0, x_array, alpha, length);
0d0321e0SJeremy L Thompson  return 0;
0d0321e0SJeremy L Thompson}
0d0321e0SJeremy L Thompson
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
0d0321e0SJeremy L Thompson// Kernel for axpy
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
9330daecSnbeams__global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedSize size) {
b7453713SJeremy L Thompson  CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
b7453713SJeremy L Thompson
b7453713SJeremy L Thompson  if (index >= size) return;
b7453713SJeremy L Thompson  y[index] += alpha * x[index];
0d0321e0SJeremy L Thompson}
0d0321e0SJeremy L Thompson
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
0d0321e0SJeremy L Thompson// Compute y = alpha x + y on device
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
9330daecSnbeamsextern "C" int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
b7453713SJeremy L Thompson  const int      block_size = 512;
b7453713SJeremy L Thompson  const CeedSize vec_size   = length;
b7453713SJeremy L Thompson  int            grid_size  = vec_size / block_size;
0d0321e0SJeremy L Thompson
b7453713SJeremy L Thompson  if (block_size * grid_size < vec_size) grid_size += 1;
b7453713SJeremy L Thompson  hipLaunchKernelGGL(axpyValueK, dim3(grid_size), dim3(block_size), 0, 0, y_array, alpha, x_array, length);
0d0321e0SJeremy L Thompson  return 0;
0d0321e0SJeremy L Thompson}
0d0321e0SJeremy L Thompson
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
5fb68f37SKaren (Ren) Stengel// Kernel for axpby
5fb68f37SKaren (Ren) Stengel//------------------------------------------------------------------------------
9330daecSnbeams__global__ static void axpbyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar beta, CeedScalar *__restrict__ x, CeedSize size) {
b7453713SJeremy L Thompson  CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
b7453713SJeremy L Thompson
b7453713SJeremy L Thompson  if (index >= size) return;
b7453713SJeremy L Thompson  y[index] = beta * y[index];
b7453713SJeremy L Thompson  y[index] += alpha * x[index];
5fb68f37SKaren (Ren) Stengel}
5fb68f37SKaren (Ren) Stengel
5fb68f37SKaren (Ren) Stengel//------------------------------------------------------------------------------
5fb68f37SKaren (Ren) Stengel// Compute y = alpha x + beta y on device
5fb68f37SKaren (Ren) Stengel//------------------------------------------------------------------------------
9330daecSnbeamsextern "C" int CeedDeviceAXPBY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedSize length) {
b7453713SJeremy L Thompson  const int      block_size = 512;
b7453713SJeremy L Thompson  const CeedSize vec_size   = length;
b7453713SJeremy L Thompson  int            grid_size  = vec_size / block_size;
5fb68f37SKaren (Ren) Stengel
b7453713SJeremy L Thompson  if (block_size * grid_size < vec_size) grid_size += 1;
b7453713SJeremy L Thompson  hipLaunchKernelGGL(axpbyValueK, dim3(grid_size), dim3(block_size), 0, 0, y_array, alpha, beta, x_array, length);
5fb68f37SKaren (Ren) Stengel  return 0;
5fb68f37SKaren (Ren) Stengel}
5fb68f37SKaren (Ren) Stengel
5fb68f37SKaren (Ren) Stengel//------------------------------------------------------------------------------
0d0321e0SJeremy L Thompson// Kernel for pointwise mult
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
9330daecSnbeams__global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedSize size) {
b7453713SJeremy L Thompson  CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
b7453713SJeremy L Thompson
b7453713SJeremy L Thompson  if (index >= size) return;
b7453713SJeremy L Thompson  w[index] = x[index] * y[index];
0d0321e0SJeremy L Thompson}
0d0321e0SJeremy L Thompson
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
0d0321e0SJeremy L Thompson// Compute the pointwise multiplication w = x .* y on device
0d0321e0SJeremy L Thompson//------------------------------------------------------------------------------
9330daecSnbeamsextern "C" int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
b7453713SJeremy L Thompson  const int      block_size = 512;
b7453713SJeremy L Thompson  const CeedSize vec_size   = length;
b7453713SJeremy L Thompson  int            grid_size  = vec_size / block_size;
0d0321e0SJeremy L Thompson
b7453713SJeremy L Thompson  if (block_size * grid_size < vec_size) grid_size += 1;
b7453713SJeremy L Thompson  hipLaunchKernelGGL(pointwiseMultValueK, dim3(grid_size), dim3(block_size), 0, 0, w_array, x_array, y_array, length);
0d0321e0SJeremy L Thompson  return 0;
0d0321e0SJeremy L Thompson}
2a86cc9dSSebastian Grimberg
2a86cc9dSSebastian Grimberg//------------------------------------------------------------------------------