1 // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <hip/hip_runtime.h> 10 11 //------------------------------------------------------------------------------ 12 // Kernel for set value on device 13 //------------------------------------------------------------------------------ 14 __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedSize size, CeedScalar val) { 15 CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 16 17 if (index >= size) return; 18 vec[index] = val; 19 } 20 21 //------------------------------------------------------------------------------ 22 // Set value on device memory 23 //------------------------------------------------------------------------------ 24 extern "C" int CeedDeviceSetValue_Hip(CeedScalar *d_array, CeedSize length, CeedScalar val) { 25 const int block_size = 512; 26 const CeedSize vec_size = length; 27 int grid_size = vec_size / block_size; 28 29 if (block_size * grid_size < vec_size) grid_size += 1; 30 hipLaunchKernelGGL(setValueK, dim3(grid_size), dim3(block_size), 0, 0, d_array, length, val); 31 return 0; 32 } 33 34 //------------------------------------------------------------------------------ 35 // Kernel for taking reciprocal 36 //------------------------------------------------------------------------------ 37 __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedSize size) { 38 CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 39 40 if (index >= size) return; 41 if (fabs(vec[index]) > 1E-16) vec[index] = 1. / vec[index]; 42 } 43 44 //------------------------------------------------------------------------------ 45 // Take vector reciprocal in device memory 46 //------------------------------------------------------------------------------ 47 extern "C" int CeedDeviceReciprocal_Hip(CeedScalar *d_array, CeedSize length) { 48 const int block_size = 512; 49 const CeedSize vec_size = length; 50 int grid_size = vec_size / block_size; 51 52 if (block_size * grid_size < vec_size) grid_size += 1; 53 hipLaunchKernelGGL(rcpValueK, dim3(grid_size), dim3(block_size), 0, 0, d_array, length); 54 return 0; 55 } 56 57 //------------------------------------------------------------------------------ 58 // Kernel for scale 59 //------------------------------------------------------------------------------ 60 __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedSize size) { 61 CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 62 63 if (index >= size) return; 64 x[index] *= alpha; 65 } 66 67 //------------------------------------------------------------------------------ 68 // Compute x = alpha x on device 69 //------------------------------------------------------------------------------ 70 extern "C" int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedSize length) { 71 const int block_size = 512; 72 const CeedSize vec_size = length; 73 int grid_size = vec_size / block_size; 74 75 if (block_size * grid_size < vec_size) grid_size += 1; 76 hipLaunchKernelGGL(scaleValueK, dim3(grid_size), dim3(block_size), 0, 0, x_array, alpha, length); 77 return 0; 78 } 79 80 //------------------------------------------------------------------------------ 81 // Kernel for axpy 82 //------------------------------------------------------------------------------ 83 __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedSize size) { 84 CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 85 86 if (index >= size) return; 87 y[index] += alpha * x[index]; 88 } 89 90 //------------------------------------------------------------------------------ 91 // Compute y = alpha x + y on device 92 //------------------------------------------------------------------------------ 93 extern "C" int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) { 94 const int block_size = 512; 95 const CeedSize vec_size = length; 96 int grid_size = vec_size / block_size; 97 98 if (block_size * grid_size < vec_size) grid_size += 1; 99 hipLaunchKernelGGL(axpyValueK, dim3(grid_size), dim3(block_size), 0, 0, y_array, alpha, x_array, length); 100 return 0; 101 } 102 103 //------------------------------------------------------------------------------ 104 // Kernel for axpby 105 //------------------------------------------------------------------------------ 106 __global__ static void axpbyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar beta, CeedScalar *__restrict__ x, CeedSize size) { 107 CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 108 109 if (index >= size) return; 110 y[index] = beta * y[index]; 111 y[index] += alpha * x[index]; 112 } 113 114 //------------------------------------------------------------------------------ 115 // Compute y = alpha x + beta y on device 116 //------------------------------------------------------------------------------ 117 extern "C" int CeedDeviceAXPBY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedSize length) { 118 const int block_size = 512; 119 const CeedSize vec_size = length; 120 int grid_size = vec_size / block_size; 121 122 if (block_size * grid_size < vec_size) grid_size += 1; 123 hipLaunchKernelGGL(axpbyValueK, dim3(grid_size), dim3(block_size), 0, 0, y_array, alpha, beta, x_array, length); 124 return 0; 125 } 126 127 //------------------------------------------------------------------------------ 128 // Kernel for pointwise mult 129 //------------------------------------------------------------------------------ 130 __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedSize size) { 131 CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 132 133 if (index >= size) return; 134 w[index] = x[index] * y[index]; 135 } 136 137 //------------------------------------------------------------------------------ 138 // Compute the pointwise multiplication w = x .* y on device 139 //------------------------------------------------------------------------------ 140 extern "C" int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) { 141 const int block_size = 512; 142 const CeedSize vec_size = length; 143 int grid_size = vec_size / block_size; 144 145 if (block_size * grid_size < vec_size) grid_size += 1; 146 hipLaunchKernelGGL(pointwiseMultValueK, dim3(grid_size), dim3(block_size), 0, 0, w_array, x_array, y_array, length); 147 return 0; 148 } 149 150 //------------------------------------------------------------------------------ 151