1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed/ceed.h> 9 #include <hip/hip_runtime.h> 10 11 //------------------------------------------------------------------------------ 12 // Kernel for set value on device 13 //------------------------------------------------------------------------------ 14 __global__ static void setValueK(CeedScalar * __restrict__ vec, CeedInt size, 15 CeedScalar val) { 16 int idx = threadIdx.x + blockDim.x * blockIdx.x; 17 if (idx >= size) 18 return; 19 vec[idx] = val; 20 } 21 22 //------------------------------------------------------------------------------ 23 // Set value on device memory 24 //------------------------------------------------------------------------------ 25 extern "C" int CeedDeviceSetValue_Hip(CeedScalar* d_array, CeedInt length, 26 CeedScalar val) { 27 const int bsize = 512; 28 const int vecsize = length; 29 int gridsize = vecsize / bsize; 30 31 if (bsize * gridsize < vecsize) 32 gridsize += 1; 33 hipLaunchKernelGGL(setValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length, val); 34 return 0; 35 } 36 37 //------------------------------------------------------------------------------ 38 // Kernel for taking reciprocal 39 //------------------------------------------------------------------------------ 40 __global__ static void rcpValueK(CeedScalar * __restrict__ vec, CeedInt size) { 41 int idx = threadIdx.x + blockDim.x * blockIdx.x; 42 if (idx >= size) 43 return; 44 if (fabs(vec[idx]) > 1E-16) 45 vec[idx] = 1./vec[idx]; 46 } 47 48 //------------------------------------------------------------------------------ 49 // Take vector reciprocal in device memory 50 //------------------------------------------------------------------------------ 51 extern "C" int CeedDeviceReciprocal_Hip(CeedScalar* d_array, CeedInt length) { 52 const int bsize = 512; 53 const int vecsize = length; 54 int gridsize = vecsize / bsize; 55 56 if (bsize * gridsize < vecsize) 57 gridsize += 1; 58 hipLaunchKernelGGL(rcpValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length); 59 return 0; 60 } 61 62 //------------------------------------------------------------------------------ 63 // Kernel for scale 64 //------------------------------------------------------------------------------ 65 __global__ static void scaleValueK(CeedScalar * __restrict__ x, CeedScalar alpha, 66 CeedInt size) { 67 int idx = threadIdx.x + blockDim.x * blockIdx.x; 68 if (idx >= size) 69 return; 70 x[idx] *= alpha; 71 } 72 73 //------------------------------------------------------------------------------ 74 // Compute x = alpha x on device 75 //------------------------------------------------------------------------------ 76 extern "C" int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha, 77 CeedInt length) { 78 const int bsize = 512; 79 const int vecsize = length; 80 int gridsize = vecsize / bsize; 81 82 if (bsize * gridsize < vecsize) 83 gridsize += 1; 84 hipLaunchKernelGGL(scaleValueK, dim3(gridsize), dim3(bsize), 0, 0, x_array, alpha, 85 length); 86 return 0; 87 } 88 89 //------------------------------------------------------------------------------ 90 // Kernel for axpy 91 //------------------------------------------------------------------------------ 92 __global__ static void axpyValueK(CeedScalar * __restrict__ y, CeedScalar alpha, 93 CeedScalar * __restrict__ x, CeedInt size) { 94 int idx = threadIdx.x + blockDim.x * blockIdx.x; 95 if (idx >= size) 96 return; 97 y[idx] += alpha * x[idx]; 98 } 99 100 //------------------------------------------------------------------------------ 101 // Compute y = alpha x + y on device 102 //------------------------------------------------------------------------------ 103 extern "C" int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, 104 CeedScalar *x_array, CeedInt length) { 105 const int bsize = 512; 106 const int vecsize = length; 107 int gridsize = vecsize / bsize; 108 109 if (bsize * gridsize < vecsize) 110 gridsize += 1; 111 hipLaunchKernelGGL(axpyValueK, dim3(gridsize), dim3(bsize), 0, 0, y_array, alpha, 112 x_array, length); 113 return 0; 114 } 115 116 //------------------------------------------------------------------------------ 117 // Kernel for pointwise mult 118 //------------------------------------------------------------------------------ 119 __global__ static void pointwiseMultValueK(CeedScalar * __restrict__ w, 120 CeedScalar * x, CeedScalar * __restrict__ y, CeedInt size) { 121 int idx = threadIdx.x + blockDim.x * blockIdx.x; 122 if (idx >= size) 123 return; 124 w[idx] = x[idx] * y[idx]; 125 } 126 127 //------------------------------------------------------------------------------ 128 // Compute the pointwise multiplication w = x .* y on device 129 //------------------------------------------------------------------------------ 130 extern "C" int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, 131 CeedScalar *y_array, CeedInt length) { 132 const int bsize = 512; 133 const int vecsize = length; 134 int gridsize = vecsize / bsize; 135 136 if (bsize * gridsize < vecsize) 137 gridsize += 1; 138 hipLaunchKernelGGL(pointwiseMultValueK, dim3(gridsize), dim3(bsize), 0, 0, w_array, 139 x_array, y_array, length); 140 return 0; 141 } 142