1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <hip/hip_runtime.h> 10 11 //------------------------------------------------------------------------------ 12 // Kernel for set value on device 13 //------------------------------------------------------------------------------ 14 __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedSize size, CeedScalar val) { 15 CeedSize idx = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 16 if (idx >= size) return; 17 vec[idx] = val; 18 } 19 20 //------------------------------------------------------------------------------ 21 // Set value on device memory 22 //------------------------------------------------------------------------------ 23 extern "C" int CeedDeviceSetValue_Hip(CeedScalar *d_array, CeedSize length, CeedScalar val) { 24 const int bsize = 512; 25 const CeedSize vecsize = length; 26 int gridsize = vecsize / bsize; 27 28 if (bsize * gridsize < vecsize) gridsize += 1; 29 hipLaunchKernelGGL(setValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length, val); 30 return 0; 31 } 32 33 //------------------------------------------------------------------------------ 34 // Kernel for taking reciprocal 35 //------------------------------------------------------------------------------ 36 __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedSize size) { 37 CeedSize idx = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 38 if (idx >= size) return; 39 if (fabs(vec[idx]) > 1E-16) vec[idx] = 1. / vec[idx]; 40 } 41 42 //------------------------------------------------------------------------------ 43 // Take vector reciprocal in device memory 44 //------------------------------------------------------------------------------ 45 extern "C" int CeedDeviceReciprocal_Hip(CeedScalar *d_array, CeedSize length) { 46 const int bsize = 512; 47 const CeedSize vecsize = length; 48 int gridsize = vecsize / bsize; 49 50 if (bsize * gridsize < vecsize) gridsize += 1; 51 hipLaunchKernelGGL(rcpValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length); 52 return 0; 53 } 54 55 //------------------------------------------------------------------------------ 56 // Kernel for scale 57 //------------------------------------------------------------------------------ 58 __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedSize size) { 59 CeedSize idx = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 60 if (idx >= size) return; 61 x[idx] *= alpha; 62 } 63 64 //------------------------------------------------------------------------------ 65 // Compute x = alpha x on device 66 //------------------------------------------------------------------------------ 67 extern "C" int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedSize length) { 68 const int bsize = 512; 69 const CeedSize vecsize = length; 70 int gridsize = vecsize / bsize; 71 72 if (bsize * gridsize < vecsize) gridsize += 1; 73 hipLaunchKernelGGL(scaleValueK, dim3(gridsize), dim3(bsize), 0, 0, x_array, alpha, length); 74 return 0; 75 } 76 77 //------------------------------------------------------------------------------ 78 // Kernel for axpy 79 //------------------------------------------------------------------------------ 80 __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedSize size) { 81 CeedSize idx = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 82 if (idx >= size) return; 83 y[idx] += alpha * x[idx]; 84 } 85 86 //------------------------------------------------------------------------------ 87 // Compute y = alpha x + y on device 88 //------------------------------------------------------------------------------ 89 extern "C" int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) { 90 const int bsize = 512; 91 const CeedSize vecsize = length; 92 int gridsize = vecsize / bsize; 93 94 if (bsize * gridsize < vecsize) gridsize += 1; 95 hipLaunchKernelGGL(axpyValueK, dim3(gridsize), dim3(bsize), 0, 0, y_array, alpha, x_array, length); 96 return 0; 97 } 98 99 //------------------------------------------------------------------------------ 100 // Kernel for axpby 101 //------------------------------------------------------------------------------ 102 __global__ static void axpbyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar beta, CeedScalar *__restrict__ x, CeedSize size) { 103 CeedSize idx = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 104 if (idx >= size) return; 105 y[idx] = beta * y[idx]; 106 y[idx] += alpha * x[idx]; 107 } 108 109 //------------------------------------------------------------------------------ 110 // Compute y = alpha x + beta y on device 111 //------------------------------------------------------------------------------ 112 extern "C" int CeedDeviceAXPBY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedSize length) { 113 const int bsize = 512; 114 const CeedSize vecsize = length; 115 int gridsize = vecsize / bsize; 116 117 if (bsize * gridsize < vecsize) gridsize += 1; 118 hipLaunchKernelGGL(axpbyValueK, dim3(gridsize), dim3(bsize), 0, 0, y_array, alpha, beta, x_array, length); 119 return 0; 120 } 121 122 //------------------------------------------------------------------------------ 123 // Kernel for pointwise mult 124 //------------------------------------------------------------------------------ 125 __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedSize size) { 126 CeedSize idx = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 127 if (idx >= size) return; 128 w[idx] = x[idx] * y[idx]; 129 } 130 131 //------------------------------------------------------------------------------ 132 // Compute the pointwise multiplication w = x .* y on device 133 //------------------------------------------------------------------------------ 134 extern "C" int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) { 135 const int bsize = 512; 136 const CeedSize vecsize = length; 137 int gridsize = vecsize / bsize; 138 139 if (bsize * gridsize < vecsize) gridsize += 1; 140 hipLaunchKernelGGL(pointwiseMultValueK, dim3(gridsize), dim3(bsize), 0, 0, w_array, x_array, y_array, length); 141 return 0; 142 } 143 144 //------------------------------------------------------------------------------ 145