1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other 2 // CEED contributors. All Rights Reserved. See the top-level LICENSE and NOTICE 3 // files for details. 4 // 5 // SPDX-License-Identifier: BSD-2-Clause 6 // 7 // This file is part of CEED: http://github.com/ceed 8 9 #include <ceed/ceed.h> 10 #include <sycl/sycl.hpp> 11 12 //------------------------------------------------------------------------------ 13 // Kernel for set value on device 14 //------------------------------------------------------------------------------ 15 __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedInt size, CeedScalar val) { 16 int idx = threadIdx.x + blockDim.x * blockIdx.x; 17 if (idx >= size) return; 18 vec[idx] = val; 19 } 20 21 //------------------------------------------------------------------------------ 22 // Set value on device memory 23 //------------------------------------------------------------------------------ 24 extern "C" int CeedDeviceSetValue_Sycl(CeedScalar *d_array, CeedInt length, CeedScalar val) { 25 const int bsize = 512; 26 const int vecsize = length; 27 int gridsize = vecsize / bsize; 28 29 if (bsize * gridsize < vecsize) gridsize += 1; 30 setValueK<<<gridsize, bsize>>>(d_array, length, val); 31 return 0; 32 } 33 34 //------------------------------------------------------------------------------ 35 // Kernel for taking reciprocal 36 //------------------------------------------------------------------------------ 37 __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedInt size) { 38 int idx = threadIdx.x + blockDim.x * blockIdx.x; 39 if (idx >= size) return; 40 if (fabs(vec[idx]) > 1E-16) vec[idx] = 1. / vec[idx]; 41 } 42 43 //------------------------------------------------------------------------------ 44 // Take vector reciprocal in device memory 45 //------------------------------------------------------------------------------ 46 extern "C" int CeedDeviceReciprocal_Sycl(CeedScalar *d_array, CeedInt length) { 47 const int bsize = 512; 48 const int vecsize = length; 49 int gridsize = vecsize / bsize; 50 51 if (bsize * gridsize < vecsize) gridsize += 1; 52 rcpValueK<<<gridsize, bsize>>>(d_array, length); 53 return 0; 54 } 55 56 //------------------------------------------------------------------------------ 57 // Kernel for scale 58 //------------------------------------------------------------------------------ 59 __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedInt size) { 60 int idx = threadIdx.x + blockDim.x * blockIdx.x; 61 if (idx >= size) return; 62 x[idx] *= alpha; 63 } 64 65 //------------------------------------------------------------------------------ 66 // Compute x = alpha x on device 67 //------------------------------------------------------------------------------ 68 extern "C" int CeedDeviceScale_Sycl(CeedScalar *x_array, CeedScalar alpha, CeedInt length) { 69 const int bsize = 512; 70 const int vecsize = length; 71 int gridsize = vecsize / bsize; 72 73 if (bsize * gridsize < vecsize) gridsize += 1; 74 scaleValueK<<<gridsize, bsize>>>(x_array, alpha, length); 75 return 0; 76 } 77 78 //------------------------------------------------------------------------------ 79 // Kernel for axpy 80 //------------------------------------------------------------------------------ 81 __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedInt size) { 82 int idx = threadIdx.x + blockDim.x * blockIdx.x; 83 if (idx >= size) return; 84 y[idx] += alpha * x[idx]; 85 } 86 87 //------------------------------------------------------------------------------ 88 // Compute y = alpha x + y on device 89 //------------------------------------------------------------------------------ 90 extern "C" int CeedDeviceAXPY_Sycl(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedInt length) { 91 const int bsize = 512; 92 const int vecsize = length; 93 int gridsize = vecsize / bsize; 94 95 if (bsize * gridsize < vecsize) gridsize += 1; 96 axpyValueK<<<gridsize, bsize>>>(y_array, alpha, x_array, length); 97 return 0; 98 } 99 100 //------------------------------------------------------------------------------ 101 // Kernel for pointwise mult 102 //------------------------------------------------------------------------------ 103 __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedInt size) { 104 int idx = threadIdx.x + blockDim.x * blockIdx.x; 105 if (idx >= size) return; 106 w[idx] = x[idx] * y[idx]; 107 } 108 109 //------------------------------------------------------------------------------ 110 // Compute the pointwise multiplication w = x .* y on device 111 //------------------------------------------------------------------------------ 112 extern "C" int CeedDevicePointwiseMult_Sycl(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedInt length) { 113 const int bsize = 512; 114 const int vecsize = length; 115 int gridsize = vecsize / bsize; 116 117 if (bsize * gridsize < vecsize) gridsize += 1; 118 pointwiseMultValueK<<<gridsize, bsize>>>(w_array, x_array, y_array, length); 119 return 0; 120 } 121 122 //------------------------------------------------------------------------------ 123