1 // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other 2 // CEED contributors. All Rights Reserved. See the top-level LICENSE and NOTICE 3 // files for details. 4 // 5 // SPDX-License-Identifier: BSD-2-Clause 6 // 7 // This file is part of CEED: http://github.com/ceed 8 9 #include <ceed/ceed.h> 10 #include <sycl/sycl.hpp> 11 12 //------------------------------------------------------------------------------ 13 // Kernel for set value on device 14 //------------------------------------------------------------------------------ 15 __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedInt size, CeedScalar val) { 16 int index = threadIdx.x + blockDim.x * blockIdx.x; 17 18 if (index >= size) return; 19 vec[index] = val; 20 } 21 22 //------------------------------------------------------------------------------ 23 // Set value on device memory 24 //------------------------------------------------------------------------------ 25 extern "C" int CeedDeviceSetValue_Sycl(CeedScalar *d_array, CeedInt length, CeedScalar val) { 26 const int block_size = 512; 27 const int vec_size = length; 28 int grid_size = vec_size / block_size; 29 30 if (block_size * grid_size < vec_size) grid_size += 1; 31 setValueK<<<grid_size, block_size>>>(d_array, length, val); 32 return 0; 33 } 34 35 //------------------------------------------------------------------------------ 36 // Kernel for taking reciprocal 37 //------------------------------------------------------------------------------ 38 __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedInt size) { 39 int index = threadIdx.x + blockDim.x * blockIdx.x; 40 41 if (index >= size) return; 42 if (fabs(vec[index]) > 1E-16) vec[index] = 1. / vec[index]; 43 } 44 45 //------------------------------------------------------------------------------ 46 // Take vector reciprocal in device memory 47 //------------------------------------------------------------------------------ 48 extern "C" int CeedDeviceReciprocal_Sycl(CeedScalar *d_array, CeedInt length) { 49 const int block_size = 512; 50 const int vec_size = length; 51 int grid_size = vec_size / block_size; 52 53 if (block_size * grid_size < vec_size) grid_size += 1; 54 rcpValueK<<<grid_size, block_size>>>(d_array, length); 55 return 0; 56 } 57 58 //------------------------------------------------------------------------------ 59 // Kernel for scale 60 //------------------------------------------------------------------------------ 61 __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedInt size) { 62 int index = threadIdx.x + blockDim.x * blockIdx.x; 63 64 if (index >= size) return; 65 x[index] *= alpha; 66 } 67 68 //------------------------------------------------------------------------------ 69 // Compute x = alpha x on device 70 //------------------------------------------------------------------------------ 71 extern "C" int CeedDeviceScale_Sycl(CeedScalar *x_array, CeedScalar alpha, CeedInt length) { 72 const int block_size = 512; 73 const int vec_size = length; 74 int grid_size = vec_size / block_size; 75 76 if (block_size * grid_size < vec_size) grid_size += 1; 77 scaleValueK<<<grid_size, block_size>>>(x_array, alpha, length); 78 return 0; 79 } 80 81 //------------------------------------------------------------------------------ 82 // Kernel for axpy 83 //------------------------------------------------------------------------------ 84 __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedInt size) { 85 int index = threadIdx.x + blockDim.x * blockIdx.x; 86 if (index >= size) return; 87 y[index] += alpha * x[index]; 88 } 89 90 //------------------------------------------------------------------------------ 91 // Compute y = alpha x + y on device 92 //------------------------------------------------------------------------------ 93 extern "C" int CeedDeviceAXPY_Sycl(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedInt length) { 94 const int block_size = 512; 95 const int vec_size = length; 96 int grid_size = vec_size / block_size; 97 98 if (block_size * grid_size < vec_size) grid_size += 1; 99 axpyValueK<<<grid_size, block_size>>>(y_array, alpha, x_array, length); 100 return 0; 101 } 102 103 //------------------------------------------------------------------------------ 104 // Kernel for pointwise mult 105 //------------------------------------------------------------------------------ 106 __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedInt size) { 107 int index = threadIdx.x + blockDim.x * blockIdx.x; 108 109 if (index >= size) return; 110 w[index] = x[index] * y[index]; 111 } 112 113 //------------------------------------------------------------------------------ 114 // Compute the pointwise multiplication w = x .* y on device 115 //------------------------------------------------------------------------------ 116 extern "C" int CeedDevicePointwiseMult_Sycl(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedInt length) { 117 const int block_size = 512; 118 const int vec_size = length; 119 int grid_size = vec_size / block_size; 120 121 if (block_size * grid_size < vec_size) grid_size += 1; 122 pointwiseMultValueK<<<grid_size, block_size>>>(w_array, x_array, y_array, length); 123 return 0; 124 } 125 126 //------------------------------------------------------------------------------ 127