1*49ed4312SSebastian Grimberg // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other 2*49ed4312SSebastian Grimberg // CEED contributors. All Rights Reserved. See the top-level LICENSE and NOTICE 3*49ed4312SSebastian Grimberg // files for details. 4*49ed4312SSebastian Grimberg // 5*49ed4312SSebastian Grimberg // SPDX-License-Identifier: BSD-2-Clause 6*49ed4312SSebastian Grimberg // 7*49ed4312SSebastian Grimberg // This file is part of CEED: http://github.com/ceed 8*49ed4312SSebastian Grimberg 9*49ed4312SSebastian Grimberg #include <ceed/ceed.h> 10*49ed4312SSebastian Grimberg #include <sycl/sycl.hpp> 11*49ed4312SSebastian Grimberg 12*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 13*49ed4312SSebastian Grimberg // Kernel for set value on device 14*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 15*49ed4312SSebastian Grimberg __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedInt size, CeedScalar val) { 16*49ed4312SSebastian Grimberg int idx = threadIdx.x + blockDim.x * blockIdx.x; 17*49ed4312SSebastian Grimberg if (idx >= size) return; 18*49ed4312SSebastian Grimberg vec[idx] = val; 19*49ed4312SSebastian Grimberg } 20*49ed4312SSebastian Grimberg 21*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 22*49ed4312SSebastian Grimberg // Set value on device memory 23*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 24*49ed4312SSebastian Grimberg extern "C" int CeedDeviceSetValue_Sycl(CeedScalar *d_array, CeedInt length, CeedScalar val) { 25*49ed4312SSebastian Grimberg const int bsize = 512; 26*49ed4312SSebastian Grimberg const int vecsize = length; 27*49ed4312SSebastian Grimberg int gridsize = vecsize / bsize; 28*49ed4312SSebastian Grimberg 29*49ed4312SSebastian Grimberg if (bsize * gridsize < vecsize) gridsize += 1; 30*49ed4312SSebastian Grimberg setValueK<<<gridsize, bsize>>>(d_array, length, val); 31*49ed4312SSebastian Grimberg return 0; 32*49ed4312SSebastian Grimberg } 33*49ed4312SSebastian Grimberg 34*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 35*49ed4312SSebastian Grimberg // Kernel for taking reciprocal 36*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 37*49ed4312SSebastian Grimberg __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedInt size) { 38*49ed4312SSebastian Grimberg int idx = threadIdx.x + blockDim.x * blockIdx.x; 39*49ed4312SSebastian Grimberg if (idx >= size) return; 40*49ed4312SSebastian Grimberg if (fabs(vec[idx]) > 1E-16) vec[idx] = 1. / vec[idx]; 41*49ed4312SSebastian Grimberg } 42*49ed4312SSebastian Grimberg 43*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 44*49ed4312SSebastian Grimberg // Take vector reciprocal in device memory 45*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 46*49ed4312SSebastian Grimberg extern "C" int CeedDeviceReciprocal_Sycl(CeedScalar *d_array, CeedInt length) { 47*49ed4312SSebastian Grimberg const int bsize = 512; 48*49ed4312SSebastian Grimberg const int vecsize = length; 49*49ed4312SSebastian Grimberg int gridsize = vecsize / bsize; 50*49ed4312SSebastian Grimberg 51*49ed4312SSebastian Grimberg if (bsize * gridsize < vecsize) gridsize += 1; 52*49ed4312SSebastian Grimberg rcpValueK<<<gridsize, bsize>>>(d_array, length); 53*49ed4312SSebastian Grimberg return 0; 54*49ed4312SSebastian Grimberg } 55*49ed4312SSebastian Grimberg 56*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 57*49ed4312SSebastian Grimberg // Kernel for scale 58*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 59*49ed4312SSebastian Grimberg __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedInt size) { 60*49ed4312SSebastian Grimberg int idx = threadIdx.x + blockDim.x * blockIdx.x; 61*49ed4312SSebastian Grimberg if (idx >= size) return; 62*49ed4312SSebastian Grimberg x[idx] *= alpha; 63*49ed4312SSebastian Grimberg } 64*49ed4312SSebastian Grimberg 65*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 66*49ed4312SSebastian Grimberg // Compute x = alpha x on device 67*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 68*49ed4312SSebastian Grimberg extern "C" int CeedDeviceScale_Sycl(CeedScalar *x_array, CeedScalar alpha, CeedInt length) { 69*49ed4312SSebastian Grimberg const int bsize = 512; 70*49ed4312SSebastian Grimberg const int vecsize = length; 71*49ed4312SSebastian Grimberg int gridsize = vecsize / bsize; 72*49ed4312SSebastian Grimberg 73*49ed4312SSebastian Grimberg if (bsize * gridsize < vecsize) gridsize += 1; 74*49ed4312SSebastian Grimberg scaleValueK<<<gridsize, bsize>>>(x_array, alpha, length); 75*49ed4312SSebastian Grimberg return 0; 76*49ed4312SSebastian Grimberg } 77*49ed4312SSebastian Grimberg 78*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 79*49ed4312SSebastian Grimberg // Kernel for axpy 80*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 81*49ed4312SSebastian Grimberg __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedInt size) { 82*49ed4312SSebastian Grimberg int idx = threadIdx.x + blockDim.x * blockIdx.x; 83*49ed4312SSebastian Grimberg if (idx >= size) return; 84*49ed4312SSebastian Grimberg y[idx] += alpha * x[idx]; 85*49ed4312SSebastian Grimberg } 86*49ed4312SSebastian Grimberg 87*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 88*49ed4312SSebastian Grimberg // Compute y = alpha x + y on device 89*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 90*49ed4312SSebastian Grimberg extern "C" int CeedDeviceAXPY_Sycl(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedInt length) { 91*49ed4312SSebastian Grimberg const int bsize = 512; 92*49ed4312SSebastian Grimberg const int vecsize = length; 93*49ed4312SSebastian Grimberg int gridsize = vecsize / bsize; 94*49ed4312SSebastian Grimberg 95*49ed4312SSebastian Grimberg if (bsize * gridsize < vecsize) gridsize += 1; 96*49ed4312SSebastian Grimberg axpyValueK<<<gridsize, bsize>>>(y_array, alpha, x_array, length); 97*49ed4312SSebastian Grimberg return 0; 98*49ed4312SSebastian Grimberg } 99*49ed4312SSebastian Grimberg 100*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 101*49ed4312SSebastian Grimberg // Kernel for pointwise mult 102*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 103*49ed4312SSebastian Grimberg __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedInt size) { 104*49ed4312SSebastian Grimberg int idx = threadIdx.x + blockDim.x * blockIdx.x; 105*49ed4312SSebastian Grimberg if (idx >= size) return; 106*49ed4312SSebastian Grimberg w[idx] = x[idx] * y[idx]; 107*49ed4312SSebastian Grimberg } 108*49ed4312SSebastian Grimberg 109*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 110*49ed4312SSebastian Grimberg // Compute the pointwise multiplication w = x .* y on device 111*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 112*49ed4312SSebastian Grimberg extern "C" int CeedDevicePointwiseMult_Sycl(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedInt length) { 113*49ed4312SSebastian Grimberg const int bsize = 512; 114*49ed4312SSebastian Grimberg const int vecsize = length; 115*49ed4312SSebastian Grimberg int gridsize = vecsize / bsize; 116*49ed4312SSebastian Grimberg 117*49ed4312SSebastian Grimberg if (bsize * gridsize < vecsize) gridsize += 1; 118*49ed4312SSebastian Grimberg pointwiseMultValueK<<<gridsize, bsize>>>(w_array, x_array, y_array, length); 119*49ed4312SSebastian Grimberg return 0; 120*49ed4312SSebastian Grimberg } 121*49ed4312SSebastian Grimberg 122*49ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 123