15aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 37d8d0e25Snbeams // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 57d8d0e25Snbeams // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 77d8d0e25Snbeams 849aac155SJeremy L Thompson #include <ceed.h> 9ec3da8bcSJed Brown #include <ceed/backend.h> 10437930d1SJeremy L Thompson #include <ceed/jit-tools.h> 1149aac155SJeremy L Thompson #include <stdbool.h> 123d576824SJeremy L Thompson #include <stddef.h> 13111870feSJeremy L Thompson #include <string.h> 14c85e8640SSebastian Grimberg #include <hip/hip_runtime.h> 152b730f8bSJeremy L Thompson 167fcac036SJeremy L Thompson #include "../hip/ceed-hip-common.h" 177d8d0e25Snbeams #include "../hip/ceed-hip-compile.h" 182b730f8bSJeremy L Thompson #include "ceed-hip-shared.h" 197d8d0e25Snbeams 207d8d0e25Snbeams //------------------------------------------------------------------------------ 219e31c45bSnbeams // Compute a block size based on required minimum threads 229e31c45bSnbeams //------------------------------------------------------------------------------ 239e31c45bSnbeams static CeedInt ComputeBlockSizeFromRequirement(const CeedInt required) { 249e31c45bSnbeams CeedInt maxSize = 1024; // Max total threads per block 259e31c45bSnbeams CeedInt currentSize = 64; // Start with one group 269e31c45bSnbeams 279e31c45bSnbeams while (currentSize < maxSize) { 282b730f8bSJeremy L Thompson if (currentSize > required) break; 292b730f8bSJeremy L Thompson else currentSize = currentSize * 2; 309e31c45bSnbeams } 319e31c45bSnbeams return currentSize; 329e31c45bSnbeams } 339e31c45bSnbeams 349e31c45bSnbeams //------------------------------------------------------------------------------ 359e31c45bSnbeams // Compute required thread block sizes for basis kernels given P, Q, dim, and 369e201c85SYohann // num_comp (num_comp not currently used, but may be again in other basis 379e201c85SYohann // parallelization options) 389e31c45bSnbeams //------------------------------------------------------------------------------ 392b730f8bSJeremy L Thompson static int ComputeBasisThreadBlockSizes(const CeedInt dim, const CeedInt P_1d, const CeedInt Q_1d, const CeedInt num_comp, CeedInt *block_sizes) { 409e31c45bSnbeams // Note that this will use the same block sizes for all dimensions when compiling, 419e31c45bSnbeams // but as each basis object is defined for a particular dimension, we will never 429e31c45bSnbeams // call any kernels except the ones for the dimension for which we have computed the 439e31c45bSnbeams // block sizes. 44437930d1SJeremy L Thompson const CeedInt thread_1d = CeedIntMax(P_1d, Q_1d); 45b7453713SJeremy L Thompson 469e31c45bSnbeams switch (dim) { 479e31c45bSnbeams case 1: { 489e31c45bSnbeams // Interp kernels: 49437930d1SJeremy L Thompson block_sizes[0] = 256; 509e31c45bSnbeams 519e31c45bSnbeams // Grad kernels: 52437930d1SJeremy L Thompson block_sizes[1] = 256; 539e31c45bSnbeams 549e31c45bSnbeams // Weight kernels: 55437930d1SJeremy L Thompson block_sizes[2] = 256; 569e31c45bSnbeams } break; 579e31c45bSnbeams case 2: { 589e31c45bSnbeams // Interp kernels: 599e201c85SYohann CeedInt required = thread_1d * thread_1d; 60b7453713SJeremy L Thompson 619e201c85SYohann block_sizes[0] = CeedIntMax(256, ComputeBlockSizeFromRequirement(required)); 629e31c45bSnbeams 639e31c45bSnbeams // Grad kernels: currently use same required minimum threads 649e201c85SYohann block_sizes[1] = CeedIntMax(256, ComputeBlockSizeFromRequirement(required)); 659e31c45bSnbeams 669e31c45bSnbeams // Weight kernels: 67437930d1SJeremy L Thompson required = CeedIntMax(64, Q_1d * Q_1d); 689e201c85SYohann block_sizes[2] = CeedIntMax(256, ComputeBlockSizeFromRequirement(required)); 699e31c45bSnbeams 709e31c45bSnbeams } break; 719e31c45bSnbeams case 3: { 729e31c45bSnbeams // Interp kernels: 739e201c85SYohann CeedInt required = thread_1d * thread_1d; 74b7453713SJeremy L Thompson 759e201c85SYohann block_sizes[0] = CeedIntMax(256, ComputeBlockSizeFromRequirement(required)); 769e31c45bSnbeams 779e31c45bSnbeams // Grad kernels: currently use same required minimum threads 789e201c85SYohann block_sizes[1] = CeedIntMax(256, ComputeBlockSizeFromRequirement(required)); 799e31c45bSnbeams 809e31c45bSnbeams // Weight kernels: 81437930d1SJeremy L Thompson required = Q_1d * Q_1d * Q_1d; 829e201c85SYohann block_sizes[2] = CeedIntMax(256, ComputeBlockSizeFromRequirement(required)); 839e31c45bSnbeams } 849e31c45bSnbeams } 85e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 869e31c45bSnbeams } 879e31c45bSnbeams 889e31c45bSnbeams //------------------------------------------------------------------------------ 897d8d0e25Snbeams // Apply basis 907d8d0e25Snbeams //------------------------------------------------------------------------------ 91db2becc9SJeremy L Thompson static int CeedBasisApplyTensorCore_Hip_shared(CeedBasis basis, bool apply_add, const CeedInt num_elem, CeedTransposeMode t_mode, 92db2becc9SJeremy L Thompson CeedEvalMode eval_mode, CeedVector u, CeedVector v) { 937d8d0e25Snbeams Ceed ceed; 946dbfb411Snbeams Ceed_Hip *ceed_Hip; 95437930d1SJeremy L Thompson CeedInt dim, num_comp; 96b7453713SJeremy L Thompson const CeedScalar *d_u; 97b7453713SJeremy L Thompson CeedScalar *d_v; 98b7453713SJeremy L Thompson CeedBasis_Hip_shared *data; 99b7453713SJeremy L Thompson 100b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 101b7453713SJeremy L Thompson CeedCallBackend(CeedGetData(ceed, &ceed_Hip)); 102b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 1032b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetDimension(basis, &dim)); 1042b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 1057d8d0e25Snbeams 1069ea2cfd9SJeremy L Thompson // Get read/write access to u, v 1076574a04fSJeremy L Thompson if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 1086574a04fSJeremy L Thompson else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 109db2becc9SJeremy L Thompson if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 110db2becc9SJeremy L Thompson else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 1117d8d0e25Snbeams 1127d8d0e25Snbeams // Apply basis operation 113437930d1SJeremy L Thompson switch (eval_mode) { 1147d8d0e25Snbeams case CEED_EVAL_INTERP: { 115437930d1SJeremy L Thompson CeedInt P_1d, Q_1d; 116437930d1SJeremy L Thompson CeedInt block_size = data->block_sizes[0]; 117b7453713SJeremy L Thompson 1182b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d)); 1192b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 120437930d1SJeremy L Thompson CeedInt thread_1d = CeedIntMax(Q_1d, P_1d); 1212b730f8bSJeremy L Thompson void *interp_args[] = {(void *)&num_elem, &data->d_interp_1d, &d_u, &d_v}; 122b7453713SJeremy L Thompson 1237d8d0e25Snbeams if (dim == 1) { 124437930d1SJeremy L Thompson CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64; 125437930d1SJeremy L Thompson elems_per_block = elems_per_block > 0 ? elems_per_block : 1; 1262b730f8bSJeremy L Thompson CeedInt grid = num_elem / elems_per_block + ((num_elem / elems_per_block * elems_per_block < num_elem) ? 1 : 0); 127437930d1SJeremy L Thompson CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); 128b2165e7aSSebastian Grimberg 1299e201c85SYohann if (t_mode == CEED_TRANSPOSE) { 130db2becc9SJeremy L Thompson CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, grid, thread_1d, 1, 131db2becc9SJeremy L Thompson elems_per_block, shared_mem, interp_args)); 1329e201c85SYohann } else { 133eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->Interp, grid, thread_1d, 1, elems_per_block, shared_mem, interp_args)); 1349e201c85SYohann } 1357d8d0e25Snbeams } else if (dim == 2) { 1369e31c45bSnbeams // Check if required threads is small enough to do multiple elems 1372b730f8bSJeremy L Thompson const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); 1382b730f8bSJeremy L Thompson CeedInt grid = num_elem / elems_per_block + ((num_elem / elems_per_block * elems_per_block < num_elem) ? 1 : 0); 1392b730f8bSJeremy L Thompson CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); 140b2165e7aSSebastian Grimberg 1419e201c85SYohann if (t_mode == CEED_TRANSPOSE) { 142db2becc9SJeremy L Thompson CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, grid, thread_1d, thread_1d, 143db2becc9SJeremy L Thompson elems_per_block, shared_mem, interp_args)); 1449e201c85SYohann } else { 145eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->Interp, grid, thread_1d, thread_1d, elems_per_block, shared_mem, interp_args)); 1469e201c85SYohann } 1477d8d0e25Snbeams } else if (dim == 3) { 1482b730f8bSJeremy L Thompson const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); 1492b730f8bSJeremy L Thompson CeedInt grid = num_elem / elems_per_block + ((num_elem / elems_per_block * elems_per_block < num_elem) ? 1 : 0); 1502b730f8bSJeremy L Thompson CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); 151b2165e7aSSebastian Grimberg 1529e201c85SYohann if (t_mode == CEED_TRANSPOSE) { 153db2becc9SJeremy L Thompson CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, apply_add ? data->InterpTransposeAdd : data->InterpTranspose, grid, thread_1d, thread_1d, 154db2becc9SJeremy L Thompson elems_per_block, shared_mem, interp_args)); 1559e201c85SYohann } else { 156eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->Interp, grid, thread_1d, thread_1d, elems_per_block, shared_mem, interp_args)); 1579e201c85SYohann } 1587d8d0e25Snbeams } 1597d8d0e25Snbeams } break; 1607d8d0e25Snbeams case CEED_EVAL_GRAD: { 161437930d1SJeremy L Thompson CeedInt P_1d, Q_1d; 162437930d1SJeremy L Thompson CeedInt block_size = data->block_sizes[1]; 163b7453713SJeremy L Thompson 1642b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d)); 1652b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 166437930d1SJeremy L Thompson CeedInt thread_1d = CeedIntMax(Q_1d, P_1d); 1679e201c85SYohann CeedScalar *d_grad_1d = data->d_grad_1d; 168b7453713SJeremy L Thompson 1699e201c85SYohann if (data->d_collo_grad_1d) { 1709e201c85SYohann d_grad_1d = data->d_collo_grad_1d; 1719e201c85SYohann } 1722b730f8bSJeremy L Thompson void *grad_args[] = {(void *)&num_elem, &data->d_interp_1d, &d_grad_1d, &d_u, &d_v}; 1737d8d0e25Snbeams if (dim == 1) { 174437930d1SJeremy L Thompson CeedInt elems_per_block = 64 * thread_1d > 256 ? 256 / thread_1d : 64; 175437930d1SJeremy L Thompson elems_per_block = elems_per_block > 0 ? elems_per_block : 1; 1762b730f8bSJeremy L Thompson CeedInt grid = num_elem / elems_per_block + ((num_elem / elems_per_block * elems_per_block < num_elem) ? 1 : 0); 177437930d1SJeremy L Thompson CeedInt shared_mem = elems_per_block * thread_1d * sizeof(CeedScalar); 178b2165e7aSSebastian Grimberg 1799e201c85SYohann if (t_mode == CEED_TRANSPOSE) { 180db2becc9SJeremy L Thompson CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, apply_add ? data->GradTransposeAdd : data->GradTranspose, grid, thread_1d, 1, 181db2becc9SJeremy L Thompson elems_per_block, shared_mem, grad_args)); 1829e201c85SYohann } else { 183eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->Grad, grid, thread_1d, 1, elems_per_block, shared_mem, grad_args)); 1849e201c85SYohann } 1857d8d0e25Snbeams } else if (dim == 2) { 1869e31c45bSnbeams // Check if required threads is small enough to do multiple elems 1872b730f8bSJeremy L Thompson const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); 1882b730f8bSJeremy L Thompson CeedInt grid = num_elem / elems_per_block + ((num_elem / elems_per_block * elems_per_block < num_elem) ? 1 : 0); 1892b730f8bSJeremy L Thompson CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); 190b2165e7aSSebastian Grimberg 1919e201c85SYohann if (t_mode == CEED_TRANSPOSE) { 192db2becc9SJeremy L Thompson CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, apply_add ? data->GradTransposeAdd : data->GradTranspose, grid, thread_1d, thread_1d, 193db2becc9SJeremy L Thompson elems_per_block, shared_mem, grad_args)); 1949e201c85SYohann } else { 195eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->Grad, grid, thread_1d, thread_1d, elems_per_block, shared_mem, grad_args)); 1969e201c85SYohann } 1977d8d0e25Snbeams } else if (dim == 3) { 1982b730f8bSJeremy L Thompson const CeedInt elems_per_block = CeedIntMax(block_size / (thread_1d * thread_1d), 1); 1992b730f8bSJeremy L Thompson CeedInt grid = num_elem / elems_per_block + ((num_elem / elems_per_block * elems_per_block < num_elem) ? 1 : 0); 2002b730f8bSJeremy L Thompson CeedInt shared_mem = elems_per_block * thread_1d * thread_1d * sizeof(CeedScalar); 201b2165e7aSSebastian Grimberg 2029e201c85SYohann if (t_mode == CEED_TRANSPOSE) { 203db2becc9SJeremy L Thompson CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, apply_add ? data->GradTransposeAdd : data->GradTranspose, grid, thread_1d, thread_1d, 204db2becc9SJeremy L Thompson elems_per_block, shared_mem, grad_args)); 2059e201c85SYohann } else { 206eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->Grad, grid, thread_1d, thread_1d, elems_per_block, shared_mem, grad_args)); 2079e201c85SYohann } 2087d8d0e25Snbeams } 2097d8d0e25Snbeams } break; 2107d8d0e25Snbeams case CEED_EVAL_WEIGHT: { 211437930d1SJeremy L Thompson CeedInt Q_1d; 212437930d1SJeremy L Thompson CeedInt block_size = data->block_sizes[2]; 213b7453713SJeremy L Thompson 214097cc795SJames Wright CeedCheck(data->d_q_weight_1d, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights_1d not set", CeedEvalModes[eval_mode]); 2152b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 216437930d1SJeremy L Thompson void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight_1d, &d_v}; 217b7453713SJeremy L Thompson 2187d8d0e25Snbeams if (dim == 1) { 219437930d1SJeremy L Thompson const CeedInt opt_elems = block_size / Q_1d; 220437930d1SJeremy L Thompson const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; 2212b730f8bSJeremy L Thompson const CeedInt grid_size = num_elem / elems_per_block + ((num_elem / elems_per_block * elems_per_block < num_elem) ? 1 : 0); 222b2165e7aSSebastian Grimberg 223eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid_size, Q_1d, elems_per_block, 1, weight_args)); 2247d8d0e25Snbeams } else if (dim == 2) { 225437930d1SJeremy L Thompson const CeedInt opt_elems = block_size / (Q_1d * Q_1d); 226437930d1SJeremy L Thompson const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; 2272b730f8bSJeremy L Thompson const CeedInt grid_size = num_elem / elems_per_block + ((num_elem / elems_per_block * elems_per_block < num_elem) ? 1 : 0); 228b2165e7aSSebastian Grimberg 229eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid_size, Q_1d, Q_1d, elems_per_block, weight_args)); 2307d8d0e25Snbeams } else if (dim == 3) { 2319e201c85SYohann const CeedInt opt_elems = block_size / (Q_1d * Q_1d); 2329e201c85SYohann const CeedInt elems_per_block = opt_elems > 0 ? opt_elems : 1; 2332b730f8bSJeremy L Thompson const CeedInt grid_size = num_elem / elems_per_block + ((num_elem / elems_per_block * elems_per_block < num_elem) ? 1 : 0); 234b2165e7aSSebastian Grimberg 235eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid_size, Q_1d, Q_1d, elems_per_block, weight_args)); 2367d8d0e25Snbeams } 2377d8d0e25Snbeams } break; 2389ea2cfd9SJeremy L Thompson case CEED_EVAL_NONE: /* handled separately below */ 2399ea2cfd9SJeremy L Thompson break; 2407d8d0e25Snbeams // LCOV_EXCL_START 2417d8d0e25Snbeams case CEED_EVAL_DIV: 2427d8d0e25Snbeams case CEED_EVAL_CURL: 243bcbe1c99SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]); 2447d8d0e25Snbeams // LCOV_EXCL_STOP 2457d8d0e25Snbeams } 2467d8d0e25Snbeams 2479ea2cfd9SJeremy L Thompson // Restore vectors, cover CEED_EVAL_NONE 2482b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 2499ea2cfd9SJeremy L Thompson if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 2509ea2cfd9SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 251e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2527d8d0e25Snbeams } 2537d8d0e25Snbeams 254db2becc9SJeremy L Thompson int CeedBasisApplyTensor_Hip_shared(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, 255db2becc9SJeremy L Thompson CeedVector v) { 256db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyTensorCore_Hip_shared(basis, false, num_elem, t_mode, eval_mode, u, v)); 257db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 258db2becc9SJeremy L Thompson } 259db2becc9SJeremy L Thompson 260db2becc9SJeremy L Thompson int CeedBasisApplyAddTensor_Hip_shared(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, 261db2becc9SJeremy L Thompson CeedVector v) { 262db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyTensorCore_Hip_shared(basis, true, num_elem, t_mode, eval_mode, u, v)); 263db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 264db2becc9SJeremy L Thompson } 265db2becc9SJeremy L Thompson 2667d8d0e25Snbeams //------------------------------------------------------------------------------ 2671dda9c1aSJeremy L Thompson // Basis apply - tensor AtPoints 2681dda9c1aSJeremy L Thompson //------------------------------------------------------------------------------ 269db2becc9SJeremy L Thompson static int CeedBasisApplyAtPointsCore_Hip_shared(CeedBasis basis, bool apply_add, const CeedInt num_elem, const CeedInt *num_points, 270db2becc9SJeremy L Thompson CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) { 2711dda9c1aSJeremy L Thompson Ceed ceed; 2721dda9c1aSJeremy L Thompson CeedInt Q_1d, dim, max_num_points = num_points[0]; 2731dda9c1aSJeremy L Thompson const CeedInt is_transpose = t_mode == CEED_TRANSPOSE; 2741dda9c1aSJeremy L Thompson const int max_block_size = 32; 2751dda9c1aSJeremy L Thompson const CeedScalar *d_x, *d_u; 2761dda9c1aSJeremy L Thompson CeedScalar *d_v; 2771dda9c1aSJeremy L Thompson CeedBasis_Hip_shared *data; 2781dda9c1aSJeremy L Thompson 2791dda9c1aSJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 2801dda9c1aSJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 2811dda9c1aSJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 2821dda9c1aSJeremy L Thompson CeedCallBackend(CeedBasisGetDimension(basis, &dim)); 2831dda9c1aSJeremy L Thompson 2841dda9c1aSJeremy L Thompson // Weight handled separately 2851dda9c1aSJeremy L Thompson if (eval_mode == CEED_EVAL_WEIGHT) { 2865a5594ffSJeremy L Thompson CeedCallBackend(CeedVectorSetValue(v, 1.0)); 2871dda9c1aSJeremy L Thompson return CEED_ERROR_SUCCESS; 2881dda9c1aSJeremy L Thompson } 2891dda9c1aSJeremy L Thompson 290111870feSJeremy L Thompson // Check padded to uniform number of points per elem 291111870feSJeremy L Thompson for (CeedInt i = 1; i < num_elem; i++) max_num_points = CeedIntMax(max_num_points, num_points[i]); 292111870feSJeremy L Thompson { 293111870feSJeremy L Thompson CeedInt num_comp, q_comp; 294111870feSJeremy L Thompson CeedSize len, len_required; 295111870feSJeremy L Thompson 296111870feSJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 297111870feSJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp)); 298111870feSJeremy L Thompson CeedCallBackend(CeedVectorGetLength(is_transpose ? u : v, &len)); 299111870feSJeremy L Thompson len_required = (CeedSize)num_comp * (CeedSize)q_comp * (CeedSize)num_elem * (CeedSize)max_num_points; 300111870feSJeremy L Thompson CeedCheck(len >= len_required, ceed, CEED_ERROR_BACKEND, 301111870feSJeremy L Thompson "Vector at points must be padded to the same number of points in each element for BasisApplyAtPoints on GPU backends." 302111870feSJeremy L Thompson " Found %" CeedSize_FMT ", Required %" CeedSize_FMT, 303111870feSJeremy L Thompson len, len_required); 304111870feSJeremy L Thompson } 305111870feSJeremy L Thompson 306111870feSJeremy L Thompson // Move num_points array to device 307111870feSJeremy L Thompson if (is_transpose) { 308111870feSJeremy L Thompson const CeedInt num_bytes = num_elem * sizeof(CeedInt); 309111870feSJeremy L Thompson 310111870feSJeremy L Thompson if (num_elem != data->num_elem_at_points) { 311111870feSJeremy L Thompson data->num_elem_at_points = num_elem; 312111870feSJeremy L Thompson 313111870feSJeremy L Thompson if (data->d_points_per_elem) CeedCallHip(ceed, hipFree(data->d_points_per_elem)); 314111870feSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_points_per_elem, num_bytes)); 315111870feSJeremy L Thompson CeedCallBackend(CeedFree(&data->h_points_per_elem)); 316111870feSJeremy L Thompson CeedCallBackend(CeedCalloc(num_elem, &data->h_points_per_elem)); 317111870feSJeremy L Thompson } 3189e511c80SJeremy L Thompson if (memcmp(data->h_points_per_elem, num_points, num_bytes)) { 319111870feSJeremy L Thompson memcpy(data->h_points_per_elem, num_points, num_bytes); 320111870feSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_points_per_elem, num_points, num_bytes, hipMemcpyHostToDevice)); 321111870feSJeremy L Thompson } 322111870feSJeremy L Thompson } 323111870feSJeremy L Thompson 3241dda9c1aSJeremy L Thompson // Build kernels if needed 3251dda9c1aSJeremy L Thompson if (data->num_points != max_num_points) { 3261dda9c1aSJeremy L Thompson CeedInt P_1d; 3271dda9c1aSJeremy L Thompson 3281dda9c1aSJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d)); 3291dda9c1aSJeremy L Thompson data->num_points = max_num_points; 3301dda9c1aSJeremy L Thompson 3311dda9c1aSJeremy L Thompson // -- Create interp matrix to Chebyshev coefficients 3321dda9c1aSJeremy L Thompson if (!data->d_chebyshev_interp_1d) { 3331dda9c1aSJeremy L Thompson CeedSize interp_bytes; 3341dda9c1aSJeremy L Thompson CeedScalar *chebyshev_interp_1d; 3351dda9c1aSJeremy L Thompson 3361dda9c1aSJeremy L Thompson interp_bytes = P_1d * Q_1d * sizeof(CeedScalar); 3371dda9c1aSJeremy L Thompson CeedCallBackend(CeedCalloc(P_1d * Q_1d, &chebyshev_interp_1d)); 3385a5594ffSJeremy L Thompson CeedCallBackend(CeedBasisGetChebyshevInterp1D(basis, chebyshev_interp_1d)); 3391dda9c1aSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_chebyshev_interp_1d, interp_bytes)); 3401dda9c1aSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_chebyshev_interp_1d, chebyshev_interp_1d, interp_bytes, hipMemcpyHostToDevice)); 3411dda9c1aSJeremy L Thompson CeedCallBackend(CeedFree(&chebyshev_interp_1d)); 3421dda9c1aSJeremy L Thompson } 3431dda9c1aSJeremy L Thompson 3441dda9c1aSJeremy L Thompson // -- Compile kernels 345*9c25dd66SJeremy L Thompson const char basis_kernel_source[] = "// AtPoints basis source\n#include <ceed/jit-source/hip/hip-ref-basis-tensor-at-points.h>\n"; 3461dda9c1aSJeremy L Thompson CeedInt num_comp; 3471dda9c1aSJeremy L Thompson 3481dda9c1aSJeremy L Thompson if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints)); 3491dda9c1aSJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 3501dda9c1aSJeremy L Thompson CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", 351f7c9815fSJeremy L Thompson Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, 3521dda9c1aSJeremy L Thompson "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS", 353f7c9815fSJeremy L Thompson max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1))); 3541dda9c1aSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints)); 3551dda9c1aSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints)); 3561dda9c1aSJeremy L Thompson } 3571dda9c1aSJeremy L Thompson 3581dda9c1aSJeremy L Thompson // Get read/write access to u, v 3591dda9c1aSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(x_ref, CEED_MEM_DEVICE, &d_x)); 3601dda9c1aSJeremy L Thompson if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 3611dda9c1aSJeremy L Thompson else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 362db2becc9SJeremy L Thompson if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 363db2becc9SJeremy L Thompson else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 3641dda9c1aSJeremy L Thompson 3651dda9c1aSJeremy L Thompson // Clear v for transpose operation 366db2becc9SJeremy L Thompson if (is_transpose && !apply_add) { 36719a04db8SJeremy L Thompson CeedInt num_comp, q_comp, num_nodes; 3681dda9c1aSJeremy L Thompson CeedSize length; 3691dda9c1aSJeremy L Thompson 37019a04db8SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 37119a04db8SJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp)); 37219a04db8SJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes)); 37319a04db8SJeremy L Thompson length = 37419a04db8SJeremy L Thompson (CeedSize)num_elem * (CeedSize)num_comp * (t_mode == CEED_TRANSPOSE ? (CeedSize)num_nodes : ((CeedSize)max_num_points * (CeedSize)q_comp)); 3751dda9c1aSJeremy L Thompson CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar))); 3761dda9c1aSJeremy L Thompson } 3771dda9c1aSJeremy L Thompson 3781dda9c1aSJeremy L Thompson // Basis action 3791dda9c1aSJeremy L Thompson switch (eval_mode) { 3801dda9c1aSJeremy L Thompson case CEED_EVAL_INTERP: { 381111870feSJeremy L Thompson void *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v}; 3821dda9c1aSJeremy L Thompson const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 3831dda9c1aSJeremy L Thompson 3841dda9c1aSJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, data->InterpAtPoints, num_elem, block_size, interp_args)); 3851dda9c1aSJeremy L Thompson } break; 3861dda9c1aSJeremy L Thompson case CEED_EVAL_GRAD: { 387111870feSJeremy L Thompson void *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v}; 3882d10e82cSJeremy L Thompson const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 3891dda9c1aSJeremy L Thompson 3901dda9c1aSJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, data->GradAtPoints, num_elem, block_size, grad_args)); 3911dda9c1aSJeremy L Thompson } break; 3921dda9c1aSJeremy L Thompson case CEED_EVAL_WEIGHT: 3931dda9c1aSJeremy L Thompson case CEED_EVAL_NONE: /* handled separately below */ 3941dda9c1aSJeremy L Thompson break; 3951dda9c1aSJeremy L Thompson // LCOV_EXCL_START 3961dda9c1aSJeremy L Thompson case CEED_EVAL_DIV: 3971dda9c1aSJeremy L Thompson case CEED_EVAL_CURL: 3981dda9c1aSJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]); 3991dda9c1aSJeremy L Thompson // LCOV_EXCL_STOP 4001dda9c1aSJeremy L Thompson } 4011dda9c1aSJeremy L Thompson 4021dda9c1aSJeremy L Thompson // Restore vectors, cover CEED_EVAL_NONE 4031dda9c1aSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(x_ref, &d_x)); 4041dda9c1aSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 4051dda9c1aSJeremy L Thompson if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 4061dda9c1aSJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 4071dda9c1aSJeremy L Thompson return CEED_ERROR_SUCCESS; 4081dda9c1aSJeremy L Thompson } 4091dda9c1aSJeremy L Thompson 410db2becc9SJeremy L Thompson static int CeedBasisApplyAtPoints_Hip_shared(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, 411db2becc9SJeremy L Thompson CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) { 412db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyAtPointsCore_Hip_shared(basis, false, num_elem, num_points, t_mode, eval_mode, x_ref, u, v)); 413db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 414db2becc9SJeremy L Thompson } 415db2becc9SJeremy L Thompson 416db2becc9SJeremy L Thompson static int CeedBasisApplyAddAtPoints_Hip_shared(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, 417db2becc9SJeremy L Thompson CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) { 418db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyAtPointsCore_Hip_shared(basis, true, num_elem, num_points, t_mode, eval_mode, x_ref, u, v)); 419db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 420db2becc9SJeremy L Thompson } 421db2becc9SJeremy L Thompson 4221dda9c1aSJeremy L Thompson //------------------------------------------------------------------------------ 4237d8d0e25Snbeams // Destroy basis 4247d8d0e25Snbeams //------------------------------------------------------------------------------ 4257d8d0e25Snbeams static int CeedBasisDestroy_Hip_shared(CeedBasis basis) { 4267d8d0e25Snbeams Ceed ceed; 4277d8d0e25Snbeams CeedBasis_Hip_shared *data; 428b7453713SJeremy L Thompson 429b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 4302b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 4312b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleUnload(data->module)); 4321dda9c1aSJeremy L Thompson if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints)); 433097cc795SJames Wright if (data->d_q_weight_1d) CeedCallHip(ceed, hipFree(data->d_q_weight_1d)); 434111870feSJeremy L Thompson CeedCallBackend(CeedFree(&data->h_points_per_elem)); 435111870feSJeremy L Thompson if (data->d_points_per_elem) CeedCallHip(ceed, hipFree(data->d_points_per_elem)); 4362b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_interp_1d)); 4372b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_grad_1d)); 4382b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_collo_grad_1d)); 4391dda9c1aSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_chebyshev_interp_1d)); 4402b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&data)); 441e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4427d8d0e25Snbeams } 4437d8d0e25Snbeams 4447d8d0e25Snbeams //------------------------------------------------------------------------------ 4457d8d0e25Snbeams // Create tensor basis 4467d8d0e25Snbeams //------------------------------------------------------------------------------ 4472b730f8bSJeremy L Thompson int CeedBasisCreateTensorH1_Hip_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, 4486574a04fSJeremy L Thompson const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) { 4497d8d0e25Snbeams Ceed ceed; 450b7453713SJeremy L Thompson CeedInt num_comp; 451b7453713SJeremy L Thompson const CeedInt q_bytes = Q_1d * sizeof(CeedScalar); 452397164e9SSebastian Grimberg const CeedInt interp_bytes = q_bytes * P_1d; 4537d8d0e25Snbeams CeedBasis_Hip_shared *data; 454b7453713SJeremy L Thompson 455b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 4562b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &data)); 4577d8d0e25Snbeams 4587d8d0e25Snbeams // Copy basis data to GPU 459097cc795SJames Wright if (q_weight_1d) { 460b7453713SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight_1d, q_bytes)); 461b7453713SJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_q_weight_1d, q_weight_1d, q_bytes, hipMemcpyHostToDevice)); 462097cc795SJames Wright } 463397164e9SSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_interp_1d, interp_bytes)); 464397164e9SSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_interp_1d, interp_1d, interp_bytes, hipMemcpyHostToDevice)); 465397164e9SSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_grad_1d, interp_bytes)); 466397164e9SSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_grad_1d, grad_1d, interp_bytes, hipMemcpyHostToDevice)); 4677d8d0e25Snbeams 4687d8d0e25Snbeams // Compute collocated gradient and copy to GPU 469437930d1SJeremy L Thompson data->d_collo_grad_1d = NULL; 4709e201c85SYohann bool has_collocated_grad = dim == 3 && Q_1d >= P_1d; 471b7453713SJeremy L Thompson 4729e201c85SYohann if (has_collocated_grad) { 473437930d1SJeremy L Thompson CeedScalar *collo_grad_1d; 474b7453713SJeremy L Thompson 4752b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(Q_1d * Q_1d, &collo_grad_1d)); 4762b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetCollocatedGrad(basis, collo_grad_1d)); 477b7453713SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_collo_grad_1d, q_bytes * Q_1d)); 478b7453713SJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_collo_grad_1d, collo_grad_1d, q_bytes * Q_1d, hipMemcpyHostToDevice)); 4792b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&collo_grad_1d)); 4807d8d0e25Snbeams } 4817d8d0e25Snbeams 4829e31c45bSnbeams // Set number of threads per block for basis kernels 4832b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 4842b730f8bSJeremy L Thompson CeedCallBackend(ComputeBasisThreadBlockSizes(dim, P_1d, Q_1d, num_comp, data->block_sizes)); 4859e31c45bSnbeams 4869e31c45bSnbeams // Compile basis kernels 487*9c25dd66SJeremy L Thompson const char basis_kernel_source[] = "// Tensor basis source\n#include <ceed/jit-source/hip/hip-shared-basis-tensor.h>\n"; 488*9c25dd66SJeremy L Thompson 489eb7e6cafSJeremy L Thompson CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 11, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "T_1D", 490eb7e6cafSJeremy L Thompson CeedIntMax(Q_1d, P_1d), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), 491eb7e6cafSJeremy L Thompson "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_INTERP_BLOCK_SIZE", data->block_sizes[0], "BASIS_GRAD_BLOCK_SIZE", 4922b730f8bSJeremy L Thompson data->block_sizes[1], "BASIS_WEIGHT_BLOCK_SIZE", data->block_sizes[2], "BASIS_HAS_COLLOCATED_GRAD", 4932b730f8bSJeremy L Thompson has_collocated_grad)); 494eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 495eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); 496db2becc9SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTransposeAdd", &data->InterpTransposeAdd)); 497eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Grad", &data->Grad)); 498eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "GradTranspose", &data->GradTranspose)); 499db2becc9SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "GradTransposeAdd", &data->GradTransposeAdd)); 500eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 5017d8d0e25Snbeams 5022b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisSetData(basis, data)); 5037d8d0e25Snbeams 5047d8d0e25Snbeams // Register backend functions 5052b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyTensor_Hip_shared)); 506db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddTensor_Hip_shared)); 5071dda9c1aSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAtPoints", CeedBasisApplyAtPoints_Hip_shared)); 508db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAddAtPoints", CeedBasisApplyAddAtPoints_Hip_shared)); 5092b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Hip_shared)); 510e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 5117d8d0e25Snbeams } 5122a86cc9dSSebastian Grimberg 5137d8d0e25Snbeams //------------------------------------------------------------------------------ 514