10d0321e0SJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 20d0321e0SJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 30d0321e0SJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details. 40d0321e0SJeremy L Thompson // 50d0321e0SJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software 60d0321e0SJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral 70d0321e0SJeremy L Thompson // element discretizations for exascale applications. For more information and 80d0321e0SJeremy L Thompson // source code availability see http://github.com/ceed. 90d0321e0SJeremy L Thompson // 100d0321e0SJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 110d0321e0SJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office 120d0321e0SJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for 130d0321e0SJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including 140d0321e0SJeremy L Thompson // software, applications, hardware, advanced system engineering and early 150d0321e0SJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative. 160d0321e0SJeremy L Thompson 170d0321e0SJeremy L Thompson #include <ceed/ceed.h> 180d0321e0SJeremy L Thompson #include <ceed/backend.h> 19437930d1SJeremy L Thompson #include <ceed/jit-tools.h> 200d0321e0SJeremy L Thompson #include <hip/hip_runtime.h> 210d0321e0SJeremy L Thompson #include "ceed-hip-ref.h" 220d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h" 230d0321e0SJeremy L Thompson 240d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 250d0321e0SJeremy L Thompson // Basis apply - tensor 260d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 27437930d1SJeremy L Thompson int CeedBasisApply_Hip(CeedBasis basis, const CeedInt num_elem, 28437930d1SJeremy L Thompson CeedTransposeMode t_mode, 29437930d1SJeremy L Thompson CeedEvalMode eval_mode, CeedVector u, CeedVector v) { 300d0321e0SJeremy L Thompson int ierr; 310d0321e0SJeremy L Thompson Ceed ceed; 320d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 330d0321e0SJeremy L Thompson Ceed_Hip *ceed_Hip; 340d0321e0SJeremy L Thompson ierr = CeedGetData(ceed, &ceed_Hip); CeedChkBackend(ierr); 350d0321e0SJeremy L Thompson CeedBasis_Hip *data; 360d0321e0SJeremy L Thompson ierr = CeedBasisGetData(basis, &data); CeedChkBackend(ierr); 37437930d1SJeremy L Thompson const CeedInt transpose = t_mode == CEED_TRANSPOSE; 38437930d1SJeremy L Thompson const int max_block_size = 64; 390d0321e0SJeremy L Thompson 400d0321e0SJeremy L Thompson // Read vectors 410d0321e0SJeremy L Thompson const CeedScalar *d_u; 420d0321e0SJeremy L Thompson CeedScalar *d_v; 43437930d1SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) { 440d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u); CeedChkBackend(ierr); 450d0321e0SJeremy L Thompson } 460d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); 470d0321e0SJeremy L Thompson 480d0321e0SJeremy L Thompson // Clear v for transpose operation 49437930d1SJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 500d0321e0SJeremy L Thompson CeedInt length; 510d0321e0SJeremy L Thompson ierr = CeedVectorGetLength(v, &length); CeedChkBackend(ierr); 520d0321e0SJeremy L Thompson ierr = hipMemset(d_v, 0, length * sizeof(CeedScalar)); 530d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 540d0321e0SJeremy L Thompson } 550d0321e0SJeremy L Thompson 560d0321e0SJeremy L Thompson // Basis action 57437930d1SJeremy L Thompson switch (eval_mode) { 580d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: { 59437930d1SJeremy L Thompson void *interp_args[] = {(void *) &num_elem, (void *) &transpose, 60437930d1SJeremy L Thompson &data->d_interp_1d, &d_u, &d_v 610d0321e0SJeremy L Thompson }; 62437930d1SJeremy L Thompson CeedInt Q_1d, dim; 63437930d1SJeremy L Thompson ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChkBackend(ierr); 640d0321e0SJeremy L Thompson ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 65437930d1SJeremy L Thompson CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 660d0321e0SJeremy L Thompson 67437930d1SJeremy L Thompson ierr = CeedRunKernelHip(ceed, data->Interp, num_elem, block_size, interp_args); 680d0321e0SJeremy L Thompson CeedChkBackend(ierr); 690d0321e0SJeremy L Thompson } break; 700d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: { 71437930d1SJeremy L Thompson void *grad_args[] = {(void *) &num_elem, (void *) &transpose, &data->d_interp_1d, 72437930d1SJeremy L Thompson &data->d_grad_1d, &d_u, &d_v 730d0321e0SJeremy L Thompson }; 74437930d1SJeremy L Thompson CeedInt block_size = max_block_size; 750d0321e0SJeremy L Thompson 76437930d1SJeremy L Thompson ierr = CeedRunKernelHip(ceed, data->Grad, num_elem, block_size, grad_args); 770d0321e0SJeremy L Thompson CeedChkBackend(ierr); 780d0321e0SJeremy L Thompson } break; 790d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: { 80437930d1SJeremy L Thompson void *weight_args[] = {(void *) &num_elem, (void *) &data->d_q_weight_1d, &d_v}; 81437930d1SJeremy L Thompson const int block_size = 64; 82437930d1SJeremy L Thompson int grid_size = num_elem / block_size; 83437930d1SJeremy L Thompson if (block_size * grid_size < num_elem) 84437930d1SJeremy L Thompson grid_size += 1; 850d0321e0SJeremy L Thompson 86437930d1SJeremy L Thompson ierr = CeedRunKernelHip(ceed, data->Weight, grid_size, block_size, 87437930d1SJeremy L Thompson weight_args); CeedChkBackend(ierr); 880d0321e0SJeremy L Thompson } break; 890d0321e0SJeremy L Thompson // LCOV_EXCL_START 900d0321e0SJeremy L Thompson // Evaluate the divergence to/from the quadrature points 910d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 920d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 930d0321e0SJeremy L Thompson // Evaluate the curl to/from the quadrature points 940d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 950d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 960d0321e0SJeremy L Thompson // Take no action, BasisApply should not have been called 970d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 980d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 990d0321e0SJeremy L Thompson "CEED_EVAL_NONE does not make sense in this context"); 1000d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 1010d0321e0SJeremy L Thompson } 1020d0321e0SJeremy L Thompson 1030d0321e0SJeremy L Thompson // Restore vectors 104437930d1SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) { 1050d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(u, &d_u); CeedChkBackend(ierr); 1060d0321e0SJeremy L Thompson } 1070d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArray(v, &d_v); CeedChkBackend(ierr); 1080d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1090d0321e0SJeremy L Thompson } 1100d0321e0SJeremy L Thompson 1110d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1120d0321e0SJeremy L Thompson // Basis apply - non-tensor 1130d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 114437930d1SJeremy L Thompson int CeedBasisApplyNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, 115437930d1SJeremy L Thompson CeedTransposeMode t_mode, CeedEvalMode eval_mode, 1160d0321e0SJeremy L Thompson CeedVector u, CeedVector v) { 1170d0321e0SJeremy L Thompson int ierr; 1180d0321e0SJeremy L Thompson Ceed ceed; 1190d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 1200d0321e0SJeremy L Thompson Ceed_Hip *ceed_Hip; 1210d0321e0SJeremy L Thompson ierr = CeedGetData(ceed, &ceed_Hip); CeedChkBackend(ierr); 1220d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data; 1230d0321e0SJeremy L Thompson ierr = CeedBasisGetData(basis, &data); CeedChkBackend(ierr); 124437930d1SJeremy L Thompson CeedInt num_nodes, num_qpts; 125437930d1SJeremy L Thompson ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChkBackend(ierr); 126437930d1SJeremy L Thompson ierr = CeedBasisGetNumNodes(basis, &num_nodes); CeedChkBackend(ierr); 127437930d1SJeremy L Thompson const CeedInt transpose = t_mode == CEED_TRANSPOSE; 1280d0321e0SJeremy L Thompson int elemsPerBlock = 1; 129437930d1SJeremy L Thompson int grid = num_elem/elemsPerBlock+(( 130437930d1SJeremy L Thompson num_elem/elemsPerBlock*elemsPerBlock<num_elem)?1:0); 1310d0321e0SJeremy L Thompson 1320d0321e0SJeremy L Thompson // Read vectors 1330d0321e0SJeremy L Thompson const CeedScalar *d_u; 1340d0321e0SJeremy L Thompson CeedScalar *d_v; 135437930d1SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) { 1360d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u); CeedChkBackend(ierr); 1370d0321e0SJeremy L Thompson } 1380d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); 1390d0321e0SJeremy L Thompson 1400d0321e0SJeremy L Thompson // Clear v for transpose operation 141437930d1SJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 1420d0321e0SJeremy L Thompson CeedInt length; 1430d0321e0SJeremy L Thompson ierr = CeedVectorGetLength(v, &length); CeedChkBackend(ierr); 1440d0321e0SJeremy L Thompson ierr = hipMemset(d_v, 0, length * sizeof(CeedScalar)); 1450d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 1460d0321e0SJeremy L Thompson } 1470d0321e0SJeremy L Thompson 1480d0321e0SJeremy L Thompson // Apply basis operation 149437930d1SJeremy L Thompson switch (eval_mode) { 1500d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: { 151437930d1SJeremy L Thompson void *interp_args[] = {(void *) &num_elem, (void *) &transpose, 1520d0321e0SJeremy L Thompson &data->d_interp, &d_u, &d_v 1530d0321e0SJeremy L Thompson }; 154437930d1SJeremy L Thompson if (transpose) { 155437930d1SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, data->Interp, grid, num_nodes, 1, 156437930d1SJeremy L Thompson elemsPerBlock, interp_args); CeedChkBackend(ierr); 1570d0321e0SJeremy L Thompson } else { 158437930d1SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, data->Interp, grid, num_qpts, 1, 159437930d1SJeremy L Thompson elemsPerBlock, interp_args); CeedChkBackend(ierr); 1600d0321e0SJeremy L Thompson } 1610d0321e0SJeremy L Thompson } break; 1620d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: { 163437930d1SJeremy L Thompson void *grad_args[] = {(void *) &num_elem, (void *) &transpose, &data->d_grad, 1640d0321e0SJeremy L Thompson &d_u, &d_v 1650d0321e0SJeremy L Thompson }; 166437930d1SJeremy L Thompson if (transpose) { 167437930d1SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, data->Grad, grid, num_nodes, 1, 168437930d1SJeremy L Thompson elemsPerBlock, grad_args); CeedChkBackend(ierr); 1690d0321e0SJeremy L Thompson } else { 170437930d1SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, data->Grad, grid, num_qpts, 1, 171437930d1SJeremy L Thompson elemsPerBlock, grad_args); CeedChkBackend(ierr); 1720d0321e0SJeremy L Thompson } 1730d0321e0SJeremy L Thompson } break; 1740d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: { 175437930d1SJeremy L Thompson void *weight_args[] = {(void *) &num_elem, (void *) &data->d_q_weight, &d_v}; 176437930d1SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, data->Weight, grid, num_qpts, 1, 177437930d1SJeremy L Thompson elemsPerBlock, weight_args); CeedChkBackend(ierr); 1780d0321e0SJeremy L Thompson } break; 1790d0321e0SJeremy L Thompson // LCOV_EXCL_START 1800d0321e0SJeremy L Thompson // Evaluate the divergence to/from the quadrature points 1810d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 1820d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 1830d0321e0SJeremy L Thompson // Evaluate the curl to/from the quadrature points 1840d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 1850d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 1860d0321e0SJeremy L Thompson // Take no action, BasisApply should not have been called 1870d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 1880d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 1890d0321e0SJeremy L Thompson "CEED_EVAL_NONE does not make sense in this context"); 1900d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 1910d0321e0SJeremy L Thompson } 1920d0321e0SJeremy L Thompson 1930d0321e0SJeremy L Thompson // Restore vectors 194437930d1SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) { 1950d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(u, &d_u); CeedChkBackend(ierr); 1960d0321e0SJeremy L Thompson } 1970d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArray(v, &d_v); CeedChkBackend(ierr); 1980d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1990d0321e0SJeremy L Thompson } 2000d0321e0SJeremy L Thompson 2010d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2020d0321e0SJeremy L Thompson // Destroy tensor basis 2030d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2040d0321e0SJeremy L Thompson static int CeedBasisDestroy_Hip(CeedBasis basis) { 2050d0321e0SJeremy L Thompson int ierr; 2060d0321e0SJeremy L Thompson Ceed ceed; 2070d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 2080d0321e0SJeremy L Thompson 2090d0321e0SJeremy L Thompson CeedBasis_Hip *data; 2100d0321e0SJeremy L Thompson ierr = CeedBasisGetData(basis, &data); CeedChkBackend(ierr); 2110d0321e0SJeremy L Thompson 2120d0321e0SJeremy L Thompson CeedChk_Hip(ceed, hipModuleUnload(data->module)); 2130d0321e0SJeremy L Thompson 214437930d1SJeremy L Thompson ierr = hipFree(data->d_q_weight_1d); CeedChk_Hip(ceed, ierr); 215437930d1SJeremy L Thompson ierr = hipFree(data->d_interp_1d); CeedChk_Hip(ceed, ierr); 216437930d1SJeremy L Thompson ierr = hipFree(data->d_grad_1d); CeedChk_Hip(ceed, ierr); 2170d0321e0SJeremy L Thompson ierr = CeedFree(&data); CeedChkBackend(ierr); 218437930d1SJeremy L Thompson 2190d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2200d0321e0SJeremy L Thompson } 2210d0321e0SJeremy L Thompson 2220d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2230d0321e0SJeremy L Thompson // Destroy non-tensor basis 2240d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2250d0321e0SJeremy L Thompson static int CeedBasisDestroyNonTensor_Hip(CeedBasis basis) { 2260d0321e0SJeremy L Thompson int ierr; 2270d0321e0SJeremy L Thompson Ceed ceed; 2280d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 2290d0321e0SJeremy L Thompson 2300d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data; 2310d0321e0SJeremy L Thompson ierr = CeedBasisGetData(basis, &data); CeedChkBackend(ierr); 2320d0321e0SJeremy L Thompson 2330d0321e0SJeremy L Thompson CeedChk_Hip(ceed, hipModuleUnload(data->module)); 2340d0321e0SJeremy L Thompson 235437930d1SJeremy L Thompson ierr = hipFree(data->d_q_weight); CeedChk_Hip(ceed, ierr); 2360d0321e0SJeremy L Thompson ierr = hipFree(data->d_interp); CeedChk_Hip(ceed, ierr); 2370d0321e0SJeremy L Thompson ierr = hipFree(data->d_grad); CeedChk_Hip(ceed, ierr); 2380d0321e0SJeremy L Thompson ierr = CeedFree(&data); CeedChkBackend(ierr); 239437930d1SJeremy L Thompson 2400d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2410d0321e0SJeremy L Thompson } 2420d0321e0SJeremy L Thompson 2430d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2440d0321e0SJeremy L Thompson // Create tensor 2450d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 246437930d1SJeremy L Thompson int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, 247437930d1SJeremy L Thompson const CeedScalar *interp_1d, 248437930d1SJeremy L Thompson const CeedScalar *grad_1d, 2490d0321e0SJeremy L Thompson const CeedScalar *qref1d, 250437930d1SJeremy L Thompson const CeedScalar *q_weight_1d, 2510d0321e0SJeremy L Thompson CeedBasis basis) { 2520d0321e0SJeremy L Thompson int ierr; 2530d0321e0SJeremy L Thompson Ceed ceed; 2540d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 2550d0321e0SJeremy L Thompson CeedBasis_Hip *data; 2560d0321e0SJeremy L Thompson ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); 2570d0321e0SJeremy L Thompson 2580d0321e0SJeremy L Thompson // Copy data to GPU 259437930d1SJeremy L Thompson const CeedInt q_bytes = Q_1d * sizeof(CeedScalar); 260437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_q_weight_1d, q_bytes); 261437930d1SJeremy L Thompson CeedChk_Hip(ceed, ierr); 262437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_q_weight_1d, q_weight_1d, q_bytes, 2630d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 2640d0321e0SJeremy L Thompson 265437930d1SJeremy L Thompson const CeedInt interp_bytes = q_bytes * P_1d; 266437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_interp_1d, interp_bytes); 267437930d1SJeremy L Thompson CeedChk_Hip(ceed, ierr); 268437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_interp_1d, interp_1d, interp_bytes, 2690d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 2700d0321e0SJeremy L Thompson 271437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_grad_1d, interp_bytes); 272437930d1SJeremy L Thompson CeedChk_Hip(ceed, ierr); 273437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_grad_1d, grad_1d, interp_bytes, 2740d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 2750d0321e0SJeremy L Thompson 2760d0321e0SJeremy L Thompson // Complie basis kernels 2770d0321e0SJeremy L Thompson CeedInt ncomp; 2780d0321e0SJeremy L Thompson ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 279437930d1SJeremy L Thompson char *basis_kernel_path, *basis_kernel_source; 280437930d1SJeremy L Thompson ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-ref-basis-tensor.h", 281437930d1SJeremy L Thompson &basis_kernel_path); CeedChkBackend(ierr); 282*46dc0734SJeremy L Thompson CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n"); 283437930d1SJeremy L Thompson ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source); 284437930d1SJeremy L Thompson CeedChkBackend(ierr); 285*46dc0734SJeremy L Thompson CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source Complete! -----\n"); 286437930d1SJeremy L Thompson ierr = CeedCompileHip(ceed, basis_kernel_source, &data->module, 7, 287437930d1SJeremy L Thompson "BASIS_Q1D", Q_1d, 288437930d1SJeremy L Thompson "BASIS_P1D", P_1d, 289437930d1SJeremy L Thompson "BASIS_BUF_LEN", ncomp * CeedIntPow(Q_1d > P_1d ? 290437930d1SJeremy L Thompson Q_1d : P_1d, dim), 2910d0321e0SJeremy L Thompson "BASIS_DIM", dim, 2920d0321e0SJeremy L Thompson "BASIS_NCOMP", ncomp, 293437930d1SJeremy L Thompson "BASIS_ELEMSIZE", CeedIntPow(P_1d, dim), 294437930d1SJeremy L Thompson "BASIS_NQPT", CeedIntPow(Q_1d, dim) 2950d0321e0SJeremy L Thompson ); CeedChkBackend(ierr); 296437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Interp", &data->Interp); 2970d0321e0SJeremy L Thompson CeedChkBackend(ierr); 298437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Grad", &data->Grad); 2990d0321e0SJeremy L Thompson CeedChkBackend(ierr); 300437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Weight", &data->Weight); 3010d0321e0SJeremy L Thompson CeedChkBackend(ierr); 302437930d1SJeremy L Thompson ierr = CeedFree(&basis_kernel_path); CeedChkBackend(ierr); 303437930d1SJeremy L Thompson ierr = CeedFree(&basis_kernel_source); CeedChkBackend(ierr); 304437930d1SJeremy L Thompson 3050d0321e0SJeremy L Thompson ierr = CeedBasisSetData(basis, data); CeedChkBackend(ierr); 3060d0321e0SJeremy L Thompson 3070d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 3080d0321e0SJeremy L Thompson CeedBasisApply_Hip); CeedChkBackend(ierr); 3090d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 3100d0321e0SJeremy L Thompson CeedBasisDestroy_Hip); CeedChkBackend(ierr); 3110d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3120d0321e0SJeremy L Thompson } 3130d0321e0SJeremy L Thompson 3140d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3150d0321e0SJeremy L Thompson // Create non-tensor 3160d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 317437930d1SJeremy L Thompson int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, 318437930d1SJeremy L Thompson CeedInt num_qpts, const CeedScalar *interp, 3190d0321e0SJeremy L Thompson const CeedScalar *grad, const CeedScalar *qref, 320437930d1SJeremy L Thompson const CeedScalar *q_weight, CeedBasis basis) { 3210d0321e0SJeremy L Thompson int ierr; 3220d0321e0SJeremy L Thompson Ceed ceed; 3230d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 3240d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data; 3250d0321e0SJeremy L Thompson ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); 3260d0321e0SJeremy L Thompson 3270d0321e0SJeremy L Thompson // Copy basis data to GPU 328437930d1SJeremy L Thompson const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 329437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_q_weight, q_bytes); CeedChk_Hip(ceed, ierr); 330437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_q_weight, q_weight, q_bytes, 3310d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 3320d0321e0SJeremy L Thompson 333437930d1SJeremy L Thompson const CeedInt interp_bytes = q_bytes * num_nodes; 334437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_interp, interp_bytes); 335437930d1SJeremy L Thompson CeedChk_Hip(ceed, ierr); 336437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_interp, interp, interp_bytes, 3370d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 3380d0321e0SJeremy L Thompson 339437930d1SJeremy L Thompson const CeedInt grad_bytes = q_bytes * num_nodes * dim; 340437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_grad, grad_bytes); CeedChk_Hip(ceed, ierr); 341437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_grad, grad, grad_bytes, 3420d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 3430d0321e0SJeremy L Thompson 3440d0321e0SJeremy L Thompson // Compile basis kernels 3450d0321e0SJeremy L Thompson CeedInt ncomp; 3460d0321e0SJeremy L Thompson ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 347437930d1SJeremy L Thompson char *basis_kernel_path, *basis_kernel_source; 348437930d1SJeremy L Thompson ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-ref-basis-nontensor.h", 349437930d1SJeremy L Thompson &basis_kernel_path); CeedChkBackend(ierr); 350*46dc0734SJeremy L Thompson CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n"); 351437930d1SJeremy L Thompson ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source); 352437930d1SJeremy L Thompson CeedChkBackend(ierr); 353*46dc0734SJeremy L Thompson CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source Complete! -----\n"); 354437930d1SJeremy L Thompson ierr = CeedCompileHip(ceed, basis_kernel_source, &data->module, 4, 355437930d1SJeremy L Thompson "Q", num_qpts, 356437930d1SJeremy L Thompson "P", num_nodes, 3570d0321e0SJeremy L Thompson "BASIS_DIM", dim, 3580d0321e0SJeremy L Thompson "BASIS_NCOMP", ncomp 3590d0321e0SJeremy L Thompson ); CeedChk_Hip(ceed, ierr); 360437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Interp", &data->Interp); 3610d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 362437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Grad", &data->Grad); 3630d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 364437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Weight", &data->Weight); 3650d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 366437930d1SJeremy L Thompson ierr = CeedFree(&basis_kernel_path); CeedChkBackend(ierr); 367437930d1SJeremy L Thompson ierr = CeedFree(&basis_kernel_source); CeedChkBackend(ierr); 3680d0321e0SJeremy L Thompson 3690d0321e0SJeremy L Thompson ierr = CeedBasisSetData(basis, data); CeedChkBackend(ierr); 3700d0321e0SJeremy L Thompson 3710d0321e0SJeremy L Thompson // Register backend functions 3720d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 3730d0321e0SJeremy L Thompson CeedBasisApplyNonTensor_Hip); CeedChkBackend(ierr); 3740d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 3750d0321e0SJeremy L Thompson CeedBasisDestroyNonTensor_Hip); CeedChkBackend(ierr); 3760d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3770d0321e0SJeremy L Thompson } 3780d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 379