10d0321e0SJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 20d0321e0SJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 30d0321e0SJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details. 40d0321e0SJeremy L Thompson // 50d0321e0SJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software 60d0321e0SJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral 70d0321e0SJeremy L Thompson // element discretizations for exascale applications. For more information and 80d0321e0SJeremy L Thompson // source code availability see http://github.com/ceed. 90d0321e0SJeremy L Thompson // 100d0321e0SJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 110d0321e0SJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office 120d0321e0SJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for 130d0321e0SJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including 140d0321e0SJeremy L Thompson // software, applications, hardware, advanced system engineering and early 150d0321e0SJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative. 160d0321e0SJeremy L Thompson 170d0321e0SJeremy L Thompson #include <ceed/ceed.h> 180d0321e0SJeremy L Thompson #include <ceed/backend.h> 19*437930d1SJeremy L Thompson #include <ceed/jit-tools.h> 200d0321e0SJeremy L Thompson #include <hip/hip_runtime.h> 210d0321e0SJeremy L Thompson #include "ceed-hip-ref.h" 220d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h" 230d0321e0SJeremy L Thompson 240d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 250d0321e0SJeremy L Thompson // Basis apply - tensor 260d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 27*437930d1SJeremy L Thompson int CeedBasisApply_Hip(CeedBasis basis, const CeedInt num_elem, 28*437930d1SJeremy L Thompson CeedTransposeMode t_mode, 29*437930d1SJeremy L Thompson CeedEvalMode eval_mode, CeedVector u, CeedVector v) { 300d0321e0SJeremy L Thompson int ierr; 310d0321e0SJeremy L Thompson Ceed ceed; 320d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 330d0321e0SJeremy L Thompson Ceed_Hip *ceed_Hip; 340d0321e0SJeremy L Thompson ierr = CeedGetData(ceed, &ceed_Hip); CeedChkBackend(ierr); 350d0321e0SJeremy L Thompson CeedBasis_Hip *data; 360d0321e0SJeremy L Thompson ierr = CeedBasisGetData(basis, &data); CeedChkBackend(ierr); 37*437930d1SJeremy L Thompson const CeedInt transpose = t_mode == CEED_TRANSPOSE; 38*437930d1SJeremy L Thompson const int max_block_size = 64; 390d0321e0SJeremy L Thompson 400d0321e0SJeremy L Thompson // Read vectors 410d0321e0SJeremy L Thompson const CeedScalar *d_u; 420d0321e0SJeremy L Thompson CeedScalar *d_v; 43*437930d1SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) { 440d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u); CeedChkBackend(ierr); 450d0321e0SJeremy L Thompson } 460d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); 470d0321e0SJeremy L Thompson 480d0321e0SJeremy L Thompson // Clear v for transpose operation 49*437930d1SJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 500d0321e0SJeremy L Thompson CeedInt length; 510d0321e0SJeremy L Thompson ierr = CeedVectorGetLength(v, &length); CeedChkBackend(ierr); 520d0321e0SJeremy L Thompson ierr = hipMemset(d_v, 0, length * sizeof(CeedScalar)); 530d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 540d0321e0SJeremy L Thompson } 550d0321e0SJeremy L Thompson 560d0321e0SJeremy L Thompson // Basis action 57*437930d1SJeremy L Thompson switch (eval_mode) { 580d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: { 59*437930d1SJeremy L Thompson void *interp_args[] = {(void *) &num_elem, (void *) &transpose, 60*437930d1SJeremy L Thompson &data->d_interp_1d, &d_u, &d_v 610d0321e0SJeremy L Thompson }; 62*437930d1SJeremy L Thompson CeedInt Q_1d, dim; 63*437930d1SJeremy L Thompson ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChkBackend(ierr); 640d0321e0SJeremy L Thompson ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 65*437930d1SJeremy L Thompson CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 660d0321e0SJeremy L Thompson 67*437930d1SJeremy L Thompson ierr = CeedRunKernelHip(ceed, data->Interp, num_elem, block_size, interp_args); 680d0321e0SJeremy L Thompson CeedChkBackend(ierr); 690d0321e0SJeremy L Thompson } break; 700d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: { 71*437930d1SJeremy L Thompson void *grad_args[] = {(void *) &num_elem, (void *) &transpose, &data->d_interp_1d, 72*437930d1SJeremy L Thompson &data->d_grad_1d, &d_u, &d_v 730d0321e0SJeremy L Thompson }; 74*437930d1SJeremy L Thompson CeedInt block_size = max_block_size; 750d0321e0SJeremy L Thompson 76*437930d1SJeremy L Thompson ierr = CeedRunKernelHip(ceed, data->Grad, num_elem, block_size, grad_args); 770d0321e0SJeremy L Thompson CeedChkBackend(ierr); 780d0321e0SJeremy L Thompson } break; 790d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: { 80*437930d1SJeremy L Thompson void *weight_args[] = {(void *) &num_elem, (void *) &data->d_q_weight_1d, &d_v}; 81*437930d1SJeremy L Thompson const int block_size = 64; 82*437930d1SJeremy L Thompson int grid_size = num_elem / block_size; 83*437930d1SJeremy L Thompson if (block_size * grid_size < num_elem) 84*437930d1SJeremy L Thompson grid_size += 1; 850d0321e0SJeremy L Thompson 86*437930d1SJeremy L Thompson ierr = CeedRunKernelHip(ceed, data->Weight, grid_size, block_size, 87*437930d1SJeremy L Thompson weight_args); CeedChkBackend(ierr); 880d0321e0SJeremy L Thompson } break; 890d0321e0SJeremy L Thompson // LCOV_EXCL_START 900d0321e0SJeremy L Thompson // Evaluate the divergence to/from the quadrature points 910d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 920d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 930d0321e0SJeremy L Thompson // Evaluate the curl to/from the quadrature points 940d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 950d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 960d0321e0SJeremy L Thompson // Take no action, BasisApply should not have been called 970d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 980d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 990d0321e0SJeremy L Thompson "CEED_EVAL_NONE does not make sense in this context"); 1000d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 1010d0321e0SJeremy L Thompson } 1020d0321e0SJeremy L Thompson 1030d0321e0SJeremy L Thompson // Restore vectors 104*437930d1SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) { 1050d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(u, &d_u); CeedChkBackend(ierr); 1060d0321e0SJeremy L Thompson } 1070d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArray(v, &d_v); CeedChkBackend(ierr); 1080d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1090d0321e0SJeremy L Thompson } 1100d0321e0SJeremy L Thompson 1110d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1120d0321e0SJeremy L Thompson // Basis apply - non-tensor 1130d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 114*437930d1SJeremy L Thompson int CeedBasisApplyNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, 115*437930d1SJeremy L Thompson CeedTransposeMode t_mode, CeedEvalMode eval_mode, 1160d0321e0SJeremy L Thompson CeedVector u, CeedVector v) { 1170d0321e0SJeremy L Thompson int ierr; 1180d0321e0SJeremy L Thompson Ceed ceed; 1190d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 1200d0321e0SJeremy L Thompson Ceed_Hip *ceed_Hip; 1210d0321e0SJeremy L Thompson ierr = CeedGetData(ceed, &ceed_Hip); CeedChkBackend(ierr); 1220d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data; 1230d0321e0SJeremy L Thompson ierr = CeedBasisGetData(basis, &data); CeedChkBackend(ierr); 124*437930d1SJeremy L Thompson CeedInt num_nodes, num_qpts; 125*437930d1SJeremy L Thompson ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChkBackend(ierr); 126*437930d1SJeremy L Thompson ierr = CeedBasisGetNumNodes(basis, &num_nodes); CeedChkBackend(ierr); 127*437930d1SJeremy L Thompson const CeedInt transpose = t_mode == CEED_TRANSPOSE; 1280d0321e0SJeremy L Thompson int elemsPerBlock = 1; 129*437930d1SJeremy L Thompson int grid = num_elem/elemsPerBlock+(( 130*437930d1SJeremy L Thompson num_elem/elemsPerBlock*elemsPerBlock<num_elem)?1:0); 1310d0321e0SJeremy L Thompson 1320d0321e0SJeremy L Thompson // Read vectors 1330d0321e0SJeremy L Thompson const CeedScalar *d_u; 1340d0321e0SJeremy L Thompson CeedScalar *d_v; 135*437930d1SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) { 1360d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u); CeedChkBackend(ierr); 1370d0321e0SJeremy L Thompson } 1380d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); 1390d0321e0SJeremy L Thompson 1400d0321e0SJeremy L Thompson // Clear v for transpose operation 141*437930d1SJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 1420d0321e0SJeremy L Thompson CeedInt length; 1430d0321e0SJeremy L Thompson ierr = CeedVectorGetLength(v, &length); CeedChkBackend(ierr); 1440d0321e0SJeremy L Thompson ierr = hipMemset(d_v, 0, length * sizeof(CeedScalar)); 1450d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 1460d0321e0SJeremy L Thompson } 1470d0321e0SJeremy L Thompson 1480d0321e0SJeremy L Thompson // Apply basis operation 149*437930d1SJeremy L Thompson switch (eval_mode) { 1500d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: { 151*437930d1SJeremy L Thompson void *interp_args[] = {(void *) &num_elem, (void *) &transpose, 1520d0321e0SJeremy L Thompson &data->d_interp, &d_u, &d_v 1530d0321e0SJeremy L Thompson }; 154*437930d1SJeremy L Thompson if (transpose) { 155*437930d1SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, data->Interp, grid, num_nodes, 1, 156*437930d1SJeremy L Thompson elemsPerBlock, interp_args); CeedChkBackend(ierr); 1570d0321e0SJeremy L Thompson } else { 158*437930d1SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, data->Interp, grid, num_qpts, 1, 159*437930d1SJeremy L Thompson elemsPerBlock, interp_args); CeedChkBackend(ierr); 1600d0321e0SJeremy L Thompson } 1610d0321e0SJeremy L Thompson } break; 1620d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: { 163*437930d1SJeremy L Thompson void *grad_args[] = {(void *) &num_elem, (void *) &transpose, &data->d_grad, 1640d0321e0SJeremy L Thompson &d_u, &d_v 1650d0321e0SJeremy L Thompson }; 166*437930d1SJeremy L Thompson if (transpose) { 167*437930d1SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, data->Grad, grid, num_nodes, 1, 168*437930d1SJeremy L Thompson elemsPerBlock, grad_args); CeedChkBackend(ierr); 1690d0321e0SJeremy L Thompson } else { 170*437930d1SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, data->Grad, grid, num_qpts, 1, 171*437930d1SJeremy L Thompson elemsPerBlock, grad_args); CeedChkBackend(ierr); 1720d0321e0SJeremy L Thompson } 1730d0321e0SJeremy L Thompson } break; 1740d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: { 175*437930d1SJeremy L Thompson void *weight_args[] = {(void *) &num_elem, (void *) &data->d_q_weight, &d_v}; 176*437930d1SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, data->Weight, grid, num_qpts, 1, 177*437930d1SJeremy L Thompson elemsPerBlock, weight_args); CeedChkBackend(ierr); 1780d0321e0SJeremy L Thompson } break; 1790d0321e0SJeremy L Thompson // LCOV_EXCL_START 1800d0321e0SJeremy L Thompson // Evaluate the divergence to/from the quadrature points 1810d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 1820d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 1830d0321e0SJeremy L Thompson // Evaluate the curl to/from the quadrature points 1840d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 1850d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 1860d0321e0SJeremy L Thompson // Take no action, BasisApply should not have been called 1870d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 1880d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 1890d0321e0SJeremy L Thompson "CEED_EVAL_NONE does not make sense in this context"); 1900d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 1910d0321e0SJeremy L Thompson } 1920d0321e0SJeremy L Thompson 1930d0321e0SJeremy L Thompson // Restore vectors 194*437930d1SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) { 1950d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(u, &d_u); CeedChkBackend(ierr); 1960d0321e0SJeremy L Thompson } 1970d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArray(v, &d_v); CeedChkBackend(ierr); 1980d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1990d0321e0SJeremy L Thompson } 2000d0321e0SJeremy L Thompson 2010d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2020d0321e0SJeremy L Thompson // Destroy tensor basis 2030d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2040d0321e0SJeremy L Thompson static int CeedBasisDestroy_Hip(CeedBasis basis) { 2050d0321e0SJeremy L Thompson int ierr; 2060d0321e0SJeremy L Thompson Ceed ceed; 2070d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 2080d0321e0SJeremy L Thompson 2090d0321e0SJeremy L Thompson CeedBasis_Hip *data; 2100d0321e0SJeremy L Thompson ierr = CeedBasisGetData(basis, &data); CeedChkBackend(ierr); 2110d0321e0SJeremy L Thompson 2120d0321e0SJeremy L Thompson CeedChk_Hip(ceed, hipModuleUnload(data->module)); 2130d0321e0SJeremy L Thompson 214*437930d1SJeremy L Thompson ierr = hipFree(data->d_q_weight_1d); CeedChk_Hip(ceed, ierr); 215*437930d1SJeremy L Thompson ierr = hipFree(data->d_interp_1d); CeedChk_Hip(ceed, ierr); 216*437930d1SJeremy L Thompson ierr = hipFree(data->d_grad_1d); CeedChk_Hip(ceed, ierr); 2170d0321e0SJeremy L Thompson ierr = CeedFree(&data); CeedChkBackend(ierr); 218*437930d1SJeremy L Thompson 2190d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2200d0321e0SJeremy L Thompson } 2210d0321e0SJeremy L Thompson 2220d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2230d0321e0SJeremy L Thompson // Destroy non-tensor basis 2240d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2250d0321e0SJeremy L Thompson static int CeedBasisDestroyNonTensor_Hip(CeedBasis basis) { 2260d0321e0SJeremy L Thompson int ierr; 2270d0321e0SJeremy L Thompson Ceed ceed; 2280d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 2290d0321e0SJeremy L Thompson 2300d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data; 2310d0321e0SJeremy L Thompson ierr = CeedBasisGetData(basis, &data); CeedChkBackend(ierr); 2320d0321e0SJeremy L Thompson 2330d0321e0SJeremy L Thompson CeedChk_Hip(ceed, hipModuleUnload(data->module)); 2340d0321e0SJeremy L Thompson 235*437930d1SJeremy L Thompson ierr = hipFree(data->d_q_weight); CeedChk_Hip(ceed, ierr); 2360d0321e0SJeremy L Thompson ierr = hipFree(data->d_interp); CeedChk_Hip(ceed, ierr); 2370d0321e0SJeremy L Thompson ierr = hipFree(data->d_grad); CeedChk_Hip(ceed, ierr); 2380d0321e0SJeremy L Thompson ierr = CeedFree(&data); CeedChkBackend(ierr); 239*437930d1SJeremy L Thompson 2400d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2410d0321e0SJeremy L Thompson } 2420d0321e0SJeremy L Thompson 2430d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2440d0321e0SJeremy L Thompson // Create tensor 2450d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 246*437930d1SJeremy L Thompson int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, 247*437930d1SJeremy L Thompson const CeedScalar *interp_1d, 248*437930d1SJeremy L Thompson const CeedScalar *grad_1d, 2490d0321e0SJeremy L Thompson const CeedScalar *qref1d, 250*437930d1SJeremy L Thompson const CeedScalar *q_weight_1d, 2510d0321e0SJeremy L Thompson CeedBasis basis) { 2520d0321e0SJeremy L Thompson int ierr; 2530d0321e0SJeremy L Thompson Ceed ceed; 2540d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 2550d0321e0SJeremy L Thompson CeedBasis_Hip *data; 2560d0321e0SJeremy L Thompson ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); 2570d0321e0SJeremy L Thompson 2580d0321e0SJeremy L Thompson // Copy data to GPU 259*437930d1SJeremy L Thompson const CeedInt q_bytes = Q_1d * sizeof(CeedScalar); 260*437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_q_weight_1d, q_bytes); 261*437930d1SJeremy L Thompson CeedChk_Hip(ceed, ierr); 262*437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_q_weight_1d, q_weight_1d, q_bytes, 2630d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 2640d0321e0SJeremy L Thompson 265*437930d1SJeremy L Thompson const CeedInt interp_bytes = q_bytes * P_1d; 266*437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_interp_1d, interp_bytes); 267*437930d1SJeremy L Thompson CeedChk_Hip(ceed, ierr); 268*437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_interp_1d, interp_1d, interp_bytes, 2690d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 2700d0321e0SJeremy L Thompson 271*437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_grad_1d, interp_bytes); 272*437930d1SJeremy L Thompson CeedChk_Hip(ceed, ierr); 273*437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_grad_1d, grad_1d, interp_bytes, 2740d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 2750d0321e0SJeremy L Thompson 2760d0321e0SJeremy L Thompson // Complie basis kernels 2770d0321e0SJeremy L Thompson CeedInt ncomp; 2780d0321e0SJeremy L Thompson ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 279*437930d1SJeremy L Thompson char *basis_kernel_path, *basis_kernel_source; 280*437930d1SJeremy L Thompson ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-ref-basis-tensor.h", 281*437930d1SJeremy L Thompson &basis_kernel_path); CeedChkBackend(ierr); 282*437930d1SJeremy L Thompson ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source); 283*437930d1SJeremy L Thompson CeedChkBackend(ierr); 284*437930d1SJeremy L Thompson ierr = CeedCompileHip(ceed, basis_kernel_source, &data->module, 7, 285*437930d1SJeremy L Thompson "BASIS_Q1D", Q_1d, 286*437930d1SJeremy L Thompson "BASIS_P1D", P_1d, 287*437930d1SJeremy L Thompson "BASIS_BUF_LEN", ncomp * CeedIntPow(Q_1d > P_1d ? 288*437930d1SJeremy L Thompson Q_1d : P_1d, dim), 2890d0321e0SJeremy L Thompson "BASIS_DIM", dim, 2900d0321e0SJeremy L Thompson "BASIS_NCOMP", ncomp, 291*437930d1SJeremy L Thompson "BASIS_ELEMSIZE", CeedIntPow(P_1d, dim), 292*437930d1SJeremy L Thompson "BASIS_NQPT", CeedIntPow(Q_1d, dim) 2930d0321e0SJeremy L Thompson ); CeedChkBackend(ierr); 294*437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Interp", &data->Interp); 2950d0321e0SJeremy L Thompson CeedChkBackend(ierr); 296*437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Grad", &data->Grad); 2970d0321e0SJeremy L Thompson CeedChkBackend(ierr); 298*437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Weight", &data->Weight); 2990d0321e0SJeremy L Thompson CeedChkBackend(ierr); 300*437930d1SJeremy L Thompson ierr = CeedFree(&basis_kernel_path); CeedChkBackend(ierr); 301*437930d1SJeremy L Thompson ierr = CeedFree(&basis_kernel_source); CeedChkBackend(ierr); 302*437930d1SJeremy L Thompson 3030d0321e0SJeremy L Thompson ierr = CeedBasisSetData(basis, data); CeedChkBackend(ierr); 3040d0321e0SJeremy L Thompson 3050d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 3060d0321e0SJeremy L Thompson CeedBasisApply_Hip); CeedChkBackend(ierr); 3070d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 3080d0321e0SJeremy L Thompson CeedBasisDestroy_Hip); CeedChkBackend(ierr); 3090d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3100d0321e0SJeremy L Thompson } 3110d0321e0SJeremy L Thompson 3120d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3130d0321e0SJeremy L Thompson // Create non-tensor 3140d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 315*437930d1SJeremy L Thompson int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, 316*437930d1SJeremy L Thompson CeedInt num_qpts, const CeedScalar *interp, 3170d0321e0SJeremy L Thompson const CeedScalar *grad, const CeedScalar *qref, 318*437930d1SJeremy L Thompson const CeedScalar *q_weight, CeedBasis basis) { 3190d0321e0SJeremy L Thompson int ierr; 3200d0321e0SJeremy L Thompson Ceed ceed; 3210d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 3220d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data; 3230d0321e0SJeremy L Thompson ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); 3240d0321e0SJeremy L Thompson 3250d0321e0SJeremy L Thompson // Copy basis data to GPU 326*437930d1SJeremy L Thompson const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 327*437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_q_weight, q_bytes); CeedChk_Hip(ceed, ierr); 328*437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_q_weight, q_weight, q_bytes, 3290d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 3300d0321e0SJeremy L Thompson 331*437930d1SJeremy L Thompson const CeedInt interp_bytes = q_bytes * num_nodes; 332*437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_interp, interp_bytes); 333*437930d1SJeremy L Thompson CeedChk_Hip(ceed, ierr); 334*437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_interp, interp, interp_bytes, 3350d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 3360d0321e0SJeremy L Thompson 337*437930d1SJeremy L Thompson const CeedInt grad_bytes = q_bytes * num_nodes * dim; 338*437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_grad, grad_bytes); CeedChk_Hip(ceed, ierr); 339*437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_grad, grad, grad_bytes, 3400d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 3410d0321e0SJeremy L Thompson 3420d0321e0SJeremy L Thompson // Compile basis kernels 3430d0321e0SJeremy L Thompson CeedInt ncomp; 3440d0321e0SJeremy L Thompson ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 345*437930d1SJeremy L Thompson char *basis_kernel_path, *basis_kernel_source; 346*437930d1SJeremy L Thompson ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-ref-basis-nontensor.h", 347*437930d1SJeremy L Thompson &basis_kernel_path); CeedChkBackend(ierr); 348*437930d1SJeremy L Thompson ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source); 349*437930d1SJeremy L Thompson CeedChkBackend(ierr); 350*437930d1SJeremy L Thompson ierr = CeedCompileHip(ceed, basis_kernel_source, &data->module, 4, 351*437930d1SJeremy L Thompson "Q", num_qpts, 352*437930d1SJeremy L Thompson "P", num_nodes, 3530d0321e0SJeremy L Thompson "BASIS_DIM", dim, 3540d0321e0SJeremy L Thompson "BASIS_NCOMP", ncomp 3550d0321e0SJeremy L Thompson ); CeedChk_Hip(ceed, ierr); 356*437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Interp", &data->Interp); 3570d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 358*437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Grad", &data->Grad); 3590d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 360*437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Weight", &data->Weight); 3610d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 362*437930d1SJeremy L Thompson ierr = CeedFree(&basis_kernel_path); CeedChkBackend(ierr); 363*437930d1SJeremy L Thompson ierr = CeedFree(&basis_kernel_source); CeedChkBackend(ierr); 3640d0321e0SJeremy L Thompson 3650d0321e0SJeremy L Thompson ierr = CeedBasisSetData(basis, data); CeedChkBackend(ierr); 3660d0321e0SJeremy L Thompson 3670d0321e0SJeremy L Thompson // Register backend functions 3680d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 3690d0321e0SJeremy L Thompson CeedBasisApplyNonTensor_Hip); CeedChkBackend(ierr); 3700d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 3710d0321e0SJeremy L Thompson CeedBasisDestroyNonTensor_Hip); CeedChkBackend(ierr); 3720d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3730d0321e0SJeremy L Thompson } 3740d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 375