1*3d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2*3d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 30d0321e0SJeremy L Thompson // 4*3d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 50d0321e0SJeremy L Thompson // 6*3d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 70d0321e0SJeremy L Thompson 80d0321e0SJeremy L Thompson #include <ceed/ceed.h> 90d0321e0SJeremy L Thompson #include <ceed/backend.h> 10437930d1SJeremy L Thompson #include <ceed/jit-tools.h> 110d0321e0SJeremy L Thompson #include <hip/hip_runtime.h> 120d0321e0SJeremy L Thompson #include "ceed-hip-ref.h" 130d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h" 140d0321e0SJeremy L Thompson 150d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 160d0321e0SJeremy L Thompson // Basis apply - tensor 170d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 18437930d1SJeremy L Thompson int CeedBasisApply_Hip(CeedBasis basis, const CeedInt num_elem, 19437930d1SJeremy L Thompson CeedTransposeMode t_mode, 20437930d1SJeremy L Thompson CeedEvalMode eval_mode, CeedVector u, CeedVector v) { 210d0321e0SJeremy L Thompson int ierr; 220d0321e0SJeremy L Thompson Ceed ceed; 230d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 240d0321e0SJeremy L Thompson Ceed_Hip *ceed_Hip; 250d0321e0SJeremy L Thompson ierr = CeedGetData(ceed, &ceed_Hip); CeedChkBackend(ierr); 260d0321e0SJeremy L Thompson CeedBasis_Hip *data; 270d0321e0SJeremy L Thompson ierr = CeedBasisGetData(basis, &data); CeedChkBackend(ierr); 28437930d1SJeremy L Thompson const CeedInt transpose = t_mode == CEED_TRANSPOSE; 29437930d1SJeremy L Thompson const int max_block_size = 64; 300d0321e0SJeremy L Thompson 310d0321e0SJeremy L Thompson // Read vectors 320d0321e0SJeremy L Thompson const CeedScalar *d_u; 330d0321e0SJeremy L Thompson CeedScalar *d_v; 34437930d1SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) { 350d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u); CeedChkBackend(ierr); 360d0321e0SJeremy L Thompson } 370d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); 380d0321e0SJeremy L Thompson 390d0321e0SJeremy L Thompson // Clear v for transpose operation 40437930d1SJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 411f9221feSJeremy L Thompson CeedSize length; 420d0321e0SJeremy L Thompson ierr = CeedVectorGetLength(v, &length); CeedChkBackend(ierr); 430d0321e0SJeremy L Thompson ierr = hipMemset(d_v, 0, length * sizeof(CeedScalar)); 440d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 450d0321e0SJeremy L Thompson } 460d0321e0SJeremy L Thompson 470d0321e0SJeremy L Thompson // Basis action 48437930d1SJeremy L Thompson switch (eval_mode) { 490d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: { 50437930d1SJeremy L Thompson void *interp_args[] = {(void *) &num_elem, (void *) &transpose, 51437930d1SJeremy L Thompson &data->d_interp_1d, &d_u, &d_v 520d0321e0SJeremy L Thompson }; 53437930d1SJeremy L Thompson CeedInt Q_1d, dim; 54437930d1SJeremy L Thompson ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChkBackend(ierr); 550d0321e0SJeremy L Thompson ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 56437930d1SJeremy L Thompson CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 570d0321e0SJeremy L Thompson 58437930d1SJeremy L Thompson ierr = CeedRunKernelHip(ceed, data->Interp, num_elem, block_size, interp_args); 590d0321e0SJeremy L Thompson CeedChkBackend(ierr); 600d0321e0SJeremy L Thompson } break; 610d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: { 62437930d1SJeremy L Thompson void *grad_args[] = {(void *) &num_elem, (void *) &transpose, &data->d_interp_1d, 63437930d1SJeremy L Thompson &data->d_grad_1d, &d_u, &d_v 640d0321e0SJeremy L Thompson }; 65437930d1SJeremy L Thompson CeedInt block_size = max_block_size; 660d0321e0SJeremy L Thompson 67437930d1SJeremy L Thompson ierr = CeedRunKernelHip(ceed, data->Grad, num_elem, block_size, grad_args); 680d0321e0SJeremy L Thompson CeedChkBackend(ierr); 690d0321e0SJeremy L Thompson } break; 700d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: { 71437930d1SJeremy L Thompson void *weight_args[] = {(void *) &num_elem, (void *) &data->d_q_weight_1d, &d_v}; 72437930d1SJeremy L Thompson const int block_size = 64; 73437930d1SJeremy L Thompson int grid_size = num_elem / block_size; 74437930d1SJeremy L Thompson if (block_size * grid_size < num_elem) 75437930d1SJeremy L Thompson grid_size += 1; 760d0321e0SJeremy L Thompson 77437930d1SJeremy L Thompson ierr = CeedRunKernelHip(ceed, data->Weight, grid_size, block_size, 78437930d1SJeremy L Thompson weight_args); CeedChkBackend(ierr); 790d0321e0SJeremy L Thompson } break; 800d0321e0SJeremy L Thompson // LCOV_EXCL_START 810d0321e0SJeremy L Thompson // Evaluate the divergence to/from the quadrature points 820d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 830d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 840d0321e0SJeremy L Thompson // Evaluate the curl to/from the quadrature points 850d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 860d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 870d0321e0SJeremy L Thompson // Take no action, BasisApply should not have been called 880d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 890d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 900d0321e0SJeremy L Thompson "CEED_EVAL_NONE does not make sense in this context"); 910d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 920d0321e0SJeremy L Thompson } 930d0321e0SJeremy L Thompson 940d0321e0SJeremy L Thompson // Restore vectors 95437930d1SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) { 960d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(u, &d_u); CeedChkBackend(ierr); 970d0321e0SJeremy L Thompson } 980d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArray(v, &d_v); CeedChkBackend(ierr); 990d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1000d0321e0SJeremy L Thompson } 1010d0321e0SJeremy L Thompson 1020d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1030d0321e0SJeremy L Thompson // Basis apply - non-tensor 1040d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 105437930d1SJeremy L Thompson int CeedBasisApplyNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, 106437930d1SJeremy L Thompson CeedTransposeMode t_mode, CeedEvalMode eval_mode, 1070d0321e0SJeremy L Thompson CeedVector u, CeedVector v) { 1080d0321e0SJeremy L Thompson int ierr; 1090d0321e0SJeremy L Thompson Ceed ceed; 1100d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 1110d0321e0SJeremy L Thompson Ceed_Hip *ceed_Hip; 1120d0321e0SJeremy L Thompson ierr = CeedGetData(ceed, &ceed_Hip); CeedChkBackend(ierr); 1130d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data; 1140d0321e0SJeremy L Thompson ierr = CeedBasisGetData(basis, &data); CeedChkBackend(ierr); 115437930d1SJeremy L Thompson CeedInt num_nodes, num_qpts; 116437930d1SJeremy L Thompson ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChkBackend(ierr); 117437930d1SJeremy L Thompson ierr = CeedBasisGetNumNodes(basis, &num_nodes); CeedChkBackend(ierr); 118437930d1SJeremy L Thompson const CeedInt transpose = t_mode == CEED_TRANSPOSE; 1190d0321e0SJeremy L Thompson int elemsPerBlock = 1; 120437930d1SJeremy L Thompson int grid = num_elem/elemsPerBlock+(( 121437930d1SJeremy L Thompson num_elem/elemsPerBlock*elemsPerBlock<num_elem)?1:0); 1220d0321e0SJeremy L Thompson 1230d0321e0SJeremy L Thompson // Read vectors 1240d0321e0SJeremy L Thompson const CeedScalar *d_u; 1250d0321e0SJeremy L Thompson CeedScalar *d_v; 126437930d1SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) { 1270d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u); CeedChkBackend(ierr); 1280d0321e0SJeremy L Thompson } 1290d0321e0SJeremy L Thompson ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); 1300d0321e0SJeremy L Thompson 1310d0321e0SJeremy L Thompson // Clear v for transpose operation 132437930d1SJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 1331f9221feSJeremy L Thompson CeedSize length; 1340d0321e0SJeremy L Thompson ierr = CeedVectorGetLength(v, &length); CeedChkBackend(ierr); 1350d0321e0SJeremy L Thompson ierr = hipMemset(d_v, 0, length * sizeof(CeedScalar)); 1360d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 1370d0321e0SJeremy L Thompson } 1380d0321e0SJeremy L Thompson 1390d0321e0SJeremy L Thompson // Apply basis operation 140437930d1SJeremy L Thompson switch (eval_mode) { 1410d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: { 142437930d1SJeremy L Thompson void *interp_args[] = {(void *) &num_elem, (void *) &transpose, 1430d0321e0SJeremy L Thompson &data->d_interp, &d_u, &d_v 1440d0321e0SJeremy L Thompson }; 145437930d1SJeremy L Thompson if (transpose) { 146437930d1SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, data->Interp, grid, num_nodes, 1, 147437930d1SJeremy L Thompson elemsPerBlock, interp_args); CeedChkBackend(ierr); 1480d0321e0SJeremy L Thompson } else { 149437930d1SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, data->Interp, grid, num_qpts, 1, 150437930d1SJeremy L Thompson elemsPerBlock, interp_args); CeedChkBackend(ierr); 1510d0321e0SJeremy L Thompson } 1520d0321e0SJeremy L Thompson } break; 1530d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: { 154437930d1SJeremy L Thompson void *grad_args[] = {(void *) &num_elem, (void *) &transpose, &data->d_grad, 1550d0321e0SJeremy L Thompson &d_u, &d_v 1560d0321e0SJeremy L Thompson }; 157437930d1SJeremy L Thompson if (transpose) { 158437930d1SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, data->Grad, grid, num_nodes, 1, 159437930d1SJeremy L Thompson elemsPerBlock, grad_args); CeedChkBackend(ierr); 1600d0321e0SJeremy L Thompson } else { 161437930d1SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, data->Grad, grid, num_qpts, 1, 162437930d1SJeremy L Thompson elemsPerBlock, grad_args); CeedChkBackend(ierr); 1630d0321e0SJeremy L Thompson } 1640d0321e0SJeremy L Thompson } break; 1650d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: { 166437930d1SJeremy L Thompson void *weight_args[] = {(void *) &num_elem, (void *) &data->d_q_weight, &d_v}; 167437930d1SJeremy L Thompson ierr = CeedRunKernelDimHip(ceed, data->Weight, grid, num_qpts, 1, 168437930d1SJeremy L Thompson elemsPerBlock, weight_args); CeedChkBackend(ierr); 1690d0321e0SJeremy L Thompson } break; 1700d0321e0SJeremy L Thompson // LCOV_EXCL_START 1710d0321e0SJeremy L Thompson // Evaluate the divergence to/from the quadrature points 1720d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 1730d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 1740d0321e0SJeremy L Thompson // Evaluate the curl to/from the quadrature points 1750d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 1760d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 1770d0321e0SJeremy L Thompson // Take no action, BasisApply should not have been called 1780d0321e0SJeremy L Thompson case CEED_EVAL_NONE: 1790d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 1800d0321e0SJeremy L Thompson "CEED_EVAL_NONE does not make sense in this context"); 1810d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 1820d0321e0SJeremy L Thompson } 1830d0321e0SJeremy L Thompson 1840d0321e0SJeremy L Thompson // Restore vectors 185437930d1SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) { 1860d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(u, &d_u); CeedChkBackend(ierr); 1870d0321e0SJeremy L Thompson } 1880d0321e0SJeremy L Thompson ierr = CeedVectorRestoreArray(v, &d_v); CeedChkBackend(ierr); 1890d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1900d0321e0SJeremy L Thompson } 1910d0321e0SJeremy L Thompson 1920d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1930d0321e0SJeremy L Thompson // Destroy tensor basis 1940d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1950d0321e0SJeremy L Thompson static int CeedBasisDestroy_Hip(CeedBasis basis) { 1960d0321e0SJeremy L Thompson int ierr; 1970d0321e0SJeremy L Thompson Ceed ceed; 1980d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 1990d0321e0SJeremy L Thompson 2000d0321e0SJeremy L Thompson CeedBasis_Hip *data; 2010d0321e0SJeremy L Thompson ierr = CeedBasisGetData(basis, &data); CeedChkBackend(ierr); 2020d0321e0SJeremy L Thompson 2030d0321e0SJeremy L Thompson CeedChk_Hip(ceed, hipModuleUnload(data->module)); 2040d0321e0SJeremy L Thompson 205437930d1SJeremy L Thompson ierr = hipFree(data->d_q_weight_1d); CeedChk_Hip(ceed, ierr); 206437930d1SJeremy L Thompson ierr = hipFree(data->d_interp_1d); CeedChk_Hip(ceed, ierr); 207437930d1SJeremy L Thompson ierr = hipFree(data->d_grad_1d); CeedChk_Hip(ceed, ierr); 2080d0321e0SJeremy L Thompson ierr = CeedFree(&data); CeedChkBackend(ierr); 209437930d1SJeremy L Thompson 2100d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2110d0321e0SJeremy L Thompson } 2120d0321e0SJeremy L Thompson 2130d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2140d0321e0SJeremy L Thompson // Destroy non-tensor basis 2150d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2160d0321e0SJeremy L Thompson static int CeedBasisDestroyNonTensor_Hip(CeedBasis basis) { 2170d0321e0SJeremy L Thompson int ierr; 2180d0321e0SJeremy L Thompson Ceed ceed; 2190d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 2200d0321e0SJeremy L Thompson 2210d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data; 2220d0321e0SJeremy L Thompson ierr = CeedBasisGetData(basis, &data); CeedChkBackend(ierr); 2230d0321e0SJeremy L Thompson 2240d0321e0SJeremy L Thompson CeedChk_Hip(ceed, hipModuleUnload(data->module)); 2250d0321e0SJeremy L Thompson 226437930d1SJeremy L Thompson ierr = hipFree(data->d_q_weight); CeedChk_Hip(ceed, ierr); 2270d0321e0SJeremy L Thompson ierr = hipFree(data->d_interp); CeedChk_Hip(ceed, ierr); 2280d0321e0SJeremy L Thompson ierr = hipFree(data->d_grad); CeedChk_Hip(ceed, ierr); 2290d0321e0SJeremy L Thompson ierr = CeedFree(&data); CeedChkBackend(ierr); 230437930d1SJeremy L Thompson 2310d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2320d0321e0SJeremy L Thompson } 2330d0321e0SJeremy L Thompson 2340d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2350d0321e0SJeremy L Thompson // Create tensor 2360d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 237437930d1SJeremy L Thompson int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, 238437930d1SJeremy L Thompson const CeedScalar *interp_1d, 239437930d1SJeremy L Thompson const CeedScalar *grad_1d, 2400d0321e0SJeremy L Thompson const CeedScalar *qref1d, 241437930d1SJeremy L Thompson const CeedScalar *q_weight_1d, 2420d0321e0SJeremy L Thompson CeedBasis basis) { 2430d0321e0SJeremy L Thompson int ierr; 2440d0321e0SJeremy L Thompson Ceed ceed; 2450d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 2460d0321e0SJeremy L Thompson CeedBasis_Hip *data; 2470d0321e0SJeremy L Thompson ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); 2480d0321e0SJeremy L Thompson 2490d0321e0SJeremy L Thompson // Copy data to GPU 250437930d1SJeremy L Thompson const CeedInt q_bytes = Q_1d * sizeof(CeedScalar); 251437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_q_weight_1d, q_bytes); 252437930d1SJeremy L Thompson CeedChk_Hip(ceed, ierr); 253437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_q_weight_1d, q_weight_1d, q_bytes, 2540d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 2550d0321e0SJeremy L Thompson 256437930d1SJeremy L Thompson const CeedInt interp_bytes = q_bytes * P_1d; 257437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_interp_1d, interp_bytes); 258437930d1SJeremy L Thompson CeedChk_Hip(ceed, ierr); 259437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_interp_1d, interp_1d, interp_bytes, 2600d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 2610d0321e0SJeremy L Thompson 262437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_grad_1d, interp_bytes); 263437930d1SJeremy L Thompson CeedChk_Hip(ceed, ierr); 264437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_grad_1d, grad_1d, interp_bytes, 2650d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 2660d0321e0SJeremy L Thompson 2670d0321e0SJeremy L Thompson // Complie basis kernels 2680d0321e0SJeremy L Thompson CeedInt ncomp; 2690d0321e0SJeremy L Thompson ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 270437930d1SJeremy L Thompson char *basis_kernel_path, *basis_kernel_source; 271437930d1SJeremy L Thompson ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-ref-basis-tensor.h", 272437930d1SJeremy L Thompson &basis_kernel_path); CeedChkBackend(ierr); 27346dc0734SJeremy L Thompson CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n"); 274437930d1SJeremy L Thompson ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source); 275437930d1SJeremy L Thompson CeedChkBackend(ierr); 27646dc0734SJeremy L Thompson CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source Complete! -----\n"); 277437930d1SJeremy L Thompson ierr = CeedCompileHip(ceed, basis_kernel_source, &data->module, 7, 278d7d111ecSJeremy L Thompson "BASIS_Q_1D", Q_1d, 279d7d111ecSJeremy L Thompson "BASIS_P_1D", P_1d, 280437930d1SJeremy L Thompson "BASIS_BUF_LEN", ncomp * CeedIntPow(Q_1d > P_1d ? 281437930d1SJeremy L Thompson Q_1d : P_1d, dim), 2820d0321e0SJeremy L Thompson "BASIS_DIM", dim, 283d7d111ecSJeremy L Thompson "BASIS_NUM_COMP", ncomp, 284d7d111ecSJeremy L Thompson "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), 285d7d111ecSJeremy L Thompson "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim) 2860d0321e0SJeremy L Thompson ); CeedChkBackend(ierr); 287437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Interp", &data->Interp); 2880d0321e0SJeremy L Thompson CeedChkBackend(ierr); 289437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Grad", &data->Grad); 2900d0321e0SJeremy L Thompson CeedChkBackend(ierr); 291437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Weight", &data->Weight); 2920d0321e0SJeremy L Thompson CeedChkBackend(ierr); 293437930d1SJeremy L Thompson ierr = CeedFree(&basis_kernel_path); CeedChkBackend(ierr); 294437930d1SJeremy L Thompson ierr = CeedFree(&basis_kernel_source); CeedChkBackend(ierr); 295437930d1SJeremy L Thompson 2960d0321e0SJeremy L Thompson ierr = CeedBasisSetData(basis, data); CeedChkBackend(ierr); 2970d0321e0SJeremy L Thompson 2980d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 2990d0321e0SJeremy L Thompson CeedBasisApply_Hip); CeedChkBackend(ierr); 3000d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 3010d0321e0SJeremy L Thompson CeedBasisDestroy_Hip); CeedChkBackend(ierr); 3020d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3030d0321e0SJeremy L Thompson } 3040d0321e0SJeremy L Thompson 3050d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3060d0321e0SJeremy L Thompson // Create non-tensor 3070d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 308437930d1SJeremy L Thompson int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, 309437930d1SJeremy L Thompson CeedInt num_qpts, const CeedScalar *interp, 3100d0321e0SJeremy L Thompson const CeedScalar *grad, const CeedScalar *qref, 311437930d1SJeremy L Thompson const CeedScalar *q_weight, CeedBasis basis) { 3120d0321e0SJeremy L Thompson int ierr; 3130d0321e0SJeremy L Thompson Ceed ceed; 3140d0321e0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 3150d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data; 3160d0321e0SJeremy L Thompson ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); 3170d0321e0SJeremy L Thompson 3180d0321e0SJeremy L Thompson // Copy basis data to GPU 319437930d1SJeremy L Thompson const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 320437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_q_weight, q_bytes); CeedChk_Hip(ceed, ierr); 321437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_q_weight, q_weight, q_bytes, 3220d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 3230d0321e0SJeremy L Thompson 324437930d1SJeremy L Thompson const CeedInt interp_bytes = q_bytes * num_nodes; 325437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_interp, interp_bytes); 326437930d1SJeremy L Thompson CeedChk_Hip(ceed, ierr); 327437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_interp, interp, interp_bytes, 3280d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 3290d0321e0SJeremy L Thompson 330437930d1SJeremy L Thompson const CeedInt grad_bytes = q_bytes * num_nodes * dim; 331437930d1SJeremy L Thompson ierr = hipMalloc((void **)&data->d_grad, grad_bytes); CeedChk_Hip(ceed, ierr); 332437930d1SJeremy L Thompson ierr = hipMemcpy(data->d_grad, grad, grad_bytes, 3330d0321e0SJeremy L Thompson hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 3340d0321e0SJeremy L Thompson 3350d0321e0SJeremy L Thompson // Compile basis kernels 3360d0321e0SJeremy L Thompson CeedInt ncomp; 3370d0321e0SJeremy L Thompson ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 338437930d1SJeremy L Thompson char *basis_kernel_path, *basis_kernel_source; 339437930d1SJeremy L Thompson ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-ref-basis-nontensor.h", 340437930d1SJeremy L Thompson &basis_kernel_path); CeedChkBackend(ierr); 34146dc0734SJeremy L Thompson CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n"); 342437930d1SJeremy L Thompson ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source); 343437930d1SJeremy L Thompson CeedChkBackend(ierr); 34446dc0734SJeremy L Thompson CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source Complete! -----\n"); 345437930d1SJeremy L Thompson ierr = CeedCompileHip(ceed, basis_kernel_source, &data->module, 4, 346d7d111ecSJeremy L Thompson "BASIS_Q", num_qpts, 347d7d111ecSJeremy L Thompson "BASIS_P", num_nodes, 3480d0321e0SJeremy L Thompson "BASIS_DIM", dim, 349d7d111ecSJeremy L Thompson "BASIS_NUM_COMP", ncomp 3500d0321e0SJeremy L Thompson ); CeedChk_Hip(ceed, ierr); 351437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Interp", &data->Interp); 3520d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 353437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Grad", &data->Grad); 3540d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 355437930d1SJeremy L Thompson ierr = CeedGetKernelHip(ceed, data->module, "Weight", &data->Weight); 3560d0321e0SJeremy L Thompson CeedChk_Hip(ceed, ierr); 357437930d1SJeremy L Thompson ierr = CeedFree(&basis_kernel_path); CeedChkBackend(ierr); 358437930d1SJeremy L Thompson ierr = CeedFree(&basis_kernel_source); CeedChkBackend(ierr); 3590d0321e0SJeremy L Thompson 3600d0321e0SJeremy L Thompson ierr = CeedBasisSetData(basis, data); CeedChkBackend(ierr); 3610d0321e0SJeremy L Thompson 3620d0321e0SJeremy L Thompson // Register backend functions 3630d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 3640d0321e0SJeremy L Thompson CeedBasisApplyNonTensor_Hip); CeedChkBackend(ierr); 3650d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 3660d0321e0SJeremy L Thompson CeedBasisDestroyNonTensor_Hip); CeedChkBackend(ierr); 3670d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3680d0321e0SJeremy L Thompson } 3690d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 370