13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 30d0321e0SJeremy L Thompson // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 50d0321e0SJeremy L Thompson // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 70d0321e0SJeremy L Thompson 849aac155SJeremy L Thompson #include <ceed.h> 90d0321e0SJeremy L Thompson #include <ceed/backend.h> 10437930d1SJeremy L Thompson #include <ceed/jit-tools.h> 110d0321e0SJeremy L Thompson #include <stdbool.h> 120d0321e0SJeremy L Thompson #include <stddef.h> 1344d7a66cSJeremy L Thompson #include <string.h> 14c85e8640SSebastian Grimberg #include <hip/hip_runtime.h> 152b730f8bSJeremy L Thompson 1649aac155SJeremy L Thompson #include "../hip/ceed-hip-common.h" 170d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h" 182b730f8bSJeremy L Thompson #include "ceed-hip-ref.h" 190d0321e0SJeremy L Thompson 200d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 21*dce49693SSebastian Grimberg // Core apply restriction code 220d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 23*dce49693SSebastian Grimberg static inline int CeedElemRestrictionApply_Hip_Core(CeedElemRestriction rstr, CeedTransposeMode t_mode, bool use_signs, bool use_orients, 24*dce49693SSebastian Grimberg CeedVector u, CeedVector v, CeedRequest *request) { 250d0321e0SJeremy L Thompson Ceed ceed; 26437930d1SJeremy L Thompson CeedInt num_elem, elem_size; 27*dce49693SSebastian Grimberg CeedRestrictionType rstr_type; 280d0321e0SJeremy L Thompson const CeedScalar *d_u; 290d0321e0SJeremy L Thompson CeedScalar *d_v; 30b7453713SJeremy L Thompson CeedElemRestriction_Hip *impl; 31b7453713SJeremy L Thompson hipFunction_t kernel; 32b7453713SJeremy L Thompson 33*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 34*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 35*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 36*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 37*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 38b7453713SJeremy L Thompson const CeedInt num_nodes = impl->num_nodes; 39b7453713SJeremy L Thompson 40b7453713SJeremy L Thompson // Get vectors 412b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 42437930d1SJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 430d0321e0SJeremy L Thompson // Sum into for transpose mode, e-vec to l-vec 442b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 450d0321e0SJeremy L Thompson } else { 460d0321e0SJeremy L Thompson // Overwrite for notranspose mode, l-vec to e-vec 472b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 480d0321e0SJeremy L Thompson } 490d0321e0SJeremy L Thompson 500d0321e0SJeremy L Thompson // Restrict 51437930d1SJeremy L Thompson if (t_mode == CEED_NOTRANSPOSE) { 520d0321e0SJeremy L Thompson // L-vector -> E-vector 53*dce49693SSebastian Grimberg const CeedInt block_size = elem_size < 256 ? (elem_size > 64 ? elem_size : 64) : 256; 54*dce49693SSebastian Grimberg const CeedInt grid = CeedDivUpInt(num_nodes, block_size); 5558549094SSebastian Grimberg 56*dce49693SSebastian Grimberg switch (rstr_type) { 57*dce49693SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 58437930d1SJeremy L Thompson kernel = impl->StridedNoTranspose; 59437930d1SJeremy L Thompson void *args[] = {&num_elem, &d_u, &d_v}; 6058549094SSebastian Grimberg 61*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 62*dce49693SSebastian Grimberg } break; 63*dce49693SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 64*dce49693SSebastian Grimberg kernel = impl->OffsetNoTranspose; 65*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 66*dce49693SSebastian Grimberg 67*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 68*dce49693SSebastian Grimberg } break; 69*dce49693SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 70*dce49693SSebastian Grimberg if (use_signs) { 71*dce49693SSebastian Grimberg kernel = impl->OrientedNoTranspose; 72*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_orients, &d_u, &d_v}; 73*dce49693SSebastian Grimberg 74*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 75*dce49693SSebastian Grimberg } else { 76*dce49693SSebastian Grimberg kernel = impl->OffsetNoTranspose; 77*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 78*dce49693SSebastian Grimberg 79*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 80*dce49693SSebastian Grimberg } 81*dce49693SSebastian Grimberg } break; 82*dce49693SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 83*dce49693SSebastian Grimberg if (use_signs && use_orients) { 84*dce49693SSebastian Grimberg kernel = impl->CurlOrientedNoTranspose; 85*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_curl_orients, &d_u, &d_v}; 86*dce49693SSebastian Grimberg 87*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 88*dce49693SSebastian Grimberg } else if (use_orients) { 89*dce49693SSebastian Grimberg kernel = impl->CurlOrientedUnsignedNoTranspose; 90*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_curl_orients, &d_u, &d_v}; 91*dce49693SSebastian Grimberg 92*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 93*dce49693SSebastian Grimberg } else { 94*dce49693SSebastian Grimberg kernel = impl->OffsetNoTranspose; 95*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 96*dce49693SSebastian Grimberg 97*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 98*dce49693SSebastian Grimberg } 99*dce49693SSebastian Grimberg } break; 1000d0321e0SJeremy L Thompson } 1010d0321e0SJeremy L Thompson } else { 1020d0321e0SJeremy L Thompson // E-vector -> L-vector 103*dce49693SSebastian Grimberg const CeedInt block_size = 64; 104*dce49693SSebastian Grimberg const CeedInt grid = CeedDivUpInt(num_nodes, block_size); 105b7453713SJeremy L Thompson 106*dce49693SSebastian Grimberg switch (rstr_type) { 107*dce49693SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 108*dce49693SSebastian Grimberg kernel = impl->StridedTranspose; 109*dce49693SSebastian Grimberg void *args[] = {&num_elem, &d_u, &d_v}; 110*dce49693SSebastian Grimberg 111*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 112*dce49693SSebastian Grimberg } break; 113*dce49693SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 11458549094SSebastian Grimberg if (impl->OffsetTranspose) { 115437930d1SJeremy L Thompson kernel = impl->OffsetTranspose; 11658549094SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 11758549094SSebastian Grimberg 118*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 1190d0321e0SJeremy L Thompson } else { 12058549094SSebastian Grimberg kernel = impl->OffsetTransposeDet; 12158549094SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 12258549094SSebastian Grimberg 123*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 12458549094SSebastian Grimberg } 125*dce49693SSebastian Grimberg } break; 126*dce49693SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 127*dce49693SSebastian Grimberg if (use_signs) { 128*dce49693SSebastian Grimberg kernel = impl->OrientedTranspose; 129*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_orients, &d_u, &d_v}; 13058549094SSebastian Grimberg 131*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 132*dce49693SSebastian Grimberg } else { 133*dce49693SSebastian Grimberg if (impl->OffsetTranspose) { 134*dce49693SSebastian Grimberg kernel = impl->OffsetTranspose; 135*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 136*dce49693SSebastian Grimberg 137*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 138*dce49693SSebastian Grimberg } else { 139*dce49693SSebastian Grimberg kernel = impl->OffsetTransposeDet; 140*dce49693SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 141*dce49693SSebastian Grimberg 142*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 143*dce49693SSebastian Grimberg } 144*dce49693SSebastian Grimberg } 145*dce49693SSebastian Grimberg } break; 146*dce49693SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 147*dce49693SSebastian Grimberg if (use_signs && use_orients) { 148*dce49693SSebastian Grimberg kernel = impl->CurlOrientedTranspose; 149*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_curl_orients, &d_u, &d_v}; 150*dce49693SSebastian Grimberg 151*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 152*dce49693SSebastian Grimberg } else if (use_orients) { 153*dce49693SSebastian Grimberg kernel = impl->CurlOrientedUnsignedTranspose; 154*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_curl_orients, &d_u, &d_v}; 155*dce49693SSebastian Grimberg 156*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 157*dce49693SSebastian Grimberg } else { 158*dce49693SSebastian Grimberg if (impl->OffsetTranspose) { 159*dce49693SSebastian Grimberg kernel = impl->OffsetTranspose; 160*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 161*dce49693SSebastian Grimberg 162*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 163*dce49693SSebastian Grimberg } else { 164*dce49693SSebastian Grimberg kernel = impl->OffsetTransposeDet; 165*dce49693SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 166*dce49693SSebastian Grimberg 167*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, kernel, grid, block_size, args)); 168*dce49693SSebastian Grimberg } 169*dce49693SSebastian Grimberg } 170*dce49693SSebastian Grimberg } break; 1710d0321e0SJeremy L Thompson } 1720d0321e0SJeremy L Thompson } 1730d0321e0SJeremy L Thompson 1742b730f8bSJeremy L Thompson if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 1750d0321e0SJeremy L Thompson 1760d0321e0SJeremy L Thompson // Restore arrays 1772b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 1782b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 1790d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1800d0321e0SJeremy L Thompson } 1810d0321e0SJeremy L Thompson 1820d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 183*dce49693SSebastian Grimberg // Apply restriction 184*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 185*dce49693SSebastian Grimberg static int CeedElemRestrictionApply_Hip(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 186*dce49693SSebastian Grimberg return CeedElemRestrictionApply_Hip_Core(rstr, t_mode, true, true, u, v, request); 187*dce49693SSebastian Grimberg } 188*dce49693SSebastian Grimberg 189*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 190*dce49693SSebastian Grimberg // Apply unsigned restriction 191*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 192*dce49693SSebastian Grimberg static int CeedElemRestrictionApplyUnsigned_Hip(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 193*dce49693SSebastian Grimberg CeedRequest *request) { 194*dce49693SSebastian Grimberg return CeedElemRestrictionApply_Hip_Core(rstr, t_mode, false, true, u, v, request); 195*dce49693SSebastian Grimberg } 196*dce49693SSebastian Grimberg 197*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 198*dce49693SSebastian Grimberg // Apply unoriented restriction 199*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 200*dce49693SSebastian Grimberg static int CeedElemRestrictionApplyUnoriented_Hip(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 201*dce49693SSebastian Grimberg CeedRequest *request) { 202*dce49693SSebastian Grimberg return CeedElemRestrictionApply_Hip_Core(rstr, t_mode, false, false, u, v, request); 203*dce49693SSebastian Grimberg } 204*dce49693SSebastian Grimberg 205*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 2060d0321e0SJeremy L Thompson // Get offsets 2070d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 208472941f0SJeremy L Thompson static int CeedElemRestrictionGetOffsets_Hip(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 2090d0321e0SJeremy L Thompson CeedElemRestriction_Hip *impl; 2100d0321e0SJeremy L Thompson 211b7453713SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 212472941f0SJeremy L Thompson switch (mem_type) { 2130d0321e0SJeremy L Thompson case CEED_MEM_HOST: 2140d0321e0SJeremy L Thompson *offsets = impl->h_ind; 2150d0321e0SJeremy L Thompson break; 2160d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 2170d0321e0SJeremy L Thompson *offsets = impl->d_ind; 2180d0321e0SJeremy L Thompson break; 2190d0321e0SJeremy L Thompson } 2200d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2210d0321e0SJeremy L Thompson } 2220d0321e0SJeremy L Thompson 2230d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 224*dce49693SSebastian Grimberg // Get orientations 225*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 226*dce49693SSebastian Grimberg static int CeedElemRestrictionGetOrientations_Hip(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 227*dce49693SSebastian Grimberg CeedElemRestriction_Hip *impl; 228*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 229*dce49693SSebastian Grimberg 230*dce49693SSebastian Grimberg switch (mem_type) { 231*dce49693SSebastian Grimberg case CEED_MEM_HOST: 232*dce49693SSebastian Grimberg *orients = impl->h_orients; 233*dce49693SSebastian Grimberg break; 234*dce49693SSebastian Grimberg case CEED_MEM_DEVICE: 235*dce49693SSebastian Grimberg *orients = impl->d_orients; 236*dce49693SSebastian Grimberg break; 237*dce49693SSebastian Grimberg } 238*dce49693SSebastian Grimberg return CEED_ERROR_SUCCESS; 239*dce49693SSebastian Grimberg } 240*dce49693SSebastian Grimberg 241*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 242*dce49693SSebastian Grimberg // Get curl-conforming orientations 243*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 244*dce49693SSebastian Grimberg static int CeedElemRestrictionGetCurlOrientations_Hip(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 245*dce49693SSebastian Grimberg CeedElemRestriction_Hip *impl; 246*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 247*dce49693SSebastian Grimberg 248*dce49693SSebastian Grimberg switch (mem_type) { 249*dce49693SSebastian Grimberg case CEED_MEM_HOST: 250*dce49693SSebastian Grimberg *curl_orients = impl->h_curl_orients; 251*dce49693SSebastian Grimberg break; 252*dce49693SSebastian Grimberg case CEED_MEM_DEVICE: 253*dce49693SSebastian Grimberg *curl_orients = impl->d_curl_orients; 254*dce49693SSebastian Grimberg break; 255*dce49693SSebastian Grimberg } 256*dce49693SSebastian Grimberg return CEED_ERROR_SUCCESS; 257*dce49693SSebastian Grimberg } 258*dce49693SSebastian Grimberg 259*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 2600d0321e0SJeremy L Thompson // Destroy restriction 2610d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 262*dce49693SSebastian Grimberg static int CeedElemRestrictionDestroy_Hip(CeedElemRestriction rstr) { 2630d0321e0SJeremy L Thompson Ceed ceed; 264b7453713SJeremy L Thompson CeedElemRestriction_Hip *impl; 265b7453713SJeremy L Thompson 266*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 267*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 2682b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleUnload(impl->module)); 2692b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_ind_allocated)); 2702b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(impl->d_ind_allocated)); 2712b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(impl->d_t_offsets)); 2722b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(impl->d_t_indices)); 2732b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(impl->d_l_vec_indices)); 274*dce49693SSebastian Grimberg CeedCallBackend(CeedFree(&impl->h_orients_allocated)); 275*dce49693SSebastian Grimberg CeedCallHip(ceed, hipFree(impl->d_orients_allocated)); 276*dce49693SSebastian Grimberg CeedCallBackend(CeedFree(&impl->h_curl_orients_allocated)); 277*dce49693SSebastian Grimberg CeedCallHip(ceed, hipFree(impl->d_curl_orients_allocated)); 2782b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 2790d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2800d0321e0SJeremy L Thompson } 2810d0321e0SJeremy L Thompson 2820d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2830d0321e0SJeremy L Thompson // Create transpose offsets and indices 2840d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 285*dce49693SSebastian Grimberg static int CeedElemRestrictionOffset_Hip(const CeedElemRestriction rstr, const CeedInt *indices) { 2860d0321e0SJeremy L Thompson Ceed ceed; 287b7453713SJeremy L Thompson bool *is_node; 288e79b91d9SJeremy L Thompson CeedSize l_size; 289*dce49693SSebastian Grimberg CeedInt num_elem, elem_size, num_comp, num_nodes = 0; 290*dce49693SSebastian Grimberg CeedInt *ind_to_offset, *l_vec_indices, *t_offsets, *t_indices; 291b7453713SJeremy L Thompson CeedElemRestriction_Hip *impl; 292b7453713SJeremy L Thompson 293*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 294*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 295*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 296*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 297*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 298*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 299b7453713SJeremy L Thompson const CeedInt size_indices = num_elem * elem_size; 3000d0321e0SJeremy L Thompson 301437930d1SJeremy L Thompson // Count num_nodes 3022b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(l_size, &is_node)); 303*dce49693SSebastian Grimberg 3042b730f8bSJeremy L Thompson for (CeedInt i = 0; i < size_indices; i++) is_node[indices[i]] = 1; 3052b730f8bSJeremy L Thompson for (CeedInt i = 0; i < l_size; i++) num_nodes += is_node[i]; 306437930d1SJeremy L Thompson impl->num_nodes = num_nodes; 3070d0321e0SJeremy L Thompson 3080d0321e0SJeremy L Thompson // L-vector offsets array 3092b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(l_size, &ind_to_offset)); 3102b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(num_nodes, &l_vec_indices)); 311b7453713SJeremy L Thompson for (CeedInt i = 0, j = 0; i < l_size; i++) { 312437930d1SJeremy L Thompson if (is_node[i]) { 313437930d1SJeremy L Thompson l_vec_indices[j] = i; 3140d0321e0SJeremy L Thompson ind_to_offset[i] = j++; 3150d0321e0SJeremy L Thompson } 3162b730f8bSJeremy L Thompson } 3172b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&is_node)); 3180d0321e0SJeremy L Thompson 3190d0321e0SJeremy L Thompson // Compute transpose offsets and indices 320437930d1SJeremy L Thompson const CeedInt size_offsets = num_nodes + 1; 321b7453713SJeremy L Thompson 3222b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(size_offsets, &t_offsets)); 3232b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(size_indices, &t_indices)); 3240d0321e0SJeremy L Thompson // Count node multiplicity 3252b730f8bSJeremy L Thompson for (CeedInt e = 0; e < num_elem; ++e) { 3262b730f8bSJeremy L Thompson for (CeedInt i = 0; i < elem_size; ++i) ++t_offsets[ind_to_offset[indices[elem_size * e + i]] + 1]; 3272b730f8bSJeremy L Thompson } 3280d0321e0SJeremy L Thompson // Convert to running sum 3292b730f8bSJeremy L Thompson for (CeedInt i = 1; i < size_offsets; ++i) t_offsets[i] += t_offsets[i - 1]; 3300d0321e0SJeremy L Thompson // List all E-vec indices associated with L-vec node 331437930d1SJeremy L Thompson for (CeedInt e = 0; e < num_elem; ++e) { 332437930d1SJeremy L Thompson for (CeedInt i = 0; i < elem_size; ++i) { 333437930d1SJeremy L Thompson const CeedInt lid = elem_size * e + i; 3340d0321e0SJeremy L Thompson const CeedInt gid = indices[lid]; 335b7453713SJeremy L Thompson 336437930d1SJeremy L Thompson t_indices[t_offsets[ind_to_offset[gid]]++] = lid; 3370d0321e0SJeremy L Thompson } 3380d0321e0SJeremy L Thompson } 3390d0321e0SJeremy L Thompson // Reset running sum 3402b730f8bSJeremy L Thompson for (int i = size_offsets - 1; i > 0; --i) t_offsets[i] = t_offsets[i - 1]; 341437930d1SJeremy L Thompson t_offsets[0] = 0; 3420d0321e0SJeremy L Thompson 3430d0321e0SJeremy L Thompson // Copy data to device 3440d0321e0SJeremy L Thompson // -- L-vector indices 3452b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_l_vec_indices, num_nodes * sizeof(CeedInt))); 3462b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(impl->d_l_vec_indices, l_vec_indices, num_nodes * sizeof(CeedInt), hipMemcpyHostToDevice)); 3470d0321e0SJeremy L Thompson // -- Transpose offsets 3482b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_t_offsets, size_offsets * sizeof(CeedInt))); 3492b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(impl->d_t_offsets, t_offsets, size_offsets * sizeof(CeedInt), hipMemcpyHostToDevice)); 3500d0321e0SJeremy L Thompson // -- Transpose indices 3512b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_t_indices, size_indices * sizeof(CeedInt))); 3522b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(impl->d_t_indices, t_indices, size_indices * sizeof(CeedInt), hipMemcpyHostToDevice)); 3530d0321e0SJeremy L Thompson 3540d0321e0SJeremy L Thompson // Cleanup 3552b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&ind_to_offset)); 3562b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&l_vec_indices)); 3572b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&t_offsets)); 3582b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&t_indices)); 3590d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3600d0321e0SJeremy L Thompson } 3610d0321e0SJeremy L Thompson 3620d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3630d0321e0SJeremy L Thompson // Create restriction 3640d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 365fcbe8c06SSebastian Grimberg int CeedElemRestrictionCreate_Hip(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *indices, const bool *orients, 366*dce49693SSebastian Grimberg const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 367b7453713SJeremy L Thompson Ceed ceed, ceed_parent; 368*dce49693SSebastian Grimberg bool is_deterministic; 369b7453713SJeremy L Thompson CeedInt num_elem, num_comp, elem_size, comp_stride = 1; 370b7453713SJeremy L Thompson CeedRestrictionType rstr_type; 371*dce49693SSebastian Grimberg char *restriction_kernel_path, *restriction_kernel_source; 3720d0321e0SJeremy L Thompson CeedElemRestriction_Hip *impl; 373b7453713SJeremy L Thompson 374*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 375ca735530SJeremy L Thompson CeedCallBackend(CeedGetParent(ceed, &ceed_parent)); 376ca735530SJeremy L Thompson CeedCallBackend(CeedIsDeterministic(ceed_parent, &is_deterministic)); 377*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 378*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 379*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 380*dce49693SSebastian Grimberg const CeedInt size = num_elem * elem_size; 381437930d1SJeremy L Thompson CeedInt strides[3] = {1, size, elem_size}; 382b7453713SJeremy L Thompson CeedInt layout[3] = {1, elem_size * num_elem, elem_size}; 3830d0321e0SJeremy L Thompson 3840d0321e0SJeremy L Thompson // Stride data 385*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 386*dce49693SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_STRIDED) { 387437930d1SJeremy L Thompson bool has_backend_strides; 388b7453713SJeremy L Thompson 389*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 390437930d1SJeremy L Thompson if (!has_backend_strides) { 391*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); 3920d0321e0SJeremy L Thompson } 3930d0321e0SJeremy L Thompson } else { 394*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 3950d0321e0SJeremy L Thompson } 3960d0321e0SJeremy L Thompson 397*dce49693SSebastian Grimberg CeedCallBackend(CeedCalloc(1, &impl)); 398*dce49693SSebastian Grimberg impl->num_nodes = size; 3990d0321e0SJeremy L Thompson impl->h_ind = NULL; 4000d0321e0SJeremy L Thompson impl->h_ind_allocated = NULL; 4010d0321e0SJeremy L Thompson impl->d_ind = NULL; 4020d0321e0SJeremy L Thompson impl->d_ind_allocated = NULL; 403437930d1SJeremy L Thompson impl->d_t_indices = NULL; 404437930d1SJeremy L Thompson impl->d_t_offsets = NULL; 405*dce49693SSebastian Grimberg impl->h_orients = NULL; 406*dce49693SSebastian Grimberg impl->h_orients_allocated = NULL; 407*dce49693SSebastian Grimberg impl->d_orients = NULL; 408*dce49693SSebastian Grimberg impl->d_orients_allocated = NULL; 409*dce49693SSebastian Grimberg impl->h_curl_orients = NULL; 410*dce49693SSebastian Grimberg impl->h_curl_orients_allocated = NULL; 411*dce49693SSebastian Grimberg impl->d_curl_orients = NULL; 412*dce49693SSebastian Grimberg impl->d_curl_orients_allocated = NULL; 413*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 414*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); 4150d0321e0SJeremy L Thompson 416*dce49693SSebastian Grimberg // Set up device offset/orientation arrays 417*dce49693SSebastian Grimberg if (rstr_type != CEED_RESTRICTION_STRIDED) { 418472941f0SJeremy L Thompson switch (mem_type) { 4196574a04fSJeremy L Thompson case CEED_MEM_HOST: { 420472941f0SJeremy L Thompson switch (copy_mode) { 4210d0321e0SJeremy L Thompson case CEED_OWN_POINTER: 4220d0321e0SJeremy L Thompson impl->h_ind_allocated = (CeedInt *)indices; 4230d0321e0SJeremy L Thompson impl->h_ind = (CeedInt *)indices; 4240d0321e0SJeremy L Thompson break; 4250d0321e0SJeremy L Thompson case CEED_USE_POINTER: 4260d0321e0SJeremy L Thompson impl->h_ind = (CeedInt *)indices; 4270d0321e0SJeremy L Thompson break; 4280d0321e0SJeremy L Thompson case CEED_COPY_VALUES: 429*dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(size, &impl->h_ind_allocated)); 430*dce49693SSebastian Grimberg memcpy(impl->h_ind_allocated, indices, size * sizeof(CeedInt)); 43144d7a66cSJeremy L Thompson impl->h_ind = impl->h_ind_allocated; 4320d0321e0SJeremy L Thompson break; 4330d0321e0SJeremy L Thompson } 4342b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_ind, size * sizeof(CeedInt))); 4350d0321e0SJeremy L Thompson impl->d_ind_allocated = impl->d_ind; // We own the device memory 4362b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(impl->d_ind, indices, size * sizeof(CeedInt), hipMemcpyHostToDevice)); 437*dce49693SSebastian Grimberg if (is_deterministic) CeedCallBackend(CeedElemRestrictionOffset_Hip(rstr, indices)); 438*dce49693SSebastian Grimberg } break; 4396574a04fSJeremy L Thompson case CEED_MEM_DEVICE: { 440472941f0SJeremy L Thompson switch (copy_mode) { 4410d0321e0SJeremy L Thompson case CEED_COPY_VALUES: 4422b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_ind, size * sizeof(CeedInt))); 4430d0321e0SJeremy L Thompson impl->d_ind_allocated = impl->d_ind; // We own the device memory 4442b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(impl->d_ind, indices, size * sizeof(CeedInt), hipMemcpyDeviceToDevice)); 4450d0321e0SJeremy L Thompson break; 4460d0321e0SJeremy L Thompson case CEED_OWN_POINTER: 4470d0321e0SJeremy L Thompson impl->d_ind = (CeedInt *)indices; 4480d0321e0SJeremy L Thompson impl->d_ind_allocated = impl->d_ind; 4490d0321e0SJeremy L Thompson break; 4500d0321e0SJeremy L Thompson case CEED_USE_POINTER: 4510d0321e0SJeremy L Thompson impl->d_ind = (CeedInt *)indices; 4526574a04fSJeremy L Thompson break; 4536574a04fSJeremy L Thompson } 454*dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(size, &impl->h_ind_allocated)); 455*dce49693SSebastian Grimberg CeedCallHip(ceed, hipMemcpy(impl->h_ind_allocated, impl->d_ind, size * sizeof(CeedInt), hipMemcpyDeviceToHost)); 456*dce49693SSebastian Grimberg impl->h_ind = impl->h_ind_allocated; 457*dce49693SSebastian Grimberg if (is_deterministic) CeedCallBackend(CeedElemRestrictionOffset_Hip(rstr, indices)); 458*dce49693SSebastian Grimberg } break; 459*dce49693SSebastian Grimberg } 460*dce49693SSebastian Grimberg 461*dce49693SSebastian Grimberg // Orientation data 462*dce49693SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_ORIENTED) { 463*dce49693SSebastian Grimberg switch (mem_type) { 464*dce49693SSebastian Grimberg case CEED_MEM_HOST: { 465*dce49693SSebastian Grimberg switch (copy_mode) { 466*dce49693SSebastian Grimberg case CEED_OWN_POINTER: 467*dce49693SSebastian Grimberg impl->h_orients_allocated = (bool *)orients; 468*dce49693SSebastian Grimberg impl->h_orients = (bool *)orients; 469*dce49693SSebastian Grimberg break; 470*dce49693SSebastian Grimberg case CEED_USE_POINTER: 471*dce49693SSebastian Grimberg impl->h_orients = (bool *)orients; 472*dce49693SSebastian Grimberg break; 473*dce49693SSebastian Grimberg case CEED_COPY_VALUES: 474*dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(size, &impl->h_orients_allocated)); 475*dce49693SSebastian Grimberg memcpy(impl->h_orients_allocated, orients, size * sizeof(bool)); 476*dce49693SSebastian Grimberg impl->h_orients = impl->h_orients_allocated; 477*dce49693SSebastian Grimberg break; 478*dce49693SSebastian Grimberg } 479*dce49693SSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&impl->d_orients, size * sizeof(bool))); 480*dce49693SSebastian Grimberg impl->d_orients_allocated = impl->d_orients; // We own the device memory 481*dce49693SSebastian Grimberg CeedCallHip(ceed, hipMemcpy(impl->d_orients, orients, size * sizeof(bool), hipMemcpyHostToDevice)); 482*dce49693SSebastian Grimberg } break; 483*dce49693SSebastian Grimberg case CEED_MEM_DEVICE: { 484*dce49693SSebastian Grimberg switch (copy_mode) { 485*dce49693SSebastian Grimberg case CEED_COPY_VALUES: 486*dce49693SSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&impl->d_orients, size * sizeof(bool))); 487*dce49693SSebastian Grimberg impl->d_orients_allocated = impl->d_orients; // We own the device memory 488*dce49693SSebastian Grimberg CeedCallHip(ceed, hipMemcpy(impl->d_orients, orients, size * sizeof(bool), hipMemcpyDeviceToDevice)); 489*dce49693SSebastian Grimberg break; 490*dce49693SSebastian Grimberg case CEED_OWN_POINTER: 491*dce49693SSebastian Grimberg impl->d_orients = (bool *)orients; 492*dce49693SSebastian Grimberg impl->d_orients_allocated = impl->d_orients; 493*dce49693SSebastian Grimberg break; 494*dce49693SSebastian Grimberg case CEED_USE_POINTER: 495*dce49693SSebastian Grimberg impl->d_orients = (bool *)orients; 496*dce49693SSebastian Grimberg break; 497*dce49693SSebastian Grimberg } 498*dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(size, &impl->h_orients_allocated)); 499*dce49693SSebastian Grimberg CeedCallHip(ceed, hipMemcpy(impl->h_orients_allocated, impl->d_orients, size * sizeof(bool), hipMemcpyDeviceToHost)); 500*dce49693SSebastian Grimberg impl->h_orients = impl->h_orients_allocated; 501*dce49693SSebastian Grimberg } break; 502*dce49693SSebastian Grimberg } 503*dce49693SSebastian Grimberg } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 504*dce49693SSebastian Grimberg switch (mem_type) { 505*dce49693SSebastian Grimberg case CEED_MEM_HOST: { 506*dce49693SSebastian Grimberg switch (copy_mode) { 507*dce49693SSebastian Grimberg case CEED_OWN_POINTER: 508*dce49693SSebastian Grimberg impl->h_curl_orients_allocated = (CeedInt8 *)curl_orients; 509*dce49693SSebastian Grimberg impl->h_curl_orients = (CeedInt8 *)curl_orients; 510*dce49693SSebastian Grimberg break; 511*dce49693SSebastian Grimberg case CEED_USE_POINTER: 512*dce49693SSebastian Grimberg impl->h_curl_orients = (CeedInt8 *)curl_orients; 513*dce49693SSebastian Grimberg break; 514*dce49693SSebastian Grimberg case CEED_COPY_VALUES: 515*dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(3 * size, &impl->h_curl_orients_allocated)); 516*dce49693SSebastian Grimberg memcpy(impl->h_curl_orients_allocated, curl_orients, 3 * size * sizeof(CeedInt8)); 517*dce49693SSebastian Grimberg impl->h_curl_orients = impl->h_curl_orients_allocated; 518*dce49693SSebastian Grimberg break; 519*dce49693SSebastian Grimberg } 520*dce49693SSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&impl->d_curl_orients, 3 * size * sizeof(CeedInt8))); 521*dce49693SSebastian Grimberg impl->d_curl_orients_allocated = impl->d_curl_orients; // We own the device memory 522*dce49693SSebastian Grimberg CeedCallHip(ceed, hipMemcpy(impl->d_curl_orients, curl_orients, 3 * size * sizeof(CeedInt8), hipMemcpyHostToDevice)); 523*dce49693SSebastian Grimberg } break; 524*dce49693SSebastian Grimberg case CEED_MEM_DEVICE: { 525*dce49693SSebastian Grimberg switch (copy_mode) { 526*dce49693SSebastian Grimberg case CEED_COPY_VALUES: 527*dce49693SSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&impl->d_curl_orients, 3 * size * sizeof(CeedInt8))); 528*dce49693SSebastian Grimberg impl->d_curl_orients_allocated = impl->d_curl_orients; // We own the device memory 529*dce49693SSebastian Grimberg CeedCallHip(ceed, hipMemcpy(impl->d_curl_orients, curl_orients, 3 * size * sizeof(CeedInt8), hipMemcpyDeviceToDevice)); 530*dce49693SSebastian Grimberg break; 531*dce49693SSebastian Grimberg case CEED_OWN_POINTER: 532*dce49693SSebastian Grimberg impl->d_curl_orients = (CeedInt8 *)curl_orients; 533*dce49693SSebastian Grimberg impl->d_curl_orients_allocated = impl->d_curl_orients; 534*dce49693SSebastian Grimberg break; 535*dce49693SSebastian Grimberg case CEED_USE_POINTER: 536*dce49693SSebastian Grimberg impl->d_curl_orients = (CeedInt8 *)curl_orients; 537*dce49693SSebastian Grimberg break; 538*dce49693SSebastian Grimberg } 539*dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(3 * size, &impl->h_curl_orients_allocated)); 540*dce49693SSebastian Grimberg CeedCallHip(ceed, hipMemcpy(impl->h_curl_orients_allocated, impl->d_curl_orients, 3 * size * sizeof(CeedInt8), hipMemcpyDeviceToHost)); 541*dce49693SSebastian Grimberg impl->h_curl_orients = impl->h_curl_orients_allocated; 542*dce49693SSebastian Grimberg } break; 543*dce49693SSebastian Grimberg } 544*dce49693SSebastian Grimberg } 5450d0321e0SJeremy L Thompson } 5460d0321e0SJeremy L Thompson 5470d0321e0SJeremy L Thompson // Compile HIP kernels 5482b730f8bSJeremy L Thompson CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-restriction.h", &restriction_kernel_path)); 54923d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); 5502b730f8bSJeremy L Thompson CeedCallBackend(CeedLoadSourceToBuffer(ceed, restriction_kernel_path, &restriction_kernel_source)); 55123d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); 552*dce49693SSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 8, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 553*dce49693SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, "RSTR_STRIDE_NODES", 554*dce49693SSebastian Grimberg strides[0], "RSTR_STRIDE_COMP", strides[1], "RSTR_STRIDE_ELEM", strides[2])); 555eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "StridedNoTranspose", &impl->StridedNoTranspose)); 556eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "StridedTranspose", &impl->StridedTranspose)); 55758549094SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->OffsetNoTranspose)); 55858549094SSebastian Grimberg if (!is_deterministic) { 559eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetTranspose", &impl->OffsetTranspose)); 56058549094SSebastian Grimberg } else { 56158549094SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetTransposeDet", &impl->OffsetTransposeDet)); 56258549094SSebastian Grimberg } 563*dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OrientedNoTranspose", &impl->OrientedNoTranspose)); 564*dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OrientedTranspose", &impl->OrientedTranspose)); 565*dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedNoTranspose", &impl->CurlOrientedNoTranspose)); 566*dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedUnsignedNoTranspose", &impl->CurlOrientedUnsignedNoTranspose)); 567*dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedTranspose", &impl->CurlOrientedTranspose)); 568*dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedUnsignedTranspose", &impl->CurlOrientedUnsignedTranspose)); 5692b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&restriction_kernel_path)); 5702b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&restriction_kernel_source)); 5710d0321e0SJeremy L Thompson 5720d0321e0SJeremy L Thompson // Register backend functions 573*dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Hip)); 574*dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Hip)); 575*dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Hip)); 576*dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Hip)); 577*dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Hip)); 578*dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Hip)); 579*dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Hip)); 5800d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 5810d0321e0SJeremy L Thompson } 5820d0321e0SJeremy L Thompson 5830d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 584