1ff1e7120SSebastian Grimberg // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2ff1e7120SSebastian Grimberg // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3ff1e7120SSebastian Grimberg // 4ff1e7120SSebastian Grimberg // SPDX-License-Identifier: BSD-2-Clause 5ff1e7120SSebastian Grimberg // 6ff1e7120SSebastian Grimberg // This file is part of CEED: http://github.com/ceed 7ff1e7120SSebastian Grimberg 8ff1e7120SSebastian Grimberg #include <ceed.h> 9ff1e7120SSebastian Grimberg #include <ceed/backend.h> 10ff1e7120SSebastian Grimberg #include <ceed/jit-tools.h> 11ff1e7120SSebastian Grimberg #include <cuda.h> 12ff1e7120SSebastian Grimberg #include <cuda_runtime.h> 13ff1e7120SSebastian Grimberg #include <stdbool.h> 14ff1e7120SSebastian Grimberg #include <stddef.h> 15ff1e7120SSebastian Grimberg #include <string.h> 16ff1e7120SSebastian Grimberg 17ff1e7120SSebastian Grimberg #include "../cuda/ceed-cuda-common.h" 18ff1e7120SSebastian Grimberg #include "../cuda/ceed-cuda-compile.h" 19ff1e7120SSebastian Grimberg #include "ceed-cuda-ref.h" 20ff1e7120SSebastian Grimberg 21ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 22dce49693SSebastian Grimberg // Core apply restriction code 23ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 24dce49693SSebastian Grimberg static inline int CeedElemRestrictionApply_Cuda_Core(CeedElemRestriction rstr, CeedTransposeMode t_mode, bool use_signs, bool use_orients, 25dce49693SSebastian Grimberg CeedVector u, CeedVector v, CeedRequest *request) { 26ff1e7120SSebastian Grimberg Ceed ceed; 27ca735530SJeremy L Thompson CeedInt num_elem, elem_size; 28dce49693SSebastian Grimberg CeedRestrictionType rstr_type; 29ff1e7120SSebastian Grimberg const CeedScalar *d_u; 30ff1e7120SSebastian Grimberg CeedScalar *d_v; 31ca735530SJeremy L Thompson CeedElemRestriction_Cuda *impl; 32dce49693SSebastian Grimberg CUfunction kernel; 33ca735530SJeremy L Thompson 34dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 35dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 36dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 37dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 38dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 39ca735530SJeremy L Thompson const CeedInt num_nodes = impl->num_nodes; 40ca735530SJeremy L Thompson 41ca735530SJeremy L Thompson // Get vectors 42ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 43ff1e7120SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 44ff1e7120SSebastian Grimberg // Sum into for transpose mode, e-vec to l-vec 45ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 46ff1e7120SSebastian Grimberg } else { 47ff1e7120SSebastian Grimberg // Overwrite for notranspose mode, l-vec to e-vec 48ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 49ff1e7120SSebastian Grimberg } 50ff1e7120SSebastian Grimberg 51ff1e7120SSebastian Grimberg // Restrict 52ff1e7120SSebastian Grimberg if (t_mode == CEED_NOTRANSPOSE) { 53ff1e7120SSebastian Grimberg // L-vector -> E-vector 54dce49693SSebastian Grimberg const CeedInt block_size = elem_size < 1024 ? (elem_size > 32 ? elem_size : 32) : 1024; 55dce49693SSebastian Grimberg const CeedInt grid = CeedDivUpInt(num_nodes, block_size); 5658549094SSebastian Grimberg 57dce49693SSebastian Grimberg switch (rstr_type) { 58dce49693SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 59ff1e7120SSebastian Grimberg kernel = impl->StridedNoTranspose; 60ff1e7120SSebastian Grimberg void *args[] = {&num_elem, &d_u, &d_v}; 6158549094SSebastian Grimberg 62dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 63dce49693SSebastian Grimberg } break; 64dce49693SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 65dce49693SSebastian Grimberg kernel = impl->OffsetNoTranspose; 66dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 67dce49693SSebastian Grimberg 68dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 69dce49693SSebastian Grimberg } break; 70dce49693SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 71dce49693SSebastian Grimberg if (use_signs) { 72dce49693SSebastian Grimberg kernel = impl->OrientedNoTranspose; 73dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_orients, &d_u, &d_v}; 74dce49693SSebastian Grimberg 75dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 76dce49693SSebastian Grimberg } else { 77dce49693SSebastian Grimberg kernel = impl->OffsetNoTranspose; 78dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 79dce49693SSebastian Grimberg 80dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 81dce49693SSebastian Grimberg } 82dce49693SSebastian Grimberg } break; 83dce49693SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 84dce49693SSebastian Grimberg if (use_signs && use_orients) { 85dce49693SSebastian Grimberg kernel = impl->CurlOrientedNoTranspose; 86dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_curl_orients, &d_u, &d_v}; 87dce49693SSebastian Grimberg 88dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 89dce49693SSebastian Grimberg } else if (use_orients) { 90dce49693SSebastian Grimberg kernel = impl->CurlOrientedUnsignedNoTranspose; 91dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_curl_orients, &d_u, &d_v}; 92dce49693SSebastian Grimberg 93dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 94dce49693SSebastian Grimberg } else { 95dce49693SSebastian Grimberg kernel = impl->OffsetNoTranspose; 96dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 97dce49693SSebastian Grimberg 98dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 99dce49693SSebastian Grimberg } 100dce49693SSebastian Grimberg } break; 101*b3d03e38SSebastian Grimberg case CEED_RESTRICTION_POINTS: { 102*b3d03e38SSebastian Grimberg // LCOV_EXCL_START 103*b3d03e38SSebastian Grimberg return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement restriction CeedElemRestrictionAtPoints"); 104*b3d03e38SSebastian Grimberg // LCOV_EXCL_STOP 105*b3d03e38SSebastian Grimberg } break; 106ff1e7120SSebastian Grimberg } 107ff1e7120SSebastian Grimberg } else { 108ff1e7120SSebastian Grimberg // E-vector -> L-vector 109dce49693SSebastian Grimberg const CeedInt block_size = 32; 110dce49693SSebastian Grimberg const CeedInt grid = CeedDivUpInt(num_nodes, block_size); 111ca735530SJeremy L Thompson 112dce49693SSebastian Grimberg switch (rstr_type) { 113dce49693SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 114dce49693SSebastian Grimberg kernel = impl->StridedTranspose; 115dce49693SSebastian Grimberg void *args[] = {&num_elem, &d_u, &d_v}; 116dce49693SSebastian Grimberg 117dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 118dce49693SSebastian Grimberg } break; 119dce49693SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 12058549094SSebastian Grimberg if (impl->OffsetTranspose) { 121ff1e7120SSebastian Grimberg kernel = impl->OffsetTranspose; 12258549094SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 12358549094SSebastian Grimberg 124dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 125ff1e7120SSebastian Grimberg } else { 12658549094SSebastian Grimberg kernel = impl->OffsetTransposeDet; 12758549094SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 12858549094SSebastian Grimberg 129dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 13058549094SSebastian Grimberg } 131dce49693SSebastian Grimberg } break; 132dce49693SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 133dce49693SSebastian Grimberg if (use_signs) { 134dce49693SSebastian Grimberg kernel = impl->OrientedTranspose; 135dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_orients, &d_u, &d_v}; 13658549094SSebastian Grimberg 137dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 138dce49693SSebastian Grimberg } else { 139dce49693SSebastian Grimberg if (impl->OffsetTranspose) { 140dce49693SSebastian Grimberg kernel = impl->OffsetTranspose; 141dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 142dce49693SSebastian Grimberg 143dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 144dce49693SSebastian Grimberg } else { 145dce49693SSebastian Grimberg kernel = impl->OffsetTransposeDet; 146dce49693SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 147dce49693SSebastian Grimberg 148dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 149dce49693SSebastian Grimberg } 150dce49693SSebastian Grimberg } 151dce49693SSebastian Grimberg } break; 152dce49693SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 153dce49693SSebastian Grimberg if (use_signs && use_orients) { 154dce49693SSebastian Grimberg kernel = impl->CurlOrientedTranspose; 155dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_curl_orients, &d_u, &d_v}; 156dce49693SSebastian Grimberg 157dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 158dce49693SSebastian Grimberg } else if (use_orients) { 159dce49693SSebastian Grimberg kernel = impl->CurlOrientedUnsignedTranspose; 160dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_curl_orients, &d_u, &d_v}; 161dce49693SSebastian Grimberg 162dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 163dce49693SSebastian Grimberg } else { 164dce49693SSebastian Grimberg if (impl->OffsetTranspose) { 165dce49693SSebastian Grimberg kernel = impl->OffsetTranspose; 166dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 167dce49693SSebastian Grimberg 168dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 169dce49693SSebastian Grimberg } else { 170dce49693SSebastian Grimberg kernel = impl->OffsetTransposeDet; 171dce49693SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 172dce49693SSebastian Grimberg 173dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 174dce49693SSebastian Grimberg } 175dce49693SSebastian Grimberg } 176dce49693SSebastian Grimberg } break; 177*b3d03e38SSebastian Grimberg case CEED_RESTRICTION_POINTS: { 178*b3d03e38SSebastian Grimberg // LCOV_EXCL_START 179*b3d03e38SSebastian Grimberg return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement restriction CeedElemRestrictionAtPoints"); 180*b3d03e38SSebastian Grimberg // LCOV_EXCL_STOP 181*b3d03e38SSebastian Grimberg } break; 182ff1e7120SSebastian Grimberg } 183ff1e7120SSebastian Grimberg } 184ff1e7120SSebastian Grimberg 185ff1e7120SSebastian Grimberg if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 186ff1e7120SSebastian Grimberg 187ff1e7120SSebastian Grimberg // Restore arrays 188ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 189ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 190ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 191ff1e7120SSebastian Grimberg } 192ff1e7120SSebastian Grimberg 193ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 194dce49693SSebastian Grimberg // Apply restriction 195dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 196dce49693SSebastian Grimberg static int CeedElemRestrictionApply_Cuda(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 197dce49693SSebastian Grimberg return CeedElemRestrictionApply_Cuda_Core(rstr, t_mode, true, true, u, v, request); 198dce49693SSebastian Grimberg } 199dce49693SSebastian Grimberg 200dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 201dce49693SSebastian Grimberg // Apply unsigned restriction 202dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 203dce49693SSebastian Grimberg static int CeedElemRestrictionApplyUnsigned_Cuda(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 204dce49693SSebastian Grimberg CeedRequest *request) { 205dce49693SSebastian Grimberg return CeedElemRestrictionApply_Cuda_Core(rstr, t_mode, false, true, u, v, request); 206dce49693SSebastian Grimberg } 207dce49693SSebastian Grimberg 208dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 209dce49693SSebastian Grimberg // Apply unoriented restriction 210dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 211dce49693SSebastian Grimberg static int CeedElemRestrictionApplyUnoriented_Cuda(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 212dce49693SSebastian Grimberg CeedRequest *request) { 213dce49693SSebastian Grimberg return CeedElemRestrictionApply_Cuda_Core(rstr, t_mode, false, false, u, v, request); 214dce49693SSebastian Grimberg } 215dce49693SSebastian Grimberg 216dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 217ff1e7120SSebastian Grimberg // Get offsets 218ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 219ff1e7120SSebastian Grimberg static int CeedElemRestrictionGetOffsets_Cuda(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 220ff1e7120SSebastian Grimberg CeedElemRestriction_Cuda *impl; 221ff1e7120SSebastian Grimberg 222ca735530SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 223ff1e7120SSebastian Grimberg switch (mem_type) { 224ff1e7120SSebastian Grimberg case CEED_MEM_HOST: 225ff1e7120SSebastian Grimberg *offsets = impl->h_ind; 226ff1e7120SSebastian Grimberg break; 227ff1e7120SSebastian Grimberg case CEED_MEM_DEVICE: 228ff1e7120SSebastian Grimberg *offsets = impl->d_ind; 229ff1e7120SSebastian Grimberg break; 230ff1e7120SSebastian Grimberg } 231ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 232ff1e7120SSebastian Grimberg } 233ff1e7120SSebastian Grimberg 234ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 235dce49693SSebastian Grimberg // Get orientations 236dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 237dce49693SSebastian Grimberg static int CeedElemRestrictionGetOrientations_Cuda(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 238dce49693SSebastian Grimberg CeedElemRestriction_Cuda *impl; 239dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 240dce49693SSebastian Grimberg 241dce49693SSebastian Grimberg switch (mem_type) { 242dce49693SSebastian Grimberg case CEED_MEM_HOST: 243dce49693SSebastian Grimberg *orients = impl->h_orients; 244dce49693SSebastian Grimberg break; 245dce49693SSebastian Grimberg case CEED_MEM_DEVICE: 246dce49693SSebastian Grimberg *orients = impl->d_orients; 247dce49693SSebastian Grimberg break; 248dce49693SSebastian Grimberg } 249dce49693SSebastian Grimberg return CEED_ERROR_SUCCESS; 250dce49693SSebastian Grimberg } 251dce49693SSebastian Grimberg 252dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 253dce49693SSebastian Grimberg // Get curl-conforming orientations 254dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 255dce49693SSebastian Grimberg static int CeedElemRestrictionGetCurlOrientations_Cuda(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 256dce49693SSebastian Grimberg CeedElemRestriction_Cuda *impl; 257dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 258dce49693SSebastian Grimberg 259dce49693SSebastian Grimberg switch (mem_type) { 260dce49693SSebastian Grimberg case CEED_MEM_HOST: 261dce49693SSebastian Grimberg *curl_orients = impl->h_curl_orients; 262dce49693SSebastian Grimberg break; 263dce49693SSebastian Grimberg case CEED_MEM_DEVICE: 264dce49693SSebastian Grimberg *curl_orients = impl->d_curl_orients; 265dce49693SSebastian Grimberg break; 266dce49693SSebastian Grimberg } 267dce49693SSebastian Grimberg return CEED_ERROR_SUCCESS; 268dce49693SSebastian Grimberg } 269dce49693SSebastian Grimberg 270dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 271ff1e7120SSebastian Grimberg // Destroy restriction 272ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 273dce49693SSebastian Grimberg static int CeedElemRestrictionDestroy_Cuda(CeedElemRestriction rstr) { 274ff1e7120SSebastian Grimberg Ceed ceed; 275ca735530SJeremy L Thompson CeedElemRestriction_Cuda *impl; 276ca735530SJeremy L Thompson 277dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 278dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 279ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cuModuleUnload(impl->module)); 280ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&impl->h_ind_allocated)); 281ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_ind_allocated)); 282ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_t_offsets)); 283ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_t_indices)); 284ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_l_vec_indices)); 285dce49693SSebastian Grimberg CeedCallBackend(CeedFree(&impl->h_orients_allocated)); 286dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_orients_allocated)); 287dce49693SSebastian Grimberg CeedCallBackend(CeedFree(&impl->h_curl_orients_allocated)); 288dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_curl_orients_allocated)); 289ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&impl)); 290ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 291ff1e7120SSebastian Grimberg } 292ff1e7120SSebastian Grimberg 293ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 294ff1e7120SSebastian Grimberg // Create transpose offsets and indices 295ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 296dce49693SSebastian Grimberg static int CeedElemRestrictionOffset_Cuda(const CeedElemRestriction rstr, const CeedInt *indices) { 297ff1e7120SSebastian Grimberg Ceed ceed; 298ca735530SJeremy L Thompson bool *is_node; 299ff1e7120SSebastian Grimberg CeedSize l_size; 300ca735530SJeremy L Thompson CeedInt num_elem, elem_size, num_comp, num_nodes = 0; 301ca735530SJeremy L Thompson CeedInt *ind_to_offset, *l_vec_indices, *t_offsets, *t_indices; 302ca735530SJeremy L Thompson CeedElemRestriction_Cuda *impl; 303ca735530SJeremy L Thompson 304dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 305dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 306dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 307dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 308dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 309dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 310ca735530SJeremy L Thompson const CeedInt size_indices = num_elem * elem_size; 311ff1e7120SSebastian Grimberg 312ff1e7120SSebastian Grimberg // Count num_nodes 313ff1e7120SSebastian Grimberg CeedCallBackend(CeedCalloc(l_size, &is_node)); 314ca735530SJeremy L Thompson 315ff1e7120SSebastian Grimberg for (CeedInt i = 0; i < size_indices; i++) is_node[indices[i]] = 1; 316ff1e7120SSebastian Grimberg for (CeedInt i = 0; i < l_size; i++) num_nodes += is_node[i]; 317ff1e7120SSebastian Grimberg impl->num_nodes = num_nodes; 318ff1e7120SSebastian Grimberg 319ff1e7120SSebastian Grimberg // L-vector offsets array 320ff1e7120SSebastian Grimberg CeedCallBackend(CeedCalloc(l_size, &ind_to_offset)); 321ff1e7120SSebastian Grimberg CeedCallBackend(CeedCalloc(num_nodes, &l_vec_indices)); 322ca735530SJeremy L Thompson for (CeedInt i = 0, j = 0; i < l_size; i++) { 323ff1e7120SSebastian Grimberg if (is_node[i]) { 324ff1e7120SSebastian Grimberg l_vec_indices[j] = i; 325ff1e7120SSebastian Grimberg ind_to_offset[i] = j++; 326ff1e7120SSebastian Grimberg } 327ff1e7120SSebastian Grimberg } 328ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&is_node)); 329ff1e7120SSebastian Grimberg 330ff1e7120SSebastian Grimberg // Compute transpose offsets and indices 331ff1e7120SSebastian Grimberg const CeedInt size_offsets = num_nodes + 1; 332ca735530SJeremy L Thompson 333ff1e7120SSebastian Grimberg CeedCallBackend(CeedCalloc(size_offsets, &t_offsets)); 334ff1e7120SSebastian Grimberg CeedCallBackend(CeedMalloc(size_indices, &t_indices)); 335ff1e7120SSebastian Grimberg // Count node multiplicity 336ff1e7120SSebastian Grimberg for (CeedInt e = 0; e < num_elem; ++e) { 337ff1e7120SSebastian Grimberg for (CeedInt i = 0; i < elem_size; ++i) ++t_offsets[ind_to_offset[indices[elem_size * e + i]] + 1]; 338ff1e7120SSebastian Grimberg } 339ff1e7120SSebastian Grimberg // Convert to running sum 340ff1e7120SSebastian Grimberg for (CeedInt i = 1; i < size_offsets; ++i) t_offsets[i] += t_offsets[i - 1]; 341ff1e7120SSebastian Grimberg // List all E-vec indices associated with L-vec node 342ff1e7120SSebastian Grimberg for (CeedInt e = 0; e < num_elem; ++e) { 343ff1e7120SSebastian Grimberg for (CeedInt i = 0; i < elem_size; ++i) { 344ff1e7120SSebastian Grimberg const CeedInt lid = elem_size * e + i; 345ff1e7120SSebastian Grimberg const CeedInt gid = indices[lid]; 346ca735530SJeremy L Thompson 347ff1e7120SSebastian Grimberg t_indices[t_offsets[ind_to_offset[gid]]++] = lid; 348ff1e7120SSebastian Grimberg } 349ff1e7120SSebastian Grimberg } 350ff1e7120SSebastian Grimberg // Reset running sum 351ff1e7120SSebastian Grimberg for (int i = size_offsets - 1; i > 0; --i) t_offsets[i] = t_offsets[i - 1]; 352ff1e7120SSebastian Grimberg t_offsets[0] = 0; 353ff1e7120SSebastian Grimberg 354ff1e7120SSebastian Grimberg // Copy data to device 355ff1e7120SSebastian Grimberg // -- L-vector indices 356ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_l_vec_indices, num_nodes * sizeof(CeedInt))); 357ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_l_vec_indices, l_vec_indices, num_nodes * sizeof(CeedInt), cudaMemcpyHostToDevice)); 358ff1e7120SSebastian Grimberg // -- Transpose offsets 359ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_t_offsets, size_offsets * sizeof(CeedInt))); 360ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_t_offsets, t_offsets, size_offsets * sizeof(CeedInt), cudaMemcpyHostToDevice)); 361ff1e7120SSebastian Grimberg // -- Transpose indices 362ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_t_indices, size_indices * sizeof(CeedInt))); 363ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_t_indices, t_indices, size_indices * sizeof(CeedInt), cudaMemcpyHostToDevice)); 364ff1e7120SSebastian Grimberg 365ff1e7120SSebastian Grimberg // Cleanup 366ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&ind_to_offset)); 367ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&l_vec_indices)); 368ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&t_offsets)); 369ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&t_indices)); 370ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 371ff1e7120SSebastian Grimberg } 372ff1e7120SSebastian Grimberg 373ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 374ff1e7120SSebastian Grimberg // Create restriction 375ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 376fcbe8c06SSebastian Grimberg int CeedElemRestrictionCreate_Cuda(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *indices, const bool *orients, 377dce49693SSebastian Grimberg const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 378ca735530SJeremy L Thompson Ceed ceed, ceed_parent; 379dce49693SSebastian Grimberg bool is_deterministic; 380ca735530SJeremy L Thompson CeedInt num_elem, num_comp, elem_size, comp_stride = 1; 381ca735530SJeremy L Thompson CeedRestrictionType rstr_type; 382dce49693SSebastian Grimberg char *restriction_kernel_path, *restriction_kernel_source; 383ff1e7120SSebastian Grimberg CeedElemRestriction_Cuda *impl; 384ca735530SJeremy L Thompson 385dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 386ca735530SJeremy L Thompson CeedCallBackend(CeedGetParent(ceed, &ceed_parent)); 387ca735530SJeremy L Thompson CeedCallBackend(CeedIsDeterministic(ceed_parent, &is_deterministic)); 388dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 389dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 390dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 391ca735530SJeremy L Thompson const CeedInt size = num_elem * elem_size; 392ff1e7120SSebastian Grimberg CeedInt strides[3] = {1, size, elem_size}; 393ca735530SJeremy L Thompson CeedInt layout[3] = {1, elem_size * num_elem, elem_size}; 394ff1e7120SSebastian Grimberg 395ff1e7120SSebastian Grimberg // Stride data 396dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 397dce49693SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_STRIDED) { 398ff1e7120SSebastian Grimberg bool has_backend_strides; 399ca735530SJeremy L Thompson 400dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 401ff1e7120SSebastian Grimberg if (!has_backend_strides) { 402dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); 403ff1e7120SSebastian Grimberg } 404ff1e7120SSebastian Grimberg } else { 405dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 406ff1e7120SSebastian Grimberg } 407ff1e7120SSebastian Grimberg 408dce49693SSebastian Grimberg CeedCallBackend(CeedCalloc(1, &impl)); 409dce49693SSebastian Grimberg impl->num_nodes = size; 410ff1e7120SSebastian Grimberg impl->h_ind = NULL; 411ff1e7120SSebastian Grimberg impl->h_ind_allocated = NULL; 412ff1e7120SSebastian Grimberg impl->d_ind = NULL; 413ff1e7120SSebastian Grimberg impl->d_ind_allocated = NULL; 414ff1e7120SSebastian Grimberg impl->d_t_indices = NULL; 415ff1e7120SSebastian Grimberg impl->d_t_offsets = NULL; 416dce49693SSebastian Grimberg impl->h_orients = NULL; 417dce49693SSebastian Grimberg impl->h_orients_allocated = NULL; 418dce49693SSebastian Grimberg impl->d_orients = NULL; 419dce49693SSebastian Grimberg impl->d_orients_allocated = NULL; 420dce49693SSebastian Grimberg impl->h_curl_orients = NULL; 421dce49693SSebastian Grimberg impl->h_curl_orients_allocated = NULL; 422dce49693SSebastian Grimberg impl->d_curl_orients = NULL; 423dce49693SSebastian Grimberg impl->d_curl_orients_allocated = NULL; 424dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 425dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); 426ff1e7120SSebastian Grimberg 427dce49693SSebastian Grimberg // Set up device offset/orientation arrays 428dce49693SSebastian Grimberg if (rstr_type != CEED_RESTRICTION_STRIDED) { 429ff1e7120SSebastian Grimberg switch (mem_type) { 430ff1e7120SSebastian Grimberg case CEED_MEM_HOST: { 431ff1e7120SSebastian Grimberg switch (copy_mode) { 432ff1e7120SSebastian Grimberg case CEED_OWN_POINTER: 433ff1e7120SSebastian Grimberg impl->h_ind_allocated = (CeedInt *)indices; 434ff1e7120SSebastian Grimberg impl->h_ind = (CeedInt *)indices; 435ff1e7120SSebastian Grimberg break; 436ff1e7120SSebastian Grimberg case CEED_USE_POINTER: 437ff1e7120SSebastian Grimberg impl->h_ind = (CeedInt *)indices; 438ff1e7120SSebastian Grimberg break; 439ff1e7120SSebastian Grimberg case CEED_COPY_VALUES: 440dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(size, &impl->h_ind_allocated)); 441dce49693SSebastian Grimberg memcpy(impl->h_ind_allocated, indices, size * sizeof(CeedInt)); 442ff1e7120SSebastian Grimberg impl->h_ind = impl->h_ind_allocated; 443ff1e7120SSebastian Grimberg break; 444ff1e7120SSebastian Grimberg } 445ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_ind, size * sizeof(CeedInt))); 446ff1e7120SSebastian Grimberg impl->d_ind_allocated = impl->d_ind; // We own the device memory 447ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_ind, indices, size * sizeof(CeedInt), cudaMemcpyHostToDevice)); 448dce49693SSebastian Grimberg if (is_deterministic) CeedCallBackend(CeedElemRestrictionOffset_Cuda(rstr, indices)); 449dce49693SSebastian Grimberg } break; 450ff1e7120SSebastian Grimberg case CEED_MEM_DEVICE: { 451ff1e7120SSebastian Grimberg switch (copy_mode) { 452ff1e7120SSebastian Grimberg case CEED_COPY_VALUES: 453ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_ind, size * sizeof(CeedInt))); 454ff1e7120SSebastian Grimberg impl->d_ind_allocated = impl->d_ind; // We own the device memory 455ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_ind, indices, size * sizeof(CeedInt), cudaMemcpyDeviceToDevice)); 456ff1e7120SSebastian Grimberg break; 457ff1e7120SSebastian Grimberg case CEED_OWN_POINTER: 458ff1e7120SSebastian Grimberg impl->d_ind = (CeedInt *)indices; 459ff1e7120SSebastian Grimberg impl->d_ind_allocated = impl->d_ind; 460ff1e7120SSebastian Grimberg break; 461ff1e7120SSebastian Grimberg case CEED_USE_POINTER: 462ff1e7120SSebastian Grimberg impl->d_ind = (CeedInt *)indices; 463ff1e7120SSebastian Grimberg break; 464ff1e7120SSebastian Grimberg } 465dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(size, &impl->h_ind_allocated)); 466dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->h_ind_allocated, impl->d_ind, size * sizeof(CeedInt), cudaMemcpyDeviceToHost)); 467dce49693SSebastian Grimberg impl->h_ind = impl->h_ind_allocated; 468dce49693SSebastian Grimberg if (is_deterministic) CeedCallBackend(CeedElemRestrictionOffset_Cuda(rstr, indices)); 469dce49693SSebastian Grimberg } break; 470ff1e7120SSebastian Grimberg } 471ff1e7120SSebastian Grimberg 472dce49693SSebastian Grimberg // Orientation data 473dce49693SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_ORIENTED) { 474dce49693SSebastian Grimberg switch (mem_type) { 475dce49693SSebastian Grimberg case CEED_MEM_HOST: { 476dce49693SSebastian Grimberg switch (copy_mode) { 477dce49693SSebastian Grimberg case CEED_OWN_POINTER: 478dce49693SSebastian Grimberg impl->h_orients_allocated = (bool *)orients; 479dce49693SSebastian Grimberg impl->h_orients = (bool *)orients; 480dce49693SSebastian Grimberg break; 481dce49693SSebastian Grimberg case CEED_USE_POINTER: 482dce49693SSebastian Grimberg impl->h_orients = (bool *)orients; 483dce49693SSebastian Grimberg break; 484dce49693SSebastian Grimberg case CEED_COPY_VALUES: 485dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(size, &impl->h_orients_allocated)); 486dce49693SSebastian Grimberg memcpy(impl->h_orients_allocated, orients, size * sizeof(bool)); 487dce49693SSebastian Grimberg impl->h_orients = impl->h_orients_allocated; 488dce49693SSebastian Grimberg break; 489dce49693SSebastian Grimberg } 490dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_orients, size * sizeof(bool))); 491dce49693SSebastian Grimberg impl->d_orients_allocated = impl->d_orients; // We own the device memory 492dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_orients, orients, size * sizeof(bool), cudaMemcpyHostToDevice)); 493dce49693SSebastian Grimberg } break; 494dce49693SSebastian Grimberg case CEED_MEM_DEVICE: { 495dce49693SSebastian Grimberg switch (copy_mode) { 496dce49693SSebastian Grimberg case CEED_COPY_VALUES: 497dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_orients, size * sizeof(bool))); 498dce49693SSebastian Grimberg impl->d_orients_allocated = impl->d_orients; // We own the device memory 499dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_orients, orients, size * sizeof(bool), cudaMemcpyDeviceToDevice)); 500dce49693SSebastian Grimberg break; 501dce49693SSebastian Grimberg case CEED_OWN_POINTER: 502dce49693SSebastian Grimberg impl->d_orients = (bool *)orients; 503dce49693SSebastian Grimberg impl->d_orients_allocated = impl->d_orients; 504dce49693SSebastian Grimberg break; 505dce49693SSebastian Grimberg case CEED_USE_POINTER: 506dce49693SSebastian Grimberg impl->d_orients = (bool *)orients; 507dce49693SSebastian Grimberg break; 508dce49693SSebastian Grimberg } 509dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(size, &impl->h_orients_allocated)); 510dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->h_orients_allocated, impl->d_orients, size * sizeof(bool), cudaMemcpyDeviceToHost)); 511dce49693SSebastian Grimberg impl->h_orients = impl->h_orients_allocated; 512dce49693SSebastian Grimberg } break; 513dce49693SSebastian Grimberg } 514dce49693SSebastian Grimberg } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 515dce49693SSebastian Grimberg switch (mem_type) { 516dce49693SSebastian Grimberg case CEED_MEM_HOST: { 517dce49693SSebastian Grimberg switch (copy_mode) { 518dce49693SSebastian Grimberg case CEED_OWN_POINTER: 519dce49693SSebastian Grimberg impl->h_curl_orients_allocated = (CeedInt8 *)curl_orients; 520dce49693SSebastian Grimberg impl->h_curl_orients = (CeedInt8 *)curl_orients; 521dce49693SSebastian Grimberg break; 522dce49693SSebastian Grimberg case CEED_USE_POINTER: 523dce49693SSebastian Grimberg impl->h_curl_orients = (CeedInt8 *)curl_orients; 524dce49693SSebastian Grimberg break; 525dce49693SSebastian Grimberg case CEED_COPY_VALUES: 526dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(3 * size, &impl->h_curl_orients_allocated)); 527dce49693SSebastian Grimberg memcpy(impl->h_curl_orients_allocated, curl_orients, 3 * size * sizeof(CeedInt8)); 528dce49693SSebastian Grimberg impl->h_curl_orients = impl->h_curl_orients_allocated; 529dce49693SSebastian Grimberg break; 530dce49693SSebastian Grimberg } 531dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_curl_orients, 3 * size * sizeof(CeedInt8))); 532dce49693SSebastian Grimberg impl->d_curl_orients_allocated = impl->d_curl_orients; // We own the device memory 533dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_curl_orients, curl_orients, 3 * size * sizeof(CeedInt8), cudaMemcpyHostToDevice)); 534dce49693SSebastian Grimberg } break; 535dce49693SSebastian Grimberg case CEED_MEM_DEVICE: { 536dce49693SSebastian Grimberg switch (copy_mode) { 537dce49693SSebastian Grimberg case CEED_COPY_VALUES: 538dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_curl_orients, 3 * size * sizeof(CeedInt8))); 539dce49693SSebastian Grimberg impl->d_curl_orients_allocated = impl->d_curl_orients; // We own the device memory 540dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_curl_orients, curl_orients, 3 * size * sizeof(CeedInt8), cudaMemcpyDeviceToDevice)); 541dce49693SSebastian Grimberg break; 542dce49693SSebastian Grimberg case CEED_OWN_POINTER: 543dce49693SSebastian Grimberg impl->d_curl_orients = (CeedInt8 *)curl_orients; 544dce49693SSebastian Grimberg impl->d_curl_orients_allocated = impl->d_curl_orients; 545dce49693SSebastian Grimberg break; 546dce49693SSebastian Grimberg case CEED_USE_POINTER: 547dce49693SSebastian Grimberg impl->d_curl_orients = (CeedInt8 *)curl_orients; 548dce49693SSebastian Grimberg break; 549dce49693SSebastian Grimberg } 550dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(3 * size, &impl->h_curl_orients_allocated)); 551dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->h_curl_orients_allocated, impl->d_curl_orients, 3 * size * sizeof(CeedInt8), cudaMemcpyDeviceToHost)); 552dce49693SSebastian Grimberg impl->h_curl_orients = impl->h_curl_orients_allocated; 553dce49693SSebastian Grimberg } break; 554dce49693SSebastian Grimberg } 555dce49693SSebastian Grimberg } 556dce49693SSebastian Grimberg } 557ca735530SJeremy L Thompson 558dce49693SSebastian Grimberg // Compile CUDA kernels 559ff1e7120SSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction.h", &restriction_kernel_path)); 56023d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); 561ff1e7120SSebastian Grimberg CeedCallBackend(CeedLoadSourceToBuffer(ceed, restriction_kernel_path, &restriction_kernel_source)); 56223d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); 563ca735530SJeremy L Thompson CeedCallBackend(CeedCompile_Cuda(ceed, restriction_kernel_source, &impl->module, 8, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 564dce49693SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, "RSTR_STRIDE_NODES", 565ca735530SJeremy L Thompson strides[0], "RSTR_STRIDE_COMP", strides[1], "RSTR_STRIDE_ELEM", strides[2])); 566ff1e7120SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "StridedNoTranspose", &impl->StridedNoTranspose)); 56758549094SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "StridedTranspose", &impl->StridedTranspose)); 568ff1e7120SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetNoTranspose", &impl->OffsetNoTranspose)); 56958549094SSebastian Grimberg if (!is_deterministic) { 57058549094SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetTranspose", &impl->OffsetTranspose)); 57158549094SSebastian Grimberg } else { 57258549094SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetTransposeDet", &impl->OffsetTransposeDet)); 57358549094SSebastian Grimberg } 574dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OrientedNoTranspose", &impl->OrientedNoTranspose)); 575dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OrientedTranspose", &impl->OrientedTranspose)); 576dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedNoTranspose", &impl->CurlOrientedNoTranspose)); 577dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedUnsignedNoTranspose", &impl->CurlOrientedUnsignedNoTranspose)); 578dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedTranspose", &impl->CurlOrientedTranspose)); 579dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedUnsignedTranspose", &impl->CurlOrientedUnsignedTranspose)); 580ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&restriction_kernel_path)); 581ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&restriction_kernel_source)); 582ff1e7120SSebastian Grimberg 583ff1e7120SSebastian Grimberg // Register backend functions 584dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Cuda)); 585dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Cuda)); 586dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Cuda)); 587dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Cuda)); 588dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Cuda)); 589dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Cuda)); 590dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Cuda)); 591ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 592ff1e7120SSebastian Grimberg } 593ff1e7120SSebastian Grimberg 594ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 595