1ff1e7120SSebastian Grimberg // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2ff1e7120SSebastian Grimberg // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3ff1e7120SSebastian Grimberg // 4ff1e7120SSebastian Grimberg // SPDX-License-Identifier: BSD-2-Clause 5ff1e7120SSebastian Grimberg // 6ff1e7120SSebastian Grimberg // This file is part of CEED: http://github.com/ceed 7ff1e7120SSebastian Grimberg 8ff1e7120SSebastian Grimberg #include <ceed.h> 9ff1e7120SSebastian Grimberg #include <ceed/backend.h> 10ff1e7120SSebastian Grimberg #include <ceed/jit-tools.h> 11ff1e7120SSebastian Grimberg #include <cuda.h> 12ff1e7120SSebastian Grimberg #include <cuda_runtime.h> 13ff1e7120SSebastian Grimberg #include <stdbool.h> 14ff1e7120SSebastian Grimberg #include <stddef.h> 15ff1e7120SSebastian Grimberg #include <string.h> 16ff1e7120SSebastian Grimberg 17ff1e7120SSebastian Grimberg #include "../cuda/ceed-cuda-common.h" 18ff1e7120SSebastian Grimberg #include "../cuda/ceed-cuda-compile.h" 19ff1e7120SSebastian Grimberg #include "ceed-cuda-ref.h" 20ff1e7120SSebastian Grimberg 21ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 22*dce49693SSebastian Grimberg // Core apply restriction code 23ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 24*dce49693SSebastian Grimberg static inline int CeedElemRestrictionApply_Cuda_Core(CeedElemRestriction rstr, CeedTransposeMode t_mode, bool use_signs, bool use_orients, 25*dce49693SSebastian Grimberg CeedVector u, CeedVector v, CeedRequest *request) { 26ff1e7120SSebastian Grimberg Ceed ceed; 27ca735530SJeremy L Thompson CeedInt num_elem, elem_size; 28*dce49693SSebastian Grimberg CeedRestrictionType rstr_type; 29ff1e7120SSebastian Grimberg const CeedScalar *d_u; 30ff1e7120SSebastian Grimberg CeedScalar *d_v; 31ca735530SJeremy L Thompson CeedElemRestriction_Cuda *impl; 32*dce49693SSebastian Grimberg CUfunction kernel; 33ca735530SJeremy L Thompson 34*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 35*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 36*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 37*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 38*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 39ca735530SJeremy L Thompson const CeedInt num_nodes = impl->num_nodes; 40ca735530SJeremy L Thompson 41ca735530SJeremy L Thompson // Get vectors 42ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 43ff1e7120SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 44ff1e7120SSebastian Grimberg // Sum into for transpose mode, e-vec to l-vec 45ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 46ff1e7120SSebastian Grimberg } else { 47ff1e7120SSebastian Grimberg // Overwrite for notranspose mode, l-vec to e-vec 48ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 49ff1e7120SSebastian Grimberg } 50ff1e7120SSebastian Grimberg 51ff1e7120SSebastian Grimberg // Restrict 52ff1e7120SSebastian Grimberg if (t_mode == CEED_NOTRANSPOSE) { 53ff1e7120SSebastian Grimberg // L-vector -> E-vector 54*dce49693SSebastian Grimberg const CeedInt block_size = elem_size < 1024 ? (elem_size > 32 ? elem_size : 32) : 1024; 55*dce49693SSebastian Grimberg const CeedInt grid = CeedDivUpInt(num_nodes, block_size); 5658549094SSebastian Grimberg 57*dce49693SSebastian Grimberg switch (rstr_type) { 58*dce49693SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 59ff1e7120SSebastian Grimberg kernel = impl->StridedNoTranspose; 60ff1e7120SSebastian Grimberg void *args[] = {&num_elem, &d_u, &d_v}; 6158549094SSebastian Grimberg 62*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 63*dce49693SSebastian Grimberg } break; 64*dce49693SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 65*dce49693SSebastian Grimberg kernel = impl->OffsetNoTranspose; 66*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 67*dce49693SSebastian Grimberg 68*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 69*dce49693SSebastian Grimberg } break; 70*dce49693SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 71*dce49693SSebastian Grimberg if (use_signs) { 72*dce49693SSebastian Grimberg kernel = impl->OrientedNoTranspose; 73*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_orients, &d_u, &d_v}; 74*dce49693SSebastian Grimberg 75*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 76*dce49693SSebastian Grimberg } else { 77*dce49693SSebastian Grimberg kernel = impl->OffsetNoTranspose; 78*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 79*dce49693SSebastian Grimberg 80*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 81*dce49693SSebastian Grimberg } 82*dce49693SSebastian Grimberg } break; 83*dce49693SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 84*dce49693SSebastian Grimberg if (use_signs && use_orients) { 85*dce49693SSebastian Grimberg kernel = impl->CurlOrientedNoTranspose; 86*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_curl_orients, &d_u, &d_v}; 87*dce49693SSebastian Grimberg 88*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 89*dce49693SSebastian Grimberg } else if (use_orients) { 90*dce49693SSebastian Grimberg kernel = impl->CurlOrientedUnsignedNoTranspose; 91*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_curl_orients, &d_u, &d_v}; 92*dce49693SSebastian Grimberg 93*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 94*dce49693SSebastian Grimberg } else { 95*dce49693SSebastian Grimberg kernel = impl->OffsetNoTranspose; 96*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 97*dce49693SSebastian Grimberg 98*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 99*dce49693SSebastian Grimberg } 100*dce49693SSebastian Grimberg } break; 101ff1e7120SSebastian Grimberg } 102ff1e7120SSebastian Grimberg } else { 103ff1e7120SSebastian Grimberg // E-vector -> L-vector 104*dce49693SSebastian Grimberg const CeedInt block_size = 32; 105*dce49693SSebastian Grimberg const CeedInt grid = CeedDivUpInt(num_nodes, block_size); 106ca735530SJeremy L Thompson 107*dce49693SSebastian Grimberg switch (rstr_type) { 108*dce49693SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 109*dce49693SSebastian Grimberg kernel = impl->StridedTranspose; 110*dce49693SSebastian Grimberg void *args[] = {&num_elem, &d_u, &d_v}; 111*dce49693SSebastian Grimberg 112*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 113*dce49693SSebastian Grimberg } break; 114*dce49693SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 11558549094SSebastian Grimberg if (impl->OffsetTranspose) { 116ff1e7120SSebastian Grimberg kernel = impl->OffsetTranspose; 11758549094SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 11858549094SSebastian Grimberg 119*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 120ff1e7120SSebastian Grimberg } else { 12158549094SSebastian Grimberg kernel = impl->OffsetTransposeDet; 12258549094SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 12358549094SSebastian Grimberg 124*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 12558549094SSebastian Grimberg } 126*dce49693SSebastian Grimberg } break; 127*dce49693SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 128*dce49693SSebastian Grimberg if (use_signs) { 129*dce49693SSebastian Grimberg kernel = impl->OrientedTranspose; 130*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_orients, &d_u, &d_v}; 13158549094SSebastian Grimberg 132*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 133*dce49693SSebastian Grimberg } else { 134*dce49693SSebastian Grimberg if (impl->OffsetTranspose) { 135*dce49693SSebastian Grimberg kernel = impl->OffsetTranspose; 136*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 137*dce49693SSebastian Grimberg 138*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 139*dce49693SSebastian Grimberg } else { 140*dce49693SSebastian Grimberg kernel = impl->OffsetTransposeDet; 141*dce49693SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 142*dce49693SSebastian Grimberg 143*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 144*dce49693SSebastian Grimberg } 145*dce49693SSebastian Grimberg } 146*dce49693SSebastian Grimberg } break; 147*dce49693SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 148*dce49693SSebastian Grimberg if (use_signs && use_orients) { 149*dce49693SSebastian Grimberg kernel = impl->CurlOrientedTranspose; 150*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_curl_orients, &d_u, &d_v}; 151*dce49693SSebastian Grimberg 152*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 153*dce49693SSebastian Grimberg } else if (use_orients) { 154*dce49693SSebastian Grimberg kernel = impl->CurlOrientedUnsignedTranspose; 155*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_curl_orients, &d_u, &d_v}; 156*dce49693SSebastian Grimberg 157*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 158*dce49693SSebastian Grimberg } else { 159*dce49693SSebastian Grimberg if (impl->OffsetTranspose) { 160*dce49693SSebastian Grimberg kernel = impl->OffsetTranspose; 161*dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 162*dce49693SSebastian Grimberg 163*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 164*dce49693SSebastian Grimberg } else { 165*dce49693SSebastian Grimberg kernel = impl->OffsetTransposeDet; 166*dce49693SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 167*dce49693SSebastian Grimberg 168*dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 169*dce49693SSebastian Grimberg } 170*dce49693SSebastian Grimberg } 171*dce49693SSebastian Grimberg } break; 172ff1e7120SSebastian Grimberg } 173ff1e7120SSebastian Grimberg } 174ff1e7120SSebastian Grimberg 175ff1e7120SSebastian Grimberg if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 176ff1e7120SSebastian Grimberg 177ff1e7120SSebastian Grimberg // Restore arrays 178ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 179ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 180ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 181ff1e7120SSebastian Grimberg } 182ff1e7120SSebastian Grimberg 183ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 184*dce49693SSebastian Grimberg // Apply restriction 185*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 186*dce49693SSebastian Grimberg static int CeedElemRestrictionApply_Cuda(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 187*dce49693SSebastian Grimberg return CeedElemRestrictionApply_Cuda_Core(rstr, t_mode, true, true, u, v, request); 188*dce49693SSebastian Grimberg } 189*dce49693SSebastian Grimberg 190*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 191*dce49693SSebastian Grimberg // Apply unsigned restriction 192*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 193*dce49693SSebastian Grimberg static int CeedElemRestrictionApplyUnsigned_Cuda(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 194*dce49693SSebastian Grimberg CeedRequest *request) { 195*dce49693SSebastian Grimberg return CeedElemRestrictionApply_Cuda_Core(rstr, t_mode, false, true, u, v, request); 196*dce49693SSebastian Grimberg } 197*dce49693SSebastian Grimberg 198*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 199*dce49693SSebastian Grimberg // Apply unoriented restriction 200*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 201*dce49693SSebastian Grimberg static int CeedElemRestrictionApplyUnoriented_Cuda(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 202*dce49693SSebastian Grimberg CeedRequest *request) { 203*dce49693SSebastian Grimberg return CeedElemRestrictionApply_Cuda_Core(rstr, t_mode, false, false, u, v, request); 204*dce49693SSebastian Grimberg } 205*dce49693SSebastian Grimberg 206*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 207ff1e7120SSebastian Grimberg // Get offsets 208ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 209ff1e7120SSebastian Grimberg static int CeedElemRestrictionGetOffsets_Cuda(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 210ff1e7120SSebastian Grimberg CeedElemRestriction_Cuda *impl; 211ff1e7120SSebastian Grimberg 212ca735530SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 213ff1e7120SSebastian Grimberg switch (mem_type) { 214ff1e7120SSebastian Grimberg case CEED_MEM_HOST: 215ff1e7120SSebastian Grimberg *offsets = impl->h_ind; 216ff1e7120SSebastian Grimberg break; 217ff1e7120SSebastian Grimberg case CEED_MEM_DEVICE: 218ff1e7120SSebastian Grimberg *offsets = impl->d_ind; 219ff1e7120SSebastian Grimberg break; 220ff1e7120SSebastian Grimberg } 221ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 222ff1e7120SSebastian Grimberg } 223ff1e7120SSebastian Grimberg 224ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 225*dce49693SSebastian Grimberg // Get orientations 226*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 227*dce49693SSebastian Grimberg static int CeedElemRestrictionGetOrientations_Cuda(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 228*dce49693SSebastian Grimberg CeedElemRestriction_Cuda *impl; 229*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 230*dce49693SSebastian Grimberg 231*dce49693SSebastian Grimberg switch (mem_type) { 232*dce49693SSebastian Grimberg case CEED_MEM_HOST: 233*dce49693SSebastian Grimberg *orients = impl->h_orients; 234*dce49693SSebastian Grimberg break; 235*dce49693SSebastian Grimberg case CEED_MEM_DEVICE: 236*dce49693SSebastian Grimberg *orients = impl->d_orients; 237*dce49693SSebastian Grimberg break; 238*dce49693SSebastian Grimberg } 239*dce49693SSebastian Grimberg return CEED_ERROR_SUCCESS; 240*dce49693SSebastian Grimberg } 241*dce49693SSebastian Grimberg 242*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 243*dce49693SSebastian Grimberg // Get curl-conforming orientations 244*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 245*dce49693SSebastian Grimberg static int CeedElemRestrictionGetCurlOrientations_Cuda(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 246*dce49693SSebastian Grimberg CeedElemRestriction_Cuda *impl; 247*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 248*dce49693SSebastian Grimberg 249*dce49693SSebastian Grimberg switch (mem_type) { 250*dce49693SSebastian Grimberg case CEED_MEM_HOST: 251*dce49693SSebastian Grimberg *curl_orients = impl->h_curl_orients; 252*dce49693SSebastian Grimberg break; 253*dce49693SSebastian Grimberg case CEED_MEM_DEVICE: 254*dce49693SSebastian Grimberg *curl_orients = impl->d_curl_orients; 255*dce49693SSebastian Grimberg break; 256*dce49693SSebastian Grimberg } 257*dce49693SSebastian Grimberg return CEED_ERROR_SUCCESS; 258*dce49693SSebastian Grimberg } 259*dce49693SSebastian Grimberg 260*dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 261ff1e7120SSebastian Grimberg // Destroy restriction 262ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 263*dce49693SSebastian Grimberg static int CeedElemRestrictionDestroy_Cuda(CeedElemRestriction rstr) { 264ff1e7120SSebastian Grimberg Ceed ceed; 265ca735530SJeremy L Thompson CeedElemRestriction_Cuda *impl; 266ca735530SJeremy L Thompson 267*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 268*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 269ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cuModuleUnload(impl->module)); 270ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&impl->h_ind_allocated)); 271ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_ind_allocated)); 272ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_t_offsets)); 273ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_t_indices)); 274ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_l_vec_indices)); 275*dce49693SSebastian Grimberg CeedCallBackend(CeedFree(&impl->h_orients_allocated)); 276*dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_orients_allocated)); 277*dce49693SSebastian Grimberg CeedCallBackend(CeedFree(&impl->h_curl_orients_allocated)); 278*dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_curl_orients_allocated)); 279ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&impl)); 280ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 281ff1e7120SSebastian Grimberg } 282ff1e7120SSebastian Grimberg 283ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 284ff1e7120SSebastian Grimberg // Create transpose offsets and indices 285ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 286*dce49693SSebastian Grimberg static int CeedElemRestrictionOffset_Cuda(const CeedElemRestriction rstr, const CeedInt *indices) { 287ff1e7120SSebastian Grimberg Ceed ceed; 288ca735530SJeremy L Thompson bool *is_node; 289ff1e7120SSebastian Grimberg CeedSize l_size; 290ca735530SJeremy L Thompson CeedInt num_elem, elem_size, num_comp, num_nodes = 0; 291ca735530SJeremy L Thompson CeedInt *ind_to_offset, *l_vec_indices, *t_offsets, *t_indices; 292ca735530SJeremy L Thompson CeedElemRestriction_Cuda *impl; 293ca735530SJeremy L Thompson 294*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 295*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 296*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 297*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 298*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 299*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 300ca735530SJeremy L Thompson const CeedInt size_indices = num_elem * elem_size; 301ff1e7120SSebastian Grimberg 302ff1e7120SSebastian Grimberg // Count num_nodes 303ff1e7120SSebastian Grimberg CeedCallBackend(CeedCalloc(l_size, &is_node)); 304ca735530SJeremy L Thompson 305ff1e7120SSebastian Grimberg for (CeedInt i = 0; i < size_indices; i++) is_node[indices[i]] = 1; 306ff1e7120SSebastian Grimberg for (CeedInt i = 0; i < l_size; i++) num_nodes += is_node[i]; 307ff1e7120SSebastian Grimberg impl->num_nodes = num_nodes; 308ff1e7120SSebastian Grimberg 309ff1e7120SSebastian Grimberg // L-vector offsets array 310ff1e7120SSebastian Grimberg CeedCallBackend(CeedCalloc(l_size, &ind_to_offset)); 311ff1e7120SSebastian Grimberg CeedCallBackend(CeedCalloc(num_nodes, &l_vec_indices)); 312ca735530SJeremy L Thompson for (CeedInt i = 0, j = 0; i < l_size; i++) { 313ff1e7120SSebastian Grimberg if (is_node[i]) { 314ff1e7120SSebastian Grimberg l_vec_indices[j] = i; 315ff1e7120SSebastian Grimberg ind_to_offset[i] = j++; 316ff1e7120SSebastian Grimberg } 317ff1e7120SSebastian Grimberg } 318ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&is_node)); 319ff1e7120SSebastian Grimberg 320ff1e7120SSebastian Grimberg // Compute transpose offsets and indices 321ff1e7120SSebastian Grimberg const CeedInt size_offsets = num_nodes + 1; 322ca735530SJeremy L Thompson 323ff1e7120SSebastian Grimberg CeedCallBackend(CeedCalloc(size_offsets, &t_offsets)); 324ff1e7120SSebastian Grimberg CeedCallBackend(CeedMalloc(size_indices, &t_indices)); 325ff1e7120SSebastian Grimberg // Count node multiplicity 326ff1e7120SSebastian Grimberg for (CeedInt e = 0; e < num_elem; ++e) { 327ff1e7120SSebastian Grimberg for (CeedInt i = 0; i < elem_size; ++i) ++t_offsets[ind_to_offset[indices[elem_size * e + i]] + 1]; 328ff1e7120SSebastian Grimberg } 329ff1e7120SSebastian Grimberg // Convert to running sum 330ff1e7120SSebastian Grimberg for (CeedInt i = 1; i < size_offsets; ++i) t_offsets[i] += t_offsets[i - 1]; 331ff1e7120SSebastian Grimberg // List all E-vec indices associated with L-vec node 332ff1e7120SSebastian Grimberg for (CeedInt e = 0; e < num_elem; ++e) { 333ff1e7120SSebastian Grimberg for (CeedInt i = 0; i < elem_size; ++i) { 334ff1e7120SSebastian Grimberg const CeedInt lid = elem_size * e + i; 335ff1e7120SSebastian Grimberg const CeedInt gid = indices[lid]; 336ca735530SJeremy L Thompson 337ff1e7120SSebastian Grimberg t_indices[t_offsets[ind_to_offset[gid]]++] = lid; 338ff1e7120SSebastian Grimberg } 339ff1e7120SSebastian Grimberg } 340ff1e7120SSebastian Grimberg // Reset running sum 341ff1e7120SSebastian Grimberg for (int i = size_offsets - 1; i > 0; --i) t_offsets[i] = t_offsets[i - 1]; 342ff1e7120SSebastian Grimberg t_offsets[0] = 0; 343ff1e7120SSebastian Grimberg 344ff1e7120SSebastian Grimberg // Copy data to device 345ff1e7120SSebastian Grimberg // -- L-vector indices 346ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_l_vec_indices, num_nodes * sizeof(CeedInt))); 347ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_l_vec_indices, l_vec_indices, num_nodes * sizeof(CeedInt), cudaMemcpyHostToDevice)); 348ff1e7120SSebastian Grimberg // -- Transpose offsets 349ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_t_offsets, size_offsets * sizeof(CeedInt))); 350ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_t_offsets, t_offsets, size_offsets * sizeof(CeedInt), cudaMemcpyHostToDevice)); 351ff1e7120SSebastian Grimberg // -- Transpose indices 352ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_t_indices, size_indices * sizeof(CeedInt))); 353ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_t_indices, t_indices, size_indices * sizeof(CeedInt), cudaMemcpyHostToDevice)); 354ff1e7120SSebastian Grimberg 355ff1e7120SSebastian Grimberg // Cleanup 356ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&ind_to_offset)); 357ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&l_vec_indices)); 358ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&t_offsets)); 359ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&t_indices)); 360ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 361ff1e7120SSebastian Grimberg } 362ff1e7120SSebastian Grimberg 363ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 364ff1e7120SSebastian Grimberg // Create restriction 365ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 366fcbe8c06SSebastian Grimberg int CeedElemRestrictionCreate_Cuda(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *indices, const bool *orients, 367*dce49693SSebastian Grimberg const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 368ca735530SJeremy L Thompson Ceed ceed, ceed_parent; 369*dce49693SSebastian Grimberg bool is_deterministic; 370ca735530SJeremy L Thompson CeedInt num_elem, num_comp, elem_size, comp_stride = 1; 371ca735530SJeremy L Thompson CeedRestrictionType rstr_type; 372*dce49693SSebastian Grimberg char *restriction_kernel_path, *restriction_kernel_source; 373ff1e7120SSebastian Grimberg CeedElemRestriction_Cuda *impl; 374ca735530SJeremy L Thompson 375*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 376ca735530SJeremy L Thompson CeedCallBackend(CeedGetParent(ceed, &ceed_parent)); 377ca735530SJeremy L Thompson CeedCallBackend(CeedIsDeterministic(ceed_parent, &is_deterministic)); 378*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 379*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 380*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 381ca735530SJeremy L Thompson const CeedInt size = num_elem * elem_size; 382ff1e7120SSebastian Grimberg CeedInt strides[3] = {1, size, elem_size}; 383ca735530SJeremy L Thompson CeedInt layout[3] = {1, elem_size * num_elem, elem_size}; 384ff1e7120SSebastian Grimberg 385ff1e7120SSebastian Grimberg // Stride data 386*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 387*dce49693SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_STRIDED) { 388ff1e7120SSebastian Grimberg bool has_backend_strides; 389ca735530SJeremy L Thompson 390*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 391ff1e7120SSebastian Grimberg if (!has_backend_strides) { 392*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); 393ff1e7120SSebastian Grimberg } 394ff1e7120SSebastian Grimberg } else { 395*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 396ff1e7120SSebastian Grimberg } 397ff1e7120SSebastian Grimberg 398*dce49693SSebastian Grimberg CeedCallBackend(CeedCalloc(1, &impl)); 399*dce49693SSebastian Grimberg impl->num_nodes = size; 400ff1e7120SSebastian Grimberg impl->h_ind = NULL; 401ff1e7120SSebastian Grimberg impl->h_ind_allocated = NULL; 402ff1e7120SSebastian Grimberg impl->d_ind = NULL; 403ff1e7120SSebastian Grimberg impl->d_ind_allocated = NULL; 404ff1e7120SSebastian Grimberg impl->d_t_indices = NULL; 405ff1e7120SSebastian Grimberg impl->d_t_offsets = NULL; 406*dce49693SSebastian Grimberg impl->h_orients = NULL; 407*dce49693SSebastian Grimberg impl->h_orients_allocated = NULL; 408*dce49693SSebastian Grimberg impl->d_orients = NULL; 409*dce49693SSebastian Grimberg impl->d_orients_allocated = NULL; 410*dce49693SSebastian Grimberg impl->h_curl_orients = NULL; 411*dce49693SSebastian Grimberg impl->h_curl_orients_allocated = NULL; 412*dce49693SSebastian Grimberg impl->d_curl_orients = NULL; 413*dce49693SSebastian Grimberg impl->d_curl_orients_allocated = NULL; 414*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 415*dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); 416ff1e7120SSebastian Grimberg 417*dce49693SSebastian Grimberg // Set up device offset/orientation arrays 418*dce49693SSebastian Grimberg if (rstr_type != CEED_RESTRICTION_STRIDED) { 419ff1e7120SSebastian Grimberg switch (mem_type) { 420ff1e7120SSebastian Grimberg case CEED_MEM_HOST: { 421ff1e7120SSebastian Grimberg switch (copy_mode) { 422ff1e7120SSebastian Grimberg case CEED_OWN_POINTER: 423ff1e7120SSebastian Grimberg impl->h_ind_allocated = (CeedInt *)indices; 424ff1e7120SSebastian Grimberg impl->h_ind = (CeedInt *)indices; 425ff1e7120SSebastian Grimberg break; 426ff1e7120SSebastian Grimberg case CEED_USE_POINTER: 427ff1e7120SSebastian Grimberg impl->h_ind = (CeedInt *)indices; 428ff1e7120SSebastian Grimberg break; 429ff1e7120SSebastian Grimberg case CEED_COPY_VALUES: 430*dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(size, &impl->h_ind_allocated)); 431*dce49693SSebastian Grimberg memcpy(impl->h_ind_allocated, indices, size * sizeof(CeedInt)); 432ff1e7120SSebastian Grimberg impl->h_ind = impl->h_ind_allocated; 433ff1e7120SSebastian Grimberg break; 434ff1e7120SSebastian Grimberg } 435ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_ind, size * sizeof(CeedInt))); 436ff1e7120SSebastian Grimberg impl->d_ind_allocated = impl->d_ind; // We own the device memory 437ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_ind, indices, size * sizeof(CeedInt), cudaMemcpyHostToDevice)); 438*dce49693SSebastian Grimberg if (is_deterministic) CeedCallBackend(CeedElemRestrictionOffset_Cuda(rstr, indices)); 439*dce49693SSebastian Grimberg } break; 440ff1e7120SSebastian Grimberg case CEED_MEM_DEVICE: { 441ff1e7120SSebastian Grimberg switch (copy_mode) { 442ff1e7120SSebastian Grimberg case CEED_COPY_VALUES: 443ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_ind, size * sizeof(CeedInt))); 444ff1e7120SSebastian Grimberg impl->d_ind_allocated = impl->d_ind; // We own the device memory 445ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_ind, indices, size * sizeof(CeedInt), cudaMemcpyDeviceToDevice)); 446ff1e7120SSebastian Grimberg break; 447ff1e7120SSebastian Grimberg case CEED_OWN_POINTER: 448ff1e7120SSebastian Grimberg impl->d_ind = (CeedInt *)indices; 449ff1e7120SSebastian Grimberg impl->d_ind_allocated = impl->d_ind; 450ff1e7120SSebastian Grimberg break; 451ff1e7120SSebastian Grimberg case CEED_USE_POINTER: 452ff1e7120SSebastian Grimberg impl->d_ind = (CeedInt *)indices; 453ff1e7120SSebastian Grimberg break; 454ff1e7120SSebastian Grimberg } 455*dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(size, &impl->h_ind_allocated)); 456*dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->h_ind_allocated, impl->d_ind, size * sizeof(CeedInt), cudaMemcpyDeviceToHost)); 457*dce49693SSebastian Grimberg impl->h_ind = impl->h_ind_allocated; 458*dce49693SSebastian Grimberg if (is_deterministic) CeedCallBackend(CeedElemRestrictionOffset_Cuda(rstr, indices)); 459*dce49693SSebastian Grimberg } break; 460ff1e7120SSebastian Grimberg } 461ff1e7120SSebastian Grimberg 462*dce49693SSebastian Grimberg // Orientation data 463*dce49693SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_ORIENTED) { 464*dce49693SSebastian Grimberg switch (mem_type) { 465*dce49693SSebastian Grimberg case CEED_MEM_HOST: { 466*dce49693SSebastian Grimberg switch (copy_mode) { 467*dce49693SSebastian Grimberg case CEED_OWN_POINTER: 468*dce49693SSebastian Grimberg impl->h_orients_allocated = (bool *)orients; 469*dce49693SSebastian Grimberg impl->h_orients = (bool *)orients; 470*dce49693SSebastian Grimberg break; 471*dce49693SSebastian Grimberg case CEED_USE_POINTER: 472*dce49693SSebastian Grimberg impl->h_orients = (bool *)orients; 473*dce49693SSebastian Grimberg break; 474*dce49693SSebastian Grimberg case CEED_COPY_VALUES: 475*dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(size, &impl->h_orients_allocated)); 476*dce49693SSebastian Grimberg memcpy(impl->h_orients_allocated, orients, size * sizeof(bool)); 477*dce49693SSebastian Grimberg impl->h_orients = impl->h_orients_allocated; 478*dce49693SSebastian Grimberg break; 479*dce49693SSebastian Grimberg } 480*dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_orients, size * sizeof(bool))); 481*dce49693SSebastian Grimberg impl->d_orients_allocated = impl->d_orients; // We own the device memory 482*dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_orients, orients, size * sizeof(bool), cudaMemcpyHostToDevice)); 483*dce49693SSebastian Grimberg } break; 484*dce49693SSebastian Grimberg case CEED_MEM_DEVICE: { 485*dce49693SSebastian Grimberg switch (copy_mode) { 486*dce49693SSebastian Grimberg case CEED_COPY_VALUES: 487*dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_orients, size * sizeof(bool))); 488*dce49693SSebastian Grimberg impl->d_orients_allocated = impl->d_orients; // We own the device memory 489*dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_orients, orients, size * sizeof(bool), cudaMemcpyDeviceToDevice)); 490*dce49693SSebastian Grimberg break; 491*dce49693SSebastian Grimberg case CEED_OWN_POINTER: 492*dce49693SSebastian Grimberg impl->d_orients = (bool *)orients; 493*dce49693SSebastian Grimberg impl->d_orients_allocated = impl->d_orients; 494*dce49693SSebastian Grimberg break; 495*dce49693SSebastian Grimberg case CEED_USE_POINTER: 496*dce49693SSebastian Grimberg impl->d_orients = (bool *)orients; 497*dce49693SSebastian Grimberg break; 498*dce49693SSebastian Grimberg } 499*dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(size, &impl->h_orients_allocated)); 500*dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->h_orients_allocated, impl->d_orients, size * sizeof(bool), cudaMemcpyDeviceToHost)); 501*dce49693SSebastian Grimberg impl->h_orients = impl->h_orients_allocated; 502*dce49693SSebastian Grimberg } break; 503*dce49693SSebastian Grimberg } 504*dce49693SSebastian Grimberg } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 505*dce49693SSebastian Grimberg switch (mem_type) { 506*dce49693SSebastian Grimberg case CEED_MEM_HOST: { 507*dce49693SSebastian Grimberg switch (copy_mode) { 508*dce49693SSebastian Grimberg case CEED_OWN_POINTER: 509*dce49693SSebastian Grimberg impl->h_curl_orients_allocated = (CeedInt8 *)curl_orients; 510*dce49693SSebastian Grimberg impl->h_curl_orients = (CeedInt8 *)curl_orients; 511*dce49693SSebastian Grimberg break; 512*dce49693SSebastian Grimberg case CEED_USE_POINTER: 513*dce49693SSebastian Grimberg impl->h_curl_orients = (CeedInt8 *)curl_orients; 514*dce49693SSebastian Grimberg break; 515*dce49693SSebastian Grimberg case CEED_COPY_VALUES: 516*dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(3 * size, &impl->h_curl_orients_allocated)); 517*dce49693SSebastian Grimberg memcpy(impl->h_curl_orients_allocated, curl_orients, 3 * size * sizeof(CeedInt8)); 518*dce49693SSebastian Grimberg impl->h_curl_orients = impl->h_curl_orients_allocated; 519*dce49693SSebastian Grimberg break; 520*dce49693SSebastian Grimberg } 521*dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_curl_orients, 3 * size * sizeof(CeedInt8))); 522*dce49693SSebastian Grimberg impl->d_curl_orients_allocated = impl->d_curl_orients; // We own the device memory 523*dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_curl_orients, curl_orients, 3 * size * sizeof(CeedInt8), cudaMemcpyHostToDevice)); 524*dce49693SSebastian Grimberg } break; 525*dce49693SSebastian Grimberg case CEED_MEM_DEVICE: { 526*dce49693SSebastian Grimberg switch (copy_mode) { 527*dce49693SSebastian Grimberg case CEED_COPY_VALUES: 528*dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_curl_orients, 3 * size * sizeof(CeedInt8))); 529*dce49693SSebastian Grimberg impl->d_curl_orients_allocated = impl->d_curl_orients; // We own the device memory 530*dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_curl_orients, curl_orients, 3 * size * sizeof(CeedInt8), cudaMemcpyDeviceToDevice)); 531*dce49693SSebastian Grimberg break; 532*dce49693SSebastian Grimberg case CEED_OWN_POINTER: 533*dce49693SSebastian Grimberg impl->d_curl_orients = (CeedInt8 *)curl_orients; 534*dce49693SSebastian Grimberg impl->d_curl_orients_allocated = impl->d_curl_orients; 535*dce49693SSebastian Grimberg break; 536*dce49693SSebastian Grimberg case CEED_USE_POINTER: 537*dce49693SSebastian Grimberg impl->d_curl_orients = (CeedInt8 *)curl_orients; 538*dce49693SSebastian Grimberg break; 539*dce49693SSebastian Grimberg } 540*dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(3 * size, &impl->h_curl_orients_allocated)); 541*dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->h_curl_orients_allocated, impl->d_curl_orients, 3 * size * sizeof(CeedInt8), cudaMemcpyDeviceToHost)); 542*dce49693SSebastian Grimberg impl->h_curl_orients = impl->h_curl_orients_allocated; 543*dce49693SSebastian Grimberg } break; 544*dce49693SSebastian Grimberg } 545*dce49693SSebastian Grimberg } 546*dce49693SSebastian Grimberg } 547ca735530SJeremy L Thompson 548*dce49693SSebastian Grimberg // Compile CUDA kernels 549ff1e7120SSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction.h", &restriction_kernel_path)); 55023d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); 551ff1e7120SSebastian Grimberg CeedCallBackend(CeedLoadSourceToBuffer(ceed, restriction_kernel_path, &restriction_kernel_source)); 55223d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); 553ca735530SJeremy L Thompson CeedCallBackend(CeedCompile_Cuda(ceed, restriction_kernel_source, &impl->module, 8, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 554*dce49693SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, "RSTR_STRIDE_NODES", 555ca735530SJeremy L Thompson strides[0], "RSTR_STRIDE_COMP", strides[1], "RSTR_STRIDE_ELEM", strides[2])); 556ff1e7120SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "StridedNoTranspose", &impl->StridedNoTranspose)); 55758549094SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "StridedTranspose", &impl->StridedTranspose)); 558ff1e7120SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetNoTranspose", &impl->OffsetNoTranspose)); 55958549094SSebastian Grimberg if (!is_deterministic) { 56058549094SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetTranspose", &impl->OffsetTranspose)); 56158549094SSebastian Grimberg } else { 56258549094SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetTransposeDet", &impl->OffsetTransposeDet)); 56358549094SSebastian Grimberg } 564*dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OrientedNoTranspose", &impl->OrientedNoTranspose)); 565*dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OrientedTranspose", &impl->OrientedTranspose)); 566*dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedNoTranspose", &impl->CurlOrientedNoTranspose)); 567*dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedUnsignedNoTranspose", &impl->CurlOrientedUnsignedNoTranspose)); 568*dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedTranspose", &impl->CurlOrientedTranspose)); 569*dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedUnsignedTranspose", &impl->CurlOrientedUnsignedTranspose)); 570ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&restriction_kernel_path)); 571ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&restriction_kernel_source)); 572ff1e7120SSebastian Grimberg 573ff1e7120SSebastian Grimberg // Register backend functions 574*dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Cuda)); 575*dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Cuda)); 576*dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Cuda)); 577*dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Cuda)); 578*dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Cuda)); 579*dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Cuda)); 580*dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Cuda)); 581ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 582ff1e7120SSebastian Grimberg } 583ff1e7120SSebastian Grimberg 584ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 585