1ff1e7120SSebastian Grimberg // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2ff1e7120SSebastian Grimberg // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3ff1e7120SSebastian Grimberg // 4ff1e7120SSebastian Grimberg // SPDX-License-Identifier: BSD-2-Clause 5ff1e7120SSebastian Grimberg // 6ff1e7120SSebastian Grimberg // This file is part of CEED: http://github.com/ceed 7ff1e7120SSebastian Grimberg 8ff1e7120SSebastian Grimberg #include <ceed.h> 9ff1e7120SSebastian Grimberg #include <ceed/backend.h> 10ff1e7120SSebastian Grimberg #include <ceed/jit-tools.h> 11ff1e7120SSebastian Grimberg #include <cuda.h> 12ff1e7120SSebastian Grimberg #include <cuda_runtime.h> 13ff1e7120SSebastian Grimberg #include <stdbool.h> 14ff1e7120SSebastian Grimberg #include <stddef.h> 15ff1e7120SSebastian Grimberg #include <string.h> 16ff1e7120SSebastian Grimberg 17ff1e7120SSebastian Grimberg #include "../cuda/ceed-cuda-common.h" 18ff1e7120SSebastian Grimberg #include "../cuda/ceed-cuda-compile.h" 19ff1e7120SSebastian Grimberg #include "ceed-cuda-ref.h" 20ff1e7120SSebastian Grimberg 21ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 22dce49693SSebastian Grimberg // Core apply restriction code 23ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 24dce49693SSebastian Grimberg static inline int CeedElemRestrictionApply_Cuda_Core(CeedElemRestriction rstr, CeedTransposeMode t_mode, bool use_signs, bool use_orients, 25dce49693SSebastian Grimberg CeedVector u, CeedVector v, CeedRequest *request) { 26ff1e7120SSebastian Grimberg Ceed ceed; 27ca735530SJeremy L Thompson CeedInt num_elem, elem_size; 28dce49693SSebastian Grimberg CeedRestrictionType rstr_type; 29ff1e7120SSebastian Grimberg const CeedScalar *d_u; 30ff1e7120SSebastian Grimberg CeedScalar *d_v; 31ca735530SJeremy L Thompson CeedElemRestriction_Cuda *impl; 32dce49693SSebastian Grimberg CUfunction kernel; 33ca735530SJeremy L Thompson 34dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 35dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 36dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 37dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 38dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 39ca735530SJeremy L Thompson const CeedInt num_nodes = impl->num_nodes; 40ca735530SJeremy L Thompson 41ca735530SJeremy L Thompson // Get vectors 42ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 43ff1e7120SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 44ff1e7120SSebastian Grimberg // Sum into for transpose mode, e-vec to l-vec 45ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 46ff1e7120SSebastian Grimberg } else { 47ff1e7120SSebastian Grimberg // Overwrite for notranspose mode, l-vec to e-vec 48ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 49ff1e7120SSebastian Grimberg } 50ff1e7120SSebastian Grimberg 51ff1e7120SSebastian Grimberg // Restrict 52ff1e7120SSebastian Grimberg if (t_mode == CEED_NOTRANSPOSE) { 53ff1e7120SSebastian Grimberg // L-vector -> E-vector 54dce49693SSebastian Grimberg const CeedInt block_size = elem_size < 1024 ? (elem_size > 32 ? elem_size : 32) : 1024; 55dce49693SSebastian Grimberg const CeedInt grid = CeedDivUpInt(num_nodes, block_size); 5658549094SSebastian Grimberg 57dce49693SSebastian Grimberg switch (rstr_type) { 58dce49693SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 59ff1e7120SSebastian Grimberg kernel = impl->StridedNoTranspose; 60ff1e7120SSebastian Grimberg void *args[] = {&num_elem, &d_u, &d_v}; 6158549094SSebastian Grimberg 62dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 63dce49693SSebastian Grimberg } break; 64dce49693SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 65dce49693SSebastian Grimberg kernel = impl->OffsetNoTranspose; 66dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 67dce49693SSebastian Grimberg 68dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 69dce49693SSebastian Grimberg } break; 70dce49693SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 71dce49693SSebastian Grimberg if (use_signs) { 72dce49693SSebastian Grimberg kernel = impl->OrientedNoTranspose; 73dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_orients, &d_u, &d_v}; 74dce49693SSebastian Grimberg 75dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 76dce49693SSebastian Grimberg } else { 77dce49693SSebastian Grimberg kernel = impl->OffsetNoTranspose; 78dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 79dce49693SSebastian Grimberg 80dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 81dce49693SSebastian Grimberg } 82dce49693SSebastian Grimberg } break; 83dce49693SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 84dce49693SSebastian Grimberg if (use_signs && use_orients) { 85dce49693SSebastian Grimberg kernel = impl->CurlOrientedNoTranspose; 86dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_curl_orients, &d_u, &d_v}; 87dce49693SSebastian Grimberg 88dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 89dce49693SSebastian Grimberg } else if (use_orients) { 90dce49693SSebastian Grimberg kernel = impl->CurlOrientedUnsignedNoTranspose; 91dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_curl_orients, &d_u, &d_v}; 92dce49693SSebastian Grimberg 93dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 94dce49693SSebastian Grimberg } else { 95dce49693SSebastian Grimberg kernel = impl->OffsetNoTranspose; 96dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 97dce49693SSebastian Grimberg 98dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 99dce49693SSebastian Grimberg } 100dce49693SSebastian Grimberg } break; 101b3d03e38SSebastian Grimberg case CEED_RESTRICTION_POINTS: { 102b3d03e38SSebastian Grimberg // LCOV_EXCL_START 103b3d03e38SSebastian Grimberg return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement restriction CeedElemRestrictionAtPoints"); 104b3d03e38SSebastian Grimberg // LCOV_EXCL_STOP 105b3d03e38SSebastian Grimberg } break; 106ff1e7120SSebastian Grimberg } 107ff1e7120SSebastian Grimberg } else { 108ff1e7120SSebastian Grimberg // E-vector -> L-vector 109dce49693SSebastian Grimberg const CeedInt block_size = 32; 110dce49693SSebastian Grimberg const CeedInt grid = CeedDivUpInt(num_nodes, block_size); 111ca735530SJeremy L Thompson 112dce49693SSebastian Grimberg switch (rstr_type) { 113dce49693SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 114dce49693SSebastian Grimberg kernel = impl->StridedTranspose; 115dce49693SSebastian Grimberg void *args[] = {&num_elem, &d_u, &d_v}; 116dce49693SSebastian Grimberg 117dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 118dce49693SSebastian Grimberg } break; 119dce49693SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 12058549094SSebastian Grimberg if (impl->OffsetTranspose) { 121ff1e7120SSebastian Grimberg kernel = impl->OffsetTranspose; 12258549094SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 12358549094SSebastian Grimberg 124dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 125ff1e7120SSebastian Grimberg } else { 12658549094SSebastian Grimberg kernel = impl->OffsetTransposeDet; 12758549094SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 12858549094SSebastian Grimberg 129dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 13058549094SSebastian Grimberg } 131dce49693SSebastian Grimberg } break; 132dce49693SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 133dce49693SSebastian Grimberg if (use_signs) { 134*7aa91133SSebastian Grimberg if (impl->OrientedTranspose) { 135dce49693SSebastian Grimberg kernel = impl->OrientedTranspose; 136dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_orients, &d_u, &d_v}; 13758549094SSebastian Grimberg 138dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 139dce49693SSebastian Grimberg } else { 140*7aa91133SSebastian Grimberg kernel = impl->OrientedTransposeDet; 141*7aa91133SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &impl->d_orients, &d_u, &d_v}; 142*7aa91133SSebastian Grimberg 143*7aa91133SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 144*7aa91133SSebastian Grimberg } 145*7aa91133SSebastian Grimberg } else { 146dce49693SSebastian Grimberg if (impl->OffsetTranspose) { 147dce49693SSebastian Grimberg kernel = impl->OffsetTranspose; 148dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 149dce49693SSebastian Grimberg 150dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 151dce49693SSebastian Grimberg } else { 152dce49693SSebastian Grimberg kernel = impl->OffsetTransposeDet; 153dce49693SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 154dce49693SSebastian Grimberg 155dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 156dce49693SSebastian Grimberg } 157dce49693SSebastian Grimberg } 158dce49693SSebastian Grimberg } break; 159dce49693SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 160dce49693SSebastian Grimberg if (use_signs && use_orients) { 161*7aa91133SSebastian Grimberg if (impl->CurlOrientedTranspose) { 162dce49693SSebastian Grimberg kernel = impl->CurlOrientedTranspose; 163dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_curl_orients, &d_u, &d_v}; 164dce49693SSebastian Grimberg 165dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 166*7aa91133SSebastian Grimberg } else { 167*7aa91133SSebastian Grimberg kernel = impl->CurlOrientedTransposeDet; 168*7aa91133SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &impl->d_curl_orients, &d_u, &d_v}; 169*7aa91133SSebastian Grimberg 170*7aa91133SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 171*7aa91133SSebastian Grimberg } 172dce49693SSebastian Grimberg } else if (use_orients) { 173*7aa91133SSebastian Grimberg if (impl->CurlOrientedUnsignedTranspose) { 174dce49693SSebastian Grimberg kernel = impl->CurlOrientedUnsignedTranspose; 175dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &impl->d_curl_orients, &d_u, &d_v}; 176dce49693SSebastian Grimberg 177dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 178dce49693SSebastian Grimberg } else { 179*7aa91133SSebastian Grimberg kernel = impl->CurlOrientedUnsignedTransposeDet; 180*7aa91133SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &impl->d_curl_orients, &d_u, &d_v}; 181*7aa91133SSebastian Grimberg 182*7aa91133SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 183*7aa91133SSebastian Grimberg } 184*7aa91133SSebastian Grimberg } else { 185dce49693SSebastian Grimberg if (impl->OffsetTranspose) { 186dce49693SSebastian Grimberg kernel = impl->OffsetTranspose; 187dce49693SSebastian Grimberg void *args[] = {&num_elem, &impl->d_ind, &d_u, &d_v}; 188dce49693SSebastian Grimberg 189dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 190dce49693SSebastian Grimberg } else { 191dce49693SSebastian Grimberg kernel = impl->OffsetTransposeDet; 192dce49693SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 193dce49693SSebastian Grimberg 194dce49693SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, grid, block_size, args)); 195dce49693SSebastian Grimberg } 196dce49693SSebastian Grimberg } 197dce49693SSebastian Grimberg } break; 198b3d03e38SSebastian Grimberg case CEED_RESTRICTION_POINTS: { 199b3d03e38SSebastian Grimberg // LCOV_EXCL_START 200b3d03e38SSebastian Grimberg return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement restriction CeedElemRestrictionAtPoints"); 201b3d03e38SSebastian Grimberg // LCOV_EXCL_STOP 202b3d03e38SSebastian Grimberg } break; 203ff1e7120SSebastian Grimberg } 204ff1e7120SSebastian Grimberg } 205ff1e7120SSebastian Grimberg 206ff1e7120SSebastian Grimberg if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 207ff1e7120SSebastian Grimberg 208ff1e7120SSebastian Grimberg // Restore arrays 209ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 210ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 211ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 212ff1e7120SSebastian Grimberg } 213ff1e7120SSebastian Grimberg 214ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 215dce49693SSebastian Grimberg // Apply restriction 216dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 217dce49693SSebastian Grimberg static int CeedElemRestrictionApply_Cuda(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 218dce49693SSebastian Grimberg return CeedElemRestrictionApply_Cuda_Core(rstr, t_mode, true, true, u, v, request); 219dce49693SSebastian Grimberg } 220dce49693SSebastian Grimberg 221dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 222dce49693SSebastian Grimberg // Apply unsigned restriction 223dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 224dce49693SSebastian Grimberg static int CeedElemRestrictionApplyUnsigned_Cuda(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 225dce49693SSebastian Grimberg CeedRequest *request) { 226dce49693SSebastian Grimberg return CeedElemRestrictionApply_Cuda_Core(rstr, t_mode, false, true, u, v, request); 227dce49693SSebastian Grimberg } 228dce49693SSebastian Grimberg 229dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 230dce49693SSebastian Grimberg // Apply unoriented restriction 231dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 232dce49693SSebastian Grimberg static int CeedElemRestrictionApplyUnoriented_Cuda(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 233dce49693SSebastian Grimberg CeedRequest *request) { 234dce49693SSebastian Grimberg return CeedElemRestrictionApply_Cuda_Core(rstr, t_mode, false, false, u, v, request); 235dce49693SSebastian Grimberg } 236dce49693SSebastian Grimberg 237dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 238ff1e7120SSebastian Grimberg // Get offsets 239ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 240ff1e7120SSebastian Grimberg static int CeedElemRestrictionGetOffsets_Cuda(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 241ff1e7120SSebastian Grimberg CeedElemRestriction_Cuda *impl; 242ff1e7120SSebastian Grimberg 243ca735530SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 244ff1e7120SSebastian Grimberg switch (mem_type) { 245ff1e7120SSebastian Grimberg case CEED_MEM_HOST: 246ff1e7120SSebastian Grimberg *offsets = impl->h_ind; 247ff1e7120SSebastian Grimberg break; 248ff1e7120SSebastian Grimberg case CEED_MEM_DEVICE: 249ff1e7120SSebastian Grimberg *offsets = impl->d_ind; 250ff1e7120SSebastian Grimberg break; 251ff1e7120SSebastian Grimberg } 252ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 253ff1e7120SSebastian Grimberg } 254ff1e7120SSebastian Grimberg 255ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 256dce49693SSebastian Grimberg // Get orientations 257dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 258dce49693SSebastian Grimberg static int CeedElemRestrictionGetOrientations_Cuda(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 259dce49693SSebastian Grimberg CeedElemRestriction_Cuda *impl; 260dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 261dce49693SSebastian Grimberg 262dce49693SSebastian Grimberg switch (mem_type) { 263dce49693SSebastian Grimberg case CEED_MEM_HOST: 264dce49693SSebastian Grimberg *orients = impl->h_orients; 265dce49693SSebastian Grimberg break; 266dce49693SSebastian Grimberg case CEED_MEM_DEVICE: 267dce49693SSebastian Grimberg *orients = impl->d_orients; 268dce49693SSebastian Grimberg break; 269dce49693SSebastian Grimberg } 270dce49693SSebastian Grimberg return CEED_ERROR_SUCCESS; 271dce49693SSebastian Grimberg } 272dce49693SSebastian Grimberg 273dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 274dce49693SSebastian Grimberg // Get curl-conforming orientations 275dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 276dce49693SSebastian Grimberg static int CeedElemRestrictionGetCurlOrientations_Cuda(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 277dce49693SSebastian Grimberg CeedElemRestriction_Cuda *impl; 278dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 279dce49693SSebastian Grimberg 280dce49693SSebastian Grimberg switch (mem_type) { 281dce49693SSebastian Grimberg case CEED_MEM_HOST: 282dce49693SSebastian Grimberg *curl_orients = impl->h_curl_orients; 283dce49693SSebastian Grimberg break; 284dce49693SSebastian Grimberg case CEED_MEM_DEVICE: 285dce49693SSebastian Grimberg *curl_orients = impl->d_curl_orients; 286dce49693SSebastian Grimberg break; 287dce49693SSebastian Grimberg } 288dce49693SSebastian Grimberg return CEED_ERROR_SUCCESS; 289dce49693SSebastian Grimberg } 290dce49693SSebastian Grimberg 291dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 292ff1e7120SSebastian Grimberg // Destroy restriction 293ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 294dce49693SSebastian Grimberg static int CeedElemRestrictionDestroy_Cuda(CeedElemRestriction rstr) { 295ff1e7120SSebastian Grimberg Ceed ceed; 296ca735530SJeremy L Thompson CeedElemRestriction_Cuda *impl; 297ca735530SJeremy L Thompson 298dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 299dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 300ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cuModuleUnload(impl->module)); 301ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&impl->h_ind_allocated)); 302ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_ind_allocated)); 303ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_t_offsets)); 304ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_t_indices)); 305ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_l_vec_indices)); 306dce49693SSebastian Grimberg CeedCallBackend(CeedFree(&impl->h_orients_allocated)); 307dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_orients_allocated)); 308dce49693SSebastian Grimberg CeedCallBackend(CeedFree(&impl->h_curl_orients_allocated)); 309dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaFree(impl->d_curl_orients_allocated)); 310ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&impl)); 311ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 312ff1e7120SSebastian Grimberg } 313ff1e7120SSebastian Grimberg 314ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 315ff1e7120SSebastian Grimberg // Create transpose offsets and indices 316ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 317dce49693SSebastian Grimberg static int CeedElemRestrictionOffset_Cuda(const CeedElemRestriction rstr, const CeedInt *indices) { 318ff1e7120SSebastian Grimberg Ceed ceed; 319ca735530SJeremy L Thompson bool *is_node; 320ff1e7120SSebastian Grimberg CeedSize l_size; 321ca735530SJeremy L Thompson CeedInt num_elem, elem_size, num_comp, num_nodes = 0; 322ca735530SJeremy L Thompson CeedInt *ind_to_offset, *l_vec_indices, *t_offsets, *t_indices; 323ca735530SJeremy L Thompson CeedElemRestriction_Cuda *impl; 324ca735530SJeremy L Thompson 325dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 326dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 327dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 328dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 329dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 330dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 331ca735530SJeremy L Thompson const CeedInt size_indices = num_elem * elem_size; 332ff1e7120SSebastian Grimberg 333ff1e7120SSebastian Grimberg // Count num_nodes 334ff1e7120SSebastian Grimberg CeedCallBackend(CeedCalloc(l_size, &is_node)); 335ca735530SJeremy L Thompson 336ff1e7120SSebastian Grimberg for (CeedInt i = 0; i < size_indices; i++) is_node[indices[i]] = 1; 337ff1e7120SSebastian Grimberg for (CeedInt i = 0; i < l_size; i++) num_nodes += is_node[i]; 338ff1e7120SSebastian Grimberg impl->num_nodes = num_nodes; 339ff1e7120SSebastian Grimberg 340ff1e7120SSebastian Grimberg // L-vector offsets array 341ff1e7120SSebastian Grimberg CeedCallBackend(CeedCalloc(l_size, &ind_to_offset)); 342ff1e7120SSebastian Grimberg CeedCallBackend(CeedCalloc(num_nodes, &l_vec_indices)); 343ca735530SJeremy L Thompson for (CeedInt i = 0, j = 0; i < l_size; i++) { 344ff1e7120SSebastian Grimberg if (is_node[i]) { 345ff1e7120SSebastian Grimberg l_vec_indices[j] = i; 346ff1e7120SSebastian Grimberg ind_to_offset[i] = j++; 347ff1e7120SSebastian Grimberg } 348ff1e7120SSebastian Grimberg } 349ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&is_node)); 350ff1e7120SSebastian Grimberg 351ff1e7120SSebastian Grimberg // Compute transpose offsets and indices 352ff1e7120SSebastian Grimberg const CeedInt size_offsets = num_nodes + 1; 353ca735530SJeremy L Thompson 354ff1e7120SSebastian Grimberg CeedCallBackend(CeedCalloc(size_offsets, &t_offsets)); 355ff1e7120SSebastian Grimberg CeedCallBackend(CeedMalloc(size_indices, &t_indices)); 356ff1e7120SSebastian Grimberg // Count node multiplicity 357ff1e7120SSebastian Grimberg for (CeedInt e = 0; e < num_elem; ++e) { 358ff1e7120SSebastian Grimberg for (CeedInt i = 0; i < elem_size; ++i) ++t_offsets[ind_to_offset[indices[elem_size * e + i]] + 1]; 359ff1e7120SSebastian Grimberg } 360ff1e7120SSebastian Grimberg // Convert to running sum 361ff1e7120SSebastian Grimberg for (CeedInt i = 1; i < size_offsets; ++i) t_offsets[i] += t_offsets[i - 1]; 362ff1e7120SSebastian Grimberg // List all E-vec indices associated with L-vec node 363ff1e7120SSebastian Grimberg for (CeedInt e = 0; e < num_elem; ++e) { 364ff1e7120SSebastian Grimberg for (CeedInt i = 0; i < elem_size; ++i) { 365ff1e7120SSebastian Grimberg const CeedInt lid = elem_size * e + i; 366ff1e7120SSebastian Grimberg const CeedInt gid = indices[lid]; 367ca735530SJeremy L Thompson 368ff1e7120SSebastian Grimberg t_indices[t_offsets[ind_to_offset[gid]]++] = lid; 369ff1e7120SSebastian Grimberg } 370ff1e7120SSebastian Grimberg } 371ff1e7120SSebastian Grimberg // Reset running sum 372ff1e7120SSebastian Grimberg for (int i = size_offsets - 1; i > 0; --i) t_offsets[i] = t_offsets[i - 1]; 373ff1e7120SSebastian Grimberg t_offsets[0] = 0; 374ff1e7120SSebastian Grimberg 375ff1e7120SSebastian Grimberg // Copy data to device 376ff1e7120SSebastian Grimberg // -- L-vector indices 377ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_l_vec_indices, num_nodes * sizeof(CeedInt))); 378ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_l_vec_indices, l_vec_indices, num_nodes * sizeof(CeedInt), cudaMemcpyHostToDevice)); 379ff1e7120SSebastian Grimberg // -- Transpose offsets 380ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_t_offsets, size_offsets * sizeof(CeedInt))); 381ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_t_offsets, t_offsets, size_offsets * sizeof(CeedInt), cudaMemcpyHostToDevice)); 382ff1e7120SSebastian Grimberg // -- Transpose indices 383ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_t_indices, size_indices * sizeof(CeedInt))); 384ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_t_indices, t_indices, size_indices * sizeof(CeedInt), cudaMemcpyHostToDevice)); 385ff1e7120SSebastian Grimberg 386ff1e7120SSebastian Grimberg // Cleanup 387ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&ind_to_offset)); 388ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&l_vec_indices)); 389ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&t_offsets)); 390ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&t_indices)); 391ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 392ff1e7120SSebastian Grimberg } 393ff1e7120SSebastian Grimberg 394ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 395ff1e7120SSebastian Grimberg // Create restriction 396ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 397fcbe8c06SSebastian Grimberg int CeedElemRestrictionCreate_Cuda(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *indices, const bool *orients, 398dce49693SSebastian Grimberg const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 399ca735530SJeremy L Thompson Ceed ceed, ceed_parent; 400dce49693SSebastian Grimberg bool is_deterministic; 401ca735530SJeremy L Thompson CeedInt num_elem, num_comp, elem_size, comp_stride = 1; 402ca735530SJeremy L Thompson CeedRestrictionType rstr_type; 403dce49693SSebastian Grimberg char *restriction_kernel_path, *restriction_kernel_source; 404ff1e7120SSebastian Grimberg CeedElemRestriction_Cuda *impl; 405ca735530SJeremy L Thompson 406dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 407ca735530SJeremy L Thompson CeedCallBackend(CeedGetParent(ceed, &ceed_parent)); 408ca735530SJeremy L Thompson CeedCallBackend(CeedIsDeterministic(ceed_parent, &is_deterministic)); 409dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 410dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 411dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 412ca735530SJeremy L Thompson const CeedInt size = num_elem * elem_size; 413ff1e7120SSebastian Grimberg CeedInt strides[3] = {1, size, elem_size}; 414ca735530SJeremy L Thompson CeedInt layout[3] = {1, elem_size * num_elem, elem_size}; 415ff1e7120SSebastian Grimberg 416ff1e7120SSebastian Grimberg // Stride data 417dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 418dce49693SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_STRIDED) { 419ff1e7120SSebastian Grimberg bool has_backend_strides; 420ca735530SJeremy L Thompson 421dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 422ff1e7120SSebastian Grimberg if (!has_backend_strides) { 423dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); 424ff1e7120SSebastian Grimberg } 425ff1e7120SSebastian Grimberg } else { 426dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 427ff1e7120SSebastian Grimberg } 428ff1e7120SSebastian Grimberg 429dce49693SSebastian Grimberg CeedCallBackend(CeedCalloc(1, &impl)); 430dce49693SSebastian Grimberg impl->num_nodes = size; 431ff1e7120SSebastian Grimberg impl->h_ind = NULL; 432ff1e7120SSebastian Grimberg impl->h_ind_allocated = NULL; 433ff1e7120SSebastian Grimberg impl->d_ind = NULL; 434ff1e7120SSebastian Grimberg impl->d_ind_allocated = NULL; 435ff1e7120SSebastian Grimberg impl->d_t_indices = NULL; 436ff1e7120SSebastian Grimberg impl->d_t_offsets = NULL; 437dce49693SSebastian Grimberg impl->h_orients = NULL; 438dce49693SSebastian Grimberg impl->h_orients_allocated = NULL; 439dce49693SSebastian Grimberg impl->d_orients = NULL; 440dce49693SSebastian Grimberg impl->d_orients_allocated = NULL; 441dce49693SSebastian Grimberg impl->h_curl_orients = NULL; 442dce49693SSebastian Grimberg impl->h_curl_orients_allocated = NULL; 443dce49693SSebastian Grimberg impl->d_curl_orients = NULL; 444dce49693SSebastian Grimberg impl->d_curl_orients_allocated = NULL; 445dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 446dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); 447ff1e7120SSebastian Grimberg 448dce49693SSebastian Grimberg // Set up device offset/orientation arrays 449dce49693SSebastian Grimberg if (rstr_type != CEED_RESTRICTION_STRIDED) { 450ff1e7120SSebastian Grimberg switch (mem_type) { 451ff1e7120SSebastian Grimberg case CEED_MEM_HOST: { 452ff1e7120SSebastian Grimberg switch (copy_mode) { 453ff1e7120SSebastian Grimberg case CEED_OWN_POINTER: 454ff1e7120SSebastian Grimberg impl->h_ind_allocated = (CeedInt *)indices; 455ff1e7120SSebastian Grimberg impl->h_ind = (CeedInt *)indices; 456ff1e7120SSebastian Grimberg break; 457ff1e7120SSebastian Grimberg case CEED_USE_POINTER: 458ff1e7120SSebastian Grimberg impl->h_ind = (CeedInt *)indices; 459ff1e7120SSebastian Grimberg break; 460ff1e7120SSebastian Grimberg case CEED_COPY_VALUES: 461dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(size, &impl->h_ind_allocated)); 462dce49693SSebastian Grimberg memcpy(impl->h_ind_allocated, indices, size * sizeof(CeedInt)); 463ff1e7120SSebastian Grimberg impl->h_ind = impl->h_ind_allocated; 464ff1e7120SSebastian Grimberg break; 465ff1e7120SSebastian Grimberg } 466ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_ind, size * sizeof(CeedInt))); 467ff1e7120SSebastian Grimberg impl->d_ind_allocated = impl->d_ind; // We own the device memory 468ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_ind, indices, size * sizeof(CeedInt), cudaMemcpyHostToDevice)); 469dce49693SSebastian Grimberg if (is_deterministic) CeedCallBackend(CeedElemRestrictionOffset_Cuda(rstr, indices)); 470dce49693SSebastian Grimberg } break; 471ff1e7120SSebastian Grimberg case CEED_MEM_DEVICE: { 472ff1e7120SSebastian Grimberg switch (copy_mode) { 473ff1e7120SSebastian Grimberg case CEED_COPY_VALUES: 474ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_ind, size * sizeof(CeedInt))); 475ff1e7120SSebastian Grimberg impl->d_ind_allocated = impl->d_ind; // We own the device memory 476ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_ind, indices, size * sizeof(CeedInt), cudaMemcpyDeviceToDevice)); 477ff1e7120SSebastian Grimberg break; 478ff1e7120SSebastian Grimberg case CEED_OWN_POINTER: 479ff1e7120SSebastian Grimberg impl->d_ind = (CeedInt *)indices; 480ff1e7120SSebastian Grimberg impl->d_ind_allocated = impl->d_ind; 481ff1e7120SSebastian Grimberg break; 482ff1e7120SSebastian Grimberg case CEED_USE_POINTER: 483ff1e7120SSebastian Grimberg impl->d_ind = (CeedInt *)indices; 484ff1e7120SSebastian Grimberg break; 485ff1e7120SSebastian Grimberg } 486dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(size, &impl->h_ind_allocated)); 487dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->h_ind_allocated, impl->d_ind, size * sizeof(CeedInt), cudaMemcpyDeviceToHost)); 488dce49693SSebastian Grimberg impl->h_ind = impl->h_ind_allocated; 489dce49693SSebastian Grimberg if (is_deterministic) CeedCallBackend(CeedElemRestrictionOffset_Cuda(rstr, indices)); 490dce49693SSebastian Grimberg } break; 491ff1e7120SSebastian Grimberg } 492ff1e7120SSebastian Grimberg 493dce49693SSebastian Grimberg // Orientation data 494dce49693SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_ORIENTED) { 495dce49693SSebastian Grimberg switch (mem_type) { 496dce49693SSebastian Grimberg case CEED_MEM_HOST: { 497dce49693SSebastian Grimberg switch (copy_mode) { 498dce49693SSebastian Grimberg case CEED_OWN_POINTER: 499dce49693SSebastian Grimberg impl->h_orients_allocated = (bool *)orients; 500dce49693SSebastian Grimberg impl->h_orients = (bool *)orients; 501dce49693SSebastian Grimberg break; 502dce49693SSebastian Grimberg case CEED_USE_POINTER: 503dce49693SSebastian Grimberg impl->h_orients = (bool *)orients; 504dce49693SSebastian Grimberg break; 505dce49693SSebastian Grimberg case CEED_COPY_VALUES: 506dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(size, &impl->h_orients_allocated)); 507dce49693SSebastian Grimberg memcpy(impl->h_orients_allocated, orients, size * sizeof(bool)); 508dce49693SSebastian Grimberg impl->h_orients = impl->h_orients_allocated; 509dce49693SSebastian Grimberg break; 510dce49693SSebastian Grimberg } 511dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_orients, size * sizeof(bool))); 512dce49693SSebastian Grimberg impl->d_orients_allocated = impl->d_orients; // We own the device memory 513dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_orients, orients, size * sizeof(bool), cudaMemcpyHostToDevice)); 514dce49693SSebastian Grimberg } break; 515dce49693SSebastian Grimberg case CEED_MEM_DEVICE: { 516dce49693SSebastian Grimberg switch (copy_mode) { 517dce49693SSebastian Grimberg case CEED_COPY_VALUES: 518dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_orients, size * sizeof(bool))); 519dce49693SSebastian Grimberg impl->d_orients_allocated = impl->d_orients; // We own the device memory 520dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_orients, orients, size * sizeof(bool), cudaMemcpyDeviceToDevice)); 521dce49693SSebastian Grimberg break; 522dce49693SSebastian Grimberg case CEED_OWN_POINTER: 523dce49693SSebastian Grimberg impl->d_orients = (bool *)orients; 524dce49693SSebastian Grimberg impl->d_orients_allocated = impl->d_orients; 525dce49693SSebastian Grimberg break; 526dce49693SSebastian Grimberg case CEED_USE_POINTER: 527dce49693SSebastian Grimberg impl->d_orients = (bool *)orients; 528dce49693SSebastian Grimberg break; 529dce49693SSebastian Grimberg } 530dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(size, &impl->h_orients_allocated)); 531dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->h_orients_allocated, impl->d_orients, size * sizeof(bool), cudaMemcpyDeviceToHost)); 532dce49693SSebastian Grimberg impl->h_orients = impl->h_orients_allocated; 533dce49693SSebastian Grimberg } break; 534dce49693SSebastian Grimberg } 535dce49693SSebastian Grimberg } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 536dce49693SSebastian Grimberg switch (mem_type) { 537dce49693SSebastian Grimberg case CEED_MEM_HOST: { 538dce49693SSebastian Grimberg switch (copy_mode) { 539dce49693SSebastian Grimberg case CEED_OWN_POINTER: 540dce49693SSebastian Grimberg impl->h_curl_orients_allocated = (CeedInt8 *)curl_orients; 541dce49693SSebastian Grimberg impl->h_curl_orients = (CeedInt8 *)curl_orients; 542dce49693SSebastian Grimberg break; 543dce49693SSebastian Grimberg case CEED_USE_POINTER: 544dce49693SSebastian Grimberg impl->h_curl_orients = (CeedInt8 *)curl_orients; 545dce49693SSebastian Grimberg break; 546dce49693SSebastian Grimberg case CEED_COPY_VALUES: 547dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(3 * size, &impl->h_curl_orients_allocated)); 548dce49693SSebastian Grimberg memcpy(impl->h_curl_orients_allocated, curl_orients, 3 * size * sizeof(CeedInt8)); 549dce49693SSebastian Grimberg impl->h_curl_orients = impl->h_curl_orients_allocated; 550dce49693SSebastian Grimberg break; 551dce49693SSebastian Grimberg } 552dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_curl_orients, 3 * size * sizeof(CeedInt8))); 553dce49693SSebastian Grimberg impl->d_curl_orients_allocated = impl->d_curl_orients; // We own the device memory 554dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_curl_orients, curl_orients, 3 * size * sizeof(CeedInt8), cudaMemcpyHostToDevice)); 555dce49693SSebastian Grimberg } break; 556dce49693SSebastian Grimberg case CEED_MEM_DEVICE: { 557dce49693SSebastian Grimberg switch (copy_mode) { 558dce49693SSebastian Grimberg case CEED_COPY_VALUES: 559dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_curl_orients, 3 * size * sizeof(CeedInt8))); 560dce49693SSebastian Grimberg impl->d_curl_orients_allocated = impl->d_curl_orients; // We own the device memory 561dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->d_curl_orients, curl_orients, 3 * size * sizeof(CeedInt8), cudaMemcpyDeviceToDevice)); 562dce49693SSebastian Grimberg break; 563dce49693SSebastian Grimberg case CEED_OWN_POINTER: 564dce49693SSebastian Grimberg impl->d_curl_orients = (CeedInt8 *)curl_orients; 565dce49693SSebastian Grimberg impl->d_curl_orients_allocated = impl->d_curl_orients; 566dce49693SSebastian Grimberg break; 567dce49693SSebastian Grimberg case CEED_USE_POINTER: 568dce49693SSebastian Grimberg impl->d_curl_orients = (CeedInt8 *)curl_orients; 569dce49693SSebastian Grimberg break; 570dce49693SSebastian Grimberg } 571dce49693SSebastian Grimberg CeedCallBackend(CeedMalloc(3 * size, &impl->h_curl_orients_allocated)); 572dce49693SSebastian Grimberg CeedCallCuda(ceed, cudaMemcpy(impl->h_curl_orients_allocated, impl->d_curl_orients, 3 * size * sizeof(CeedInt8), cudaMemcpyDeviceToHost)); 573dce49693SSebastian Grimberg impl->h_curl_orients = impl->h_curl_orients_allocated; 574dce49693SSebastian Grimberg } break; 575dce49693SSebastian Grimberg } 576dce49693SSebastian Grimberg } 577dce49693SSebastian Grimberg } 578ca735530SJeremy L Thompson 579dce49693SSebastian Grimberg // Compile CUDA kernels 580ff1e7120SSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction.h", &restriction_kernel_path)); 58123d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); 582ff1e7120SSebastian Grimberg CeedCallBackend(CeedLoadSourceToBuffer(ceed, restriction_kernel_path, &restriction_kernel_source)); 58323d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); 584ca735530SJeremy L Thompson CeedCallBackend(CeedCompile_Cuda(ceed, restriction_kernel_source, &impl->module, 8, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 585dce49693SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, "RSTR_STRIDE_NODES", 586ca735530SJeremy L Thompson strides[0], "RSTR_STRIDE_COMP", strides[1], "RSTR_STRIDE_ELEM", strides[2])); 587ff1e7120SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "StridedNoTranspose", &impl->StridedNoTranspose)); 58858549094SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "StridedTranspose", &impl->StridedTranspose)); 589ff1e7120SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetNoTranspose", &impl->OffsetNoTranspose)); 590dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OrientedNoTranspose", &impl->OrientedNoTranspose)); 591dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedNoTranspose", &impl->CurlOrientedNoTranspose)); 592dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedUnsignedNoTranspose", &impl->CurlOrientedUnsignedNoTranspose)); 593*7aa91133SSebastian Grimberg if (!is_deterministic) { 594*7aa91133SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetTranspose", &impl->OffsetTranspose)); 595*7aa91133SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OrientedTranspose", &impl->OrientedTranspose)); 596dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedTranspose", &impl->CurlOrientedTranspose)); 597dce49693SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedUnsignedTranspose", &impl->CurlOrientedUnsignedTranspose)); 598*7aa91133SSebastian Grimberg } else { 599*7aa91133SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetTransposeDet", &impl->OffsetTransposeDet)); 600*7aa91133SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OrientedTransposeDet", &impl->OrientedTransposeDet)); 601*7aa91133SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedTransposeDet", &impl->CurlOrientedTransposeDet)); 602*7aa91133SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedUnsignedTransposeDet", &impl->CurlOrientedUnsignedTransposeDet)); 603*7aa91133SSebastian Grimberg } 604ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&restriction_kernel_path)); 605ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&restriction_kernel_source)); 606ff1e7120SSebastian Grimberg 607ff1e7120SSebastian Grimberg // Register backend functions 608dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Cuda)); 609dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Cuda)); 610dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Cuda)); 611dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Cuda)); 612dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Cuda)); 613dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Cuda)); 614dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Cuda)); 615ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 616ff1e7120SSebastian Grimberg } 617ff1e7120SSebastian Grimberg 618ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 619