15aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 2ff1e7120SSebastian Grimberg // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3ff1e7120SSebastian Grimberg // 4ff1e7120SSebastian Grimberg // SPDX-License-Identifier: BSD-2-Clause 5ff1e7120SSebastian Grimberg // 6ff1e7120SSebastian Grimberg // This file is part of CEED: http://github.com/ceed 7ff1e7120SSebastian Grimberg 8ff1e7120SSebastian Grimberg #include <ceed.h> 9ff1e7120SSebastian Grimberg #include <ceed/backend.h> 10ff1e7120SSebastian Grimberg #include <ceed/jit-tools.h> 11ff1e7120SSebastian Grimberg #include <cuda.h> 12ff1e7120SSebastian Grimberg #include <cuda_runtime.h> 13ff1e7120SSebastian Grimberg #include <stdbool.h> 14ff1e7120SSebastian Grimberg #include <stddef.h> 15ff1e7120SSebastian Grimberg #include <string.h> 16ff1e7120SSebastian Grimberg 17ff1e7120SSebastian Grimberg #include "../cuda/ceed-cuda-common.h" 18ff1e7120SSebastian Grimberg #include "../cuda/ceed-cuda-compile.h" 19ff1e7120SSebastian Grimberg #include "ceed-cuda-ref.h" 20ff1e7120SSebastian Grimberg 21ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 22cf8cbdd6SSebastian Grimberg // Compile restriction kernels 23cf8cbdd6SSebastian Grimberg //------------------------------------------------------------------------------ 24cf8cbdd6SSebastian Grimberg static inline int CeedElemRestrictionSetupCompile_Cuda(CeedElemRestriction rstr) { 25cf8cbdd6SSebastian Grimberg Ceed ceed; 26cf8cbdd6SSebastian Grimberg bool is_deterministic; 2722070f95SJeremy L Thompson char *restriction_kernel_source; 2822070f95SJeremy L Thompson const char *restriction_kernel_path; 29cf8cbdd6SSebastian Grimberg CeedInt num_elem, num_comp, elem_size, comp_stride; 30cf8cbdd6SSebastian Grimberg CeedRestrictionType rstr_type; 31cf8cbdd6SSebastian Grimberg CeedElemRestriction_Cuda *impl; 32cf8cbdd6SSebastian Grimberg 33cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 34cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 35cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 36cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 37cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 38cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 39cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 40cf8cbdd6SSebastian Grimberg is_deterministic = impl->d_l_vec_indices != NULL; 41cf8cbdd6SSebastian Grimberg 42cf8cbdd6SSebastian Grimberg // Compile CUDA kernels 43cf8cbdd6SSebastian Grimberg switch (rstr_type) { 44cf8cbdd6SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 45cf8cbdd6SSebastian Grimberg bool has_backend_strides; 46*509d4af6SJeremy L Thompson CeedInt strides[3] = {1, num_elem * elem_size, elem_size}; 47cf8cbdd6SSebastian Grimberg 48cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 49cf8cbdd6SSebastian Grimberg if (!has_backend_strides) { 5056c48462SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); 51cf8cbdd6SSebastian Grimberg } 52cf8cbdd6SSebastian Grimberg 53cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction-strided.h", &restriction_kernel_path)); 54cf8cbdd6SSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); 55cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedLoadSourceToBuffer(ceed, restriction_kernel_path, &restriction_kernel_source)); 56cf8cbdd6SSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); 57cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedCompile_Cuda(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 58cf8cbdd6SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_STRIDE_NODES", strides[0], "RSTR_STRIDE_COMP", strides[1], "RSTR_STRIDE_ELEM", 59cf8cbdd6SSebastian Grimberg strides[2])); 60cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "StridedNoTranspose", &impl->ApplyNoTranspose)); 61cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "StridedTranspose", &impl->ApplyTranspose)); 62cf8cbdd6SSebastian Grimberg } break; 63cf8cbdd6SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 64cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction-offset.h", &restriction_kernel_path)); 65cf8cbdd6SSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); 66cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedLoadSourceToBuffer(ceed, restriction_kernel_path, &restriction_kernel_source)); 67cf8cbdd6SSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); 68cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedCompile_Cuda(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 69cf8cbdd6SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, 70cf8cbdd6SSebastian Grimberg "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); 71cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyNoTranspose)); 72cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetTranspose", &impl->ApplyTranspose)); 73cf8cbdd6SSebastian Grimberg } break; 74cf8cbdd6SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 7522070f95SJeremy L Thompson const char *offset_kernel_path; 76*509d4af6SJeremy L Thompson char **file_paths = NULL; 77*509d4af6SJeremy L Thompson CeedInt num_file_paths = 0; 78cf8cbdd6SSebastian Grimberg 79cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction-oriented.h", &restriction_kernel_path)); 80cf8cbdd6SSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); 81*509d4af6SJeremy L Thompson CeedCallBackend(CeedLoadSourceAndInitializeBuffer(ceed, restriction_kernel_path, &num_file_paths, &file_paths, &restriction_kernel_source)); 82cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction-offset.h", &offset_kernel_path)); 83*509d4af6SJeremy L Thompson CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, offset_kernel_path, &num_file_paths, &file_paths, &restriction_kernel_source)); 84cf8cbdd6SSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); 85cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedCompile_Cuda(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 86cf8cbdd6SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, 87cf8cbdd6SSebastian Grimberg "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); 88cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OrientedNoTranspose", &impl->ApplyNoTranspose)); 89cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyUnsignedNoTranspose)); 90cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OrientedTranspose", &impl->ApplyTranspose)); 91cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetTranspose", &impl->ApplyUnsignedTranspose)); 92*509d4af6SJeremy L Thompson // Cleanup 93cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedFree(&offset_kernel_path)); 94*509d4af6SJeremy L Thompson for (CeedInt i = 0; i < num_file_paths; i++) CeedCall(CeedFree(&file_paths[i])); 95*509d4af6SJeremy L Thompson CeedCall(CeedFree(&file_paths)); 96cf8cbdd6SSebastian Grimberg } break; 97cf8cbdd6SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 9822070f95SJeremy L Thompson const char *offset_kernel_path; 99*509d4af6SJeremy L Thompson char **file_paths = NULL; 100*509d4af6SJeremy L Thompson CeedInt num_file_paths = 0; 101cf8cbdd6SSebastian Grimberg 102cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction-curl-oriented.h", &restriction_kernel_path)); 103cf8cbdd6SSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source -----\n"); 104*509d4af6SJeremy L Thompson CeedCallBackend(CeedLoadSourceAndInitializeBuffer(ceed, restriction_kernel_path, &num_file_paths, &file_paths, &restriction_kernel_source)); 105cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction-offset.h", &offset_kernel_path)); 106*509d4af6SJeremy L Thompson CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, offset_kernel_path, &num_file_paths, &file_paths, &restriction_kernel_source)); 107cf8cbdd6SSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); 108cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedCompile_Cuda(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 109cf8cbdd6SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, 110cf8cbdd6SSebastian Grimberg "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); 111cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedNoTranspose", &impl->ApplyNoTranspose)); 112cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedUnsignedNoTranspose", &impl->ApplyUnsignedNoTranspose)); 113cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyUnorientedNoTranspose)); 114cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedTranspose", &impl->ApplyTranspose)); 115cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "CurlOrientedUnsignedTranspose", &impl->ApplyUnsignedTranspose)); 116cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetTranspose", &impl->ApplyUnorientedTranspose)); 117*509d4af6SJeremy L Thompson // Cleanup 118cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedFree(&offset_kernel_path)); 119*509d4af6SJeremy L Thompson for (CeedInt i = 0; i < num_file_paths; i++) CeedCall(CeedFree(&file_paths[i])); 120*509d4af6SJeremy L Thompson CeedCall(CeedFree(&file_paths)); 121cf8cbdd6SSebastian Grimberg } break; 122cf8cbdd6SSebastian Grimberg case CEED_RESTRICTION_POINTS: { 123cf8cbdd6SSebastian Grimberg // LCOV_EXCL_START 124cf8cbdd6SSebastian Grimberg return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement restriction CeedElemRestrictionAtPoints"); 125cf8cbdd6SSebastian Grimberg // LCOV_EXCL_STOP 126cf8cbdd6SSebastian Grimberg } break; 127cf8cbdd6SSebastian Grimberg } 128cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedFree(&restriction_kernel_path)); 129cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedFree(&restriction_kernel_source)); 130cf8cbdd6SSebastian Grimberg return CEED_ERROR_SUCCESS; 131cf8cbdd6SSebastian Grimberg } 132cf8cbdd6SSebastian Grimberg 133cf8cbdd6SSebastian Grimberg //------------------------------------------------------------------------------ 134dce49693SSebastian Grimberg // Core apply restriction code 135ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 136dce49693SSebastian Grimberg static inline int CeedElemRestrictionApply_Cuda_Core(CeedElemRestriction rstr, CeedTransposeMode t_mode, bool use_signs, bool use_orients, 137dce49693SSebastian Grimberg CeedVector u, CeedVector v, CeedRequest *request) { 138ff1e7120SSebastian Grimberg Ceed ceed; 139dce49693SSebastian Grimberg CeedRestrictionType rstr_type; 140ff1e7120SSebastian Grimberg const CeedScalar *d_u; 141ff1e7120SSebastian Grimberg CeedScalar *d_v; 142ca735530SJeremy L Thompson CeedElemRestriction_Cuda *impl; 143ca735530SJeremy L Thompson 144dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 145dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 146dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 147cf8cbdd6SSebastian Grimberg 148cf8cbdd6SSebastian Grimberg // Assemble kernel if needed 149cf8cbdd6SSebastian Grimberg if (!impl->module) { 150cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetupCompile_Cuda(rstr)); 151cf8cbdd6SSebastian Grimberg } 152ca735530SJeremy L Thompson 153ca735530SJeremy L Thompson // Get vectors 154ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 155ff1e7120SSebastian Grimberg if (t_mode == CEED_TRANSPOSE) { 156ff1e7120SSebastian Grimberg // Sum into for transpose mode, e-vec to l-vec 157ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 158ff1e7120SSebastian Grimberg } else { 159ff1e7120SSebastian Grimberg // Overwrite for notranspose mode, l-vec to e-vec 160ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 161ff1e7120SSebastian Grimberg } 162ff1e7120SSebastian Grimberg 163ff1e7120SSebastian Grimberg // Restrict 164ff1e7120SSebastian Grimberg if (t_mode == CEED_NOTRANSPOSE) { 165ff1e7120SSebastian Grimberg // L-vector -> E-vector 166cf8cbdd6SSebastian Grimberg CeedInt elem_size; 167cf8cbdd6SSebastian Grimberg 168cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 169dce49693SSebastian Grimberg const CeedInt block_size = elem_size < 1024 ? (elem_size > 32 ? elem_size : 32) : 1024; 170cf8cbdd6SSebastian Grimberg const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); 17158549094SSebastian Grimberg 172dce49693SSebastian Grimberg switch (rstr_type) { 173dce49693SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 174cf8cbdd6SSebastian Grimberg void *args[] = {&d_u, &d_v}; 17558549094SSebastian Grimberg 176cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyNoTranspose, grid, block_size, args)); 177dce49693SSebastian Grimberg } break; 178dce49693SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 179a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 180dce49693SSebastian Grimberg 181cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyNoTranspose, grid, block_size, args)); 182dce49693SSebastian Grimberg } break; 183dce49693SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 184dce49693SSebastian Grimberg if (use_signs) { 185a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_orients, &d_u, &d_v}; 186dce49693SSebastian Grimberg 187cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyNoTranspose, grid, block_size, args)); 188dce49693SSebastian Grimberg } else { 189a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 190dce49693SSebastian Grimberg 191cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); 192dce49693SSebastian Grimberg } 193dce49693SSebastian Grimberg } break; 194dce49693SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 195dce49693SSebastian Grimberg if (use_signs && use_orients) { 196a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; 197dce49693SSebastian Grimberg 198cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyNoTranspose, grid, block_size, args)); 199dce49693SSebastian Grimberg } else if (use_orients) { 200a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; 201dce49693SSebastian Grimberg 202cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); 203dce49693SSebastian Grimberg } else { 204a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 205dce49693SSebastian Grimberg 206cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnorientedNoTranspose, grid, block_size, args)); 207dce49693SSebastian Grimberg } 208dce49693SSebastian Grimberg } break; 209b3d03e38SSebastian Grimberg case CEED_RESTRICTION_POINTS: { 210b3d03e38SSebastian Grimberg // LCOV_EXCL_START 211b3d03e38SSebastian Grimberg return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement restriction CeedElemRestrictionAtPoints"); 212b3d03e38SSebastian Grimberg // LCOV_EXCL_STOP 213b3d03e38SSebastian Grimberg } break; 214ff1e7120SSebastian Grimberg } 215ff1e7120SSebastian Grimberg } else { 216ff1e7120SSebastian Grimberg // E-vector -> L-vector 217cf8cbdd6SSebastian Grimberg const bool is_deterministic = impl->d_l_vec_indices != NULL; 218dce49693SSebastian Grimberg const CeedInt block_size = 32; 219cf8cbdd6SSebastian Grimberg const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); 220ca735530SJeremy L Thompson 221dce49693SSebastian Grimberg switch (rstr_type) { 222dce49693SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 223cf8cbdd6SSebastian Grimberg void *args[] = {&d_u, &d_v}; 224dce49693SSebastian Grimberg 225cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyTranspose, grid, block_size, args)); 226dce49693SSebastian Grimberg } break; 227dce49693SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 228cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 229a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 23058549094SSebastian Grimberg 231cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyTranspose, grid, block_size, args)); 232ff1e7120SSebastian Grimberg } else { 23358549094SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 23458549094SSebastian Grimberg 235cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyTranspose, grid, block_size, args)); 23658549094SSebastian Grimberg } 237dce49693SSebastian Grimberg } break; 238dce49693SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 239dce49693SSebastian Grimberg if (use_signs) { 240cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 241a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_orients, &d_u, &d_v}; 24258549094SSebastian Grimberg 243cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyTranspose, grid, block_size, args)); 244dce49693SSebastian Grimberg } else { 2457aa91133SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &impl->d_orients, &d_u, &d_v}; 2467aa91133SSebastian Grimberg 247cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyTranspose, grid, block_size, args)); 2487aa91133SSebastian Grimberg } 2497aa91133SSebastian Grimberg } else { 250cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 251a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 252dce49693SSebastian Grimberg 253cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnsignedTranspose, grid, block_size, args)); 254dce49693SSebastian Grimberg } else { 255dce49693SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 256dce49693SSebastian Grimberg 257cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnsignedTranspose, grid, block_size, args)); 258dce49693SSebastian Grimberg } 259dce49693SSebastian Grimberg } 260dce49693SSebastian Grimberg } break; 261dce49693SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 262dce49693SSebastian Grimberg if (use_signs && use_orients) { 263cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 264a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; 265dce49693SSebastian Grimberg 266cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyTranspose, grid, block_size, args)); 2677aa91133SSebastian Grimberg } else { 2687aa91133SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &impl->d_curl_orients, &d_u, &d_v}; 2697aa91133SSebastian Grimberg 270cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyTranspose, grid, block_size, args)); 2717aa91133SSebastian Grimberg } 272dce49693SSebastian Grimberg } else if (use_orients) { 273cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 274a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; 275dce49693SSebastian Grimberg 276cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnsignedTranspose, grid, block_size, args)); 277dce49693SSebastian Grimberg } else { 2787aa91133SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &impl->d_curl_orients, &d_u, &d_v}; 2797aa91133SSebastian Grimberg 280cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnsignedTranspose, grid, block_size, args)); 2817aa91133SSebastian Grimberg } 2827aa91133SSebastian Grimberg } else { 283cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 284a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 285dce49693SSebastian Grimberg 286cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnorientedTranspose, grid, block_size, args)); 287dce49693SSebastian Grimberg } else { 288dce49693SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 289dce49693SSebastian Grimberg 290cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Cuda(ceed, impl->ApplyUnorientedTranspose, grid, block_size, args)); 291dce49693SSebastian Grimberg } 292dce49693SSebastian Grimberg } 293dce49693SSebastian Grimberg } break; 294b3d03e38SSebastian Grimberg case CEED_RESTRICTION_POINTS: { 295b3d03e38SSebastian Grimberg // LCOV_EXCL_START 296b3d03e38SSebastian Grimberg return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement restriction CeedElemRestrictionAtPoints"); 297b3d03e38SSebastian Grimberg // LCOV_EXCL_STOP 298b3d03e38SSebastian Grimberg } break; 299ff1e7120SSebastian Grimberg } 300ff1e7120SSebastian Grimberg } 301ff1e7120SSebastian Grimberg 302ff1e7120SSebastian Grimberg if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 303ff1e7120SSebastian Grimberg 304ff1e7120SSebastian Grimberg // Restore arrays 305ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 306ff1e7120SSebastian Grimberg CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 307ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 308ff1e7120SSebastian Grimberg } 309ff1e7120SSebastian Grimberg 310ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 311dce49693SSebastian Grimberg // Apply restriction 312dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 313dce49693SSebastian Grimberg static int CeedElemRestrictionApply_Cuda(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 314dce49693SSebastian Grimberg return CeedElemRestrictionApply_Cuda_Core(rstr, t_mode, true, true, u, v, request); 315dce49693SSebastian Grimberg } 316dce49693SSebastian Grimberg 317dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 318dce49693SSebastian Grimberg // Apply unsigned restriction 319dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 320dce49693SSebastian Grimberg static int CeedElemRestrictionApplyUnsigned_Cuda(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 321dce49693SSebastian Grimberg CeedRequest *request) { 322dce49693SSebastian Grimberg return CeedElemRestrictionApply_Cuda_Core(rstr, t_mode, false, true, u, v, request); 323dce49693SSebastian Grimberg } 324dce49693SSebastian Grimberg 325dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 326dce49693SSebastian Grimberg // Apply unoriented restriction 327dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 328dce49693SSebastian Grimberg static int CeedElemRestrictionApplyUnoriented_Cuda(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 329dce49693SSebastian Grimberg CeedRequest *request) { 330dce49693SSebastian Grimberg return CeedElemRestrictionApply_Cuda_Core(rstr, t_mode, false, false, u, v, request); 331dce49693SSebastian Grimberg } 332dce49693SSebastian Grimberg 333dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 334ff1e7120SSebastian Grimberg // Get offsets 335ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 336ff1e7120SSebastian Grimberg static int CeedElemRestrictionGetOffsets_Cuda(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 337ff1e7120SSebastian Grimberg CeedElemRestriction_Cuda *impl; 338ff1e7120SSebastian Grimberg 339ca735530SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 340ff1e7120SSebastian Grimberg switch (mem_type) { 341ff1e7120SSebastian Grimberg case CEED_MEM_HOST: 342a267acd1SJeremy L Thompson *offsets = impl->h_offsets; 343ff1e7120SSebastian Grimberg break; 344ff1e7120SSebastian Grimberg case CEED_MEM_DEVICE: 345a267acd1SJeremy L Thompson *offsets = impl->d_offsets; 346ff1e7120SSebastian Grimberg break; 347ff1e7120SSebastian Grimberg } 348ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 349ff1e7120SSebastian Grimberg } 350ff1e7120SSebastian Grimberg 351ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 352dce49693SSebastian Grimberg // Get orientations 353dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 354dce49693SSebastian Grimberg static int CeedElemRestrictionGetOrientations_Cuda(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 355dce49693SSebastian Grimberg CeedElemRestriction_Cuda *impl; 356dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 357dce49693SSebastian Grimberg 358dce49693SSebastian Grimberg switch (mem_type) { 359dce49693SSebastian Grimberg case CEED_MEM_HOST: 360dce49693SSebastian Grimberg *orients = impl->h_orients; 361dce49693SSebastian Grimberg break; 362dce49693SSebastian Grimberg case CEED_MEM_DEVICE: 363dce49693SSebastian Grimberg *orients = impl->d_orients; 364dce49693SSebastian Grimberg break; 365dce49693SSebastian Grimberg } 366dce49693SSebastian Grimberg return CEED_ERROR_SUCCESS; 367dce49693SSebastian Grimberg } 368dce49693SSebastian Grimberg 369dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 370dce49693SSebastian Grimberg // Get curl-conforming orientations 371dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 372dce49693SSebastian Grimberg static int CeedElemRestrictionGetCurlOrientations_Cuda(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 373dce49693SSebastian Grimberg CeedElemRestriction_Cuda *impl; 374dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 375dce49693SSebastian Grimberg 376dce49693SSebastian Grimberg switch (mem_type) { 377dce49693SSebastian Grimberg case CEED_MEM_HOST: 378dce49693SSebastian Grimberg *curl_orients = impl->h_curl_orients; 379dce49693SSebastian Grimberg break; 380dce49693SSebastian Grimberg case CEED_MEM_DEVICE: 381dce49693SSebastian Grimberg *curl_orients = impl->d_curl_orients; 382dce49693SSebastian Grimberg break; 383dce49693SSebastian Grimberg } 384dce49693SSebastian Grimberg return CEED_ERROR_SUCCESS; 385dce49693SSebastian Grimberg } 386dce49693SSebastian Grimberg 387dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 388ff1e7120SSebastian Grimberg // Destroy restriction 389ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 390dce49693SSebastian Grimberg static int CeedElemRestrictionDestroy_Cuda(CeedElemRestriction rstr) { 391ff1e7120SSebastian Grimberg Ceed ceed; 392ca735530SJeremy L Thompson CeedElemRestriction_Cuda *impl; 393ca735530SJeremy L Thompson 394dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 395dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 396cf8cbdd6SSebastian Grimberg if (impl->module) { 397ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cuModuleUnload(impl->module)); 398cf8cbdd6SSebastian Grimberg } 399a267acd1SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_offsets_owned)); 400f5d1e504SJeremy L Thompson CeedCallCuda(ceed, cudaFree((CeedInt *)impl->d_offsets_owned)); 401081aa29dSJeremy L Thompson CeedCallCuda(ceed, cudaFree((CeedInt *)impl->d_t_offsets)); 402081aa29dSJeremy L Thompson CeedCallCuda(ceed, cudaFree((CeedInt *)impl->d_t_indices)); 403081aa29dSJeremy L Thompson CeedCallCuda(ceed, cudaFree((CeedInt *)impl->d_l_vec_indices)); 404a267acd1SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_orients_owned)); 405f5d1e504SJeremy L Thompson CeedCallCuda(ceed, cudaFree((bool *)impl->d_orients_owned)); 406a267acd1SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_curl_orients_owned)); 407f5d1e504SJeremy L Thompson CeedCallCuda(ceed, cudaFree((CeedInt8 *)impl->d_curl_orients_owned)); 408ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&impl)); 409ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 410ff1e7120SSebastian Grimberg } 411ff1e7120SSebastian Grimberg 412ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 413ff1e7120SSebastian Grimberg // Create transpose offsets and indices 414ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 415dce49693SSebastian Grimberg static int CeedElemRestrictionOffset_Cuda(const CeedElemRestriction rstr, const CeedInt *indices) { 416ff1e7120SSebastian Grimberg Ceed ceed; 417ca735530SJeremy L Thompson bool *is_node; 418ff1e7120SSebastian Grimberg CeedSize l_size; 419ca735530SJeremy L Thompson CeedInt num_elem, elem_size, num_comp, num_nodes = 0; 420ca735530SJeremy L Thompson CeedInt *ind_to_offset, *l_vec_indices, *t_offsets, *t_indices; 421ca735530SJeremy L Thompson CeedElemRestriction_Cuda *impl; 422ca735530SJeremy L Thompson 423dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 424dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 425dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 426dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 427dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 428dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 429ca735530SJeremy L Thompson const CeedInt size_indices = num_elem * elem_size; 430ff1e7120SSebastian Grimberg 431ff1e7120SSebastian Grimberg // Count num_nodes 432ff1e7120SSebastian Grimberg CeedCallBackend(CeedCalloc(l_size, &is_node)); 433ca735530SJeremy L Thompson 434ff1e7120SSebastian Grimberg for (CeedInt i = 0; i < size_indices; i++) is_node[indices[i]] = 1; 435ff1e7120SSebastian Grimberg for (CeedInt i = 0; i < l_size; i++) num_nodes += is_node[i]; 436ff1e7120SSebastian Grimberg impl->num_nodes = num_nodes; 437ff1e7120SSebastian Grimberg 438ff1e7120SSebastian Grimberg // L-vector offsets array 439ff1e7120SSebastian Grimberg CeedCallBackend(CeedCalloc(l_size, &ind_to_offset)); 440ff1e7120SSebastian Grimberg CeedCallBackend(CeedCalloc(num_nodes, &l_vec_indices)); 441ca735530SJeremy L Thompson for (CeedInt i = 0, j = 0; i < l_size; i++) { 442ff1e7120SSebastian Grimberg if (is_node[i]) { 443ff1e7120SSebastian Grimberg l_vec_indices[j] = i; 444ff1e7120SSebastian Grimberg ind_to_offset[i] = j++; 445ff1e7120SSebastian Grimberg } 446ff1e7120SSebastian Grimberg } 447ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&is_node)); 448ff1e7120SSebastian Grimberg 449ff1e7120SSebastian Grimberg // Compute transpose offsets and indices 450ff1e7120SSebastian Grimberg const CeedInt size_offsets = num_nodes + 1; 451ca735530SJeremy L Thompson 452ff1e7120SSebastian Grimberg CeedCallBackend(CeedCalloc(size_offsets, &t_offsets)); 453ff1e7120SSebastian Grimberg CeedCallBackend(CeedMalloc(size_indices, &t_indices)); 454ff1e7120SSebastian Grimberg // Count node multiplicity 455ff1e7120SSebastian Grimberg for (CeedInt e = 0; e < num_elem; ++e) { 456ff1e7120SSebastian Grimberg for (CeedInt i = 0; i < elem_size; ++i) ++t_offsets[ind_to_offset[indices[elem_size * e + i]] + 1]; 457ff1e7120SSebastian Grimberg } 458ff1e7120SSebastian Grimberg // Convert to running sum 459ff1e7120SSebastian Grimberg for (CeedInt i = 1; i < size_offsets; ++i) t_offsets[i] += t_offsets[i - 1]; 460ff1e7120SSebastian Grimberg // List all E-vec indices associated with L-vec node 461ff1e7120SSebastian Grimberg for (CeedInt e = 0; e < num_elem; ++e) { 462ff1e7120SSebastian Grimberg for (CeedInt i = 0; i < elem_size; ++i) { 463ff1e7120SSebastian Grimberg const CeedInt lid = elem_size * e + i; 464ff1e7120SSebastian Grimberg const CeedInt gid = indices[lid]; 465ca735530SJeremy L Thompson 466ff1e7120SSebastian Grimberg t_indices[t_offsets[ind_to_offset[gid]]++] = lid; 467ff1e7120SSebastian Grimberg } 468ff1e7120SSebastian Grimberg } 469ff1e7120SSebastian Grimberg // Reset running sum 470ff1e7120SSebastian Grimberg for (int i = size_offsets - 1; i > 0; --i) t_offsets[i] = t_offsets[i - 1]; 471ff1e7120SSebastian Grimberg t_offsets[0] = 0; 472ff1e7120SSebastian Grimberg 473ff1e7120SSebastian Grimberg // Copy data to device 474ff1e7120SSebastian Grimberg // -- L-vector indices 475ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_l_vec_indices, num_nodes * sizeof(CeedInt))); 476081aa29dSJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy((CeedInt *)impl->d_l_vec_indices, l_vec_indices, num_nodes * sizeof(CeedInt), cudaMemcpyHostToDevice)); 477ff1e7120SSebastian Grimberg // -- Transpose offsets 478ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_t_offsets, size_offsets * sizeof(CeedInt))); 479081aa29dSJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy((CeedInt *)impl->d_t_offsets, t_offsets, size_offsets * sizeof(CeedInt), cudaMemcpyHostToDevice)); 480ff1e7120SSebastian Grimberg // -- Transpose indices 481ff1e7120SSebastian Grimberg CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_t_indices, size_indices * sizeof(CeedInt))); 482081aa29dSJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy((CeedInt *)impl->d_t_indices, t_indices, size_indices * sizeof(CeedInt), cudaMemcpyHostToDevice)); 483ff1e7120SSebastian Grimberg 484ff1e7120SSebastian Grimberg // Cleanup 485ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&ind_to_offset)); 486ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&l_vec_indices)); 487ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&t_offsets)); 488ff1e7120SSebastian Grimberg CeedCallBackend(CeedFree(&t_indices)); 489ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 490ff1e7120SSebastian Grimberg } 491ff1e7120SSebastian Grimberg 492ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 493ff1e7120SSebastian Grimberg // Create restriction 494ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 495a267acd1SJeremy L Thompson int CeedElemRestrictionCreate_Cuda(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 496dce49693SSebastian Grimberg const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 497ca735530SJeremy L Thompson Ceed ceed, ceed_parent; 498dce49693SSebastian Grimberg bool is_deterministic; 499cf8cbdd6SSebastian Grimberg CeedInt num_elem, elem_size; 500ca735530SJeremy L Thompson CeedRestrictionType rstr_type; 501ff1e7120SSebastian Grimberg CeedElemRestriction_Cuda *impl; 502ca735530SJeremy L Thompson 503dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 504ca735530SJeremy L Thompson CeedCallBackend(CeedGetParent(ceed, &ceed_parent)); 505ca735530SJeremy L Thompson CeedCallBackend(CeedIsDeterministic(ceed_parent, &is_deterministic)); 506dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 507dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 50822eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 509ca735530SJeremy L Thompson const CeedInt size = num_elem * elem_size; 510ff1e7120SSebastian Grimberg 511dce49693SSebastian Grimberg CeedCallBackend(CeedCalloc(1, &impl)); 512dce49693SSebastian Grimberg impl->num_nodes = size; 513dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 51422eb1385SJeremy L Thompson 51522eb1385SJeremy L Thompson // Set layouts 51622eb1385SJeremy L Thompson { 51722eb1385SJeremy L Thompson bool has_backend_strides; 51822eb1385SJeremy L Thompson CeedInt layout[3] = {1, size, elem_size}; 51922eb1385SJeremy L Thompson 520dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); 52122eb1385SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_STRIDED) { 52222eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 52322eb1385SJeremy L Thompson if (has_backend_strides) { 52422eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout)); 52522eb1385SJeremy L Thompson } 52622eb1385SJeremy L Thompson } 52722eb1385SJeremy L Thompson } 528ff1e7120SSebastian Grimberg 529dce49693SSebastian Grimberg // Set up device offset/orientation arrays 530dce49693SSebastian Grimberg if (rstr_type != CEED_RESTRICTION_STRIDED) { 531ff1e7120SSebastian Grimberg switch (mem_type) { 532ff1e7120SSebastian Grimberg case CEED_MEM_HOST: { 533f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetHostCeedIntArray(offsets, copy_mode, size, &impl->h_offsets_owned, &impl->h_offsets_borrowed, &impl->h_offsets)); 534a267acd1SJeremy L Thompson CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_offsets_owned, size * sizeof(CeedInt))); 535f5d1e504SJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy((CeedInt *)impl->d_offsets_owned, impl->h_offsets, size * sizeof(CeedInt), cudaMemcpyHostToDevice)); 536f5d1e504SJeremy L Thompson impl->d_offsets = (CeedInt *)impl->d_offsets_owned; 537a267acd1SJeremy L Thompson if (is_deterministic) CeedCallBackend(CeedElemRestrictionOffset_Cuda(rstr, offsets)); 538dce49693SSebastian Grimberg } break; 539ff1e7120SSebastian Grimberg case CEED_MEM_DEVICE: { 540f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceCeedIntArray_Cuda(ceed, offsets, copy_mode, size, &impl->d_offsets_owned, &impl->d_offsets_borrowed, 541f5d1e504SJeremy L Thompson (const CeedInt **)&impl->d_offsets)); 542a267acd1SJeremy L Thompson CeedCallBackend(CeedMalloc(size, &impl->h_offsets_owned)); 543f5d1e504SJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy((CeedInt *)impl->h_offsets_owned, impl->d_offsets, size * sizeof(CeedInt), cudaMemcpyDeviceToHost)); 544a267acd1SJeremy L Thompson impl->h_offsets = impl->h_offsets_owned; 545a267acd1SJeremy L Thompson if (is_deterministic) CeedCallBackend(CeedElemRestrictionOffset_Cuda(rstr, offsets)); 546dce49693SSebastian Grimberg } break; 547ff1e7120SSebastian Grimberg } 548ff1e7120SSebastian Grimberg 549dce49693SSebastian Grimberg // Orientation data 550dce49693SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_ORIENTED) { 551dce49693SSebastian Grimberg switch (mem_type) { 552dce49693SSebastian Grimberg case CEED_MEM_HOST: { 553f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetHostBoolArray(orients, copy_mode, size, &impl->h_orients_owned, &impl->h_orients_borrowed, &impl->h_orients)); 554a267acd1SJeremy L Thompson CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_orients_owned, size * sizeof(bool))); 555f5d1e504SJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy((bool *)impl->d_orients_owned, impl->h_orients, size * sizeof(bool), cudaMemcpyHostToDevice)); 556a267acd1SJeremy L Thompson impl->d_orients = impl->d_orients_owned; 557dce49693SSebastian Grimberg } break; 558dce49693SSebastian Grimberg case CEED_MEM_DEVICE: { 559f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceBoolArray_Cuda(ceed, orients, copy_mode, size, &impl->d_orients_owned, &impl->d_orients_borrowed, 560f5d1e504SJeremy L Thompson (const bool **)&impl->d_orients)); 561a267acd1SJeremy L Thompson CeedCallBackend(CeedMalloc(size, &impl->h_orients_owned)); 562f5d1e504SJeremy L Thompson CeedCallCuda(ceed, cudaMemcpy((bool *)impl->h_orients_owned, impl->d_orients, size * sizeof(bool), cudaMemcpyDeviceToHost)); 563a267acd1SJeremy L Thompson impl->h_orients = impl->h_orients_owned; 564dce49693SSebastian Grimberg } break; 565dce49693SSebastian Grimberg } 566dce49693SSebastian Grimberg } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 567dce49693SSebastian Grimberg switch (mem_type) { 568dce49693SSebastian Grimberg case CEED_MEM_HOST: { 569f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetHostCeedInt8Array(curl_orients, copy_mode, 3 * size, &impl->h_curl_orients_owned, &impl->h_curl_orients_borrowed, 570f5d1e504SJeremy L Thompson &impl->h_curl_orients)); 571a267acd1SJeremy L Thompson CeedCallCuda(ceed, cudaMalloc((void **)&impl->d_curl_orients_owned, 3 * size * sizeof(CeedInt8))); 572f5d1e504SJeremy L Thompson CeedCallCuda(ceed, 573f5d1e504SJeremy L Thompson cudaMemcpy((CeedInt8 *)impl->d_curl_orients_owned, impl->h_curl_orients, 3 * size * sizeof(CeedInt8), cudaMemcpyHostToDevice)); 574a267acd1SJeremy L Thompson impl->d_curl_orients = impl->d_curl_orients_owned; 575dce49693SSebastian Grimberg } break; 576dce49693SSebastian Grimberg case CEED_MEM_DEVICE: { 577f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceCeedInt8Array_Cuda(ceed, curl_orients, copy_mode, 3 * size, &impl->d_curl_orients_owned, 578f5d1e504SJeremy L Thompson &impl->d_curl_orients_borrowed, (const CeedInt8 **)&impl->d_curl_orients)); 579a267acd1SJeremy L Thompson CeedCallBackend(CeedMalloc(3 * size, &impl->h_curl_orients_owned)); 580f5d1e504SJeremy L Thompson CeedCallCuda(ceed, 581f5d1e504SJeremy L Thompson cudaMemcpy((CeedInt8 *)impl->h_curl_orients_owned, impl->d_curl_orients, 3 * size * sizeof(CeedInt8), cudaMemcpyDeviceToHost)); 582a267acd1SJeremy L Thompson impl->h_curl_orients = impl->h_curl_orients_owned; 583dce49693SSebastian Grimberg } break; 584dce49693SSebastian Grimberg } 585dce49693SSebastian Grimberg } 586dce49693SSebastian Grimberg } 587ca735530SJeremy L Thompson 588ff1e7120SSebastian Grimberg // Register backend functions 589dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Cuda)); 590dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Cuda)); 591dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Cuda)); 592dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Cuda)); 593dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Cuda)); 594dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Cuda)); 595dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Cuda)); 596ff1e7120SSebastian Grimberg return CEED_ERROR_SUCCESS; 597ff1e7120SSebastian Grimberg } 598ff1e7120SSebastian Grimberg 599ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------ 600