15aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 30d0321e0SJeremy L Thompson // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 50d0321e0SJeremy L Thompson // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 70d0321e0SJeremy L Thompson 849aac155SJeremy L Thompson #include <ceed.h> 90d0321e0SJeremy L Thompson #include <ceed/backend.h> 10437930d1SJeremy L Thompson #include <ceed/jit-tools.h> 110d0321e0SJeremy L Thompson #include <stdbool.h> 120d0321e0SJeremy L Thompson #include <stddef.h> 1344d7a66cSJeremy L Thompson #include <string.h> 14c85e8640SSebastian Grimberg #include <hip/hip_runtime.h> 152b730f8bSJeremy L Thompson 1649aac155SJeremy L Thompson #include "../hip/ceed-hip-common.h" 170d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h" 182b730f8bSJeremy L Thompson #include "ceed-hip-ref.h" 190d0321e0SJeremy L Thompson 200d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 21cf8cbdd6SSebastian Grimberg // Compile restriction kernels 22cf8cbdd6SSebastian Grimberg //------------------------------------------------------------------------------ 23cf8cbdd6SSebastian Grimberg static inline int CeedElemRestrictionSetupCompile_Hip(CeedElemRestriction rstr) { 24cf8cbdd6SSebastian Grimberg Ceed ceed; 25cf8cbdd6SSebastian Grimberg bool is_deterministic; 26cf8cbdd6SSebastian Grimberg CeedInt num_elem, num_comp, elem_size, comp_stride; 27cf8cbdd6SSebastian Grimberg CeedRestrictionType rstr_type; 28cf8cbdd6SSebastian Grimberg CeedElemRestriction_Hip *impl; 29cf8cbdd6SSebastian Grimberg 30cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 31cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 32fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 33cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 34cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 35cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 36fe960054SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) { 37fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetMaxPointsInElement(rstr, &elem_size)); 38fe960054SJeremy L Thompson } else { 39fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 40fe960054SJeremy L Thompson } 41cf8cbdd6SSebastian Grimberg is_deterministic = impl->d_l_vec_indices != NULL; 42cf8cbdd6SSebastian Grimberg 43cf8cbdd6SSebastian Grimberg // Compile HIP kernels 44cf8cbdd6SSebastian Grimberg switch (rstr_type) { 45cf8cbdd6SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 469c25dd66SJeremy L Thompson const char restriction_kernel_source[] = "// Strided restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-strided.h>\n"; 47cf8cbdd6SSebastian Grimberg bool has_backend_strides; 48509d4af6SJeremy L Thompson CeedInt strides[3] = {1, num_elem * elem_size, elem_size}; 49cf8cbdd6SSebastian Grimberg 50cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 51cf8cbdd6SSebastian Grimberg if (!has_backend_strides) { 5256c48462SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); 53cf8cbdd6SSebastian Grimberg } 54cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 55cf8cbdd6SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_STRIDE_NODES", strides[0], "RSTR_STRIDE_COMP", strides[1], "RSTR_STRIDE_ELEM", 56cf8cbdd6SSebastian Grimberg strides[2])); 57cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "StridedNoTranspose", &impl->ApplyNoTranspose)); 58cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "StridedTranspose", &impl->ApplyTranspose)); 59cf8cbdd6SSebastian Grimberg } break; 60cf8cbdd6SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 619c25dd66SJeremy L Thompson const char restriction_kernel_source[] = "// Standard restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-offset.h>\n"; 629c25dd66SJeremy L Thompson 63cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 64cf8cbdd6SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, 65cf8cbdd6SSebastian Grimberg "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); 66cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyNoTranspose)); 67cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetTranspose", &impl->ApplyTranspose)); 68cf8cbdd6SSebastian Grimberg } break; 699c25dd66SJeremy L Thompson case CEED_RESTRICTION_POINTS: { 709c25dd66SJeremy L Thompson const char restriction_kernel_source[] = 719c25dd66SJeremy L Thompson "// AtPoints restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-at-points.h>\n\n" 729c25dd66SJeremy L Thompson "// Standard restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-offset.h>\n"; 73cf8cbdd6SSebastian Grimberg 74cf8cbdd6SSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); 75cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 76cf8cbdd6SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, 77cf8cbdd6SSebastian Grimberg "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); 789c25dd66SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyNoTranspose)); 799c25dd66SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "AtPointsTranspose", &impl->ApplyTranspose)); 809c25dd66SJeremy L Thompson } break; 819c25dd66SJeremy L Thompson case CEED_RESTRICTION_ORIENTED: { 829c25dd66SJeremy L Thompson const char restriction_kernel_source[] = 839c25dd66SJeremy L Thompson "// Oriented restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-oriented.h>\n\n" 849c25dd66SJeremy L Thompson "// Standard restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-offset.h>\n"; 859c25dd66SJeremy L Thompson 869c25dd66SJeremy L Thompson CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 879c25dd66SJeremy L Thompson "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, 889c25dd66SJeremy L Thompson "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); 89cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OrientedNoTranspose", &impl->ApplyNoTranspose)); 90cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyUnsignedNoTranspose)); 91cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OrientedTranspose", &impl->ApplyTranspose)); 92cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetTranspose", &impl->ApplyUnsignedTranspose)); 93cf8cbdd6SSebastian Grimberg } break; 94cf8cbdd6SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 959c25dd66SJeremy L Thompson const char restriction_kernel_source[] = 969c25dd66SJeremy L Thompson "// Curl oriented restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-curl-oriented.h>\n\n" 979c25dd66SJeremy L Thompson "// Standard restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-offset.h>\n"; 98cf8cbdd6SSebastian Grimberg 99cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 100cf8cbdd6SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, 101cf8cbdd6SSebastian Grimberg "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); 102cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedNoTranspose", &impl->ApplyNoTranspose)); 103cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedUnsignedNoTranspose", &impl->ApplyUnsignedNoTranspose)); 104cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyUnorientedNoTranspose)); 105cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedTranspose", &impl->ApplyTranspose)); 106cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedUnsignedTranspose", &impl->ApplyUnsignedTranspose)); 107cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetTranspose", &impl->ApplyUnorientedTranspose)); 1089c25dd66SJeremy L Thompson 109cf8cbdd6SSebastian Grimberg } break; 110cf8cbdd6SSebastian Grimberg } 111*9bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 112cf8cbdd6SSebastian Grimberg return CEED_ERROR_SUCCESS; 113cf8cbdd6SSebastian Grimberg } 114cf8cbdd6SSebastian Grimberg 115cf8cbdd6SSebastian Grimberg //------------------------------------------------------------------------------ 116dce49693SSebastian Grimberg // Core apply restriction code 1170d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 118dce49693SSebastian Grimberg static inline int CeedElemRestrictionApply_Hip_Core(CeedElemRestriction rstr, CeedTransposeMode t_mode, bool use_signs, bool use_orients, 119dce49693SSebastian Grimberg CeedVector u, CeedVector v, CeedRequest *request) { 1200d0321e0SJeremy L Thompson Ceed ceed; 121dce49693SSebastian Grimberg CeedRestrictionType rstr_type; 1220d0321e0SJeremy L Thompson const CeedScalar *d_u; 1230d0321e0SJeremy L Thompson CeedScalar *d_v; 124b7453713SJeremy L Thompson CeedElemRestriction_Hip *impl; 125b7453713SJeremy L Thompson 126dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 127dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 128dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 129cf8cbdd6SSebastian Grimberg 130cf8cbdd6SSebastian Grimberg // Assemble kernel if needed 131cf8cbdd6SSebastian Grimberg if (!impl->module) { 132cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetupCompile_Hip(rstr)); 133cf8cbdd6SSebastian Grimberg } 134b7453713SJeremy L Thompson 135b7453713SJeremy L Thompson // Get vectors 1362b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 137437930d1SJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 1380d0321e0SJeremy L Thompson // Sum into for transpose mode, e-vec to l-vec 1392b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 1400d0321e0SJeremy L Thompson } else { 1410d0321e0SJeremy L Thompson // Overwrite for notranspose mode, l-vec to e-vec 1422b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 1430d0321e0SJeremy L Thompson } 1440d0321e0SJeremy L Thompson 1450d0321e0SJeremy L Thompson // Restrict 146437930d1SJeremy L Thompson if (t_mode == CEED_NOTRANSPOSE) { 1470d0321e0SJeremy L Thompson // L-vector -> E-vector 148cf8cbdd6SSebastian Grimberg CeedInt elem_size; 149cf8cbdd6SSebastian Grimberg 150cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 151dce49693SSebastian Grimberg const CeedInt block_size = elem_size < 256 ? (elem_size > 64 ? elem_size : 64) : 256; 152cf8cbdd6SSebastian Grimberg const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); 15358549094SSebastian Grimberg 154dce49693SSebastian Grimberg switch (rstr_type) { 155dce49693SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 156cf8cbdd6SSebastian Grimberg void *args[] = {&d_u, &d_v}; 15758549094SSebastian Grimberg 158cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); 159dce49693SSebastian Grimberg } break; 160fe960054SJeremy L Thompson case CEED_RESTRICTION_POINTS: 161dce49693SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 162a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 163dce49693SSebastian Grimberg 164cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); 165dce49693SSebastian Grimberg } break; 166dce49693SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 167dce49693SSebastian Grimberg if (use_signs) { 168a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_orients, &d_u, &d_v}; 169dce49693SSebastian Grimberg 170cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); 171dce49693SSebastian Grimberg } else { 172a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 173dce49693SSebastian Grimberg 174cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); 175dce49693SSebastian Grimberg } 176dce49693SSebastian Grimberg } break; 177dce49693SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 178dce49693SSebastian Grimberg if (use_signs && use_orients) { 179a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; 180dce49693SSebastian Grimberg 181cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); 182dce49693SSebastian Grimberg } else if (use_orients) { 183a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; 184dce49693SSebastian Grimberg 185cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); 186dce49693SSebastian Grimberg } else { 187a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 188dce49693SSebastian Grimberg 189cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnorientedNoTranspose, grid, block_size, args)); 190dce49693SSebastian Grimberg } 191dce49693SSebastian Grimberg } break; 1920d0321e0SJeremy L Thompson } 1930d0321e0SJeremy L Thompson } else { 1940d0321e0SJeremy L Thompson // E-vector -> L-vector 195cf8cbdd6SSebastian Grimberg const bool is_deterministic = impl->d_l_vec_indices != NULL; 196dce49693SSebastian Grimberg const CeedInt block_size = 64; 197cf8cbdd6SSebastian Grimberg const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); 198b7453713SJeremy L Thompson 199dce49693SSebastian Grimberg switch (rstr_type) { 200dce49693SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 201cf8cbdd6SSebastian Grimberg void *args[] = {&d_u, &d_v}; 202dce49693SSebastian Grimberg 203cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 204dce49693SSebastian Grimberg } break; 2050b63de31SJeremy L Thompson case CEED_RESTRICTION_POINTS: { 2060b63de31SJeremy L Thompson if (!is_deterministic) { 2070b63de31SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_points_per_elem, &d_u, &d_v}; 2080b63de31SJeremy L Thompson 2090b63de31SJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2100b63de31SJeremy L Thompson } else { 2110b63de31SJeremy L Thompson void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_points_per_elem, &impl->d_t_offsets, &d_u, &d_v}; 2120b63de31SJeremy L Thompson 2130b63de31SJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2140b63de31SJeremy L Thompson } 2150b63de31SJeremy L Thompson } break; 216dce49693SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 217cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 218a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 21958549094SSebastian Grimberg 220cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2210d0321e0SJeremy L Thompson } else { 22258549094SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 22358549094SSebastian Grimberg 224cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 22558549094SSebastian Grimberg } 226dce49693SSebastian Grimberg } break; 227dce49693SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 228dce49693SSebastian Grimberg if (use_signs) { 229cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 230a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_orients, &d_u, &d_v}; 23158549094SSebastian Grimberg 232cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 233dce49693SSebastian Grimberg } else { 2347aa91133SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &impl->d_orients, &d_u, &d_v}; 2357aa91133SSebastian Grimberg 236cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2377aa91133SSebastian Grimberg } 2387aa91133SSebastian Grimberg } else { 239cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 240a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 241dce49693SSebastian Grimberg 242cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedTranspose, grid, block_size, args)); 243dce49693SSebastian Grimberg } else { 244dce49693SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 245dce49693SSebastian Grimberg 246cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedTranspose, grid, block_size, args)); 247dce49693SSebastian Grimberg } 248dce49693SSebastian Grimberg } 249dce49693SSebastian Grimberg } break; 250dce49693SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 251dce49693SSebastian Grimberg if (use_signs && use_orients) { 252cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 253a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; 254dce49693SSebastian Grimberg 255cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2567aa91133SSebastian Grimberg } else { 2577aa91133SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &impl->d_curl_orients, &d_u, &d_v}; 2587aa91133SSebastian Grimberg 259cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2607aa91133SSebastian Grimberg } 261dce49693SSebastian Grimberg } else if (use_orients) { 262cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 263a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; 264dce49693SSebastian Grimberg 265cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedTranspose, grid, block_size, args)); 266dce49693SSebastian Grimberg } else { 2677aa91133SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &impl->d_curl_orients, &d_u, &d_v}; 2687aa91133SSebastian Grimberg 269cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedTranspose, grid, block_size, args)); 2707aa91133SSebastian Grimberg } 2717aa91133SSebastian Grimberg } else { 272cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 273a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 274dce49693SSebastian Grimberg 275cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnorientedTranspose, grid, block_size, args)); 276dce49693SSebastian Grimberg } else { 277dce49693SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 278dce49693SSebastian Grimberg 279cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnorientedTranspose, grid, block_size, args)); 280dce49693SSebastian Grimberg } 281dce49693SSebastian Grimberg } 282dce49693SSebastian Grimberg } break; 2830d0321e0SJeremy L Thompson } 2840d0321e0SJeremy L Thompson } 2850d0321e0SJeremy L Thompson 2862b730f8bSJeremy L Thompson if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 2870d0321e0SJeremy L Thompson 2880d0321e0SJeremy L Thompson // Restore arrays 2892b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 2902b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 291*9bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 2920d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2930d0321e0SJeremy L Thompson } 2940d0321e0SJeremy L Thompson 2950d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 296dce49693SSebastian Grimberg // Apply restriction 297dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 298dce49693SSebastian Grimberg static int CeedElemRestrictionApply_Hip(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 299dce49693SSebastian Grimberg return CeedElemRestrictionApply_Hip_Core(rstr, t_mode, true, true, u, v, request); 300dce49693SSebastian Grimberg } 301dce49693SSebastian Grimberg 302dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 303dce49693SSebastian Grimberg // Apply unsigned restriction 304dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 305dce49693SSebastian Grimberg static int CeedElemRestrictionApplyUnsigned_Hip(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 306dce49693SSebastian Grimberg CeedRequest *request) { 307dce49693SSebastian Grimberg return CeedElemRestrictionApply_Hip_Core(rstr, t_mode, false, true, u, v, request); 308dce49693SSebastian Grimberg } 309dce49693SSebastian Grimberg 310dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 311dce49693SSebastian Grimberg // Apply unoriented restriction 312dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 313dce49693SSebastian Grimberg static int CeedElemRestrictionApplyUnoriented_Hip(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 314dce49693SSebastian Grimberg CeedRequest *request) { 315dce49693SSebastian Grimberg return CeedElemRestrictionApply_Hip_Core(rstr, t_mode, false, false, u, v, request); 316dce49693SSebastian Grimberg } 317dce49693SSebastian Grimberg 318dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 3190d0321e0SJeremy L Thompson // Get offsets 3200d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 321472941f0SJeremy L Thompson static int CeedElemRestrictionGetOffsets_Hip(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 3220d0321e0SJeremy L Thompson CeedElemRestriction_Hip *impl; 323fe960054SJeremy L Thompson CeedRestrictionType rstr_type; 3240d0321e0SJeremy L Thompson 325b7453713SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 326fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 327472941f0SJeremy L Thompson switch (mem_type) { 3280d0321e0SJeremy L Thompson case CEED_MEM_HOST: 329fe960054SJeremy L Thompson *offsets = rstr_type == CEED_RESTRICTION_POINTS ? impl->h_offsets_at_points : impl->h_offsets; 3300d0321e0SJeremy L Thompson break; 3310d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 332fe960054SJeremy L Thompson *offsets = rstr_type == CEED_RESTRICTION_POINTS ? impl->d_offsets_at_points : impl->d_offsets; 3330d0321e0SJeremy L Thompson break; 3340d0321e0SJeremy L Thompson } 3350d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3360d0321e0SJeremy L Thompson } 3370d0321e0SJeremy L Thompson 3380d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 339dce49693SSebastian Grimberg // Get orientations 340dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 341dce49693SSebastian Grimberg static int CeedElemRestrictionGetOrientations_Hip(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 342dce49693SSebastian Grimberg CeedElemRestriction_Hip *impl; 343dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 344dce49693SSebastian Grimberg 345dce49693SSebastian Grimberg switch (mem_type) { 346dce49693SSebastian Grimberg case CEED_MEM_HOST: 347dce49693SSebastian Grimberg *orients = impl->h_orients; 348dce49693SSebastian Grimberg break; 349dce49693SSebastian Grimberg case CEED_MEM_DEVICE: 350dce49693SSebastian Grimberg *orients = impl->d_orients; 351dce49693SSebastian Grimberg break; 352dce49693SSebastian Grimberg } 353dce49693SSebastian Grimberg return CEED_ERROR_SUCCESS; 354dce49693SSebastian Grimberg } 355dce49693SSebastian Grimberg 356dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 357dce49693SSebastian Grimberg // Get curl-conforming orientations 358dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 359dce49693SSebastian Grimberg static int CeedElemRestrictionGetCurlOrientations_Hip(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 360dce49693SSebastian Grimberg CeedElemRestriction_Hip *impl; 361dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 362dce49693SSebastian Grimberg 363dce49693SSebastian Grimberg switch (mem_type) { 364dce49693SSebastian Grimberg case CEED_MEM_HOST: 365dce49693SSebastian Grimberg *curl_orients = impl->h_curl_orients; 366dce49693SSebastian Grimberg break; 367dce49693SSebastian Grimberg case CEED_MEM_DEVICE: 368dce49693SSebastian Grimberg *curl_orients = impl->d_curl_orients; 369dce49693SSebastian Grimberg break; 370dce49693SSebastian Grimberg } 371dce49693SSebastian Grimberg return CEED_ERROR_SUCCESS; 372dce49693SSebastian Grimberg } 373dce49693SSebastian Grimberg 374dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 375fe960054SJeremy L Thompson // Get offset for padded AtPoints E-layout 376fe960054SJeremy L Thompson //------------------------------------------------------------------------------ 377fe960054SJeremy L Thompson static int CeedElemRestrictionGetAtPointsElementOffset_Hip(CeedElemRestriction rstr, CeedInt elem, CeedSize *elem_offset) { 378fe960054SJeremy L Thompson CeedInt layout[3]; 379fe960054SJeremy L Thompson 380fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetELayout(rstr, layout)); 381fe960054SJeremy L Thompson *elem_offset = 0 * layout[0] + 0 * layout[1] + elem * layout[2]; 382fe960054SJeremy L Thompson return CEED_ERROR_SUCCESS; 383fe960054SJeremy L Thompson } 384fe960054SJeremy L Thompson 385fe960054SJeremy L Thompson //------------------------------------------------------------------------------ 3860d0321e0SJeremy L Thompson // Destroy restriction 3870d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 388dce49693SSebastian Grimberg static int CeedElemRestrictionDestroy_Hip(CeedElemRestriction rstr) { 3890d0321e0SJeremy L Thompson Ceed ceed; 390b7453713SJeremy L Thompson CeedElemRestriction_Hip *impl; 391b7453713SJeremy L Thompson 392dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 393dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 394cf8cbdd6SSebastian Grimberg if (impl->module) { 3952b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleUnload(impl->module)); 396cf8cbdd6SSebastian Grimberg } 397a267acd1SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_offsets_owned)); 398f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt *)impl->d_offsets_owned)); 399081aa29dSJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt *)impl->d_t_offsets)); 400081aa29dSJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt *)impl->d_t_indices)); 401081aa29dSJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt *)impl->d_l_vec_indices)); 402a267acd1SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_orients_owned)); 403f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipFree((bool *)impl->d_orients_owned)); 404a267acd1SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_curl_orients_owned)); 405f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt8 *)impl->d_curl_orients_owned)); 406fe960054SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_offsets_at_points_owned)); 407fe960054SJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt8 *)impl->d_offsets_at_points_owned)); 4080b63de31SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_points_per_elem_owned)); 4090b63de31SJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt *)impl->d_points_per_elem_owned)); 4102b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 411*9bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 4120d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4130d0321e0SJeremy L Thompson } 4140d0321e0SJeremy L Thompson 4150d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 4160d0321e0SJeremy L Thompson // Create transpose offsets and indices 4170d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 418fe960054SJeremy L Thompson static int CeedElemRestrictionOffset_Hip(const CeedElemRestriction rstr, const CeedInt elem_size, const CeedInt *indices) { 4190d0321e0SJeremy L Thompson Ceed ceed; 420b7453713SJeremy L Thompson bool *is_node; 421e79b91d9SJeremy L Thompson CeedSize l_size; 422fe960054SJeremy L Thompson CeedInt num_elem, num_comp, num_nodes = 0; 423dce49693SSebastian Grimberg CeedInt *ind_to_offset, *l_vec_indices, *t_offsets, *t_indices; 424fe960054SJeremy L Thompson CeedRestrictionType rstr_type; 425b7453713SJeremy L Thompson CeedElemRestriction_Hip *impl; 426b7453713SJeremy L Thompson 427dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 428dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 429dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 430fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 431dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 432dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 433b7453713SJeremy L Thompson const CeedInt size_indices = num_elem * elem_size; 4340d0321e0SJeremy L Thompson 435437930d1SJeremy L Thompson // Count num_nodes 4362b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(l_size, &is_node)); 437dce49693SSebastian Grimberg 4382b730f8bSJeremy L Thompson for (CeedInt i = 0; i < size_indices; i++) is_node[indices[i]] = 1; 4392b730f8bSJeremy L Thompson for (CeedInt i = 0; i < l_size; i++) num_nodes += is_node[i]; 440437930d1SJeremy L Thompson impl->num_nodes = num_nodes; 4410d0321e0SJeremy L Thompson 4420d0321e0SJeremy L Thompson // L-vector offsets array 4432b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(l_size, &ind_to_offset)); 4442b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(num_nodes, &l_vec_indices)); 445b7453713SJeremy L Thompson for (CeedInt i = 0, j = 0; i < l_size; i++) { 446437930d1SJeremy L Thompson if (is_node[i]) { 447437930d1SJeremy L Thompson l_vec_indices[j] = i; 4480d0321e0SJeremy L Thompson ind_to_offset[i] = j++; 4490d0321e0SJeremy L Thompson } 4502b730f8bSJeremy L Thompson } 4512b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&is_node)); 4520d0321e0SJeremy L Thompson 4530d0321e0SJeremy L Thompson // Compute transpose offsets and indices 454437930d1SJeremy L Thompson const CeedInt size_offsets = num_nodes + 1; 455b7453713SJeremy L Thompson 4562b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(size_offsets, &t_offsets)); 4572b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(size_indices, &t_indices)); 4580d0321e0SJeremy L Thompson // Count node multiplicity 4592b730f8bSJeremy L Thompson for (CeedInt e = 0; e < num_elem; ++e) { 4602b730f8bSJeremy L Thompson for (CeedInt i = 0; i < elem_size; ++i) ++t_offsets[ind_to_offset[indices[elem_size * e + i]] + 1]; 4612b730f8bSJeremy L Thompson } 4620d0321e0SJeremy L Thompson // Convert to running sum 4632b730f8bSJeremy L Thompson for (CeedInt i = 1; i < size_offsets; ++i) t_offsets[i] += t_offsets[i - 1]; 4640d0321e0SJeremy L Thompson // List all E-vec indices associated with L-vec node 465437930d1SJeremy L Thompson for (CeedInt e = 0; e < num_elem; ++e) { 466437930d1SJeremy L Thompson for (CeedInt i = 0; i < elem_size; ++i) { 467437930d1SJeremy L Thompson const CeedInt lid = elem_size * e + i; 4680d0321e0SJeremy L Thompson const CeedInt gid = indices[lid]; 469b7453713SJeremy L Thompson 470437930d1SJeremy L Thompson t_indices[t_offsets[ind_to_offset[gid]]++] = lid; 4710d0321e0SJeremy L Thompson } 4720d0321e0SJeremy L Thompson } 4730d0321e0SJeremy L Thompson // Reset running sum 4742b730f8bSJeremy L Thompson for (int i = size_offsets - 1; i > 0; --i) t_offsets[i] = t_offsets[i - 1]; 475437930d1SJeremy L Thompson t_offsets[0] = 0; 4760d0321e0SJeremy L Thompson 4770d0321e0SJeremy L Thompson // Copy data to device 4780d0321e0SJeremy L Thompson // -- L-vector indices 4792b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_l_vec_indices, num_nodes * sizeof(CeedInt))); 480081aa29dSJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt *)impl->d_l_vec_indices, l_vec_indices, num_nodes * sizeof(CeedInt), hipMemcpyHostToDevice)); 4810d0321e0SJeremy L Thompson // -- Transpose offsets 4822b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_t_offsets, size_offsets * sizeof(CeedInt))); 483081aa29dSJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt *)impl->d_t_offsets, t_offsets, size_offsets * sizeof(CeedInt), hipMemcpyHostToDevice)); 4840d0321e0SJeremy L Thompson // -- Transpose indices 4852b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_t_indices, size_indices * sizeof(CeedInt))); 486081aa29dSJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt *)impl->d_t_indices, t_indices, size_indices * sizeof(CeedInt), hipMemcpyHostToDevice)); 4870d0321e0SJeremy L Thompson 4880d0321e0SJeremy L Thompson // Cleanup 4892b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&ind_to_offset)); 4902b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&l_vec_indices)); 4912b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&t_offsets)); 4922b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&t_indices)); 493*9bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 4940d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4950d0321e0SJeremy L Thompson } 4960d0321e0SJeremy L Thompson 4970d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 4980d0321e0SJeremy L Thompson // Create restriction 4990d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 500a267acd1SJeremy L Thompson int CeedElemRestrictionCreate_Hip(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 501dce49693SSebastian Grimberg const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 502b7453713SJeremy L Thompson Ceed ceed, ceed_parent; 503dce49693SSebastian Grimberg bool is_deterministic; 504ff1bc20eSJeremy L Thompson CeedInt num_elem, num_comp, elem_size; 505b7453713SJeremy L Thompson CeedRestrictionType rstr_type; 5060d0321e0SJeremy L Thompson CeedElemRestriction_Hip *impl; 507b7453713SJeremy L Thompson 508dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 509ca735530SJeremy L Thompson CeedCallBackend(CeedGetParent(ceed, &ceed_parent)); 510ca735530SJeremy L Thompson CeedCallBackend(CeedIsDeterministic(ceed_parent, &is_deterministic)); 511*9bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed_parent)); 512dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 513ff1bc20eSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 514dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 51522eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 516fe960054SJeremy L Thompson // Use max number of points as elem size for AtPoints restrictions 517fe960054SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) { 518fe960054SJeremy L Thompson CeedInt max_points = 0; 519fe960054SJeremy L Thompson 520fe960054SJeremy L Thompson for (CeedInt i = 0; i < num_elem; i++) { 521fe960054SJeremy L Thompson max_points = CeedIntMax(max_points, offsets[i + 1] - offsets[i]); 522fe960054SJeremy L Thompson } 523fe960054SJeremy L Thompson elem_size = max_points; 524fe960054SJeremy L Thompson } 525dce49693SSebastian Grimberg const CeedInt size = num_elem * elem_size; 5260d0321e0SJeremy L Thompson 527dce49693SSebastian Grimberg CeedCallBackend(CeedCalloc(1, &impl)); 528dce49693SSebastian Grimberg impl->num_nodes = size; 529dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 53022eb1385SJeremy L Thompson 53122eb1385SJeremy L Thompson // Set layouts 53222eb1385SJeremy L Thompson { 53322eb1385SJeremy L Thompson bool has_backend_strides; 53422eb1385SJeremy L Thompson CeedInt layout[3] = {1, size, elem_size}; 53522eb1385SJeremy L Thompson 536dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); 53722eb1385SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_STRIDED) { 53822eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 53922eb1385SJeremy L Thompson if (has_backend_strides) { 54022eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout)); 54122eb1385SJeremy L Thompson } 54222eb1385SJeremy L Thompson } 54322eb1385SJeremy L Thompson } 5440d0321e0SJeremy L Thompson 545fe960054SJeremy L Thompson // Pad AtPoints indices 546fe960054SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) { 547fe960054SJeremy L Thompson CeedSize offsets_len = elem_size * num_elem, at_points_size = num_elem + 1; 5480b63de31SJeremy L Thompson CeedInt max_points = elem_size, *offsets_padded, *points_per_elem; 549fe960054SJeremy L Thompson 550fe960054SJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "only MemType Host supported when creating AtPoints restriction"); 551fe960054SJeremy L Thompson CeedCallBackend(CeedMalloc(offsets_len, &offsets_padded)); 5520b63de31SJeremy L Thompson CeedCallBackend(CeedMalloc(num_elem, &points_per_elem)); 553fe960054SJeremy L Thompson for (CeedInt i = 0; i < num_elem; i++) { 554fe960054SJeremy L Thompson CeedInt num_points = offsets[i + 1] - offsets[i]; 555fe960054SJeremy L Thompson 5560b63de31SJeremy L Thompson points_per_elem[i] = num_points; 557fe960054SJeremy L Thompson at_points_size += num_points; 558fe960054SJeremy L Thompson // -- Copy all points in element 559fe960054SJeremy L Thompson for (CeedInt j = 0; j < num_points; j++) { 5608be297eeSJeremy L Thompson offsets_padded[i * max_points + j] = offsets[offsets[i] + j] * num_comp; 561fe960054SJeremy L Thompson } 562fe960054SJeremy L Thompson // -- Replicate out last point in element 563fe960054SJeremy L Thompson for (CeedInt j = num_points; j < max_points; j++) { 5648be297eeSJeremy L Thompson offsets_padded[i * max_points + j] = offsets[offsets[i] + num_points - 1] * num_comp; 565fe960054SJeremy L Thompson } 566fe960054SJeremy L Thompson } 567fe960054SJeremy L Thompson CeedCallBackend(CeedSetHostCeedIntArray(offsets, copy_mode, at_points_size, &impl->h_offsets_at_points_owned, &impl->h_offsets_at_points_borrowed, 568fe960054SJeremy L Thompson &impl->h_offsets_at_points)); 569fe960054SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_offsets_at_points_owned, at_points_size * sizeof(CeedInt))); 570fe960054SJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt **)impl->d_offsets_at_points_owned, impl->h_offsets_at_points, at_points_size * sizeof(CeedInt), 571fe960054SJeremy L Thompson hipMemcpyHostToDevice)); 572fe960054SJeremy L Thompson impl->d_offsets_at_points = (CeedInt *)impl->d_offsets_at_points_owned; 573ff1bc20eSJeremy L Thompson 574fe960054SJeremy L Thompson // -- Use padded offsets for the rest of the setup 575fe960054SJeremy L Thompson offsets = (const CeedInt *)offsets_padded; 576fe960054SJeremy L Thompson copy_mode = CEED_OWN_POINTER; 5772e88d319SJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetAtPointsEVectorSize(rstr, elem_size * num_elem * num_comp)); 5780b63de31SJeremy L Thompson 5790b63de31SJeremy L Thompson // -- Points per element 5800b63de31SJeremy L Thompson CeedCallBackend(CeedSetHostCeedIntArray(points_per_elem, CEED_OWN_POINTER, num_elem, &impl->h_points_per_elem_owned, 5810b63de31SJeremy L Thompson &impl->h_points_per_elem_borrowed, &impl->h_points_per_elem)); 5820b63de31SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_points_per_elem_owned, num_elem * sizeof(CeedInt))); 5830b63de31SJeremy L Thompson CeedCallHip(ceed, 5840b63de31SJeremy L Thompson hipMemcpy((CeedInt **)impl->d_points_per_elem_owned, impl->h_points_per_elem, num_elem * sizeof(CeedInt), hipMemcpyHostToDevice)); 5850b63de31SJeremy L Thompson impl->d_points_per_elem = (CeedInt *)impl->d_points_per_elem_owned; 586fe960054SJeremy L Thompson } 587fe960054SJeremy L Thompson 588dce49693SSebastian Grimberg // Set up device offset/orientation arrays 589dce49693SSebastian Grimberg if (rstr_type != CEED_RESTRICTION_STRIDED) { 590472941f0SJeremy L Thompson switch (mem_type) { 5916574a04fSJeremy L Thompson case CEED_MEM_HOST: { 592f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetHostCeedIntArray(offsets, copy_mode, size, &impl->h_offsets_owned, &impl->h_offsets_borrowed, &impl->h_offsets)); 593a267acd1SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_offsets_owned, size * sizeof(CeedInt))); 594f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt **)impl->d_offsets_owned, impl->h_offsets, size * sizeof(CeedInt), hipMemcpyHostToDevice)); 595f5d1e504SJeremy L Thompson impl->d_offsets = (CeedInt *)impl->d_offsets_owned; 596fe960054SJeremy L Thompson if (is_deterministic) CeedCallBackend(CeedElemRestrictionOffset_Hip(rstr, elem_size, offsets)); 597dce49693SSebastian Grimberg } break; 5986574a04fSJeremy L Thompson case CEED_MEM_DEVICE: { 599f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceCeedIntArray_Hip(ceed, offsets, copy_mode, size, &impl->d_offsets_owned, &impl->d_offsets_borrowed, 600f5d1e504SJeremy L Thompson (const CeedInt **)&impl->d_offsets)); 601a267acd1SJeremy L Thompson CeedCallBackend(CeedMalloc(size, &impl->h_offsets_owned)); 602f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt **)impl->h_offsets_owned, impl->d_offsets, size * sizeof(CeedInt), hipMemcpyDeviceToHost)); 603a267acd1SJeremy L Thompson impl->h_offsets = impl->h_offsets_owned; 604fe960054SJeremy L Thompson if (is_deterministic) CeedCallBackend(CeedElemRestrictionOffset_Hip(rstr, elem_size, offsets)); 605dce49693SSebastian Grimberg } break; 606dce49693SSebastian Grimberg } 607dce49693SSebastian Grimberg 608dce49693SSebastian Grimberg // Orientation data 609dce49693SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_ORIENTED) { 610dce49693SSebastian Grimberg switch (mem_type) { 611dce49693SSebastian Grimberg case CEED_MEM_HOST: { 612f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetHostBoolArray(orients, copy_mode, size, &impl->h_orients_owned, &impl->h_orients_borrowed, &impl->h_orients)); 613a267acd1SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_orients_owned, size * sizeof(bool))); 614f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipMemcpy((bool *)impl->d_orients_owned, impl->h_orients, size * sizeof(bool), hipMemcpyHostToDevice)); 615a267acd1SJeremy L Thompson impl->d_orients = impl->d_orients_owned; 616dce49693SSebastian Grimberg } break; 617dce49693SSebastian Grimberg case CEED_MEM_DEVICE: { 618f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceBoolArray_Hip(ceed, orients, copy_mode, size, &impl->d_orients_owned, &impl->d_orients_borrowed, 619f5d1e504SJeremy L Thompson (const bool **)&impl->d_orients)); 620a267acd1SJeremy L Thompson CeedCallBackend(CeedMalloc(size, &impl->h_orients_owned)); 621f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipMemcpy((bool *)impl->h_orients_owned, impl->d_orients, size * sizeof(bool), hipMemcpyDeviceToHost)); 622a267acd1SJeremy L Thompson impl->h_orients = impl->h_orients_owned; 623dce49693SSebastian Grimberg } break; 624dce49693SSebastian Grimberg } 625dce49693SSebastian Grimberg } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 626dce49693SSebastian Grimberg switch (mem_type) { 627dce49693SSebastian Grimberg case CEED_MEM_HOST: { 628f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetHostCeedInt8Array(curl_orients, copy_mode, 3 * size, &impl->h_curl_orients_owned, &impl->h_curl_orients_borrowed, 629f5d1e504SJeremy L Thompson &impl->h_curl_orients)); 630a267acd1SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_curl_orients_owned, 3 * size * sizeof(CeedInt8))); 631f5d1e504SJeremy L Thompson CeedCallHip(ceed, 632f5d1e504SJeremy L Thompson hipMemcpy((CeedInt8 *)impl->d_curl_orients_owned, impl->h_curl_orients, 3 * size * sizeof(CeedInt8), hipMemcpyHostToDevice)); 633a267acd1SJeremy L Thompson impl->d_curl_orients = impl->d_curl_orients_owned; 634dce49693SSebastian Grimberg } break; 635dce49693SSebastian Grimberg case CEED_MEM_DEVICE: { 636f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceCeedInt8Array_Hip(ceed, curl_orients, copy_mode, 3 * size, &impl->d_curl_orients_owned, 637f5d1e504SJeremy L Thompson &impl->d_curl_orients_borrowed, (const CeedInt8 **)&impl->d_curl_orients)); 638a267acd1SJeremy L Thompson CeedCallBackend(CeedMalloc(3 * size, &impl->h_curl_orients_owned)); 639f5d1e504SJeremy L Thompson CeedCallHip(ceed, 640f5d1e504SJeremy L Thompson hipMemcpy((CeedInt8 *)impl->h_curl_orients_owned, impl->d_curl_orients, 3 * size * sizeof(CeedInt8), hipMemcpyDeviceToHost)); 641a267acd1SJeremy L Thompson impl->h_curl_orients = impl->h_curl_orients_owned; 642dce49693SSebastian Grimberg } break; 643dce49693SSebastian Grimberg } 644dce49693SSebastian Grimberg } 6450d0321e0SJeremy L Thompson } 6460d0321e0SJeremy L Thompson 6470d0321e0SJeremy L Thompson // Register backend functions 648dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Hip)); 649dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Hip)); 650dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Hip)); 651dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Hip)); 652dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Hip)); 653dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Hip)); 654fe960054SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) { 655fe960054SJeremy L Thompson CeedCallBackend( 656fe960054SJeremy L Thompson CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetAtPointsElementOffset", CeedElemRestrictionGetAtPointsElementOffset_Hip)); 657fe960054SJeremy L Thompson } 658dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Hip)); 659*9bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 6600d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 6610d0321e0SJeremy L Thompson } 6620d0321e0SJeremy L Thompson 6630d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 664