1d275d636SJeremy L Thompson // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 30d0321e0SJeremy L Thompson // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 50d0321e0SJeremy L Thompson // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 70d0321e0SJeremy L Thompson 849aac155SJeremy L Thompson #include <ceed.h> 90d0321e0SJeremy L Thompson #include <ceed/backend.h> 10437930d1SJeremy L Thompson #include <ceed/jit-tools.h> 110d0321e0SJeremy L Thompson #include <stdbool.h> 120d0321e0SJeremy L Thompson #include <stddef.h> 1344d7a66cSJeremy L Thompson #include <string.h> 14c85e8640SSebastian Grimberg #include <hip/hip_runtime.h> 152b730f8bSJeremy L Thompson 1649aac155SJeremy L Thompson #include "../hip/ceed-hip-common.h" 170d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h" 182b730f8bSJeremy L Thompson #include "ceed-hip-ref.h" 190d0321e0SJeremy L Thompson 200d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 21cf8cbdd6SSebastian Grimberg // Compile restriction kernels 22cf8cbdd6SSebastian Grimberg //------------------------------------------------------------------------------ 23cf8cbdd6SSebastian Grimberg static inline int CeedElemRestrictionSetupCompile_Hip(CeedElemRestriction rstr) { 24cf8cbdd6SSebastian Grimberg Ceed ceed; 25cf8cbdd6SSebastian Grimberg bool is_deterministic; 26cf8cbdd6SSebastian Grimberg CeedInt num_elem, num_comp, elem_size, comp_stride; 27cf8cbdd6SSebastian Grimberg CeedRestrictionType rstr_type; 28cf8cbdd6SSebastian Grimberg CeedElemRestriction_Hip *impl; 29cf8cbdd6SSebastian Grimberg 30cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 31cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 32fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 33cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 34cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 35cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 36fe960054SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) { 37fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetMaxPointsInElement(rstr, &elem_size)); 38fe960054SJeremy L Thompson } else { 39fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 40fe960054SJeremy L Thompson } 41cf8cbdd6SSebastian Grimberg is_deterministic = impl->d_l_vec_indices != NULL; 42cf8cbdd6SSebastian Grimberg 43cf8cbdd6SSebastian Grimberg // Compile HIP kernels 44cf8cbdd6SSebastian Grimberg switch (rstr_type) { 45cf8cbdd6SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 469c25dd66SJeremy L Thompson const char restriction_kernel_source[] = "// Strided restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-strided.h>\n"; 47cf8cbdd6SSebastian Grimberg bool has_backend_strides; 48509d4af6SJeremy L Thompson CeedInt strides[3] = {1, num_elem * elem_size, elem_size}; 49cf8cbdd6SSebastian Grimberg 50cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 51cf8cbdd6SSebastian Grimberg if (!has_backend_strides) { 5256c48462SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); 53cf8cbdd6SSebastian Grimberg } 54cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 55cf8cbdd6SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_STRIDE_NODES", strides[0], "RSTR_STRIDE_COMP", strides[1], "RSTR_STRIDE_ELEM", 56cf8cbdd6SSebastian Grimberg strides[2])); 57cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "StridedNoTranspose", &impl->ApplyNoTranspose)); 58cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "StridedTranspose", &impl->ApplyTranspose)); 59cf8cbdd6SSebastian Grimberg } break; 60cf8cbdd6SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 619c25dd66SJeremy L Thompson const char restriction_kernel_source[] = "// Standard restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-offset.h>\n"; 629c25dd66SJeremy L Thompson 63cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 64cf8cbdd6SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, 65cf8cbdd6SSebastian Grimberg "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); 66cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyNoTranspose)); 67cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetTranspose", &impl->ApplyTranspose)); 68cf8cbdd6SSebastian Grimberg } break; 699c25dd66SJeremy L Thompson case CEED_RESTRICTION_POINTS: { 709c25dd66SJeremy L Thompson const char restriction_kernel_source[] = 719c25dd66SJeremy L Thompson "// AtPoints restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-at-points.h>\n\n" 729c25dd66SJeremy L Thompson "// Standard restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-offset.h>\n"; 73cf8cbdd6SSebastian Grimberg 74cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 75cf8cbdd6SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, 76cf8cbdd6SSebastian Grimberg "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); 779c25dd66SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyNoTranspose)); 789c25dd66SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "AtPointsTranspose", &impl->ApplyTranspose)); 799c25dd66SJeremy L Thompson } break; 809c25dd66SJeremy L Thompson case CEED_RESTRICTION_ORIENTED: { 819c25dd66SJeremy L Thompson const char restriction_kernel_source[] = 829c25dd66SJeremy L Thompson "// Oriented restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-oriented.h>\n\n" 839c25dd66SJeremy L Thompson "// Standard restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-offset.h>\n"; 849c25dd66SJeremy L Thompson 859c25dd66SJeremy L Thompson CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 869c25dd66SJeremy L Thompson "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, 879c25dd66SJeremy L Thompson "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); 88cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OrientedNoTranspose", &impl->ApplyNoTranspose)); 89cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyUnsignedNoTranspose)); 90cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OrientedTranspose", &impl->ApplyTranspose)); 91cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetTranspose", &impl->ApplyUnsignedTranspose)); 92cf8cbdd6SSebastian Grimberg } break; 93cf8cbdd6SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 949c25dd66SJeremy L Thompson const char restriction_kernel_source[] = 959c25dd66SJeremy L Thompson "// Curl oriented restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-curl-oriented.h>\n\n" 969c25dd66SJeremy L Thompson "// Standard restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-offset.h>\n"; 97cf8cbdd6SSebastian Grimberg 98cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 99cf8cbdd6SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, 100cf8cbdd6SSebastian Grimberg "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); 101cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedNoTranspose", &impl->ApplyNoTranspose)); 102cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedUnsignedNoTranspose", &impl->ApplyUnsignedNoTranspose)); 103cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyUnorientedNoTranspose)); 104cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedTranspose", &impl->ApplyTranspose)); 105cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedUnsignedTranspose", &impl->ApplyUnsignedTranspose)); 106cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetTranspose", &impl->ApplyUnorientedTranspose)); 1079c25dd66SJeremy L Thompson 108cf8cbdd6SSebastian Grimberg } break; 109cf8cbdd6SSebastian Grimberg } 1109bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 111cf8cbdd6SSebastian Grimberg return CEED_ERROR_SUCCESS; 112cf8cbdd6SSebastian Grimberg } 113cf8cbdd6SSebastian Grimberg 114cf8cbdd6SSebastian Grimberg //------------------------------------------------------------------------------ 115dce49693SSebastian Grimberg // Core apply restriction code 1160d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 117dce49693SSebastian Grimberg static inline int CeedElemRestrictionApply_Hip_Core(CeedElemRestriction rstr, CeedTransposeMode t_mode, bool use_signs, bool use_orients, 118dce49693SSebastian Grimberg CeedVector u, CeedVector v, CeedRequest *request) { 1190d0321e0SJeremy L Thompson Ceed ceed; 120dce49693SSebastian Grimberg CeedRestrictionType rstr_type; 1210d0321e0SJeremy L Thompson const CeedScalar *d_u; 1220d0321e0SJeremy L Thompson CeedScalar *d_v; 123b7453713SJeremy L Thompson CeedElemRestriction_Hip *impl; 124b7453713SJeremy L Thompson 125dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 126dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 127dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 128cf8cbdd6SSebastian Grimberg 129cf8cbdd6SSebastian Grimberg // Assemble kernel if needed 130cf8cbdd6SSebastian Grimberg if (!impl->module) { 131cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetupCompile_Hip(rstr)); 132cf8cbdd6SSebastian Grimberg } 133b7453713SJeremy L Thompson 134b7453713SJeremy L Thompson // Get vectors 1352b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 136437930d1SJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 1370d0321e0SJeremy L Thompson // Sum into for transpose mode, e-vec to l-vec 1382b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 1390d0321e0SJeremy L Thompson } else { 1400d0321e0SJeremy L Thompson // Overwrite for notranspose mode, l-vec to e-vec 1412b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 1420d0321e0SJeremy L Thompson } 1430d0321e0SJeremy L Thompson 1440d0321e0SJeremy L Thompson // Restrict 145437930d1SJeremy L Thompson if (t_mode == CEED_NOTRANSPOSE) { 1460d0321e0SJeremy L Thompson // L-vector -> E-vector 147cf8cbdd6SSebastian Grimberg CeedInt elem_size; 148cf8cbdd6SSebastian Grimberg 149cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 150dce49693SSebastian Grimberg const CeedInt block_size = elem_size < 256 ? (elem_size > 64 ? elem_size : 64) : 256; 151cf8cbdd6SSebastian Grimberg const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); 15258549094SSebastian Grimberg 153dce49693SSebastian Grimberg switch (rstr_type) { 154dce49693SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 155cf8cbdd6SSebastian Grimberg void *args[] = {&d_u, &d_v}; 15658549094SSebastian Grimberg 157cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); 158dce49693SSebastian Grimberg } break; 159fe960054SJeremy L Thompson case CEED_RESTRICTION_POINTS: 160dce49693SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 161a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 162dce49693SSebastian Grimberg 163cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); 164dce49693SSebastian Grimberg } break; 165dce49693SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 166dce49693SSebastian Grimberg if (use_signs) { 167a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_orients, &d_u, &d_v}; 168dce49693SSebastian Grimberg 169cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); 170dce49693SSebastian Grimberg } else { 171a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 172dce49693SSebastian Grimberg 173cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); 174dce49693SSebastian Grimberg } 175dce49693SSebastian Grimberg } break; 176dce49693SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 177dce49693SSebastian Grimberg if (use_signs && use_orients) { 178a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; 179dce49693SSebastian Grimberg 180cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); 181dce49693SSebastian Grimberg } else if (use_orients) { 182a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; 183dce49693SSebastian Grimberg 184cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); 185dce49693SSebastian Grimberg } else { 186a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 187dce49693SSebastian Grimberg 188cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnorientedNoTranspose, grid, block_size, args)); 189dce49693SSebastian Grimberg } 190dce49693SSebastian Grimberg } break; 1910d0321e0SJeremy L Thompson } 1920d0321e0SJeremy L Thompson } else { 1930d0321e0SJeremy L Thompson // E-vector -> L-vector 194cf8cbdd6SSebastian Grimberg const bool is_deterministic = impl->d_l_vec_indices != NULL; 195dce49693SSebastian Grimberg const CeedInt block_size = 64; 196cf8cbdd6SSebastian Grimberg const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); 197b7453713SJeremy L Thompson 198dce49693SSebastian Grimberg switch (rstr_type) { 199dce49693SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 200cf8cbdd6SSebastian Grimberg void *args[] = {&d_u, &d_v}; 201dce49693SSebastian Grimberg 202cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 203dce49693SSebastian Grimberg } break; 2040b63de31SJeremy L Thompson case CEED_RESTRICTION_POINTS: { 2050b63de31SJeremy L Thompson if (!is_deterministic) { 2060b63de31SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_points_per_elem, &d_u, &d_v}; 2070b63de31SJeremy L Thompson 2080b63de31SJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2090b63de31SJeremy L Thompson } else { 2100b63de31SJeremy L Thompson void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_points_per_elem, &impl->d_t_offsets, &d_u, &d_v}; 2110b63de31SJeremy L Thompson 2120b63de31SJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2130b63de31SJeremy L Thompson } 2140b63de31SJeremy L Thompson } break; 215dce49693SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 216cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 217a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 21858549094SSebastian Grimberg 219cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2200d0321e0SJeremy L Thompson } else { 22158549094SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 22258549094SSebastian Grimberg 223cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 22458549094SSebastian Grimberg } 225dce49693SSebastian Grimberg } break; 226dce49693SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 227dce49693SSebastian Grimberg if (use_signs) { 228cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 229a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_orients, &d_u, &d_v}; 23058549094SSebastian Grimberg 231cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 232dce49693SSebastian Grimberg } else { 2337aa91133SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &impl->d_orients, &d_u, &d_v}; 2347aa91133SSebastian Grimberg 235cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2367aa91133SSebastian Grimberg } 2377aa91133SSebastian Grimberg } else { 238cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 239a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 240dce49693SSebastian Grimberg 241cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedTranspose, grid, block_size, args)); 242dce49693SSebastian Grimberg } else { 243dce49693SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 244dce49693SSebastian Grimberg 245cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedTranspose, grid, block_size, args)); 246dce49693SSebastian Grimberg } 247dce49693SSebastian Grimberg } 248dce49693SSebastian Grimberg } break; 249dce49693SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 250dce49693SSebastian Grimberg if (use_signs && use_orients) { 251cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 252a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; 253dce49693SSebastian Grimberg 254cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2557aa91133SSebastian Grimberg } else { 2567aa91133SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &impl->d_curl_orients, &d_u, &d_v}; 2577aa91133SSebastian Grimberg 258cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2597aa91133SSebastian Grimberg } 260dce49693SSebastian Grimberg } else if (use_orients) { 261cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 262a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; 263dce49693SSebastian Grimberg 264cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedTranspose, grid, block_size, args)); 265dce49693SSebastian Grimberg } else { 2667aa91133SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &impl->d_curl_orients, &d_u, &d_v}; 2677aa91133SSebastian Grimberg 268cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedTranspose, grid, block_size, args)); 2697aa91133SSebastian Grimberg } 2707aa91133SSebastian Grimberg } else { 271cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 272a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 273dce49693SSebastian Grimberg 274cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnorientedTranspose, grid, block_size, args)); 275dce49693SSebastian Grimberg } else { 276dce49693SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 277dce49693SSebastian Grimberg 278cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnorientedTranspose, grid, block_size, args)); 279dce49693SSebastian Grimberg } 280dce49693SSebastian Grimberg } 281dce49693SSebastian Grimberg } break; 2820d0321e0SJeremy L Thompson } 2830d0321e0SJeremy L Thompson } 2840d0321e0SJeremy L Thompson 2852b730f8bSJeremy L Thompson if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 2860d0321e0SJeremy L Thompson 2870d0321e0SJeremy L Thompson // Restore arrays 2882b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 2892b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 2909bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 2910d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2920d0321e0SJeremy L Thompson } 2930d0321e0SJeremy L Thompson 2940d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 295dce49693SSebastian Grimberg // Apply restriction 296dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 297dce49693SSebastian Grimberg static int CeedElemRestrictionApply_Hip(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 298dce49693SSebastian Grimberg return CeedElemRestrictionApply_Hip_Core(rstr, t_mode, true, true, u, v, request); 299dce49693SSebastian Grimberg } 300dce49693SSebastian Grimberg 301dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 302dce49693SSebastian Grimberg // Apply unsigned restriction 303dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 304dce49693SSebastian Grimberg static int CeedElemRestrictionApplyUnsigned_Hip(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 305dce49693SSebastian Grimberg CeedRequest *request) { 306dce49693SSebastian Grimberg return CeedElemRestrictionApply_Hip_Core(rstr, t_mode, false, true, u, v, request); 307dce49693SSebastian Grimberg } 308dce49693SSebastian Grimberg 309dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 310dce49693SSebastian Grimberg // Apply unoriented restriction 311dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 312dce49693SSebastian Grimberg static int CeedElemRestrictionApplyUnoriented_Hip(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 313dce49693SSebastian Grimberg CeedRequest *request) { 314dce49693SSebastian Grimberg return CeedElemRestrictionApply_Hip_Core(rstr, t_mode, false, false, u, v, request); 315dce49693SSebastian Grimberg } 316dce49693SSebastian Grimberg 317dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 3180d0321e0SJeremy L Thompson // Get offsets 3190d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 320472941f0SJeremy L Thompson static int CeedElemRestrictionGetOffsets_Hip(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 3210d0321e0SJeremy L Thompson CeedElemRestriction_Hip *impl; 322fe960054SJeremy L Thompson CeedRestrictionType rstr_type; 3230d0321e0SJeremy L Thompson 324b7453713SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 325fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 326472941f0SJeremy L Thompson switch (mem_type) { 3270d0321e0SJeremy L Thompson case CEED_MEM_HOST: 328fe960054SJeremy L Thompson *offsets = rstr_type == CEED_RESTRICTION_POINTS ? impl->h_offsets_at_points : impl->h_offsets; 3290d0321e0SJeremy L Thompson break; 3300d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 331fe960054SJeremy L Thompson *offsets = rstr_type == CEED_RESTRICTION_POINTS ? impl->d_offsets_at_points : impl->d_offsets; 3320d0321e0SJeremy L Thompson break; 3330d0321e0SJeremy L Thompson } 3340d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3350d0321e0SJeremy L Thompson } 3360d0321e0SJeremy L Thompson 3370d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 338dce49693SSebastian Grimberg // Get orientations 339dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 340dce49693SSebastian Grimberg static int CeedElemRestrictionGetOrientations_Hip(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 341dce49693SSebastian Grimberg CeedElemRestriction_Hip *impl; 342dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 343dce49693SSebastian Grimberg 344dce49693SSebastian Grimberg switch (mem_type) { 345dce49693SSebastian Grimberg case CEED_MEM_HOST: 346dce49693SSebastian Grimberg *orients = impl->h_orients; 347dce49693SSebastian Grimberg break; 348dce49693SSebastian Grimberg case CEED_MEM_DEVICE: 349dce49693SSebastian Grimberg *orients = impl->d_orients; 350dce49693SSebastian Grimberg break; 351dce49693SSebastian Grimberg } 352dce49693SSebastian Grimberg return CEED_ERROR_SUCCESS; 353dce49693SSebastian Grimberg } 354dce49693SSebastian Grimberg 355dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 356dce49693SSebastian Grimberg // Get curl-conforming orientations 357dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 358dce49693SSebastian Grimberg static int CeedElemRestrictionGetCurlOrientations_Hip(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 359dce49693SSebastian Grimberg CeedElemRestriction_Hip *impl; 360dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 361dce49693SSebastian Grimberg 362dce49693SSebastian Grimberg switch (mem_type) { 363dce49693SSebastian Grimberg case CEED_MEM_HOST: 364dce49693SSebastian Grimberg *curl_orients = impl->h_curl_orients; 365dce49693SSebastian Grimberg break; 366dce49693SSebastian Grimberg case CEED_MEM_DEVICE: 367dce49693SSebastian Grimberg *curl_orients = impl->d_curl_orients; 368dce49693SSebastian Grimberg break; 369dce49693SSebastian Grimberg } 370dce49693SSebastian Grimberg return CEED_ERROR_SUCCESS; 371dce49693SSebastian Grimberg } 372dce49693SSebastian Grimberg 373dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 374fe960054SJeremy L Thompson // Get offset for padded AtPoints E-layout 375fe960054SJeremy L Thompson //------------------------------------------------------------------------------ 376fe960054SJeremy L Thompson static int CeedElemRestrictionGetAtPointsElementOffset_Hip(CeedElemRestriction rstr, CeedInt elem, CeedSize *elem_offset) { 377fe960054SJeremy L Thompson CeedInt layout[3]; 378fe960054SJeremy L Thompson 379fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetELayout(rstr, layout)); 380fe960054SJeremy L Thompson *elem_offset = 0 * layout[0] + 0 * layout[1] + elem * layout[2]; 381fe960054SJeremy L Thompson return CEED_ERROR_SUCCESS; 382fe960054SJeremy L Thompson } 383fe960054SJeremy L Thompson 384fe960054SJeremy L Thompson //------------------------------------------------------------------------------ 3850d0321e0SJeremy L Thompson // Destroy restriction 3860d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 387dce49693SSebastian Grimberg static int CeedElemRestrictionDestroy_Hip(CeedElemRestriction rstr) { 3880d0321e0SJeremy L Thompson Ceed ceed; 389b7453713SJeremy L Thompson CeedElemRestriction_Hip *impl; 390b7453713SJeremy L Thompson 391dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 392dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 393cf8cbdd6SSebastian Grimberg if (impl->module) { 3942b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleUnload(impl->module)); 395cf8cbdd6SSebastian Grimberg } 396a267acd1SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_offsets_owned)); 397f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt *)impl->d_offsets_owned)); 398081aa29dSJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt *)impl->d_t_offsets)); 399081aa29dSJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt *)impl->d_t_indices)); 400081aa29dSJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt *)impl->d_l_vec_indices)); 401a267acd1SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_orients_owned)); 402f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipFree((bool *)impl->d_orients_owned)); 403a267acd1SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_curl_orients_owned)); 404f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt8 *)impl->d_curl_orients_owned)); 405fe960054SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_offsets_at_points_owned)); 406fe960054SJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt8 *)impl->d_offsets_at_points_owned)); 4070b63de31SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_points_per_elem_owned)); 4080b63de31SJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt *)impl->d_points_per_elem_owned)); 4092b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 4109bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 4110d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4120d0321e0SJeremy L Thompson } 4130d0321e0SJeremy L Thompson 4140d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 4150d0321e0SJeremy L Thompson // Create transpose offsets and indices 4160d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 417fe960054SJeremy L Thompson static int CeedElemRestrictionOffset_Hip(const CeedElemRestriction rstr, const CeedInt elem_size, const CeedInt *indices) { 4180d0321e0SJeremy L Thompson Ceed ceed; 419b7453713SJeremy L Thompson bool *is_node; 420e79b91d9SJeremy L Thompson CeedSize l_size; 421fe960054SJeremy L Thompson CeedInt num_elem, num_comp, num_nodes = 0; 422dce49693SSebastian Grimberg CeedInt *ind_to_offset, *l_vec_indices, *t_offsets, *t_indices; 423fe960054SJeremy L Thompson CeedRestrictionType rstr_type; 424b7453713SJeremy L Thompson CeedElemRestriction_Hip *impl; 425b7453713SJeremy L Thompson 426dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 427dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 428dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 429fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 430dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 431dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 432b7453713SJeremy L Thompson const CeedInt size_indices = num_elem * elem_size; 4330d0321e0SJeremy L Thompson 434437930d1SJeremy L Thompson // Count num_nodes 4352b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(l_size, &is_node)); 436dce49693SSebastian Grimberg 4372b730f8bSJeremy L Thompson for (CeedInt i = 0; i < size_indices; i++) is_node[indices[i]] = 1; 4382b730f8bSJeremy L Thompson for (CeedInt i = 0; i < l_size; i++) num_nodes += is_node[i]; 439437930d1SJeremy L Thompson impl->num_nodes = num_nodes; 4400d0321e0SJeremy L Thompson 4410d0321e0SJeremy L Thompson // L-vector offsets array 4422b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(l_size, &ind_to_offset)); 4432b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(num_nodes, &l_vec_indices)); 444b7453713SJeremy L Thompson for (CeedInt i = 0, j = 0; i < l_size; i++) { 445437930d1SJeremy L Thompson if (is_node[i]) { 446437930d1SJeremy L Thompson l_vec_indices[j] = i; 4470d0321e0SJeremy L Thompson ind_to_offset[i] = j++; 4480d0321e0SJeremy L Thompson } 4492b730f8bSJeremy L Thompson } 4502b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&is_node)); 4510d0321e0SJeremy L Thompson 4520d0321e0SJeremy L Thompson // Compute transpose offsets and indices 453437930d1SJeremy L Thompson const CeedInt size_offsets = num_nodes + 1; 454b7453713SJeremy L Thompson 4552b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(size_offsets, &t_offsets)); 4562b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(size_indices, &t_indices)); 4570d0321e0SJeremy L Thompson // Count node multiplicity 4582b730f8bSJeremy L Thompson for (CeedInt e = 0; e < num_elem; ++e) { 4592b730f8bSJeremy L Thompson for (CeedInt i = 0; i < elem_size; ++i) ++t_offsets[ind_to_offset[indices[elem_size * e + i]] + 1]; 4602b730f8bSJeremy L Thompson } 4610d0321e0SJeremy L Thompson // Convert to running sum 4622b730f8bSJeremy L Thompson for (CeedInt i = 1; i < size_offsets; ++i) t_offsets[i] += t_offsets[i - 1]; 4630d0321e0SJeremy L Thompson // List all E-vec indices associated with L-vec node 464437930d1SJeremy L Thompson for (CeedInt e = 0; e < num_elem; ++e) { 465437930d1SJeremy L Thompson for (CeedInt i = 0; i < elem_size; ++i) { 466437930d1SJeremy L Thompson const CeedInt lid = elem_size * e + i; 4670d0321e0SJeremy L Thompson const CeedInt gid = indices[lid]; 468b7453713SJeremy L Thompson 469437930d1SJeremy L Thompson t_indices[t_offsets[ind_to_offset[gid]]++] = lid; 4700d0321e0SJeremy L Thompson } 4710d0321e0SJeremy L Thompson } 4720d0321e0SJeremy L Thompson // Reset running sum 4732b730f8bSJeremy L Thompson for (int i = size_offsets - 1; i > 0; --i) t_offsets[i] = t_offsets[i - 1]; 474437930d1SJeremy L Thompson t_offsets[0] = 0; 4750d0321e0SJeremy L Thompson 4760d0321e0SJeremy L Thompson // Copy data to device 4770d0321e0SJeremy L Thompson // -- L-vector indices 4782b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_l_vec_indices, num_nodes * sizeof(CeedInt))); 479081aa29dSJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt *)impl->d_l_vec_indices, l_vec_indices, num_nodes * sizeof(CeedInt), hipMemcpyHostToDevice)); 4800d0321e0SJeremy L Thompson // -- Transpose offsets 4812b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_t_offsets, size_offsets * sizeof(CeedInt))); 482081aa29dSJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt *)impl->d_t_offsets, t_offsets, size_offsets * sizeof(CeedInt), hipMemcpyHostToDevice)); 4830d0321e0SJeremy L Thompson // -- Transpose indices 4842b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_t_indices, size_indices * sizeof(CeedInt))); 485081aa29dSJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt *)impl->d_t_indices, t_indices, size_indices * sizeof(CeedInt), hipMemcpyHostToDevice)); 4860d0321e0SJeremy L Thompson 4870d0321e0SJeremy L Thompson // Cleanup 4882b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&ind_to_offset)); 4892b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&l_vec_indices)); 4902b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&t_offsets)); 4912b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&t_indices)); 4929bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 4930d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4940d0321e0SJeremy L Thompson } 4950d0321e0SJeremy L Thompson 4960d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 4970d0321e0SJeremy L Thompson // Create restriction 4980d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 499a267acd1SJeremy L Thompson int CeedElemRestrictionCreate_Hip(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 500dce49693SSebastian Grimberg const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 501b7453713SJeremy L Thompson Ceed ceed, ceed_parent; 502dce49693SSebastian Grimberg bool is_deterministic; 503ff1bc20eSJeremy L Thompson CeedInt num_elem, num_comp, elem_size; 504b7453713SJeremy L Thompson CeedRestrictionType rstr_type; 5050d0321e0SJeremy L Thompson CeedElemRestriction_Hip *impl; 506b7453713SJeremy L Thompson 507dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 508ca735530SJeremy L Thompson CeedCallBackend(CeedGetParent(ceed, &ceed_parent)); 509ca735530SJeremy L Thompson CeedCallBackend(CeedIsDeterministic(ceed_parent, &is_deterministic)); 5109bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed_parent)); 511dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 512ff1bc20eSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 513dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 51422eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 515fe960054SJeremy L Thompson // Use max number of points as elem size for AtPoints restrictions 516fe960054SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) { 517fe960054SJeremy L Thompson CeedInt max_points = 0; 518fe960054SJeremy L Thompson 519fe960054SJeremy L Thompson for (CeedInt i = 0; i < num_elem; i++) { 520fe960054SJeremy L Thompson max_points = CeedIntMax(max_points, offsets[i + 1] - offsets[i]); 521fe960054SJeremy L Thompson } 522fe960054SJeremy L Thompson elem_size = max_points; 523fe960054SJeremy L Thompson } 524dce49693SSebastian Grimberg const CeedInt size = num_elem * elem_size; 5250d0321e0SJeremy L Thompson 526dce49693SSebastian Grimberg CeedCallBackend(CeedCalloc(1, &impl)); 527dce49693SSebastian Grimberg impl->num_nodes = size; 528dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 52922eb1385SJeremy L Thompson 53022eb1385SJeremy L Thompson // Set layouts 53122eb1385SJeremy L Thompson { 53222eb1385SJeremy L Thompson bool has_backend_strides; 53322eb1385SJeremy L Thompson CeedInt layout[3] = {1, size, elem_size}; 53422eb1385SJeremy L Thompson 535dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); 53622eb1385SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_STRIDED) { 53722eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 53822eb1385SJeremy L Thompson if (has_backend_strides) { 53922eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout)); 54022eb1385SJeremy L Thompson } 54122eb1385SJeremy L Thompson } 54222eb1385SJeremy L Thompson } 5430d0321e0SJeremy L Thompson 544fe960054SJeremy L Thompson // Pad AtPoints indices 545fe960054SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) { 546fe960054SJeremy L Thompson CeedSize offsets_len = elem_size * num_elem, at_points_size = num_elem + 1; 5470b63de31SJeremy L Thompson CeedInt max_points = elem_size, *offsets_padded, *points_per_elem; 548fe960054SJeremy L Thompson 549fe960054SJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "only MemType Host supported when creating AtPoints restriction"); 550fe960054SJeremy L Thompson CeedCallBackend(CeedMalloc(offsets_len, &offsets_padded)); 5510b63de31SJeremy L Thompson CeedCallBackend(CeedMalloc(num_elem, &points_per_elem)); 552fe960054SJeremy L Thompson for (CeedInt i = 0; i < num_elem; i++) { 553fe960054SJeremy L Thompson CeedInt num_points = offsets[i + 1] - offsets[i]; 5548c76f877SZach Atkins CeedInt last_point = offsets[offsets[i]] * num_comp; 555fe960054SJeremy L Thompson 5560b63de31SJeremy L Thompson points_per_elem[i] = num_points; 557fe960054SJeremy L Thompson at_points_size += num_points; 558fe960054SJeremy L Thompson // -- Copy all points in element 559fe960054SJeremy L Thompson for (CeedInt j = 0; j < num_points; j++) { 5608be297eeSJeremy L Thompson offsets_padded[i * max_points + j] = offsets[offsets[i] + j] * num_comp; 5618c76f877SZach Atkins last_point = offsets_padded[i * max_points + j]; 562fe960054SJeremy L Thompson } 563fe960054SJeremy L Thompson // -- Replicate out last point in element 564fe960054SJeremy L Thompson for (CeedInt j = num_points; j < max_points; j++) { 5658c76f877SZach Atkins offsets_padded[i * max_points + j] = last_point; 566fe960054SJeremy L Thompson } 567fe960054SJeremy L Thompson } 568fe960054SJeremy L Thompson CeedCallBackend(CeedSetHostCeedIntArray(offsets, copy_mode, at_points_size, &impl->h_offsets_at_points_owned, &impl->h_offsets_at_points_borrowed, 569fe960054SJeremy L Thompson &impl->h_offsets_at_points)); 570fe960054SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_offsets_at_points_owned, at_points_size * sizeof(CeedInt))); 571fe960054SJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt **)impl->d_offsets_at_points_owned, impl->h_offsets_at_points, at_points_size * sizeof(CeedInt), 572fe960054SJeremy L Thompson hipMemcpyHostToDevice)); 573fe960054SJeremy L Thompson impl->d_offsets_at_points = (CeedInt *)impl->d_offsets_at_points_owned; 574ff1bc20eSJeremy L Thompson 575fe960054SJeremy L Thompson // -- Use padded offsets for the rest of the setup 576fe960054SJeremy L Thompson offsets = (const CeedInt *)offsets_padded; 577fe960054SJeremy L Thompson copy_mode = CEED_OWN_POINTER; 5782e88d319SJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetAtPointsEVectorSize(rstr, elem_size * num_elem * num_comp)); 5790b63de31SJeremy L Thompson 5800b63de31SJeremy L Thompson // -- Points per element 5810b63de31SJeremy L Thompson CeedCallBackend(CeedSetHostCeedIntArray(points_per_elem, CEED_OWN_POINTER, num_elem, &impl->h_points_per_elem_owned, 5820b63de31SJeremy L Thompson &impl->h_points_per_elem_borrowed, &impl->h_points_per_elem)); 5830b63de31SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_points_per_elem_owned, num_elem * sizeof(CeedInt))); 5840b63de31SJeremy L Thompson CeedCallHip(ceed, 5850b63de31SJeremy L Thompson hipMemcpy((CeedInt **)impl->d_points_per_elem_owned, impl->h_points_per_elem, num_elem * sizeof(CeedInt), hipMemcpyHostToDevice)); 5860b63de31SJeremy L Thompson impl->d_points_per_elem = (CeedInt *)impl->d_points_per_elem_owned; 587fe960054SJeremy L Thompson } 588fe960054SJeremy L Thompson 589dce49693SSebastian Grimberg // Set up device offset/orientation arrays 590dce49693SSebastian Grimberg if (rstr_type != CEED_RESTRICTION_STRIDED) { 591472941f0SJeremy L Thompson switch (mem_type) { 5926574a04fSJeremy L Thompson case CEED_MEM_HOST: { 593f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetHostCeedIntArray(offsets, copy_mode, size, &impl->h_offsets_owned, &impl->h_offsets_borrowed, &impl->h_offsets)); 594a267acd1SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_offsets_owned, size * sizeof(CeedInt))); 595f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt **)impl->d_offsets_owned, impl->h_offsets, size * sizeof(CeedInt), hipMemcpyHostToDevice)); 596f5d1e504SJeremy L Thompson impl->d_offsets = (CeedInt *)impl->d_offsets_owned; 597fe960054SJeremy L Thompson if (is_deterministic) CeedCallBackend(CeedElemRestrictionOffset_Hip(rstr, elem_size, offsets)); 598dce49693SSebastian Grimberg } break; 5996574a04fSJeremy L Thompson case CEED_MEM_DEVICE: { 600f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceCeedIntArray_Hip(ceed, offsets, copy_mode, size, &impl->d_offsets_owned, &impl->d_offsets_borrowed, 601f5d1e504SJeremy L Thompson (const CeedInt **)&impl->d_offsets)); 602a267acd1SJeremy L Thompson CeedCallBackend(CeedMalloc(size, &impl->h_offsets_owned)); 603f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt **)impl->h_offsets_owned, impl->d_offsets, size * sizeof(CeedInt), hipMemcpyDeviceToHost)); 604a267acd1SJeremy L Thompson impl->h_offsets = impl->h_offsets_owned; 605fe960054SJeremy L Thompson if (is_deterministic) CeedCallBackend(CeedElemRestrictionOffset_Hip(rstr, elem_size, offsets)); 606dce49693SSebastian Grimberg } break; 607dce49693SSebastian Grimberg } 608dce49693SSebastian Grimberg 609dce49693SSebastian Grimberg // Orientation data 610dce49693SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_ORIENTED) { 611dce49693SSebastian Grimberg switch (mem_type) { 612dce49693SSebastian Grimberg case CEED_MEM_HOST: { 613f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetHostBoolArray(orients, copy_mode, size, &impl->h_orients_owned, &impl->h_orients_borrowed, &impl->h_orients)); 614a267acd1SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_orients_owned, size * sizeof(bool))); 615f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipMemcpy((bool *)impl->d_orients_owned, impl->h_orients, size * sizeof(bool), hipMemcpyHostToDevice)); 616a267acd1SJeremy L Thompson impl->d_orients = impl->d_orients_owned; 617dce49693SSebastian Grimberg } break; 618dce49693SSebastian Grimberg case CEED_MEM_DEVICE: { 619f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceBoolArray_Hip(ceed, orients, copy_mode, size, &impl->d_orients_owned, &impl->d_orients_borrowed, 620f5d1e504SJeremy L Thompson (const bool **)&impl->d_orients)); 621a267acd1SJeremy L Thompson CeedCallBackend(CeedMalloc(size, &impl->h_orients_owned)); 622f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipMemcpy((bool *)impl->h_orients_owned, impl->d_orients, size * sizeof(bool), hipMemcpyDeviceToHost)); 623a267acd1SJeremy L Thompson impl->h_orients = impl->h_orients_owned; 624dce49693SSebastian Grimberg } break; 625dce49693SSebastian Grimberg } 626dce49693SSebastian Grimberg } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 627dce49693SSebastian Grimberg switch (mem_type) { 628dce49693SSebastian Grimberg case CEED_MEM_HOST: { 629f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetHostCeedInt8Array(curl_orients, copy_mode, 3 * size, &impl->h_curl_orients_owned, &impl->h_curl_orients_borrowed, 630f5d1e504SJeremy L Thompson &impl->h_curl_orients)); 631a267acd1SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_curl_orients_owned, 3 * size * sizeof(CeedInt8))); 632f5d1e504SJeremy L Thompson CeedCallHip(ceed, 633f5d1e504SJeremy L Thompson hipMemcpy((CeedInt8 *)impl->d_curl_orients_owned, impl->h_curl_orients, 3 * size * sizeof(CeedInt8), hipMemcpyHostToDevice)); 634a267acd1SJeremy L Thompson impl->d_curl_orients = impl->d_curl_orients_owned; 635dce49693SSebastian Grimberg } break; 636dce49693SSebastian Grimberg case CEED_MEM_DEVICE: { 637f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceCeedInt8Array_Hip(ceed, curl_orients, copy_mode, 3 * size, &impl->d_curl_orients_owned, 638f5d1e504SJeremy L Thompson &impl->d_curl_orients_borrowed, (const CeedInt8 **)&impl->d_curl_orients)); 639a267acd1SJeremy L Thompson CeedCallBackend(CeedMalloc(3 * size, &impl->h_curl_orients_owned)); 640f5d1e504SJeremy L Thompson CeedCallHip(ceed, 641f5d1e504SJeremy L Thompson hipMemcpy((CeedInt8 *)impl->h_curl_orients_owned, impl->d_curl_orients, 3 * size * sizeof(CeedInt8), hipMemcpyDeviceToHost)); 642a267acd1SJeremy L Thompson impl->h_curl_orients = impl->h_curl_orients_owned; 643dce49693SSebastian Grimberg } break; 644dce49693SSebastian Grimberg } 645dce49693SSebastian Grimberg } 6460d0321e0SJeremy L Thompson } 6470d0321e0SJeremy L Thompson 6480d0321e0SJeremy L Thompson // Register backend functions 649dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Hip)); 650dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Hip)); 651dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Hip)); 652dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Hip)); 653dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Hip)); 654dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Hip)); 655fe960054SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) { 656*1a8516d0SJames Wright CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetAtPointsElementOffset", 657*1a8516d0SJames Wright CeedElemRestrictionGetAtPointsElementOffset_Hip)); 658fe960054SJeremy L Thompson } 659dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Hip)); 6609bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 6610d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 6620d0321e0SJeremy L Thompson } 6630d0321e0SJeremy L Thompson 6640d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 665