15aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 30d0321e0SJeremy L Thompson // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 50d0321e0SJeremy L Thompson // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 70d0321e0SJeremy L Thompson 849aac155SJeremy L Thompson #include <ceed.h> 90d0321e0SJeremy L Thompson #include <ceed/backend.h> 10437930d1SJeremy L Thompson #include <ceed/jit-tools.h> 110d0321e0SJeremy L Thompson #include <stdbool.h> 120d0321e0SJeremy L Thompson #include <stddef.h> 1344d7a66cSJeremy L Thompson #include <string.h> 14c85e8640SSebastian Grimberg #include <hip/hip_runtime.h> 152b730f8bSJeremy L Thompson 1649aac155SJeremy L Thompson #include "../hip/ceed-hip-common.h" 170d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h" 182b730f8bSJeremy L Thompson #include "ceed-hip-ref.h" 190d0321e0SJeremy L Thompson 200d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 21cf8cbdd6SSebastian Grimberg // Compile restriction kernels 22cf8cbdd6SSebastian Grimberg //------------------------------------------------------------------------------ 23cf8cbdd6SSebastian Grimberg static inline int CeedElemRestrictionSetupCompile_Hip(CeedElemRestriction rstr) { 24cf8cbdd6SSebastian Grimberg Ceed ceed; 25cf8cbdd6SSebastian Grimberg bool is_deterministic; 26cf8cbdd6SSebastian Grimberg CeedInt num_elem, num_comp, elem_size, comp_stride; 27cf8cbdd6SSebastian Grimberg CeedRestrictionType rstr_type; 28cf8cbdd6SSebastian Grimberg CeedElemRestriction_Hip *impl; 29cf8cbdd6SSebastian Grimberg 30cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 31cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 32fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 33cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 34cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 35cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 36fe960054SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) { 37fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetMaxPointsInElement(rstr, &elem_size)); 38fe960054SJeremy L Thompson } else { 39fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 40fe960054SJeremy L Thompson } 41cf8cbdd6SSebastian Grimberg is_deterministic = impl->d_l_vec_indices != NULL; 42cf8cbdd6SSebastian Grimberg 43cf8cbdd6SSebastian Grimberg // Compile HIP kernels 44cf8cbdd6SSebastian Grimberg switch (rstr_type) { 45cf8cbdd6SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 46*9c25dd66SJeremy L Thompson const char restriction_kernel_source[] = "// Strided restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-strided.h>\n"; 47cf8cbdd6SSebastian Grimberg bool has_backend_strides; 48509d4af6SJeremy L Thompson CeedInt strides[3] = {1, num_elem * elem_size, elem_size}; 49cf8cbdd6SSebastian Grimberg 50cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 51cf8cbdd6SSebastian Grimberg if (!has_backend_strides) { 5256c48462SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); 53cf8cbdd6SSebastian Grimberg } 54cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 55cf8cbdd6SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_STRIDE_NODES", strides[0], "RSTR_STRIDE_COMP", strides[1], "RSTR_STRIDE_ELEM", 56cf8cbdd6SSebastian Grimberg strides[2])); 57cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "StridedNoTranspose", &impl->ApplyNoTranspose)); 58cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "StridedTranspose", &impl->ApplyTranspose)); 59cf8cbdd6SSebastian Grimberg } break; 60cf8cbdd6SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 61*9c25dd66SJeremy L Thompson const char restriction_kernel_source[] = "// Standard restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-offset.h>\n"; 62*9c25dd66SJeremy L Thompson 63cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 64cf8cbdd6SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, 65cf8cbdd6SSebastian Grimberg "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); 66cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyNoTranspose)); 67cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetTranspose", &impl->ApplyTranspose)); 68cf8cbdd6SSebastian Grimberg } break; 69*9c25dd66SJeremy L Thompson case CEED_RESTRICTION_POINTS: { 70*9c25dd66SJeremy L Thompson const char restriction_kernel_source[] = 71*9c25dd66SJeremy L Thompson "// AtPoints restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-at-points.h>\n\n" 72*9c25dd66SJeremy L Thompson "// Standard restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-offset.h>\n"; 73cf8cbdd6SSebastian Grimberg 74cf8cbdd6SSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Restriction Kernel Source Complete! -----\n"); 75cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 76cf8cbdd6SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, 77cf8cbdd6SSebastian Grimberg "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); 78*9c25dd66SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyNoTranspose)); 79*9c25dd66SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "AtPointsTranspose", &impl->ApplyTranspose)); 80*9c25dd66SJeremy L Thompson } break; 81*9c25dd66SJeremy L Thompson case CEED_RESTRICTION_ORIENTED: { 82*9c25dd66SJeremy L Thompson const char restriction_kernel_source[] = 83*9c25dd66SJeremy L Thompson "// Oriented restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-oriented.h>\n\n" 84*9c25dd66SJeremy L Thompson "// Standard restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-offset.h>\n"; 85*9c25dd66SJeremy L Thompson 86*9c25dd66SJeremy L Thompson CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 87*9c25dd66SJeremy L Thompson "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, 88*9c25dd66SJeremy L Thompson "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); 89cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OrientedNoTranspose", &impl->ApplyNoTranspose)); 90cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyUnsignedNoTranspose)); 91cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OrientedTranspose", &impl->ApplyTranspose)); 92cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetTranspose", &impl->ApplyUnsignedTranspose)); 93cf8cbdd6SSebastian Grimberg } break; 94cf8cbdd6SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 95*9c25dd66SJeremy L Thompson const char restriction_kernel_source[] = 96*9c25dd66SJeremy L Thompson "// Curl oriented restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-curl-oriented.h>\n\n" 97*9c25dd66SJeremy L Thompson "// Standard restriction source\n#include <ceed/jit-source/hip/hip-ref-restriction-offset.h>\n"; 98cf8cbdd6SSebastian Grimberg 99cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, restriction_kernel_source, &impl->module, 6, "RSTR_ELEM_SIZE", elem_size, "RSTR_NUM_ELEM", num_elem, 100cf8cbdd6SSebastian Grimberg "RSTR_NUM_COMP", num_comp, "RSTR_NUM_NODES", impl->num_nodes, "RSTR_COMP_STRIDE", comp_stride, 101cf8cbdd6SSebastian Grimberg "USE_DETERMINISTIC", is_deterministic ? 1 : 0)); 102cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedNoTranspose", &impl->ApplyNoTranspose)); 103cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedUnsignedNoTranspose", &impl->ApplyUnsignedNoTranspose)); 104cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyUnorientedNoTranspose)); 105cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedTranspose", &impl->ApplyTranspose)); 106cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "CurlOrientedUnsignedTranspose", &impl->ApplyUnsignedTranspose)); 107cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetTranspose", &impl->ApplyUnorientedTranspose)); 108*9c25dd66SJeremy L Thompson 109cf8cbdd6SSebastian Grimberg } break; 110cf8cbdd6SSebastian Grimberg } 111cf8cbdd6SSebastian Grimberg return CEED_ERROR_SUCCESS; 112cf8cbdd6SSebastian Grimberg } 113cf8cbdd6SSebastian Grimberg 114cf8cbdd6SSebastian Grimberg //------------------------------------------------------------------------------ 115dce49693SSebastian Grimberg // Core apply restriction code 1160d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 117dce49693SSebastian Grimberg static inline int CeedElemRestrictionApply_Hip_Core(CeedElemRestriction rstr, CeedTransposeMode t_mode, bool use_signs, bool use_orients, 118dce49693SSebastian Grimberg CeedVector u, CeedVector v, CeedRequest *request) { 1190d0321e0SJeremy L Thompson Ceed ceed; 120dce49693SSebastian Grimberg CeedRestrictionType rstr_type; 1210d0321e0SJeremy L Thompson const CeedScalar *d_u; 1220d0321e0SJeremy L Thompson CeedScalar *d_v; 123b7453713SJeremy L Thompson CeedElemRestriction_Hip *impl; 124b7453713SJeremy L Thompson 125dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 126dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 127dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 128cf8cbdd6SSebastian Grimberg 129cf8cbdd6SSebastian Grimberg // Assemble kernel if needed 130cf8cbdd6SSebastian Grimberg if (!impl->module) { 131cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetupCompile_Hip(rstr)); 132cf8cbdd6SSebastian Grimberg } 133b7453713SJeremy L Thompson 134b7453713SJeremy L Thompson // Get vectors 1352b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 136437930d1SJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 1370d0321e0SJeremy L Thompson // Sum into for transpose mode, e-vec to l-vec 1382b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 1390d0321e0SJeremy L Thompson } else { 1400d0321e0SJeremy L Thompson // Overwrite for notranspose mode, l-vec to e-vec 1412b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 1420d0321e0SJeremy L Thompson } 1430d0321e0SJeremy L Thompson 1440d0321e0SJeremy L Thompson // Restrict 145437930d1SJeremy L Thompson if (t_mode == CEED_NOTRANSPOSE) { 1460d0321e0SJeremy L Thompson // L-vector -> E-vector 147cf8cbdd6SSebastian Grimberg CeedInt elem_size; 148cf8cbdd6SSebastian Grimberg 149cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 150dce49693SSebastian Grimberg const CeedInt block_size = elem_size < 256 ? (elem_size > 64 ? elem_size : 64) : 256; 151cf8cbdd6SSebastian Grimberg const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); 15258549094SSebastian Grimberg 153dce49693SSebastian Grimberg switch (rstr_type) { 154dce49693SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 155cf8cbdd6SSebastian Grimberg void *args[] = {&d_u, &d_v}; 15658549094SSebastian Grimberg 157cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); 158dce49693SSebastian Grimberg } break; 159fe960054SJeremy L Thompson case CEED_RESTRICTION_POINTS: 160dce49693SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 161a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 162dce49693SSebastian Grimberg 163cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); 164dce49693SSebastian Grimberg } break; 165dce49693SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 166dce49693SSebastian Grimberg if (use_signs) { 167a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_orients, &d_u, &d_v}; 168dce49693SSebastian Grimberg 169cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); 170dce49693SSebastian Grimberg } else { 171a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 172dce49693SSebastian Grimberg 173cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); 174dce49693SSebastian Grimberg } 175dce49693SSebastian Grimberg } break; 176dce49693SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 177dce49693SSebastian Grimberg if (use_signs && use_orients) { 178a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; 179dce49693SSebastian Grimberg 180cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyNoTranspose, grid, block_size, args)); 181dce49693SSebastian Grimberg } else if (use_orients) { 182a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; 183dce49693SSebastian Grimberg 184cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedNoTranspose, grid, block_size, args)); 185dce49693SSebastian Grimberg } else { 186a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 187dce49693SSebastian Grimberg 188cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnorientedNoTranspose, grid, block_size, args)); 189dce49693SSebastian Grimberg } 190dce49693SSebastian Grimberg } break; 1910d0321e0SJeremy L Thompson } 1920d0321e0SJeremy L Thompson } else { 1930d0321e0SJeremy L Thompson // E-vector -> L-vector 194cf8cbdd6SSebastian Grimberg const bool is_deterministic = impl->d_l_vec_indices != NULL; 195dce49693SSebastian Grimberg const CeedInt block_size = 64; 196cf8cbdd6SSebastian Grimberg const CeedInt grid = CeedDivUpInt(impl->num_nodes, block_size); 197b7453713SJeremy L Thompson 198dce49693SSebastian Grimberg switch (rstr_type) { 199dce49693SSebastian Grimberg case CEED_RESTRICTION_STRIDED: { 200cf8cbdd6SSebastian Grimberg void *args[] = {&d_u, &d_v}; 201dce49693SSebastian Grimberg 202cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 203dce49693SSebastian Grimberg } break; 2040b63de31SJeremy L Thompson case CEED_RESTRICTION_POINTS: { 2050b63de31SJeremy L Thompson if (!is_deterministic) { 2060b63de31SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_points_per_elem, &d_u, &d_v}; 2070b63de31SJeremy L Thompson 2080b63de31SJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2090b63de31SJeremy L Thompson } else { 2100b63de31SJeremy L Thompson void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_points_per_elem, &impl->d_t_offsets, &d_u, &d_v}; 2110b63de31SJeremy L Thompson 2120b63de31SJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2130b63de31SJeremy L Thompson } 2140b63de31SJeremy L Thompson } break; 215dce49693SSebastian Grimberg case CEED_RESTRICTION_STANDARD: { 216cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 217a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 21858549094SSebastian Grimberg 219cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2200d0321e0SJeremy L Thompson } else { 22158549094SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 22258549094SSebastian Grimberg 223cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 22458549094SSebastian Grimberg } 225dce49693SSebastian Grimberg } break; 226dce49693SSebastian Grimberg case CEED_RESTRICTION_ORIENTED: { 227dce49693SSebastian Grimberg if (use_signs) { 228cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 229a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_orients, &d_u, &d_v}; 23058549094SSebastian Grimberg 231cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 232dce49693SSebastian Grimberg } else { 2337aa91133SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &impl->d_orients, &d_u, &d_v}; 2347aa91133SSebastian Grimberg 235cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2367aa91133SSebastian Grimberg } 2377aa91133SSebastian Grimberg } else { 238cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 239a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 240dce49693SSebastian Grimberg 241cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedTranspose, grid, block_size, args)); 242dce49693SSebastian Grimberg } else { 243dce49693SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 244dce49693SSebastian Grimberg 245cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedTranspose, grid, block_size, args)); 246dce49693SSebastian Grimberg } 247dce49693SSebastian Grimberg } 248dce49693SSebastian Grimberg } break; 249dce49693SSebastian Grimberg case CEED_RESTRICTION_CURL_ORIENTED: { 250dce49693SSebastian Grimberg if (use_signs && use_orients) { 251cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 252a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; 253dce49693SSebastian Grimberg 254cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2557aa91133SSebastian Grimberg } else { 2567aa91133SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &impl->d_curl_orients, &d_u, &d_v}; 2577aa91133SSebastian Grimberg 258cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyTranspose, grid, block_size, args)); 2597aa91133SSebastian Grimberg } 260dce49693SSebastian Grimberg } else if (use_orients) { 261cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 262a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &impl->d_curl_orients, &d_u, &d_v}; 263dce49693SSebastian Grimberg 264cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedTranspose, grid, block_size, args)); 265dce49693SSebastian Grimberg } else { 2667aa91133SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &impl->d_curl_orients, &d_u, &d_v}; 2677aa91133SSebastian Grimberg 268cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnsignedTranspose, grid, block_size, args)); 2697aa91133SSebastian Grimberg } 2707aa91133SSebastian Grimberg } else { 271cf8cbdd6SSebastian Grimberg if (!is_deterministic) { 272a267acd1SJeremy L Thompson void *args[] = {&impl->d_offsets, &d_u, &d_v}; 273dce49693SSebastian Grimberg 274cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnorientedTranspose, grid, block_size, args)); 275dce49693SSebastian Grimberg } else { 276dce49693SSebastian Grimberg void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v}; 277dce49693SSebastian Grimberg 278cf8cbdd6SSebastian Grimberg CeedCallBackend(CeedRunKernel_Hip(ceed, impl->ApplyUnorientedTranspose, grid, block_size, args)); 279dce49693SSebastian Grimberg } 280dce49693SSebastian Grimberg } 281dce49693SSebastian Grimberg } break; 2820d0321e0SJeremy L Thompson } 2830d0321e0SJeremy L Thompson } 2840d0321e0SJeremy L Thompson 2852b730f8bSJeremy L Thompson if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 2860d0321e0SJeremy L Thompson 2870d0321e0SJeremy L Thompson // Restore arrays 2882b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 2892b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 2900d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2910d0321e0SJeremy L Thompson } 2920d0321e0SJeremy L Thompson 2930d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 294dce49693SSebastian Grimberg // Apply restriction 295dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 296dce49693SSebastian Grimberg static int CeedElemRestrictionApply_Hip(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 297dce49693SSebastian Grimberg return CeedElemRestrictionApply_Hip_Core(rstr, t_mode, true, true, u, v, request); 298dce49693SSebastian Grimberg } 299dce49693SSebastian Grimberg 300dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 301dce49693SSebastian Grimberg // Apply unsigned restriction 302dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 303dce49693SSebastian Grimberg static int CeedElemRestrictionApplyUnsigned_Hip(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 304dce49693SSebastian Grimberg CeedRequest *request) { 305dce49693SSebastian Grimberg return CeedElemRestrictionApply_Hip_Core(rstr, t_mode, false, true, u, v, request); 306dce49693SSebastian Grimberg } 307dce49693SSebastian Grimberg 308dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 309dce49693SSebastian Grimberg // Apply unoriented restriction 310dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 311dce49693SSebastian Grimberg static int CeedElemRestrictionApplyUnoriented_Hip(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 312dce49693SSebastian Grimberg CeedRequest *request) { 313dce49693SSebastian Grimberg return CeedElemRestrictionApply_Hip_Core(rstr, t_mode, false, false, u, v, request); 314dce49693SSebastian Grimberg } 315dce49693SSebastian Grimberg 316dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 3170d0321e0SJeremy L Thompson // Get offsets 3180d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 319472941f0SJeremy L Thompson static int CeedElemRestrictionGetOffsets_Hip(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 3200d0321e0SJeremy L Thompson CeedElemRestriction_Hip *impl; 321fe960054SJeremy L Thompson CeedRestrictionType rstr_type; 3220d0321e0SJeremy L Thompson 323b7453713SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 324fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 325472941f0SJeremy L Thompson switch (mem_type) { 3260d0321e0SJeremy L Thompson case CEED_MEM_HOST: 327fe960054SJeremy L Thompson *offsets = rstr_type == CEED_RESTRICTION_POINTS ? impl->h_offsets_at_points : impl->h_offsets; 3280d0321e0SJeremy L Thompson break; 3290d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 330fe960054SJeremy L Thompson *offsets = rstr_type == CEED_RESTRICTION_POINTS ? impl->d_offsets_at_points : impl->d_offsets; 3310d0321e0SJeremy L Thompson break; 3320d0321e0SJeremy L Thompson } 3330d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3340d0321e0SJeremy L Thompson } 3350d0321e0SJeremy L Thompson 3360d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 337dce49693SSebastian Grimberg // Get orientations 338dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 339dce49693SSebastian Grimberg static int CeedElemRestrictionGetOrientations_Hip(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 340dce49693SSebastian Grimberg CeedElemRestriction_Hip *impl; 341dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 342dce49693SSebastian Grimberg 343dce49693SSebastian Grimberg switch (mem_type) { 344dce49693SSebastian Grimberg case CEED_MEM_HOST: 345dce49693SSebastian Grimberg *orients = impl->h_orients; 346dce49693SSebastian Grimberg break; 347dce49693SSebastian Grimberg case CEED_MEM_DEVICE: 348dce49693SSebastian Grimberg *orients = impl->d_orients; 349dce49693SSebastian Grimberg break; 350dce49693SSebastian Grimberg } 351dce49693SSebastian Grimberg return CEED_ERROR_SUCCESS; 352dce49693SSebastian Grimberg } 353dce49693SSebastian Grimberg 354dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 355dce49693SSebastian Grimberg // Get curl-conforming orientations 356dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 357dce49693SSebastian Grimberg static int CeedElemRestrictionGetCurlOrientations_Hip(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 358dce49693SSebastian Grimberg CeedElemRestriction_Hip *impl; 359dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 360dce49693SSebastian Grimberg 361dce49693SSebastian Grimberg switch (mem_type) { 362dce49693SSebastian Grimberg case CEED_MEM_HOST: 363dce49693SSebastian Grimberg *curl_orients = impl->h_curl_orients; 364dce49693SSebastian Grimberg break; 365dce49693SSebastian Grimberg case CEED_MEM_DEVICE: 366dce49693SSebastian Grimberg *curl_orients = impl->d_curl_orients; 367dce49693SSebastian Grimberg break; 368dce49693SSebastian Grimberg } 369dce49693SSebastian Grimberg return CEED_ERROR_SUCCESS; 370dce49693SSebastian Grimberg } 371dce49693SSebastian Grimberg 372dce49693SSebastian Grimberg //------------------------------------------------------------------------------ 373fe960054SJeremy L Thompson // Get offset for padded AtPoints E-layout 374fe960054SJeremy L Thompson //------------------------------------------------------------------------------ 375fe960054SJeremy L Thompson static int CeedElemRestrictionGetAtPointsElementOffset_Hip(CeedElemRestriction rstr, CeedInt elem, CeedSize *elem_offset) { 376fe960054SJeremy L Thompson CeedInt layout[3]; 377fe960054SJeremy L Thompson 378fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetELayout(rstr, layout)); 379fe960054SJeremy L Thompson *elem_offset = 0 * layout[0] + 0 * layout[1] + elem * layout[2]; 380fe960054SJeremy L Thompson return CEED_ERROR_SUCCESS; 381fe960054SJeremy L Thompson } 382fe960054SJeremy L Thompson 383fe960054SJeremy L Thompson //------------------------------------------------------------------------------ 3840d0321e0SJeremy L Thompson // Destroy restriction 3850d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 386dce49693SSebastian Grimberg static int CeedElemRestrictionDestroy_Hip(CeedElemRestriction rstr) { 3870d0321e0SJeremy L Thompson Ceed ceed; 388b7453713SJeremy L Thompson CeedElemRestriction_Hip *impl; 389b7453713SJeremy L Thompson 390dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 391dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 392cf8cbdd6SSebastian Grimberg if (impl->module) { 3932b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleUnload(impl->module)); 394cf8cbdd6SSebastian Grimberg } 395a267acd1SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_offsets_owned)); 396f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt *)impl->d_offsets_owned)); 397081aa29dSJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt *)impl->d_t_offsets)); 398081aa29dSJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt *)impl->d_t_indices)); 399081aa29dSJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt *)impl->d_l_vec_indices)); 400a267acd1SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_orients_owned)); 401f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipFree((bool *)impl->d_orients_owned)); 402a267acd1SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_curl_orients_owned)); 403f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt8 *)impl->d_curl_orients_owned)); 404fe960054SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_offsets_at_points_owned)); 405fe960054SJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt8 *)impl->d_offsets_at_points_owned)); 4060b63de31SJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_points_per_elem_owned)); 4070b63de31SJeremy L Thompson CeedCallHip(ceed, hipFree((CeedInt *)impl->d_points_per_elem_owned)); 4082b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 4090d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4100d0321e0SJeremy L Thompson } 4110d0321e0SJeremy L Thompson 4120d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 4130d0321e0SJeremy L Thompson // Create transpose offsets and indices 4140d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 415fe960054SJeremy L Thompson static int CeedElemRestrictionOffset_Hip(const CeedElemRestriction rstr, const CeedInt elem_size, const CeedInt *indices) { 4160d0321e0SJeremy L Thompson Ceed ceed; 417b7453713SJeremy L Thompson bool *is_node; 418e79b91d9SJeremy L Thompson CeedSize l_size; 419fe960054SJeremy L Thompson CeedInt num_elem, num_comp, num_nodes = 0; 420dce49693SSebastian Grimberg CeedInt *ind_to_offset, *l_vec_indices, *t_offsets, *t_indices; 421fe960054SJeremy L Thompson CeedRestrictionType rstr_type; 422b7453713SJeremy L Thompson CeedElemRestriction_Hip *impl; 423b7453713SJeremy L Thompson 424dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 425dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 426dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 427fe960054SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 428dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 429dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 430b7453713SJeremy L Thompson const CeedInt size_indices = num_elem * elem_size; 4310d0321e0SJeremy L Thompson 432437930d1SJeremy L Thompson // Count num_nodes 4332b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(l_size, &is_node)); 434dce49693SSebastian Grimberg 4352b730f8bSJeremy L Thompson for (CeedInt i = 0; i < size_indices; i++) is_node[indices[i]] = 1; 4362b730f8bSJeremy L Thompson for (CeedInt i = 0; i < l_size; i++) num_nodes += is_node[i]; 437437930d1SJeremy L Thompson impl->num_nodes = num_nodes; 4380d0321e0SJeremy L Thompson 4390d0321e0SJeremy L Thompson // L-vector offsets array 4402b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(l_size, &ind_to_offset)); 4412b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(num_nodes, &l_vec_indices)); 442b7453713SJeremy L Thompson for (CeedInt i = 0, j = 0; i < l_size; i++) { 443437930d1SJeremy L Thompson if (is_node[i]) { 444437930d1SJeremy L Thompson l_vec_indices[j] = i; 4450d0321e0SJeremy L Thompson ind_to_offset[i] = j++; 4460d0321e0SJeremy L Thompson } 4472b730f8bSJeremy L Thompson } 4482b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&is_node)); 4490d0321e0SJeremy L Thompson 4500d0321e0SJeremy L Thompson // Compute transpose offsets and indices 451437930d1SJeremy L Thompson const CeedInt size_offsets = num_nodes + 1; 452b7453713SJeremy L Thompson 4532b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(size_offsets, &t_offsets)); 4542b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(size_indices, &t_indices)); 4550d0321e0SJeremy L Thompson // Count node multiplicity 4562b730f8bSJeremy L Thompson for (CeedInt e = 0; e < num_elem; ++e) { 4572b730f8bSJeremy L Thompson for (CeedInt i = 0; i < elem_size; ++i) ++t_offsets[ind_to_offset[indices[elem_size * e + i]] + 1]; 4582b730f8bSJeremy L Thompson } 4590d0321e0SJeremy L Thompson // Convert to running sum 4602b730f8bSJeremy L Thompson for (CeedInt i = 1; i < size_offsets; ++i) t_offsets[i] += t_offsets[i - 1]; 4610d0321e0SJeremy L Thompson // List all E-vec indices associated with L-vec node 462437930d1SJeremy L Thompson for (CeedInt e = 0; e < num_elem; ++e) { 463437930d1SJeremy L Thompson for (CeedInt i = 0; i < elem_size; ++i) { 464437930d1SJeremy L Thompson const CeedInt lid = elem_size * e + i; 4650d0321e0SJeremy L Thompson const CeedInt gid = indices[lid]; 466b7453713SJeremy L Thompson 467437930d1SJeremy L Thompson t_indices[t_offsets[ind_to_offset[gid]]++] = lid; 4680d0321e0SJeremy L Thompson } 4690d0321e0SJeremy L Thompson } 4700d0321e0SJeremy L Thompson // Reset running sum 4712b730f8bSJeremy L Thompson for (int i = size_offsets - 1; i > 0; --i) t_offsets[i] = t_offsets[i - 1]; 472437930d1SJeremy L Thompson t_offsets[0] = 0; 4730d0321e0SJeremy L Thompson 4740d0321e0SJeremy L Thompson // Copy data to device 4750d0321e0SJeremy L Thompson // -- L-vector indices 4762b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_l_vec_indices, num_nodes * sizeof(CeedInt))); 477081aa29dSJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt *)impl->d_l_vec_indices, l_vec_indices, num_nodes * sizeof(CeedInt), hipMemcpyHostToDevice)); 4780d0321e0SJeremy L Thompson // -- Transpose offsets 4792b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_t_offsets, size_offsets * sizeof(CeedInt))); 480081aa29dSJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt *)impl->d_t_offsets, t_offsets, size_offsets * sizeof(CeedInt), hipMemcpyHostToDevice)); 4810d0321e0SJeremy L Thompson // -- Transpose indices 4822b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_t_indices, size_indices * sizeof(CeedInt))); 483081aa29dSJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt *)impl->d_t_indices, t_indices, size_indices * sizeof(CeedInt), hipMemcpyHostToDevice)); 4840d0321e0SJeremy L Thompson 4850d0321e0SJeremy L Thompson // Cleanup 4862b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&ind_to_offset)); 4872b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&l_vec_indices)); 4882b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&t_offsets)); 4892b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&t_indices)); 4900d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4910d0321e0SJeremy L Thompson } 4920d0321e0SJeremy L Thompson 4930d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 4940d0321e0SJeremy L Thompson // Create restriction 4950d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 496a267acd1SJeremy L Thompson int CeedElemRestrictionCreate_Hip(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 497dce49693SSebastian Grimberg const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 498b7453713SJeremy L Thompson Ceed ceed, ceed_parent; 499dce49693SSebastian Grimberg bool is_deterministic; 500ff1bc20eSJeremy L Thompson CeedInt num_elem, num_comp, elem_size; 501b7453713SJeremy L Thompson CeedRestrictionType rstr_type; 5020d0321e0SJeremy L Thompson CeedElemRestriction_Hip *impl; 503b7453713SJeremy L Thompson 504dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 505ca735530SJeremy L Thompson CeedCallBackend(CeedGetParent(ceed, &ceed_parent)); 506ca735530SJeremy L Thompson CeedCallBackend(CeedIsDeterministic(ceed_parent, &is_deterministic)); 507dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 508ff1bc20eSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 509dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 51022eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 511fe960054SJeremy L Thompson // Use max number of points as elem size for AtPoints restrictions 512fe960054SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) { 513fe960054SJeremy L Thompson CeedInt max_points = 0; 514fe960054SJeremy L Thompson 515fe960054SJeremy L Thompson for (CeedInt i = 0; i < num_elem; i++) { 516fe960054SJeremy L Thompson max_points = CeedIntMax(max_points, offsets[i + 1] - offsets[i]); 517fe960054SJeremy L Thompson } 518fe960054SJeremy L Thompson elem_size = max_points; 519fe960054SJeremy L Thompson } 520dce49693SSebastian Grimberg const CeedInt size = num_elem * elem_size; 5210d0321e0SJeremy L Thompson 522dce49693SSebastian Grimberg CeedCallBackend(CeedCalloc(1, &impl)); 523dce49693SSebastian Grimberg impl->num_nodes = size; 524dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 52522eb1385SJeremy L Thompson 52622eb1385SJeremy L Thompson // Set layouts 52722eb1385SJeremy L Thompson { 52822eb1385SJeremy L Thompson bool has_backend_strides; 52922eb1385SJeremy L Thompson CeedInt layout[3] = {1, size, elem_size}; 53022eb1385SJeremy L Thompson 531dce49693SSebastian Grimberg CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); 53222eb1385SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_STRIDED) { 53322eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 53422eb1385SJeremy L Thompson if (has_backend_strides) { 53522eb1385SJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout)); 53622eb1385SJeremy L Thompson } 53722eb1385SJeremy L Thompson } 53822eb1385SJeremy L Thompson } 5390d0321e0SJeremy L Thompson 540fe960054SJeremy L Thompson // Pad AtPoints indices 541fe960054SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) { 542fe960054SJeremy L Thompson CeedSize offsets_len = elem_size * num_elem, at_points_size = num_elem + 1; 5430b63de31SJeremy L Thompson CeedInt max_points = elem_size, *offsets_padded, *points_per_elem; 544fe960054SJeremy L Thompson 545fe960054SJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "only MemType Host supported when creating AtPoints restriction"); 546fe960054SJeremy L Thompson CeedCallBackend(CeedMalloc(offsets_len, &offsets_padded)); 5470b63de31SJeremy L Thompson CeedCallBackend(CeedMalloc(num_elem, &points_per_elem)); 548fe960054SJeremy L Thompson for (CeedInt i = 0; i < num_elem; i++) { 549fe960054SJeremy L Thompson CeedInt num_points = offsets[i + 1] - offsets[i]; 550fe960054SJeremy L Thompson 5510b63de31SJeremy L Thompson points_per_elem[i] = num_points; 552fe960054SJeremy L Thompson at_points_size += num_points; 553fe960054SJeremy L Thompson // -- Copy all points in element 554fe960054SJeremy L Thompson for (CeedInt j = 0; j < num_points; j++) { 5558be297eeSJeremy L Thompson offsets_padded[i * max_points + j] = offsets[offsets[i] + j] * num_comp; 556fe960054SJeremy L Thompson } 557fe960054SJeremy L Thompson // -- Replicate out last point in element 558fe960054SJeremy L Thompson for (CeedInt j = num_points; j < max_points; j++) { 5598be297eeSJeremy L Thompson offsets_padded[i * max_points + j] = offsets[offsets[i] + num_points - 1] * num_comp; 560fe960054SJeremy L Thompson } 561fe960054SJeremy L Thompson } 562fe960054SJeremy L Thompson CeedCallBackend(CeedSetHostCeedIntArray(offsets, copy_mode, at_points_size, &impl->h_offsets_at_points_owned, &impl->h_offsets_at_points_borrowed, 563fe960054SJeremy L Thompson &impl->h_offsets_at_points)); 564fe960054SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_offsets_at_points_owned, at_points_size * sizeof(CeedInt))); 565fe960054SJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt **)impl->d_offsets_at_points_owned, impl->h_offsets_at_points, at_points_size * sizeof(CeedInt), 566fe960054SJeremy L Thompson hipMemcpyHostToDevice)); 567fe960054SJeremy L Thompson impl->d_offsets_at_points = (CeedInt *)impl->d_offsets_at_points_owned; 568ff1bc20eSJeremy L Thompson 569fe960054SJeremy L Thompson // -- Use padded offsets for the rest of the setup 570fe960054SJeremy L Thompson offsets = (const CeedInt *)offsets_padded; 571fe960054SJeremy L Thompson copy_mode = CEED_OWN_POINTER; 5722e88d319SJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetAtPointsEVectorSize(rstr, elem_size * num_elem * num_comp)); 5730b63de31SJeremy L Thompson 5740b63de31SJeremy L Thompson // -- Points per element 5750b63de31SJeremy L Thompson CeedCallBackend(CeedSetHostCeedIntArray(points_per_elem, CEED_OWN_POINTER, num_elem, &impl->h_points_per_elem_owned, 5760b63de31SJeremy L Thompson &impl->h_points_per_elem_borrowed, &impl->h_points_per_elem)); 5770b63de31SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_points_per_elem_owned, num_elem * sizeof(CeedInt))); 5780b63de31SJeremy L Thompson CeedCallHip(ceed, 5790b63de31SJeremy L Thompson hipMemcpy((CeedInt **)impl->d_points_per_elem_owned, impl->h_points_per_elem, num_elem * sizeof(CeedInt), hipMemcpyHostToDevice)); 5800b63de31SJeremy L Thompson impl->d_points_per_elem = (CeedInt *)impl->d_points_per_elem_owned; 581fe960054SJeremy L Thompson } 582fe960054SJeremy L Thompson 583dce49693SSebastian Grimberg // Set up device offset/orientation arrays 584dce49693SSebastian Grimberg if (rstr_type != CEED_RESTRICTION_STRIDED) { 585472941f0SJeremy L Thompson switch (mem_type) { 5866574a04fSJeremy L Thompson case CEED_MEM_HOST: { 587f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetHostCeedIntArray(offsets, copy_mode, size, &impl->h_offsets_owned, &impl->h_offsets_borrowed, &impl->h_offsets)); 588a267acd1SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_offsets_owned, size * sizeof(CeedInt))); 589f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt **)impl->d_offsets_owned, impl->h_offsets, size * sizeof(CeedInt), hipMemcpyHostToDevice)); 590f5d1e504SJeremy L Thompson impl->d_offsets = (CeedInt *)impl->d_offsets_owned; 591fe960054SJeremy L Thompson if (is_deterministic) CeedCallBackend(CeedElemRestrictionOffset_Hip(rstr, elem_size, offsets)); 592dce49693SSebastian Grimberg } break; 5936574a04fSJeremy L Thompson case CEED_MEM_DEVICE: { 594f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceCeedIntArray_Hip(ceed, offsets, copy_mode, size, &impl->d_offsets_owned, &impl->d_offsets_borrowed, 595f5d1e504SJeremy L Thompson (const CeedInt **)&impl->d_offsets)); 596a267acd1SJeremy L Thompson CeedCallBackend(CeedMalloc(size, &impl->h_offsets_owned)); 597f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipMemcpy((CeedInt **)impl->h_offsets_owned, impl->d_offsets, size * sizeof(CeedInt), hipMemcpyDeviceToHost)); 598a267acd1SJeremy L Thompson impl->h_offsets = impl->h_offsets_owned; 599fe960054SJeremy L Thompson if (is_deterministic) CeedCallBackend(CeedElemRestrictionOffset_Hip(rstr, elem_size, offsets)); 600dce49693SSebastian Grimberg } break; 601dce49693SSebastian Grimberg } 602dce49693SSebastian Grimberg 603dce49693SSebastian Grimberg // Orientation data 604dce49693SSebastian Grimberg if (rstr_type == CEED_RESTRICTION_ORIENTED) { 605dce49693SSebastian Grimberg switch (mem_type) { 606dce49693SSebastian Grimberg case CEED_MEM_HOST: { 607f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetHostBoolArray(orients, copy_mode, size, &impl->h_orients_owned, &impl->h_orients_borrowed, &impl->h_orients)); 608a267acd1SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_orients_owned, size * sizeof(bool))); 609f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipMemcpy((bool *)impl->d_orients_owned, impl->h_orients, size * sizeof(bool), hipMemcpyHostToDevice)); 610a267acd1SJeremy L Thompson impl->d_orients = impl->d_orients_owned; 611dce49693SSebastian Grimberg } break; 612dce49693SSebastian Grimberg case CEED_MEM_DEVICE: { 613f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceBoolArray_Hip(ceed, orients, copy_mode, size, &impl->d_orients_owned, &impl->d_orients_borrowed, 614f5d1e504SJeremy L Thompson (const bool **)&impl->d_orients)); 615a267acd1SJeremy L Thompson CeedCallBackend(CeedMalloc(size, &impl->h_orients_owned)); 616f5d1e504SJeremy L Thompson CeedCallHip(ceed, hipMemcpy((bool *)impl->h_orients_owned, impl->d_orients, size * sizeof(bool), hipMemcpyDeviceToHost)); 617a267acd1SJeremy L Thompson impl->h_orients = impl->h_orients_owned; 618dce49693SSebastian Grimberg } break; 619dce49693SSebastian Grimberg } 620dce49693SSebastian Grimberg } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 621dce49693SSebastian Grimberg switch (mem_type) { 622dce49693SSebastian Grimberg case CEED_MEM_HOST: { 623f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetHostCeedInt8Array(curl_orients, copy_mode, 3 * size, &impl->h_curl_orients_owned, &impl->h_curl_orients_borrowed, 624f5d1e504SJeremy L Thompson &impl->h_curl_orients)); 625a267acd1SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&impl->d_curl_orients_owned, 3 * size * sizeof(CeedInt8))); 626f5d1e504SJeremy L Thompson CeedCallHip(ceed, 627f5d1e504SJeremy L Thompson hipMemcpy((CeedInt8 *)impl->d_curl_orients_owned, impl->h_curl_orients, 3 * size * sizeof(CeedInt8), hipMemcpyHostToDevice)); 628a267acd1SJeremy L Thompson impl->d_curl_orients = impl->d_curl_orients_owned; 629dce49693SSebastian Grimberg } break; 630dce49693SSebastian Grimberg case CEED_MEM_DEVICE: { 631f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceCeedInt8Array_Hip(ceed, curl_orients, copy_mode, 3 * size, &impl->d_curl_orients_owned, 632f5d1e504SJeremy L Thompson &impl->d_curl_orients_borrowed, (const CeedInt8 **)&impl->d_curl_orients)); 633a267acd1SJeremy L Thompson CeedCallBackend(CeedMalloc(3 * size, &impl->h_curl_orients_owned)); 634f5d1e504SJeremy L Thompson CeedCallHip(ceed, 635f5d1e504SJeremy L Thompson hipMemcpy((CeedInt8 *)impl->h_curl_orients_owned, impl->d_curl_orients, 3 * size * sizeof(CeedInt8), hipMemcpyDeviceToHost)); 636a267acd1SJeremy L Thompson impl->h_curl_orients = impl->h_curl_orients_owned; 637dce49693SSebastian Grimberg } break; 638dce49693SSebastian Grimberg } 639dce49693SSebastian Grimberg } 6400d0321e0SJeremy L Thompson } 6410d0321e0SJeremy L Thompson 6420d0321e0SJeremy L Thompson // Register backend functions 643dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Hip)); 644dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Hip)); 645dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Hip)); 646dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Hip)); 647dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Hip)); 648dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Hip)); 649fe960054SJeremy L Thompson if (rstr_type == CEED_RESTRICTION_POINTS) { 650fe960054SJeremy L Thompson CeedCallBackend( 651fe960054SJeremy L Thompson CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetAtPointsElementOffset", CeedElemRestrictionGetAtPointsElementOffset_Hip)); 652fe960054SJeremy L Thompson } 653dce49693SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Hip)); 6540d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 6550d0321e0SJeremy L Thompson } 6560d0321e0SJeremy L Thompson 6570d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 658