xref: /libCEED/rust/libceed-sys/c-src/backends/hip-ref/ceed-hip-ref-restriction.c (revision 2b730f8b5a9c809740a0b3b302db43a719c636b1)
13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
30d0321e0SJeremy L Thompson //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
50d0321e0SJeremy L Thompson //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
70d0321e0SJeremy L Thompson 
80d0321e0SJeremy L Thompson #include <ceed/backend.h>
9*2b730f8bSJeremy L Thompson #include <ceed/ceed.h>
10437930d1SJeremy L Thompson #include <ceed/jit-tools.h>
110d0321e0SJeremy L Thompson #include <hip/hip_runtime.h>
120d0321e0SJeremy L Thompson #include <stdbool.h>
130d0321e0SJeremy L Thompson #include <stddef.h>
1444d7a66cSJeremy L Thompson #include <string.h>
15*2b730f8bSJeremy L Thompson 
160d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h"
17*2b730f8bSJeremy L Thompson #include "ceed-hip-ref.h"
180d0321e0SJeremy L Thompson 
190d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
200d0321e0SJeremy L Thompson // Apply restriction
210d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
22*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Hip(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
230d0321e0SJeremy L Thompson   CeedElemRestriction_Hip *impl;
24*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
250d0321e0SJeremy L Thompson   Ceed ceed;
26*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
270d0321e0SJeremy L Thompson   Ceed_Hip *data;
28*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedGetData(ceed, &data));
29437930d1SJeremy L Thompson   const CeedInt block_size = 64;
30437930d1SJeremy L Thompson   const CeedInt num_nodes  = impl->num_nodes;
31437930d1SJeremy L Thompson   CeedInt       num_elem, elem_size;
32437930d1SJeremy L Thompson   CeedElemRestrictionGetNumElements(r, &num_elem);
33*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
340d0321e0SJeremy L Thompson   hipFunction_t kernel;
350d0321e0SJeremy L Thompson 
360d0321e0SJeremy L Thompson   // Get vectors
370d0321e0SJeremy L Thompson   const CeedScalar *d_u;
380d0321e0SJeremy L Thompson   CeedScalar       *d_v;
39*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
40437930d1SJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
410d0321e0SJeremy L Thompson     // Sum into for transpose mode, e-vec to l-vec
42*2b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
430d0321e0SJeremy L Thompson   } else {
440d0321e0SJeremy L Thompson     // Overwrite for notranspose mode, l-vec to e-vec
45*2b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
460d0321e0SJeremy L Thompson   }
470d0321e0SJeremy L Thompson 
480d0321e0SJeremy L Thompson   // Restrict
49437930d1SJeremy L Thompson   if (t_mode == CEED_NOTRANSPOSE) {
500d0321e0SJeremy L Thompson     // L-vector -> E-vector
510d0321e0SJeremy L Thompson     if (impl->d_ind) {
520d0321e0SJeremy L Thompson       // -- Offsets provided
53437930d1SJeremy L Thompson       kernel             = impl->OffsetNoTranspose;
54437930d1SJeremy L Thompson       void   *args[]     = {&num_elem, &impl->d_ind, &d_u, &d_v};
55437930d1SJeremy L Thompson       CeedInt block_size = elem_size < 256 ? (elem_size > 64 ? elem_size : 64) : 256;
56*2b730f8bSJeremy L Thompson       CeedCallBackend(CeedRunKernelHip(ceed, kernel, CeedDivUpInt(num_nodes, block_size), block_size, args));
570d0321e0SJeremy L Thompson     } else {
580d0321e0SJeremy L Thompson       // -- Strided restriction
59437930d1SJeremy L Thompson       kernel             = impl->StridedNoTranspose;
60437930d1SJeremy L Thompson       void   *args[]     = {&num_elem, &d_u, &d_v};
61437930d1SJeremy L Thompson       CeedInt block_size = elem_size < 256 ? (elem_size > 64 ? elem_size : 64) : 256;
62*2b730f8bSJeremy L Thompson       CeedCallBackend(CeedRunKernelHip(ceed, kernel, CeedDivUpInt(num_nodes, block_size), block_size, args));
630d0321e0SJeremy L Thompson     }
640d0321e0SJeremy L Thompson   } else {
650d0321e0SJeremy L Thompson     // E-vector -> L-vector
660d0321e0SJeremy L Thompson     if (impl->d_ind) {
670d0321e0SJeremy L Thompson       // -- Offsets provided
68437930d1SJeremy L Thompson       kernel       = impl->OffsetTranspose;
69*2b730f8bSJeremy L Thompson       void *args[] = {&impl->d_l_vec_indices, &impl->d_t_indices, &impl->d_t_offsets, &d_u, &d_v};
70*2b730f8bSJeremy L Thompson       CeedCallBackend(CeedRunKernelHip(ceed, kernel, CeedDivUpInt(num_nodes, block_size), block_size, args));
710d0321e0SJeremy L Thompson     } else {
720d0321e0SJeremy L Thompson       // -- Strided restriction
73437930d1SJeremy L Thompson       kernel       = impl->StridedTranspose;
74437930d1SJeremy L Thompson       void *args[] = {&num_elem, &d_u, &d_v};
75*2b730f8bSJeremy L Thompson       CeedCallBackend(CeedRunKernelHip(ceed, kernel, CeedDivUpInt(num_nodes, block_size), block_size, args));
760d0321e0SJeremy L Thompson     }
770d0321e0SJeremy L Thompson   }
780d0321e0SJeremy L Thompson 
79*2b730f8bSJeremy L Thompson   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
800d0321e0SJeremy L Thompson 
810d0321e0SJeremy L Thompson   // Restore arrays
82*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u));
83*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArray(v, &d_v));
840d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
850d0321e0SJeremy L Thompson }
860d0321e0SJeremy L Thompson 
870d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
880d0321e0SJeremy L Thompson // Blocked not supported
890d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
90*2b730f8bSJeremy L Thompson int CeedElemRestrictionApplyBlock_Hip(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
91*2b730f8bSJeremy L Thompson                                       CeedRequest *request) {
920d0321e0SJeremy L Thompson   // LCOV_EXCL_START
930d0321e0SJeremy L Thompson   Ceed ceed;
94*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
95*2b730f8bSJeremy L Thompson   return CeedError(ceed, CEED_ERROR_BACKEND, "Backend does not implement blocked restrictions");
960d0321e0SJeremy L Thompson   // LCOV_EXCL_STOP
970d0321e0SJeremy L Thompson }
980d0321e0SJeremy L Thompson 
990d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1000d0321e0SJeremy L Thompson // Get offsets
1010d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
102*2b730f8bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Hip(CeedElemRestriction rstr, CeedMemType mtype, const CeedInt **offsets) {
1030d0321e0SJeremy L Thompson   CeedElemRestriction_Hip *impl;
104*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
1050d0321e0SJeremy L Thompson 
1060d0321e0SJeremy L Thompson   switch (mtype) {
1070d0321e0SJeremy L Thompson     case CEED_MEM_HOST:
1080d0321e0SJeremy L Thompson       *offsets = impl->h_ind;
1090d0321e0SJeremy L Thompson       break;
1100d0321e0SJeremy L Thompson     case CEED_MEM_DEVICE:
1110d0321e0SJeremy L Thompson       *offsets = impl->d_ind;
1120d0321e0SJeremy L Thompson       break;
1130d0321e0SJeremy L Thompson   }
1140d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1150d0321e0SJeremy L Thompson }
1160d0321e0SJeremy L Thompson 
1170d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1180d0321e0SJeremy L Thompson // Destroy restriction
1190d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1200d0321e0SJeremy L Thompson static int CeedElemRestrictionDestroy_Hip(CeedElemRestriction r) {
1210d0321e0SJeremy L Thompson   CeedElemRestriction_Hip *impl;
122*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
1230d0321e0SJeremy L Thompson 
1240d0321e0SJeremy L Thompson   Ceed ceed;
125*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
126*2b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipModuleUnload(impl->module));
127*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl->h_ind_allocated));
128*2b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipFree(impl->d_ind_allocated));
129*2b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipFree(impl->d_t_offsets));
130*2b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipFree(impl->d_t_indices));
131*2b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipFree(impl->d_l_vec_indices));
132*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl));
133437930d1SJeremy L Thompson 
1340d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1350d0321e0SJeremy L Thompson }
1360d0321e0SJeremy L Thompson 
1370d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1380d0321e0SJeremy L Thompson // Create transpose offsets and indices
1390d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
140*2b730f8bSJeremy L Thompson static int CeedElemRestrictionOffset_Hip(const CeedElemRestriction r, const CeedInt *indices) {
1410d0321e0SJeremy L Thompson   Ceed ceed;
142*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
1430d0321e0SJeremy L Thompson   CeedElemRestriction_Hip *impl;
144*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
145e79b91d9SJeremy L Thompson   CeedSize l_size;
146e79b91d9SJeremy L Thompson   CeedInt  num_elem, elem_size, num_comp;
147*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
148*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
149*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size));
150*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
1510d0321e0SJeremy L Thompson 
152437930d1SJeremy L Thompson   // Count num_nodes
153437930d1SJeremy L Thompson   bool *is_node;
154*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedCalloc(l_size, &is_node));
155437930d1SJeremy L Thompson   const CeedInt size_indices = num_elem * elem_size;
156*2b730f8bSJeremy L Thompson   for (CeedInt i = 0; i < size_indices; i++) is_node[indices[i]] = 1;
157437930d1SJeremy L Thompson   CeedInt num_nodes = 0;
158*2b730f8bSJeremy L Thompson   for (CeedInt i = 0; i < l_size; i++) num_nodes += is_node[i];
159437930d1SJeremy L Thompson   impl->num_nodes = num_nodes;
1600d0321e0SJeremy L Thompson 
1610d0321e0SJeremy L Thompson   // L-vector offsets array
162437930d1SJeremy L Thompson   CeedInt *ind_to_offset, *l_vec_indices;
163*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedCalloc(l_size, &ind_to_offset));
164*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedCalloc(num_nodes, &l_vec_indices));
1650d0321e0SJeremy L Thompson   CeedInt j = 0;
166*2b730f8bSJeremy L Thompson   for (CeedInt i = 0; i < l_size; i++) {
167437930d1SJeremy L Thompson     if (is_node[i]) {
168437930d1SJeremy L Thompson       l_vec_indices[j] = i;
1690d0321e0SJeremy L Thompson       ind_to_offset[i] = j++;
1700d0321e0SJeremy L Thompson     }
171*2b730f8bSJeremy L Thompson   }
172*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&is_node));
1730d0321e0SJeremy L Thompson 
1740d0321e0SJeremy L Thompson   // Compute transpose offsets and indices
175437930d1SJeremy L Thompson   const CeedInt size_offsets = num_nodes + 1;
176437930d1SJeremy L Thompson   CeedInt      *t_offsets;
177*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedCalloc(size_offsets, &t_offsets));
178437930d1SJeremy L Thompson   CeedInt *t_indices;
179*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedMalloc(size_indices, &t_indices));
1800d0321e0SJeremy L Thompson   // Count node multiplicity
181*2b730f8bSJeremy L Thompson   for (CeedInt e = 0; e < num_elem; ++e) {
182*2b730f8bSJeremy L Thompson     for (CeedInt i = 0; i < elem_size; ++i) ++t_offsets[ind_to_offset[indices[elem_size * e + i]] + 1];
183*2b730f8bSJeremy L Thompson   }
1840d0321e0SJeremy L Thompson   // Convert to running sum
185*2b730f8bSJeremy L Thompson   for (CeedInt i = 1; i < size_offsets; ++i) t_offsets[i] += t_offsets[i - 1];
1860d0321e0SJeremy L Thompson   // List all E-vec indices associated with L-vec node
187437930d1SJeremy L Thompson   for (CeedInt e = 0; e < num_elem; ++e) {
188437930d1SJeremy L Thompson     for (CeedInt i = 0; i < elem_size; ++i) {
189437930d1SJeremy L Thompson       const CeedInt lid                          = elem_size * e + i;
1900d0321e0SJeremy L Thompson       const CeedInt gid                          = indices[lid];
191437930d1SJeremy L Thompson       t_indices[t_offsets[ind_to_offset[gid]]++] = lid;
1920d0321e0SJeremy L Thompson     }
1930d0321e0SJeremy L Thompson   }
1940d0321e0SJeremy L Thompson   // Reset running sum
195*2b730f8bSJeremy L Thompson   for (int i = size_offsets - 1; i > 0; --i) t_offsets[i] = t_offsets[i - 1];
196437930d1SJeremy L Thompson   t_offsets[0] = 0;
1970d0321e0SJeremy L Thompson 
1980d0321e0SJeremy L Thompson   // Copy data to device
1990d0321e0SJeremy L Thompson   // -- L-vector indices
200*2b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipMalloc((void **)&impl->d_l_vec_indices, num_nodes * sizeof(CeedInt)));
201*2b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipMemcpy(impl->d_l_vec_indices, l_vec_indices, num_nodes * sizeof(CeedInt), hipMemcpyHostToDevice));
2020d0321e0SJeremy L Thompson   // -- Transpose offsets
203*2b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipMalloc((void **)&impl->d_t_offsets, size_offsets * sizeof(CeedInt)));
204*2b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipMemcpy(impl->d_t_offsets, t_offsets, size_offsets * sizeof(CeedInt), hipMemcpyHostToDevice));
2050d0321e0SJeremy L Thompson   // -- Transpose indices
206*2b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipMalloc((void **)&impl->d_t_indices, size_indices * sizeof(CeedInt)));
207*2b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipMemcpy(impl->d_t_indices, t_indices, size_indices * sizeof(CeedInt), hipMemcpyHostToDevice));
2080d0321e0SJeremy L Thompson 
2090d0321e0SJeremy L Thompson   // Cleanup
210*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&ind_to_offset));
211*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&l_vec_indices));
212*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&t_offsets));
213*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&t_indices));
214437930d1SJeremy L Thompson 
2150d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2160d0321e0SJeremy L Thompson }
2170d0321e0SJeremy L Thompson 
2180d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
2190d0321e0SJeremy L Thompson // Create restriction
2200d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
221*2b730f8bSJeremy L Thompson int CeedElemRestrictionCreate_Hip(CeedMemType mtype, CeedCopyMode cmode, const CeedInt *indices, CeedElemRestriction r) {
2220d0321e0SJeremy L Thompson   Ceed ceed;
223*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
2240d0321e0SJeremy L Thompson   CeedElemRestriction_Hip *impl;
225*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedCalloc(1, &impl));
226437930d1SJeremy L Thompson   CeedInt num_elem, num_comp, elem_size;
227*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
228*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
229*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
230437930d1SJeremy L Thompson   CeedInt size        = num_elem * elem_size;
231437930d1SJeremy L Thompson   CeedInt strides[3]  = {1, size, elem_size};
232437930d1SJeremy L Thompson   CeedInt comp_stride = 1;
2330d0321e0SJeremy L Thompson 
2340d0321e0SJeremy L Thompson   // Stride data
235437930d1SJeremy L Thompson   bool is_strided;
236*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionIsStrided(r, &is_strided));
237437930d1SJeremy L Thompson   if (is_strided) {
238437930d1SJeremy L Thompson     bool has_backend_strides;
239*2b730f8bSJeremy L Thompson     CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
240437930d1SJeremy L Thompson     if (!has_backend_strides) {
241*2b730f8bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
2420d0321e0SJeremy L Thompson     }
2430d0321e0SJeremy L Thompson   } else {
244*2b730f8bSJeremy L Thompson     CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
2450d0321e0SJeremy L Thompson   }
2460d0321e0SJeremy L Thompson 
2470d0321e0SJeremy L Thompson   impl->h_ind           = NULL;
2480d0321e0SJeremy L Thompson   impl->h_ind_allocated = NULL;
2490d0321e0SJeremy L Thompson   impl->d_ind           = NULL;
2500d0321e0SJeremy L Thompson   impl->d_ind_allocated = NULL;
251437930d1SJeremy L Thompson   impl->d_t_indices     = NULL;
252437930d1SJeremy L Thompson   impl->d_t_offsets     = NULL;
253437930d1SJeremy L Thompson   impl->num_nodes       = size;
254*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionSetData(r, impl));
255437930d1SJeremy L Thompson   CeedInt layout[3] = {1, elem_size * num_elem, elem_size};
256*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionSetELayout(r, layout));
2570d0321e0SJeremy L Thompson 
2580d0321e0SJeremy L Thompson   // Set up device indices/offset arrays
2590d0321e0SJeremy L Thompson   if (mtype == CEED_MEM_HOST) {
2600d0321e0SJeremy L Thompson     switch (cmode) {
2610d0321e0SJeremy L Thompson       case CEED_OWN_POINTER:
2620d0321e0SJeremy L Thompson         impl->h_ind_allocated = (CeedInt *)indices;
2630d0321e0SJeremy L Thompson         impl->h_ind           = (CeedInt *)indices;
2640d0321e0SJeremy L Thompson         break;
2650d0321e0SJeremy L Thompson       case CEED_USE_POINTER:
2660d0321e0SJeremy L Thompson         impl->h_ind = (CeedInt *)indices;
2670d0321e0SJeremy L Thompson         break;
2680d0321e0SJeremy L Thompson       case CEED_COPY_VALUES:
26944d7a66cSJeremy L Thompson         if (indices != NULL) {
270*2b730f8bSJeremy L Thompson           CeedCallBackend(CeedMalloc(elem_size * num_elem, &impl->h_ind_allocated));
27144d7a66cSJeremy L Thompson           memcpy(impl->h_ind_allocated, indices, elem_size * num_elem * sizeof(CeedInt));
27244d7a66cSJeremy L Thompson           impl->h_ind = impl->h_ind_allocated;
27344d7a66cSJeremy L Thompson         }
2740d0321e0SJeremy L Thompson         break;
2750d0321e0SJeremy L Thompson     }
2760d0321e0SJeremy L Thompson     if (indices != NULL) {
277*2b730f8bSJeremy L Thompson       CeedCallHip(ceed, hipMalloc((void **)&impl->d_ind, size * sizeof(CeedInt)));
2780d0321e0SJeremy L Thompson       impl->d_ind_allocated = impl->d_ind;  // We own the device memory
279*2b730f8bSJeremy L Thompson       CeedCallHip(ceed, hipMemcpy(impl->d_ind, indices, size * sizeof(CeedInt), hipMemcpyHostToDevice));
280*2b730f8bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionOffset_Hip(r, indices));
2810d0321e0SJeremy L Thompson     }
2820d0321e0SJeremy L Thompson   } else if (mtype == CEED_MEM_DEVICE) {
2830d0321e0SJeremy L Thompson     switch (cmode) {
2840d0321e0SJeremy L Thompson       case CEED_COPY_VALUES:
2850d0321e0SJeremy L Thompson         if (indices != NULL) {
286*2b730f8bSJeremy L Thompson           CeedCallHip(ceed, hipMalloc((void **)&impl->d_ind, size * sizeof(CeedInt)));
2870d0321e0SJeremy L Thompson           impl->d_ind_allocated = impl->d_ind;  // We own the device memory
288*2b730f8bSJeremy L Thompson           CeedCallHip(ceed, hipMemcpy(impl->d_ind, indices, size * sizeof(CeedInt), hipMemcpyDeviceToDevice));
2890d0321e0SJeremy L Thompson         }
2900d0321e0SJeremy L Thompson         break;
2910d0321e0SJeremy L Thompson       case CEED_OWN_POINTER:
2920d0321e0SJeremy L Thompson         impl->d_ind           = (CeedInt *)indices;
2930d0321e0SJeremy L Thompson         impl->d_ind_allocated = impl->d_ind;
2940d0321e0SJeremy L Thompson         break;
2950d0321e0SJeremy L Thompson       case CEED_USE_POINTER:
2960d0321e0SJeremy L Thompson         impl->d_ind = (CeedInt *)indices;
2970d0321e0SJeremy L Thompson     }
2980d0321e0SJeremy L Thompson     if (indices != NULL) {
299*2b730f8bSJeremy L Thompson       CeedCallBackend(CeedMalloc(elem_size * num_elem, &impl->h_ind_allocated));
300*2b730f8bSJeremy L Thompson       CeedCallHip(ceed, hipMemcpy(impl->h_ind_allocated, impl->d_ind, elem_size * num_elem * sizeof(CeedInt), hipMemcpyDeviceToHost));
30144d7a66cSJeremy L Thompson       impl->h_ind = impl->h_ind_allocated;
302*2b730f8bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionOffset_Hip(r, indices));
3030d0321e0SJeremy L Thompson     }
3040d0321e0SJeremy L Thompson   } else {
3050d0321e0SJeremy L Thompson     // LCOV_EXCL_START
306*2b730f8bSJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST or DEVICE supported");
3070d0321e0SJeremy L Thompson     // LCOV_EXCL_STOP
3080d0321e0SJeremy L Thompson   }
3090d0321e0SJeremy L Thompson 
3100d0321e0SJeremy L Thompson   // Compile HIP kernels
311437930d1SJeremy L Thompson   CeedInt num_nodes = impl->num_nodes;
312437930d1SJeremy L Thompson   char   *restriction_kernel_path, *restriction_kernel_source;
313*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-restriction.h", &restriction_kernel_path));
31446dc0734SJeremy L Thompson   CeedDebug256(ceed, 2, "----- Loading Restriction Kernel Source -----\n");
315*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedLoadSourceToBuffer(ceed, restriction_kernel_path, &restriction_kernel_source));
316*2b730f8bSJeremy L Thompson   CeedDebug256(ceed, 2, "----- Loading Restriction Kernel Source Complete! -----\n");
317*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedCompileHip(ceed, restriction_kernel_source, &impl->module, 8, "RESTR_ELEM_SIZE", elem_size, "RESTR_NUM_ELEM", num_elem,
318*2b730f8bSJeremy L Thompson                                  "RESTR_NUM_COMP", num_comp, "RESTR_NUM_NODES", num_nodes, "RESTR_COMP_STRIDE", comp_stride, "RESTR_STRIDE_NODES",
319*2b730f8bSJeremy L Thompson                                  strides[0], "RESTR_STRIDE_COMP", strides[1], "RESTR_STRIDE_ELEM", strides[2]));
320*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedGetKernelHip(ceed, impl->module, "StridedNoTranspose", &impl->StridedNoTranspose));
321*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedGetKernelHip(ceed, impl->module, "OffsetNoTranspose", &impl->OffsetNoTranspose));
322*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedGetKernelHip(ceed, impl->module, "StridedTranspose", &impl->StridedTranspose));
323*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedGetKernelHip(ceed, impl->module, "OffsetTranspose", &impl->OffsetTranspose));
324*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&restriction_kernel_path));
325*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&restriction_kernel_source));
3260d0321e0SJeremy L Thompson 
3270d0321e0SJeremy L Thompson   // Register backend functions
328*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Hip));
329*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Hip));
330*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Hip));
331*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Hip));
3320d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3330d0321e0SJeremy L Thompson }
3340d0321e0SJeremy L Thompson 
3350d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
3360d0321e0SJeremy L Thompson // Blocked not supported
3370d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
338*2b730f8bSJeremy L Thompson int CeedElemRestrictionCreateBlocked_Hip(const CeedMemType mtype, const CeedCopyMode cmode, const CeedInt *indices, CeedElemRestriction r) {
3390d0321e0SJeremy L Thompson   Ceed ceed;
340*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
341*2b730f8bSJeremy L Thompson   return CeedError(ceed, CEED_ERROR_BACKEND, "Backend does not implement blocked restrictions");
3420d0321e0SJeremy L Thompson }
3430d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
344