xref: /libCEED/backends/hip-ref/ceed-hip-ref-basis.c (revision 4bd6ffc97dc9a7688ef3a2d802aad5d41776eea1)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed.h>
9 #include <ceed/backend.h>
10 #include <ceed/jit-tools.h>
11 #include <hip/hip_runtime.h>
12 
13 #include "../hip/ceed-hip-common.h"
14 #include "../hip/ceed-hip-compile.h"
15 #include "ceed-hip-ref.h"
16 
17 //------------------------------------------------------------------------------
18 // Basis apply - tensor
19 //------------------------------------------------------------------------------
20 int CeedBasisApply_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
21   Ceed              ceed;
22   CeedInt           Q_1d, dim;
23   const CeedInt     is_transpose   = t_mode == CEED_TRANSPOSE;
24   const int         max_block_size = 64;
25   const CeedScalar *d_u;
26   CeedScalar       *d_v;
27   CeedBasis_Hip    *data;
28 
29   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
30   CeedCallBackend(CeedBasisGetData(basis, &data));
31 
32   // Get read/write access to u, v
33   if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
34   else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
35   CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
36 
37   // Clear v for transpose operation
38   if (is_transpose) {
39     CeedSize length;
40 
41     CeedCallBackend(CeedVectorGetLength(v, &length));
42     CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar)));
43   }
44   CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
45   CeedCallBackend(CeedBasisGetDimension(basis, &dim));
46 
47   // Basis action
48   switch (eval_mode) {
49     case CEED_EVAL_INTERP: {
50       void         *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &d_u, &d_v};
51       const CeedInt block_size    = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);
52 
53       CeedCallBackend(CeedRunKernel_Hip(ceed, data->Interp, num_elem, block_size, interp_args));
54     } break;
55     case CEED_EVAL_GRAD: {
56       void         *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &data->d_grad_1d, &d_u, &d_v};
57       const CeedInt block_size  = max_block_size;
58 
59       CeedCallBackend(CeedRunKernel_Hip(ceed, data->Grad, num_elem, block_size, grad_args));
60     } break;
61     case CEED_EVAL_WEIGHT: {
62       void     *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight_1d, &d_v};
63       const int block_size_x  = Q_1d;
64       const int block_size_y  = dim >= 2 ? Q_1d : 1;
65 
66       CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, num_elem, block_size_x, block_size_y, 1, weight_args));
67     } break;
68     case CEED_EVAL_NONE: /* handled separately below */
69       break;
70     // LCOV_EXCL_START
71     case CEED_EVAL_DIV:
72     case CEED_EVAL_CURL:
73       return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]);
74       // LCOV_EXCL_STOP
75   }
76 
77   // Restore vectors, cover CEED_EVAL_NONE
78   CeedCallBackend(CeedVectorRestoreArray(v, &d_v));
79   if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u));
80   if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u));
81   return CEED_ERROR_SUCCESS;
82 }
83 
84 //------------------------------------------------------------------------------
85 // Basis apply - non-tensor
86 //------------------------------------------------------------------------------
87 int CeedBasisApplyNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u,
88                                 CeedVector v) {
89   Ceed                    ceed;
90   CeedInt                 num_nodes, num_qpts;
91   const CeedInt           is_transpose    = t_mode == CEED_TRANSPOSE;
92   const int               elems_per_block = 1;
93   const int               grid            = CeedDivUpInt(num_elem, elems_per_block);
94   const CeedScalar       *d_u;
95   CeedScalar             *d_v;
96   CeedBasisNonTensor_Hip *data;
97 
98   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
99   CeedCallBackend(CeedBasisGetData(basis, &data));
100   CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
101   CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes));
102 
103   // Get read/write access to u, v
104   if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
105   else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
106   CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
107 
108   // Clear v for transpose operation
109   if (is_transpose) {
110     CeedSize length;
111 
112     CeedCallBackend(CeedVectorGetLength(v, &length));
113     CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar)));
114   }
115 
116   // Apply basis operation
117   switch (eval_mode) {
118     case CEED_EVAL_INTERP: {
119       void     *interp_args[] = {(void *)&num_elem, &data->d_interp, &d_u, &d_v};
120       const int block_size_x  = is_transpose ? num_nodes : num_qpts;
121 
122       if (is_transpose) {
123         CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->InterpTranspose, grid, block_size_x, 1, elems_per_block, interp_args));
124       } else {
125         CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Interp, grid, block_size_x, 1, elems_per_block, interp_args));
126       }
127     } break;
128     case CEED_EVAL_GRAD: {
129       void     *grad_args[]  = {(void *)&num_elem, &data->d_grad, &d_u, &d_v};
130       const int block_size_x = is_transpose ? num_nodes : num_qpts;
131 
132       if (is_transpose) {
133         CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, grad_args));
134       } else {
135         CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, grad_args));
136       }
137     } break;
138     case CEED_EVAL_DIV: {
139       void     *div_args[]   = {(void *)&num_elem, &data->d_div, &d_u, &d_v};
140       const int block_size_x = is_transpose ? num_nodes : num_qpts;
141 
142       if (is_transpose) {
143         CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, div_args));
144       } else {
145         CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, div_args));
146       }
147     } break;
148     case CEED_EVAL_CURL: {
149       void     *curl_args[]  = {(void *)&num_elem, &data->d_curl, &d_u, &d_v};
150       const int block_size_x = is_transpose ? num_nodes : num_qpts;
151 
152       if (is_transpose) {
153         CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, curl_args));
154       } else {
155         CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, curl_args));
156       }
157     } break;
158     case CEED_EVAL_WEIGHT: {
159       void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight, &d_v};
160 
161       CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid, num_qpts, 1, elems_per_block, weight_args));
162     } break;
163     case CEED_EVAL_NONE: /* handled separately below */
164       break;
165   }
166 
167   // Restore vectors, cover CEED_EVAL_NONE
168   CeedCallBackend(CeedVectorRestoreArray(v, &d_v));
169   if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u));
170   if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u));
171   return CEED_ERROR_SUCCESS;
172 }
173 
174 //------------------------------------------------------------------------------
175 // Destroy tensor basis
176 //------------------------------------------------------------------------------
177 static int CeedBasisDestroy_Hip(CeedBasis basis) {
178   Ceed           ceed;
179   CeedBasis_Hip *data;
180 
181   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
182   CeedCallBackend(CeedBasisGetData(basis, &data));
183   CeedCallHip(ceed, hipModuleUnload(data->module));
184   CeedCallHip(ceed, hipFree(data->d_q_weight_1d));
185   CeedCallHip(ceed, hipFree(data->d_interp_1d));
186   CeedCallHip(ceed, hipFree(data->d_grad_1d));
187   CeedCallBackend(CeedFree(&data));
188   return CEED_ERROR_SUCCESS;
189 }
190 
191 //------------------------------------------------------------------------------
192 // Destroy non-tensor basis
193 //------------------------------------------------------------------------------
194 static int CeedBasisDestroyNonTensor_Hip(CeedBasis basis) {
195   Ceed                    ceed;
196   CeedBasisNonTensor_Hip *data;
197 
198   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
199   CeedCallBackend(CeedBasisGetData(basis, &data));
200   CeedCallHip(ceed, hipModuleUnload(data->module));
201   CeedCallHip(ceed, hipFree(data->d_q_weight));
202   CeedCallHip(ceed, hipFree(data->d_interp));
203   CeedCallHip(ceed, hipFree(data->d_grad));
204   CeedCallHip(ceed, hipFree(data->d_div));
205   CeedCallHip(ceed, hipFree(data->d_curl));
206   CeedCallBackend(CeedFree(&data));
207   return CEED_ERROR_SUCCESS;
208 }
209 
210 //------------------------------------------------------------------------------
211 // Create tensor
212 //------------------------------------------------------------------------------
213 int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d,
214                                 const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) {
215   Ceed           ceed;
216   char          *basis_kernel_path, *basis_kernel_source;
217   CeedInt        num_comp;
218   const CeedInt  q_bytes      = Q_1d * sizeof(CeedScalar);
219   const CeedInt  interp_bytes = q_bytes * P_1d;
220   CeedBasis_Hip *data;
221 
222   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
223   CeedCallBackend(CeedCalloc(1, &data));
224 
225   // Copy data to GPU
226   CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight_1d, q_bytes));
227   CeedCallHip(ceed, hipMemcpy(data->d_q_weight_1d, q_weight_1d, q_bytes, hipMemcpyHostToDevice));
228   CeedCallHip(ceed, hipMalloc((void **)&data->d_interp_1d, interp_bytes));
229   CeedCallHip(ceed, hipMemcpy(data->d_interp_1d, interp_1d, interp_bytes, hipMemcpyHostToDevice));
230   CeedCallHip(ceed, hipMalloc((void **)&data->d_grad_1d, interp_bytes));
231   CeedCallHip(ceed, hipMemcpy(data->d_grad_1d, grad_1d, interp_bytes, hipMemcpyHostToDevice));
232 
233   // Compile basis kernels
234   CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
235   CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-tensor.h", &basis_kernel_path));
236   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
237   CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
238   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
239   CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 7, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN",
240                                   num_comp * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp,
241                                   "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim)));
242   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp));
243   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Grad", &data->Grad));
244   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight));
245   CeedCallBackend(CeedFree(&basis_kernel_path));
246   CeedCallBackend(CeedFree(&basis_kernel_source));
247 
248   CeedCallBackend(CeedBasisSetData(basis, data));
249 
250   // Register backend functions
251   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Hip));
252   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Hip));
253   return CEED_ERROR_SUCCESS;
254 }
255 
256 //------------------------------------------------------------------------------
257 // Create non-tensor H^1
258 //------------------------------------------------------------------------------
259 int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *grad,
260                           const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
261   Ceed                    ceed;
262   char                   *basis_kernel_path, *basis_kernel_source;
263   CeedInt                 num_comp, q_comp_interp, q_comp_grad;
264   const CeedInt           q_bytes = num_qpts * sizeof(CeedScalar);
265   CeedBasisNonTensor_Hip *data;
266 
267   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
268   CeedCallBackend(CeedCalloc(1, &data));
269 
270   // Copy basis data to GPU
271   CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
272   CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_grad));
273   CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes));
274   CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice));
275   if (interp) {
276     const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp;
277 
278     CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes));
279     CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice));
280   }
281   if (grad) {
282     const CeedInt grad_bytes = q_bytes * num_nodes * q_comp_grad;
283 
284     CeedCallHip(ceed, hipMalloc((void **)&data->d_grad, grad_bytes));
285     CeedCallHip(ceed, hipMemcpy(data->d_grad, grad, grad_bytes, hipMemcpyHostToDevice));
286   }
287 
288   // Compile basis kernels
289   CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
290   CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path));
291   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
292   CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
293   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
294   CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
295                                   q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_grad, "BASIS_NUM_COMP", num_comp));
296   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp));
297   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
298   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv));
299   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose));
300   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight));
301   CeedCallBackend(CeedFree(&basis_kernel_path));
302   CeedCallBackend(CeedFree(&basis_kernel_source));
303 
304   CeedCallBackend(CeedBasisSetData(basis, data));
305 
306   // Register backend functions
307   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip));
308   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip));
309   return CEED_ERROR_SUCCESS;
310 }
311 
312 //------------------------------------------------------------------------------
313 // Create non-tensor H(div)
314 //------------------------------------------------------------------------------
315 int CeedBasisCreateHdiv_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *div,
316                             const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
317   Ceed                    ceed;
318   char                   *basis_kernel_path, *basis_kernel_source;
319   CeedInt                 num_comp, q_comp_interp, q_comp_div;
320   const CeedInt           q_bytes = num_qpts * sizeof(CeedScalar);
321   CeedBasisNonTensor_Hip *data;
322 
323   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
324   CeedCallBackend(CeedCalloc(1, &data));
325 
326   // Copy basis data to GPU
327   CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
328   CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp_div));
329   CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes));
330   CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice));
331   if (interp) {
332     const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp;
333 
334     CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes));
335     CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice));
336   }
337   if (div) {
338     const CeedInt div_bytes = q_bytes * num_nodes * q_comp_div;
339 
340     CeedCallHip(ceed, hipMalloc((void **)&data->d_div, div_bytes));
341     CeedCallHip(ceed, hipMemcpy(data->d_div, div, div_bytes, hipMemcpyHostToDevice));
342   }
343 
344   // Compile basis kernels
345   CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
346   CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path));
347   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
348   CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
349   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
350   CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
351                                   q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_div, "BASIS_NUM_COMP", num_comp));
352   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp));
353   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
354   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv));
355   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose));
356   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight));
357   CeedCallBackend(CeedFree(&basis_kernel_path));
358   CeedCallBackend(CeedFree(&basis_kernel_source));
359 
360   CeedCallBackend(CeedBasisSetData(basis, data));
361 
362   // Register backend functions
363   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip));
364   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip));
365   return CEED_ERROR_SUCCESS;
366 }
367 
368 //------------------------------------------------------------------------------
369 // Create non-tensor H(curl)
370 //------------------------------------------------------------------------------
371 int CeedBasisCreateHcurl_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
372                              const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
373   Ceed                    ceed;
374   char                   *basis_kernel_path, *basis_kernel_source;
375   CeedInt                 num_comp, q_comp_interp, q_comp_curl;
376   const CeedInt           q_bytes = num_qpts * sizeof(CeedScalar);
377   CeedBasisNonTensor_Hip *data;
378 
379   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
380   CeedCallBackend(CeedCalloc(1, &data));
381 
382   // Copy basis data to GPU
383   CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
384   CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp_curl));
385   CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes));
386   CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice));
387   if (interp) {
388     const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp;
389 
390     CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes));
391     CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice));
392   }
393   if (curl) {
394     const CeedInt curl_bytes = q_bytes * num_nodes * q_comp_curl;
395 
396     CeedCallHip(ceed, hipMalloc((void **)&data->d_curl, curl_bytes));
397     CeedCallHip(ceed, hipMemcpy(data->d_curl, curl, curl_bytes, hipMemcpyHostToDevice));
398   }
399 
400   // Compile basis kernels
401   CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
402   CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path));
403   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
404   CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
405   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
406   CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
407                                   q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_curl, "BASIS_NUM_COMP", num_comp));
408   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp));
409   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
410   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv));
411   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose));
412   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight));
413   CeedCallBackend(CeedFree(&basis_kernel_path));
414   CeedCallBackend(CeedFree(&basis_kernel_source));
415 
416   CeedCallBackend(CeedBasisSetData(basis, data));
417 
418   // Register backend functions
419   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip));
420   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip));
421   return CEED_ERROR_SUCCESS;
422 }
423 
424 //------------------------------------------------------------------------------
425