xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma-basis.c (revision 5aed82e4fa97acf4ba24a7f10a35f5303a6798e0)
1*5aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
37f5b9731SStan Tomov //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
57f5b9731SStan Tomov //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
77f5b9731SStan Tomov 
849aac155SJeremy L Thompson #include <ceed.h>
9ec3da8bcSJed Brown #include <ceed/backend.h>
10f6af633fSnbeams #include <ceed/jit-tools.h>
11f6af633fSnbeams #include <string.h>
122b730f8bSJeremy L Thompson 
13e5f091ebSnbeams #ifdef CEED_MAGMA_USE_HIP
14f6af633fSnbeams #include "../hip/ceed-hip-common.h"
15f6af633fSnbeams #include "../hip/ceed-hip-compile.h"
16f6af633fSnbeams #else
17f6af633fSnbeams #include "../cuda/ceed-cuda-common.h"
18f6af633fSnbeams #include "../cuda/ceed-cuda-compile.h"
19f6af633fSnbeams #endif
2000fb7a04SSebastian Grimberg #include "ceed-magma-common.h"
2100fb7a04SSebastian Grimberg #include "ceed-magma.h"
227f5b9731SStan Tomov 
23940a72f1SSebastian Grimberg #include "ceed-magma-gemm-nontensor.h"
24940a72f1SSebastian Grimberg #include "ceed-magma-gemm-selector.h"
25940a72f1SSebastian Grimberg 
26940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
27940a72f1SSebastian Grimberg // Basis apply - tensor
28940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
29940a72f1SSebastian Grimberg static int CeedBasisApply_Magma(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u, CeedVector v) {
307f5b9731SStan Tomov   Ceed              ceed;
31e0582403Sabdelfattah83   Ceed_Magma       *data;
32940a72f1SSebastian Grimberg   CeedInt           dim, num_comp, num_nodes, P_1d, Q_1d, P, Q;
33940a72f1SSebastian Grimberg   const CeedScalar *d_u;
34940a72f1SSebastian Grimberg   CeedScalar       *d_v;
3538293ee6SJeremy L Thompson   CeedBasis_Magma  *impl;
3638293ee6SJeremy L Thompson 
3738293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
38940a72f1SSebastian Grimberg   CeedCallBackend(CeedGetData(ceed, &data));
39940a72f1SSebastian Grimberg   CeedCallBackend(CeedBasisGetData(basis, &impl));
4038293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetDimension(basis, &dim));
4138293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
42940a72f1SSebastian Grimberg   CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes));
4338293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d));
4438293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
45940a72f1SSebastian Grimberg   P = P_1d;
46940a72f1SSebastian Grimberg   Q = Q_1d;
4738293ee6SJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
4838293ee6SJeremy L Thompson     P = Q_1d;
4938293ee6SJeremy L Thompson     Q = P_1d;
507f5b9731SStan Tomov   }
517f5b9731SStan Tomov 
52940a72f1SSebastian Grimberg   // Read vectors
53940a72f1SSebastian Grimberg   if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
54940a72f1SSebastian Grimberg   else CeedCheck(e_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
55940a72f1SSebastian Grimberg   CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
56940a72f1SSebastian Grimberg 
57940a72f1SSebastian Grimberg   // Apply basis operation
58940a72f1SSebastian Grimberg   switch (e_mode) {
59940a72f1SSebastian Grimberg     case CEED_EVAL_INTERP: {
607f5b9731SStan Tomov       // Define element sizes for dofs/quad
6138293ee6SJeremy L Thompson       CeedInt elem_qpts_size = CeedIntPow(Q_1d, dim);
6238293ee6SJeremy L Thompson       CeedInt elem_dofs_size = CeedIntPow(P_1d, dim);
637f5b9731SStan Tomov 
647f5b9731SStan Tomov       // E-vector ordering -------------- Q-vector ordering
65868539c2SNatalie Beams       //  component                        component
66868539c2SNatalie Beams       //    elem                             elem
677f5b9731SStan Tomov       //       node                            node
687f5b9731SStan Tomov 
697f5b9731SStan Tomov       // ---  Define strides for NOTRANSPOSE mode: ---
70940a72f1SSebastian Grimberg       // Input (d_u) is E-vector, output (d_v) is Q-vector
717f5b9731SStan Tomov 
727f5b9731SStan Tomov       // Element strides
7338293ee6SJeremy L Thompson       CeedInt u_elem_stride = elem_dofs_size;
7438293ee6SJeremy L Thompson       CeedInt v_elem_stride = elem_qpts_size;
757f5b9731SStan Tomov       // Component strides
7638293ee6SJeremy L Thompson       CeedInt u_comp_stride = num_elem * elem_dofs_size;
7738293ee6SJeremy L Thompson       CeedInt v_comp_stride = num_elem * elem_qpts_size;
7838293ee6SJeremy L Thompson       if (t_mode == CEED_TRANSPOSE) {
79940a72f1SSebastian Grimberg         // Input (d_u) is Q-vector, output (d_v) is E-vector
807f5b9731SStan Tomov         // Element strides
8138293ee6SJeremy L Thompson         v_elem_stride = elem_dofs_size;
8238293ee6SJeremy L Thompson         u_elem_stride = elem_qpts_size;
837f5b9731SStan Tomov         // Component strides
8438293ee6SJeremy L Thompson         v_comp_stride = num_elem * elem_dofs_size;
8538293ee6SJeremy L Thompson         u_comp_stride = num_elem * elem_qpts_size;
867f5b9731SStan Tomov       }
8738293ee6SJeremy L Thompson       CeedInt num_threads = 1;
8838293ee6SJeremy L Thompson       CeedInt num_t_col   = 1;
8938293ee6SJeremy L Thompson       CeedInt shared_mem  = 0;
9038293ee6SJeremy L Thompson       CeedInt max_P_Q     = CeedIntMax(P, Q);
91f6af633fSnbeams 
92f6af633fSnbeams       switch (dim) {
93f6af633fSnbeams         case 1:
9438293ee6SJeremy L Thompson           num_threads = max_P_Q;
9538293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_1D);
9638293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * num_t_col * (num_comp * (1 * P + 1 * Q));
9738293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * (P * Q);
98f6af633fSnbeams           break;
99f6af633fSnbeams         case 2:
10038293ee6SJeremy L Thompson           num_threads = max_P_Q;
10138293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_2D);
10238293ee6SJeremy L Thompson           shared_mem += P * Q * sizeof(CeedScalar);  // for sT
103940a72f1SSebastian Grimberg           // for reforming rU we need P x P, and for the intermediate output we need P x Q
104940a72f1SSebastian Grimberg           shared_mem += num_t_col * (P * max_P_Q * sizeof(CeedScalar));
105f6af633fSnbeams           break;
106f6af633fSnbeams         case 3:
10738293ee6SJeremy L Thompson           num_threads = max_P_Q * max_P_Q;
10838293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_3D);
10938293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * (P * Q);  // for sT
110940a72f1SSebastian Grimberg           // rU needs P^2 x P, the intermediate output needs max(P^2 x Q, P x Q^2)
111940a72f1SSebastian Grimberg           shared_mem += sizeof(CeedScalar) * num_t_col * (CeedIntMax(P * P * max_P_Q, P * Q * Q));
112940a72f1SSebastian Grimberg           break;
113f6af633fSnbeams       }
114940a72f1SSebastian Grimberg       CeedInt grid   = CeedDivUpInt(num_elem, num_t_col);
115940a72f1SSebastian Grimberg       void   *args[] = {&impl->d_interp_1d, &d_u, &u_elem_stride, &u_comp_stride, &d_v, &v_elem_stride, &v_comp_stride, &num_elem};
116f6af633fSnbeams 
11738293ee6SJeremy L Thompson       if (t_mode == CEED_TRANSPOSE) {
118940a72f1SSebastian Grimberg         CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->InterpTranspose, grid, num_threads, num_t_col, 1, shared_mem, args));
119f6af633fSnbeams       } else {
120940a72f1SSebastian Grimberg         CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Interp, grid, num_threads, num_t_col, 1, shared_mem, args));
121f6af633fSnbeams       }
1222b730f8bSJeremy L Thompson     } break;
1233513a710Sjeremylt     case CEED_EVAL_GRAD: {
1247f5b9731SStan Tomov       // Define element sizes for dofs/quad
12538293ee6SJeremy L Thompson       CeedInt elem_qpts_size = CeedIntPow(Q_1d, dim);
12638293ee6SJeremy L Thompson       CeedInt elem_dofs_size = CeedIntPow(P_1d, dim);
1277f5b9731SStan Tomov 
128940a72f1SSebastian Grimberg       // In CEED_NOTRANSPOSE mode:
129940a72f1SSebastian Grimberg       // d_u is (P^dim x nc), column-major layout (nc = num_comp)
130940a72f1SSebastian Grimberg       // d_v is (Q^dim x nc x dim), column-major layout (nc = num_comp)
131940a72f1SSebastian Grimberg       // In CEED_TRANSPOSE mode, the sizes of d_u and d_v are switched.
132940a72f1SSebastian Grimberg 
1337f5b9731SStan Tomov       // E-vector ordering -------------- Q-vector ordering
1347f5b9731SStan Tomov       //                                  dim
135868539c2SNatalie Beams       //  component                        component
136868539c2SNatalie Beams       //    elem                              elem
1377f5b9731SStan Tomov       //       node                            node
1387f5b9731SStan Tomov 
1397f5b9731SStan Tomov       // ---  Define strides for NOTRANSPOSE mode: ---
140940a72f1SSebastian Grimberg       // Input (d_u) is E-vector, output (d_v) is Q-vector
1417f5b9731SStan Tomov 
1427f5b9731SStan Tomov       // Element strides
14338293ee6SJeremy L Thompson       CeedInt u_elem_stride = elem_dofs_size;
14438293ee6SJeremy L Thompson       CeedInt v_elem_stride = elem_qpts_size;
1457f5b9731SStan Tomov       // Component strides
14638293ee6SJeremy L Thompson       CeedInt u_comp_stride = num_elem * elem_dofs_size;
14738293ee6SJeremy L Thompson       CeedInt v_comp_stride = num_elem * elem_qpts_size;
1487f5b9731SStan Tomov       // Dimension strides
14938293ee6SJeremy L Thompson       CeedInt u_dim_stride = 0;
15038293ee6SJeremy L Thompson       CeedInt v_dim_stride = num_elem * elem_qpts_size * num_comp;
15138293ee6SJeremy L Thompson       if (t_mode == CEED_TRANSPOSE) {
152940a72f1SSebastian Grimberg         // Input (d_u) is Q-vector, output (d_v) is E-vector
1537f5b9731SStan Tomov         // Element strides
15438293ee6SJeremy L Thompson         v_elem_stride = elem_dofs_size;
15538293ee6SJeremy L Thompson         u_elem_stride = elem_qpts_size;
1567f5b9731SStan Tomov         // Component strides
15738293ee6SJeremy L Thompson         v_comp_stride = num_elem * elem_dofs_size;
15838293ee6SJeremy L Thompson         u_comp_stride = num_elem * elem_qpts_size;
1597f5b9731SStan Tomov         // Dimension strides
16038293ee6SJeremy L Thompson         v_dim_stride = 0;
16138293ee6SJeremy L Thompson         u_dim_stride = num_elem * elem_qpts_size * num_comp;
1627f5b9731SStan Tomov       }
16338293ee6SJeremy L Thompson       CeedInt num_threads = 1;
16438293ee6SJeremy L Thompson       CeedInt num_t_col   = 1;
16538293ee6SJeremy L Thompson       CeedInt shared_mem  = 0;
16638293ee6SJeremy L Thompson       CeedInt max_P_Q     = CeedIntMax(P, Q);
167f6af633fSnbeams 
168f6af633fSnbeams       switch (dim) {
169f6af633fSnbeams         case 1:
17038293ee6SJeremy L Thompson           num_threads = max_P_Q;
17138293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_1D);
17238293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * num_t_col * (num_comp * (1 * P + 1 * Q));
17338293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * (P * Q);
174f6af633fSnbeams           break;
175f6af633fSnbeams         case 2:
17638293ee6SJeremy L Thompson           num_threads = max_P_Q;
17738293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_2D);
17838293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * 2 * P * Q;  // for sTinterp and sTgrad
179940a72f1SSebastian Grimberg           // for reforming rU we need P x P, and for the intermediate output we need P x Q
180940a72f1SSebastian Grimberg           shared_mem += sizeof(CeedScalar) * num_t_col * (P * max_P_Q);
181f6af633fSnbeams           break;
182f6af633fSnbeams         case 3:
18338293ee6SJeremy L Thompson           num_threads = max_P_Q * max_P_Q;
18438293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_3D);
18538293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * 2 * P * Q;  // for sTinterp and sTgrad
186940a72f1SSebastian Grimberg           // rU needs P^2 x P, the intermediate outputs need (P^2 x Q + P x Q^2)
187940a72f1SSebastian Grimberg           shared_mem += sizeof(CeedScalar) * num_t_col * CeedIntMax(P * P * P, (P * P * Q) + (P * Q * Q));
188940a72f1SSebastian Grimberg           break;
189f6af633fSnbeams       }
190940a72f1SSebastian Grimberg       CeedInt grid   = CeedDivUpInt(num_elem, num_t_col);
191940a72f1SSebastian Grimberg       void   *args[] = {&impl->d_interp_1d, &impl->d_grad_1d, &d_u,          &u_elem_stride, &u_comp_stride, &u_dim_stride, &d_v,
19238293ee6SJeremy L Thompson                         &v_elem_stride,     &v_comp_stride,   &v_dim_stride, &num_elem};
193f6af633fSnbeams 
19438293ee6SJeremy L Thompson       if (t_mode == CEED_TRANSPOSE) {
195940a72f1SSebastian Grimberg         CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->GradTranspose, grid, num_threads, num_t_col, 1, shared_mem, args));
196f6af633fSnbeams       } else {
197940a72f1SSebastian Grimberg         CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Grad, grid, num_threads, num_t_col, 1, shared_mem, args));
198f6af633fSnbeams       }
1992b730f8bSJeremy L Thompson     } break;
2003513a710Sjeremylt     case CEED_EVAL_WEIGHT: {
201940a72f1SSebastian Grimberg       CeedCheck(t_mode != CEED_TRANSPOSE, ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
20238293ee6SJeremy L Thompson       CeedInt elem_dofs_size = CeedIntPow(Q, dim);
20338293ee6SJeremy L Thompson       CeedInt num_threads    = 1;
20438293ee6SJeremy L Thompson       CeedInt num_t_col      = 1;
20538293ee6SJeremy L Thompson       CeedInt shared_mem     = 0;
206f6af633fSnbeams 
207f6af633fSnbeams       switch (dim) {
208f6af633fSnbeams         case 1:
20938293ee6SJeremy L Thompson           num_threads = Q;
21038293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_1D);
21138293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * Q;              // for d_q_weight_1d
21238293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * num_t_col * Q;  // for output
213f6af633fSnbeams           break;
214f6af633fSnbeams         case 2:
21538293ee6SJeremy L Thompson           num_threads = Q;
21638293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_2D);
21738293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * Q;  // for d_q_weight_1d
218f6af633fSnbeams           break;
219f6af633fSnbeams         case 3:
22038293ee6SJeremy L Thompson           num_threads = Q * Q;
22138293ee6SJeremy L Thompson           num_t_col   = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_3D);
22238293ee6SJeremy L Thompson           shared_mem += sizeof(CeedScalar) * Q;  // for d_q_weight_1d
223940a72f1SSebastian Grimberg           break;
224f6af633fSnbeams       }
225940a72f1SSebastian Grimberg       CeedInt grid   = CeedDivUpInt(num_elem, num_t_col);
226940a72f1SSebastian Grimberg       void   *args[] = {&impl->d_q_weight_1d, &d_v, &elem_dofs_size, &num_elem};
227f6af633fSnbeams 
228940a72f1SSebastian Grimberg       CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Weight, grid, num_threads, num_t_col, 1, shared_mem, args));
2292b730f8bSJeremy L Thompson     } break;
2303513a710Sjeremylt     // LCOV_EXCL_START
2313513a710Sjeremylt     case CEED_EVAL_DIV:
2323513a710Sjeremylt     case CEED_EVAL_CURL:
233bcbe1c99SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[e_mode]);
2343513a710Sjeremylt     case CEED_EVAL_NONE:
2352b730f8bSJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_NONE does not make sense in this context");
2363513a710Sjeremylt       // LCOV_EXCL_STOP
2373513a710Sjeremylt   }
2387f5b9731SStan Tomov 
239940a72f1SSebastian Grimberg   // Must sync to ensure completeness
240e0582403Sabdelfattah83   ceed_magma_queue_sync(data->queue);
241e0582403Sabdelfattah83 
242940a72f1SSebastian Grimberg   // Restore vectors
24338293ee6SJeremy L Thompson   if (e_mode != CEED_EVAL_WEIGHT) {
244940a72f1SSebastian Grimberg     CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u));
2457f5b9731SStan Tomov   }
246940a72f1SSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArray(v, &d_v));
247e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2487f5b9731SStan Tomov }
2497f5b9731SStan Tomov 
250940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
251940a72f1SSebastian Grimberg // Basis apply - non-tensor
252940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
253940a72f1SSebastian Grimberg static int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u,
254940a72f1SSebastian Grimberg                                          CeedVector v) {
255868539c2SNatalie Beams   Ceed                      ceed;
256e0582403Sabdelfattah83   Ceed_Magma               *data;
2577251047cSSebastian Grimberg   CeedInt                   num_comp, num_nodes, num_qpts, P, Q, N;
2587251047cSSebastian Grimberg   const CeedScalar         *d_u;
259940a72f1SSebastian Grimberg   CeedScalar               *d_v;
26038293ee6SJeremy L Thompson   CeedBasisNonTensor_Magma *impl;
26138293ee6SJeremy L Thompson 
26238293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
26338293ee6SJeremy L Thompson   CeedCallBackend(CeedGetData(ceed, &data));
264940a72f1SSebastian Grimberg   CeedCallBackend(CeedBasisGetData(basis, &impl));
26538293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
266940a72f1SSebastian Grimberg   CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes));
26738293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
268940a72f1SSebastian Grimberg   P = num_nodes;
269940a72f1SSebastian Grimberg   Q = num_qpts;
270940a72f1SSebastian Grimberg   N = num_elem * num_comp;
27138293ee6SJeremy L Thompson 
272940a72f1SSebastian Grimberg   // Read vectors
273940a72f1SSebastian Grimberg   if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
27438293ee6SJeremy L Thompson   else CeedCheck(e_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
275940a72f1SSebastian Grimberg   CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
276868539c2SNatalie Beams 
2777251047cSSebastian Grimberg   // Compile kernels for N as needed
2787251047cSSebastian Grimberg   CeedInt iN = 0;
2797251047cSSebastian Grimberg   if (P <= MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P && Q <= MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q && (e_mode != CEED_EVAL_WEIGHT || !impl->Weight)) {
280940a72f1SSebastian Grimberg     CeedInt n_array[MAGMA_NONTENSOR_KERNEL_INSTANCES] = {MAGMA_NONTENSOR_KERNEL_N_VALUES};
2817251047cSSebastian Grimberg     CeedInt diff                                      = abs(n_array[iN] - N), idiff;
28238293ee6SJeremy L Thompson 
283023b8a51Sabdelfattah83     for (CeedInt in = iN + 1; in < MAGMA_NONTENSOR_KERNEL_INSTANCES; in++) {
284940a72f1SSebastian Grimberg       idiff = abs(n_array[in] - N);
285023b8a51Sabdelfattah83       if (idiff < diff) {
286023b8a51Sabdelfattah83         iN   = in;
287023b8a51Sabdelfattah83         diff = idiff;
288868539c2SNatalie Beams       }
28980a9ef05SNatalie Beams     }
29080a9ef05SNatalie Beams 
291940a72f1SSebastian Grimberg     if (!impl->NB_interp[iN]) {
2929d15e85bSSebastian Grimberg       CeedFESpace fe_space;
2939d15e85bSSebastian Grimberg       CeedInt     q_comp_interp, q_comp_deriv;
294940a72f1SSebastian Grimberg       Ceed        ceed_delegate;
29522070f95SJeremy L Thompson       char       *basis_kernel_source;
29622070f95SJeremy L Thompson       const char *basis_kernel_path, *weight_kernel_path;
297940a72f1SSebastian Grimberg       magma_int_t arch = magma_getdevice_arch();
29880a9ef05SNatalie Beams 
299940a72f1SSebastian Grimberg       // Tuning parameters for NB
3009d15e85bSSebastian Grimberg       CeedCallBackend(CeedBasisGetFESpace(basis, &fe_space));
3019d15e85bSSebastian Grimberg       CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
3029d15e85bSSebastian Grimberg       switch (fe_space) {
3039d15e85bSSebastian Grimberg         case CEED_FE_SPACE_H1:
3049d15e85bSSebastian Grimberg           CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_deriv));
3059d15e85bSSebastian Grimberg           break;
3069d15e85bSSebastian Grimberg         case CEED_FE_SPACE_HDIV:
3079d15e85bSSebastian Grimberg           CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp_deriv));
3089d15e85bSSebastian Grimberg           break;
3099d15e85bSSebastian Grimberg         case CEED_FE_SPACE_HCURL:
3109d15e85bSSebastian Grimberg           CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp_deriv));
3119d15e85bSSebastian Grimberg           break;
3129d15e85bSSebastian Grimberg       }
3139d15e85bSSebastian Grimberg       impl->NB_interp[iN]   = nontensor_rtc_get_nb(arch, 'n', q_comp_interp, P, Q, n_array[iN]);
3149d15e85bSSebastian Grimberg       impl->NB_interp_t[iN] = nontensor_rtc_get_nb(arch, 't', q_comp_interp, P, Q, n_array[iN]);
3159d15e85bSSebastian Grimberg       impl->NB_deriv[iN]    = nontensor_rtc_get_nb(arch, 'n', q_comp_deriv, P, Q, n_array[iN]);
3169d15e85bSSebastian Grimberg       impl->NB_deriv_t[iN]  = nontensor_rtc_get_nb(arch, 't', q_comp_deriv, P, Q, n_array[iN]);
317023b8a51Sabdelfattah83 
318940a72f1SSebastian Grimberg       // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data
319940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate));
320023b8a51Sabdelfattah83 
321940a72f1SSebastian Grimberg       // Compile kernels
3229d15e85bSSebastian Grimberg       CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-interp-deriv-nontensor.h", &basis_kernel_path));
323940a72f1SSebastian Grimberg       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
3249d15e85bSSebastian Grimberg       CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
3257251047cSSebastian Grimberg       if (!impl->Weight) {
3267251047cSSebastian Grimberg         CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-weight-nontensor.h", &weight_kernel_path));
3277251047cSSebastian Grimberg         CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, weight_kernel_path, &basis_kernel_source));
3287251047cSSebastian Grimberg       }
329940a72f1SSebastian Grimberg       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
3307251047cSSebastian Grimberg       CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module[iN], 8, "BASIS_Q_COMP_INTERP", q_comp_interp,
3319d15e85bSSebastian Grimberg                                        "BASIS_Q_COMP_DERIV", q_comp_deriv, "BASIS_P", P, "BASIS_Q", Q, "BASIS_NB_INTERP_N", impl->NB_interp[iN],
3329d15e85bSSebastian Grimberg                                        "BASIS_NB_INTERP_T", impl->NB_interp_t[iN], "BASIS_NB_DERIV_N", impl->NB_deriv[iN], "BASIS_NB_DERIV_T",
3339d15e85bSSebastian Grimberg                                        impl->NB_deriv_t[iN]));
3347251047cSSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_interp_nontensor_n", &impl->Interp[iN]));
3357251047cSSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_interp_nontensor_t", &impl->InterpTranspose[iN]));
3367251047cSSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_deriv_nontensor_n", &impl->Deriv[iN]));
3377251047cSSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_deriv_nontensor_t", &impl->DerivTranspose[iN]));
3387251047cSSebastian Grimberg       if (!impl->Weight) {
3397251047cSSebastian Grimberg         CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_weight_nontensor", &impl->Weight));
3407251047cSSebastian Grimberg         CeedCallBackend(CeedFree(&weight_kernel_path));
3417251047cSSebastian Grimberg       }
3429d15e85bSSebastian Grimberg       CeedCallBackend(CeedFree(&basis_kernel_path));
343940a72f1SSebastian Grimberg       CeedCallBackend(CeedFree(&basis_kernel_source));
344940a72f1SSebastian Grimberg     }
3457251047cSSebastian Grimberg   }
3467251047cSSebastian Grimberg 
3477251047cSSebastian Grimberg   // Apply basis operation
3487251047cSSebastian Grimberg   if (e_mode != CEED_EVAL_WEIGHT) {
3497251047cSSebastian Grimberg     const CeedScalar *d_b = NULL;
3507251047cSSebastian Grimberg     CeedInt           q_comp, NB, M, K;
3517251047cSSebastian Grimberg     CeedMagmaFunction Kernel;
3527251047cSSebastian Grimberg 
3537251047cSSebastian Grimberg     switch (e_mode) {
3547251047cSSebastian Grimberg       case CEED_EVAL_INTERP:
3557251047cSSebastian Grimberg         d_b = impl->d_interp;
3567251047cSSebastian Grimberg         break;
3577251047cSSebastian Grimberg       case CEED_EVAL_GRAD:
3587251047cSSebastian Grimberg         d_b = impl->d_grad;
3597251047cSSebastian Grimberg         break;
3607251047cSSebastian Grimberg       case CEED_EVAL_DIV:
3617251047cSSebastian Grimberg         d_b = impl->d_div;
3627251047cSSebastian Grimberg         break;
3637251047cSSebastian Grimberg       case CEED_EVAL_CURL:
3647251047cSSebastian Grimberg         d_b = impl->d_curl;
3657251047cSSebastian Grimberg         break;
3667251047cSSebastian Grimberg       // LCOV_EXCL_START
3677251047cSSebastian Grimberg       case CEED_EVAL_WEIGHT:
3687251047cSSebastian Grimberg       case CEED_EVAL_NONE:
369bcbe1c99SJeremy L Thompson         return CeedError(ceed, CEED_ERROR_BACKEND, "%s does not make sense in this context", CeedEvalModes[e_mode]);
3707251047cSSebastian Grimberg         // LCOV_EXCL_STOP
3717251047cSSebastian Grimberg     }
3727251047cSSebastian Grimberg     CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, e_mode, &q_comp));
3737251047cSSebastian Grimberg     M = (t_mode == CEED_TRANSPOSE) ? P : Q, K = (t_mode == CEED_TRANSPOSE) ? Q : P;
3747251047cSSebastian Grimberg 
3757251047cSSebastian Grimberg     if (P <= MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P && Q <= MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q) {
3769d15e85bSSebastian Grimberg       if (e_mode == CEED_EVAL_INTERP) {
3779d15e85bSSebastian Grimberg         if (t_mode == CEED_TRANSPOSE) {
3789d15e85bSSebastian Grimberg           Kernel = impl->InterpTranspose[iN];
3799d15e85bSSebastian Grimberg           NB     = impl->NB_interp_t[iN];
3809d15e85bSSebastian Grimberg         } else {
3819d15e85bSSebastian Grimberg           Kernel = impl->Interp[iN];
3829d15e85bSSebastian Grimberg           NB     = impl->NB_interp[iN];
3839d15e85bSSebastian Grimberg         }
3849d15e85bSSebastian Grimberg       } else {
3859d15e85bSSebastian Grimberg         if (t_mode == CEED_TRANSPOSE) {
3869d15e85bSSebastian Grimberg           Kernel = impl->DerivTranspose[iN];
3879d15e85bSSebastian Grimberg           NB     = impl->NB_deriv_t[iN];
3889d15e85bSSebastian Grimberg         } else {
3899d15e85bSSebastian Grimberg           Kernel = impl->Deriv[iN];
3909d15e85bSSebastian Grimberg           NB     = impl->NB_deriv[iN];
3919d15e85bSSebastian Grimberg         }
3929d15e85bSSebastian Grimberg       }
393940a72f1SSebastian Grimberg       CeedInt num_t_col    = MAGMA_BASIS_NTCOL(M, MAGMA_MAXTHREADS_1D);
3949d15e85bSSebastian Grimberg       CeedInt grid         = CeedDivUpInt(N, num_t_col * NB);
395833aa127SSebastian Grimberg       CeedInt shared_mem_A = P * Q * sizeof(CeedScalar);
396940a72f1SSebastian Grimberg       CeedInt shared_mem_B = num_t_col * K * NB * sizeof(CeedScalar);
397833aa127SSebastian Grimberg       CeedInt shared_mem   = (t_mode != CEED_TRANSPOSE && q_comp > 1) ? (shared_mem_A + shared_mem_B) : CeedIntMax(shared_mem_A, shared_mem_B);
3989d15e85bSSebastian Grimberg       void   *args[]       = {&N, &d_b, &d_u, &d_v};
399940a72f1SSebastian Grimberg 
4009d15e85bSSebastian Grimberg       CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, Kernel, grid, M, num_t_col, 1, shared_mem, args));
4019d15e85bSSebastian Grimberg     } else {
4029d15e85bSSebastian Grimberg       for (CeedInt d = 0; d < q_comp; d++) {
40338293ee6SJeremy L Thompson         if (t_mode == CEED_TRANSPOSE) {
404940a72f1SSebastian Grimberg           const CeedScalar beta = (d > 0) ? 1.0 : 0.0;
4059d15e85bSSebastian Grimberg           magma_gemm_nontensor(MagmaNoTrans, MagmaNoTrans, P, N, Q, 1.0, d_b + d * P * Q, P, d_u + d * N * Q, Q, beta, d_v, P, data->queue);
406940a72f1SSebastian Grimberg         } else {
4079d15e85bSSebastian Grimberg           magma_gemm_nontensor(MagmaTrans, MagmaNoTrans, Q, N, P, 1.0, d_b + d * P * Q, P, d_u, P, 0.0, d_v + d * N * Q, Q, data->queue);
408940a72f1SSebastian Grimberg         }
409940a72f1SSebastian Grimberg       }
410940a72f1SSebastian Grimberg     }
411940a72f1SSebastian Grimberg   } else {
412940a72f1SSebastian Grimberg     CeedCheck(t_mode != CEED_TRANSPOSE, ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
413940a72f1SSebastian Grimberg     CeedInt num_t_col  = MAGMA_BASIS_NTCOL(Q, MAGMA_MAXTHREADS_1D);
414940a72f1SSebastian Grimberg     CeedInt grid       = CeedDivUpInt(num_elem, num_t_col);
415940a72f1SSebastian Grimberg     CeedInt shared_mem = Q * sizeof(CeedScalar) + num_t_col * Q * sizeof(CeedScalar);
4169d15e85bSSebastian Grimberg     void   *args[]     = {&num_elem, &impl->d_q_weight, &d_v};
417868539c2SNatalie Beams 
418940a72f1SSebastian Grimberg     CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Weight, grid, Q, num_t_col, 1, shared_mem, args));
419940a72f1SSebastian Grimberg   }
420940a72f1SSebastian Grimberg 
421940a72f1SSebastian Grimberg   // Must sync to ensure completeness
422e0582403Sabdelfattah83   ceed_magma_queue_sync(data->queue);
423e0582403Sabdelfattah83 
424940a72f1SSebastian Grimberg   // Restore vectors
42538293ee6SJeremy L Thompson   if (e_mode != CEED_EVAL_WEIGHT) {
426940a72f1SSebastian Grimberg     CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u));
427868539c2SNatalie Beams   }
428940a72f1SSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArray(v, &d_v));
429e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
430868539c2SNatalie Beams }
431868539c2SNatalie Beams 
432940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
433940a72f1SSebastian Grimberg // Destroy tensor basis
434940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
435940a72f1SSebastian Grimberg static int CeedBasisDestroy_Magma(CeedBasis basis) {
436f6af633fSnbeams   Ceed             ceed;
43738293ee6SJeremy L Thompson   CeedBasis_Magma *impl;
43838293ee6SJeremy L Thompson 
4392b730f8bSJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
440940a72f1SSebastian Grimberg   CeedCallBackend(CeedBasisGetData(basis, &impl));
441e5f091ebSnbeams #ifdef CEED_MAGMA_USE_HIP
4422b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipModuleUnload(impl->module));
443f6af633fSnbeams #else
4442b730f8bSJeremy L Thompson   CeedCallCuda(ceed, cuModuleUnload(impl->module));
445f6af633fSnbeams #endif
446940a72f1SSebastian Grimberg   CeedCallBackend(magma_free(impl->d_interp_1d));
447940a72f1SSebastian Grimberg   CeedCallBackend(magma_free(impl->d_grad_1d));
448940a72f1SSebastian Grimberg   CeedCallBackend(magma_free(impl->d_q_weight_1d));
4492b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl));
450e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4517f5b9731SStan Tomov }
4527f5b9731SStan Tomov 
453940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
454940a72f1SSebastian Grimberg // Destroy non-tensor basis
455940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
456940a72f1SSebastian Grimberg static int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) {
457023b8a51Sabdelfattah83   Ceed                      ceed;
45838293ee6SJeremy L Thompson   CeedBasisNonTensor_Magma *impl;
45938293ee6SJeremy L Thompson 
460940a72f1SSebastian Grimberg   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
46138293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetData(basis, &impl));
462940a72f1SSebastian Grimberg   for (CeedInt in = 0; in < MAGMA_NONTENSOR_KERNEL_INSTANCES; in++) {
4637251047cSSebastian Grimberg     if (impl->module[in]) {
464940a72f1SSebastian Grimberg #ifdef CEED_MAGMA_USE_HIP
4657251047cSSebastian Grimberg       CeedCallHip(ceed, hipModuleUnload(impl->module[in]));
466940a72f1SSebastian Grimberg #else
4677251047cSSebastian Grimberg       CeedCallCuda(ceed, cuModuleUnload(impl->module[in]));
468940a72f1SSebastian Grimberg #endif
469940a72f1SSebastian Grimberg     }
470940a72f1SSebastian Grimberg   }
47138293ee6SJeremy L Thompson   CeedCallBackend(magma_free(impl->d_interp));
47238293ee6SJeremy L Thompson   CeedCallBackend(magma_free(impl->d_grad));
4739d15e85bSSebastian Grimberg   CeedCallBackend(magma_free(impl->d_div));
4749d15e85bSSebastian Grimberg   CeedCallBackend(magma_free(impl->d_curl));
47538293ee6SJeremy L Thompson   CeedCallBackend(magma_free(impl->d_q_weight));
4762b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl));
477e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
478868539c2SNatalie Beams }
479868539c2SNatalie Beams 
480940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
481940a72f1SSebastian Grimberg // Create tensor
482940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
483940a72f1SSebastian Grimberg int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d,
48438293ee6SJeremy L Thompson                                   const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) {
48538293ee6SJeremy L Thompson   Ceed             ceed, ceed_delegate;
48638293ee6SJeremy L Thompson   Ceed_Magma      *data;
48722070f95SJeremy L Thompson   char            *basis_kernel_source;
48822070f95SJeremy L Thompson   const char      *interp_kernel_path, *grad_kernel_path, *weight_kernel_path;
489940a72f1SSebastian Grimberg   CeedInt          num_comp;
4907f5b9731SStan Tomov   CeedBasis_Magma *impl;
49138293ee6SJeremy L Thompson 
4922b730f8bSJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
4932b730f8bSJeremy L Thompson   CeedCallBackend(CeedGetData(ceed, &data));
494940a72f1SSebastian Grimberg   CeedCallBackend(CeedCalloc(1, &impl));
495e0582403Sabdelfattah83 
496940a72f1SSebastian Grimberg   // Copy basis data to GPU
497940a72f1SSebastian Grimberg   CeedCallBackend(magma_malloc((void **)&impl->d_q_weight_1d, Q_1d * sizeof(q_weight_1d[0])));
498940a72f1SSebastian Grimberg   magma_setvector(Q_1d, sizeof(q_weight_1d[0]), q_weight_1d, 1, impl->d_q_weight_1d, 1, data->queue);
49938293ee6SJeremy L Thompson   CeedCallBackend(magma_malloc((void **)&impl->d_interp_1d, Q_1d * P_1d * sizeof(interp_1d[0])));
50038293ee6SJeremy L Thompson   magma_setvector(Q_1d * P_1d, sizeof(interp_1d[0]), interp_1d, 1, impl->d_interp_1d, 1, data->queue);
50138293ee6SJeremy L Thompson   CeedCallBackend(magma_malloc((void **)&impl->d_grad_1d, Q_1d * P_1d * sizeof(grad_1d[0])));
50238293ee6SJeremy L Thompson   magma_setvector(Q_1d * P_1d, sizeof(grad_1d[0]), grad_1d, 1, impl->d_grad_1d, 1, data->queue);
5037f5b9731SStan Tomov 
504940a72f1SSebastian Grimberg   // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data
505940a72f1SSebastian Grimberg   CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate));
506940a72f1SSebastian Grimberg 
507940a72f1SSebastian Grimberg   // Compile kernels
508940a72f1SSebastian Grimberg   CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
509940a72f1SSebastian Grimberg   {
510940a72f1SSebastian Grimberg     char   *interp_kernel_name_base = "ceed/jit-source/magma/magma-basis-interp";
511940a72f1SSebastian Grimberg     CeedInt interp_kernel_name_len  = strlen(interp_kernel_name_base) + 6;
512940a72f1SSebastian Grimberg     char    interp_kernel_name[interp_kernel_name_len];
513940a72f1SSebastian Grimberg 
514940a72f1SSebastian Grimberg     snprintf(interp_kernel_name, interp_kernel_name_len, "%s-%" CeedInt_FMT "d.h", interp_kernel_name_base, dim);
515940a72f1SSebastian Grimberg     CeedCallBackend(CeedGetJitAbsolutePath(ceed, interp_kernel_name, &interp_kernel_path));
516940a72f1SSebastian Grimberg   }
517940a72f1SSebastian Grimberg   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
518940a72f1SSebastian Grimberg   CeedCallBackend(CeedLoadSourceToBuffer(ceed, interp_kernel_path, &basis_kernel_source));
519940a72f1SSebastian Grimberg   {
520940a72f1SSebastian Grimberg     char   *grad_kernel_name_base = "ceed/jit-source/magma/magma-basis-grad";
521940a72f1SSebastian Grimberg     CeedInt grad_kernel_name_len  = strlen(grad_kernel_name_base) + 6;
522940a72f1SSebastian Grimberg     char    grad_kernel_name[grad_kernel_name_len];
523940a72f1SSebastian Grimberg 
524940a72f1SSebastian Grimberg     snprintf(grad_kernel_name, grad_kernel_name_len, "%s-%" CeedInt_FMT "d.h", grad_kernel_name_base, dim);
525940a72f1SSebastian Grimberg     CeedCallBackend(CeedGetJitAbsolutePath(ceed, grad_kernel_name, &grad_kernel_path));
526940a72f1SSebastian Grimberg   }
527940a72f1SSebastian Grimberg   CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, grad_kernel_path, &basis_kernel_source));
528940a72f1SSebastian Grimberg   {
529940a72f1SSebastian Grimberg     char   *weight_kernel_name_base = "ceed/jit-source/magma/magma-basis-weight";
530940a72f1SSebastian Grimberg     CeedInt weight_kernel_name_len  = strlen(weight_kernel_name_base) + 6;
531940a72f1SSebastian Grimberg     char    weight_kernel_name[weight_kernel_name_len];
532940a72f1SSebastian Grimberg 
533940a72f1SSebastian Grimberg     snprintf(weight_kernel_name, weight_kernel_name_len, "%s-%" CeedInt_FMT "d.h", weight_kernel_name_base, dim);
534940a72f1SSebastian Grimberg     CeedCallBackend(CeedGetJitAbsolutePath(ceed, weight_kernel_name, &weight_kernel_path));
535940a72f1SSebastian Grimberg   }
536940a72f1SSebastian Grimberg   CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, weight_kernel_path, &basis_kernel_source));
537940a72f1SSebastian Grimberg   CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
538940a72f1SSebastian Grimberg   CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module, 5, "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_P",
539940a72f1SSebastian Grimberg                                    P_1d, "BASIS_Q", Q_1d, "BASIS_MAX_P_Q", CeedIntMax(P_1d, Q_1d)));
540940a72f1SSebastian Grimberg   switch (dim) {
541940a72f1SSebastian Grimberg     case 1:
542940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpn_1d_kernel", &impl->Interp));
543940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpt_1d_kernel", &impl->InterpTranspose));
544940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradn_1d_kernel", &impl->Grad));
545940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradt_1d_kernel", &impl->GradTranspose));
546940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_weight_1d_kernel", &impl->Weight));
547940a72f1SSebastian Grimberg       break;
548940a72f1SSebastian Grimberg     case 2:
549940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpn_2d_kernel", &impl->Interp));
550940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpt_2d_kernel", &impl->InterpTranspose));
551940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradn_2d_kernel", &impl->Grad));
552940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradt_2d_kernel", &impl->GradTranspose));
553940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_weight_2d_kernel", &impl->Weight));
554940a72f1SSebastian Grimberg       break;
555940a72f1SSebastian Grimberg     case 3:
556940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpn_3d_kernel", &impl->Interp));
557940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpt_3d_kernel", &impl->InterpTranspose));
558940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradn_3d_kernel", &impl->Grad));
559940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradt_3d_kernel", &impl->GradTranspose));
560940a72f1SSebastian Grimberg       CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_weight_3d_kernel", &impl->Weight));
561940a72f1SSebastian Grimberg       break;
562940a72f1SSebastian Grimberg   }
563940a72f1SSebastian Grimberg   CeedCallBackend(CeedFree(&interp_kernel_path));
564940a72f1SSebastian Grimberg   CeedCallBackend(CeedFree(&grad_kernel_path));
565940a72f1SSebastian Grimberg   CeedCallBackend(CeedFree(&weight_kernel_path));
566940a72f1SSebastian Grimberg   CeedCallBackend(CeedFree(&basis_kernel_source));
5677f5b9731SStan Tomov 
5682b730f8bSJeremy L Thompson   CeedCallBackend(CeedBasisSetData(basis, impl));
569940a72f1SSebastian Grimberg 
570940a72f1SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Magma));
571940a72f1SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Magma));
572e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
5737f5b9731SStan Tomov }
5747f5b9731SStan Tomov 
575940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
576940a72f1SSebastian Grimberg // Create non-tensor H^1
577940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
578940a72f1SSebastian Grimberg int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *grad,
57938293ee6SJeremy L Thompson                             const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
5807251047cSSebastian Grimberg   Ceed                      ceed;
581e0582403Sabdelfattah83   Ceed_Magma               *data;
58238293ee6SJeremy L Thompson   CeedBasisNonTensor_Magma *impl;
58338293ee6SJeremy L Thompson 
58438293ee6SJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
5852b730f8bSJeremy L Thompson   CeedCallBackend(CeedGetData(ceed, &data));
5862b730f8bSJeremy L Thompson   CeedCallBackend(CeedCalloc(1, &impl));
587023b8a51Sabdelfattah83 
588940a72f1SSebastian Grimberg   // Copy basis data to GPU
58938293ee6SJeremy L Thompson   CeedCallBackend(magma_malloc((void **)&impl->d_q_weight, num_qpts * sizeof(q_weight[0])));
59038293ee6SJeremy L Thompson   magma_setvector(num_qpts, sizeof(q_weight[0]), q_weight, 1, impl->d_q_weight, 1, data->queue);
5919d15e85bSSebastian Grimberg   if (interp) {
5929d15e85bSSebastian Grimberg     CeedInt q_comp_interp;
5939d15e85bSSebastian Grimberg 
5949d15e85bSSebastian Grimberg     CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
5959d15e85bSSebastian Grimberg     CeedCallBackend(magma_malloc((void **)&impl->d_interp, num_qpts * num_nodes * q_comp_interp * sizeof(interp[0])));
5969d15e85bSSebastian Grimberg     magma_setvector(num_qpts * num_nodes * q_comp_interp, sizeof(interp[0]), interp, 1, impl->d_interp, 1, data->queue);
5979d15e85bSSebastian Grimberg   }
5989d15e85bSSebastian Grimberg   if (grad) {
5999d15e85bSSebastian Grimberg     CeedInt q_comp_grad;
6009d15e85bSSebastian Grimberg 
6019d15e85bSSebastian Grimberg     CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_grad));
6029d15e85bSSebastian Grimberg     CeedCallBackend(magma_malloc((void **)&impl->d_grad, num_qpts * num_nodes * q_comp_grad * sizeof(grad[0])));
6039d15e85bSSebastian Grimberg     magma_setvector(num_qpts * num_nodes * q_comp_grad, sizeof(grad[0]), grad, 1, impl->d_grad, 1, data->queue);
6049d15e85bSSebastian Grimberg   }
6059d15e85bSSebastian Grimberg 
6067251047cSSebastian Grimberg   // Compile the weight kernel if it won't be compiled later on
6077251047cSSebastian Grimberg   if (num_nodes > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P || num_qpts > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q) {
6087251047cSSebastian Grimberg     Ceed        ceed_delegate;
60922070f95SJeremy L Thompson     char       *basis_kernel_source;
61022070f95SJeremy L Thompson     const char *weight_kernel_path;
6117251047cSSebastian Grimberg 
6129d15e85bSSebastian Grimberg     // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data
6139d15e85bSSebastian Grimberg     CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate));
6149d15e85bSSebastian Grimberg 
6159d15e85bSSebastian Grimberg     // Compile weight kernel (the remainder of kernel compilation happens at first call to CeedBasisApply)
6169d15e85bSSebastian Grimberg     CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-weight-nontensor.h", &weight_kernel_path));
6179d15e85bSSebastian Grimberg     CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
6189d15e85bSSebastian Grimberg     CeedCallBackend(CeedLoadSourceToBuffer(ceed, weight_kernel_path, &basis_kernel_source));
6199d15e85bSSebastian Grimberg     CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
6207251047cSSebastian Grimberg     CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module[0], 1, "BASIS_Q", num_qpts));
6217251047cSSebastian Grimberg     CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[0], "magma_weight_nontensor", &impl->Weight));
6229d15e85bSSebastian Grimberg     CeedCallBackend(CeedFree(&weight_kernel_path));
6239d15e85bSSebastian Grimberg     CeedCallBackend(CeedFree(&basis_kernel_source));
6247251047cSSebastian Grimberg   }
6259d15e85bSSebastian Grimberg 
6269d15e85bSSebastian Grimberg   CeedCallBackend(CeedBasisSetData(basis, impl));
6279d15e85bSSebastian Grimberg 
6289d15e85bSSebastian Grimberg   // Register backend functions
6299d15e85bSSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Magma));
6309d15e85bSSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Magma));
6319d15e85bSSebastian Grimberg   return CEED_ERROR_SUCCESS;
6329d15e85bSSebastian Grimberg }
6339d15e85bSSebastian Grimberg 
6349d15e85bSSebastian Grimberg //------------------------------------------------------------------------------
6359d15e85bSSebastian Grimberg // Create non-tensor H(div)
6369d15e85bSSebastian Grimberg //------------------------------------------------------------------------------
6379d15e85bSSebastian Grimberg int CeedBasisCreateHdiv_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
6389d15e85bSSebastian Grimberg                               const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
6397251047cSSebastian Grimberg   Ceed                      ceed;
6409d15e85bSSebastian Grimberg   Ceed_Magma               *data;
6419d15e85bSSebastian Grimberg   CeedBasisNonTensor_Magma *impl;
6429d15e85bSSebastian Grimberg 
6439d15e85bSSebastian Grimberg   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
6449d15e85bSSebastian Grimberg   CeedCallBackend(CeedGetData(ceed, &data));
6459d15e85bSSebastian Grimberg   CeedCallBackend(CeedCalloc(1, &impl));
6469d15e85bSSebastian Grimberg 
6479d15e85bSSebastian Grimberg   // Copy basis data to GPU
6489d15e85bSSebastian Grimberg   CeedCallBackend(magma_malloc((void **)&impl->d_q_weight, num_qpts * sizeof(q_weight[0])));
6499d15e85bSSebastian Grimberg   magma_setvector(num_qpts, sizeof(q_weight[0]), q_weight, 1, impl->d_q_weight, 1, data->queue);
6509d15e85bSSebastian Grimberg   if (interp) {
6519d15e85bSSebastian Grimberg     CeedInt q_comp_interp;
6529d15e85bSSebastian Grimberg 
6539d15e85bSSebastian Grimberg     CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
6549d15e85bSSebastian Grimberg     CeedCallBackend(magma_malloc((void **)&impl->d_interp, num_qpts * num_nodes * q_comp_interp * sizeof(interp[0])));
6559d15e85bSSebastian Grimberg     magma_setvector(num_qpts * num_nodes * q_comp_interp, sizeof(interp[0]), interp, 1, impl->d_interp, 1, data->queue);
6569d15e85bSSebastian Grimberg   }
6579d15e85bSSebastian Grimberg   if (div) {
6589d15e85bSSebastian Grimberg     CeedInt q_comp_div;
6599d15e85bSSebastian Grimberg 
6609d15e85bSSebastian Grimberg     CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp_div));
6619d15e85bSSebastian Grimberg     CeedCallBackend(magma_malloc((void **)&impl->d_div, num_qpts * num_nodes * q_comp_div * sizeof(div[0])));
6629d15e85bSSebastian Grimberg     magma_setvector(num_qpts * num_nodes * q_comp_div, sizeof(div[0]), div, 1, impl->d_div, 1, data->queue);
6639d15e85bSSebastian Grimberg   }
6649d15e85bSSebastian Grimberg 
6657251047cSSebastian Grimberg   // Compile the weight kernel if it won't be compiled later on
6667251047cSSebastian Grimberg   if (num_nodes > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P || num_qpts > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q) {
6677251047cSSebastian Grimberg     Ceed        ceed_delegate;
66822070f95SJeremy L Thompson     char       *basis_kernel_source;
66922070f95SJeremy L Thompson     const char *weight_kernel_path;
6707251047cSSebastian Grimberg 
6719d15e85bSSebastian Grimberg     // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data
6729d15e85bSSebastian Grimberg     CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate));
6739d15e85bSSebastian Grimberg 
6749d15e85bSSebastian Grimberg     // Compile weight kernel (the remainder of kernel compilation happens at first call to CeedBasisApply)
6759d15e85bSSebastian Grimberg     CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-weight-nontensor.h", &weight_kernel_path));
6769d15e85bSSebastian Grimberg     CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
6779d15e85bSSebastian Grimberg     CeedCallBackend(CeedLoadSourceToBuffer(ceed, weight_kernel_path, &basis_kernel_source));
6789d15e85bSSebastian Grimberg     CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
6797251047cSSebastian Grimberg     CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module[0], 1, "BASIS_Q", num_qpts));
6807251047cSSebastian Grimberg     CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[0], "magma_weight_nontensor", &impl->Weight));
6819d15e85bSSebastian Grimberg     CeedCallBackend(CeedFree(&weight_kernel_path));
6829d15e85bSSebastian Grimberg     CeedCallBackend(CeedFree(&basis_kernel_source));
6837251047cSSebastian Grimberg   }
6849d15e85bSSebastian Grimberg 
6859d15e85bSSebastian Grimberg   CeedCallBackend(CeedBasisSetData(basis, impl));
6869d15e85bSSebastian Grimberg 
6879d15e85bSSebastian Grimberg   // Register backend functions
6889d15e85bSSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Magma));
6899d15e85bSSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Magma));
6909d15e85bSSebastian Grimberg   return CEED_ERROR_SUCCESS;
6919d15e85bSSebastian Grimberg }
6929d15e85bSSebastian Grimberg 
6939d15e85bSSebastian Grimberg //------------------------------------------------------------------------------
6949d15e85bSSebastian Grimberg // Create non-tensor H(curl)
6959d15e85bSSebastian Grimberg //------------------------------------------------------------------------------
6969d15e85bSSebastian Grimberg int CeedBasisCreateHcurl_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
6979d15e85bSSebastian Grimberg                                const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
6987251047cSSebastian Grimberg   Ceed                      ceed;
6999d15e85bSSebastian Grimberg   Ceed_Magma               *data;
7009d15e85bSSebastian Grimberg   CeedBasisNonTensor_Magma *impl;
7019d15e85bSSebastian Grimberg 
7029d15e85bSSebastian Grimberg   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
7039d15e85bSSebastian Grimberg   CeedCallBackend(CeedGetData(ceed, &data));
7049d15e85bSSebastian Grimberg   CeedCallBackend(CeedCalloc(1, &impl));
7059d15e85bSSebastian Grimberg 
7069d15e85bSSebastian Grimberg   // Copy basis data to GPU
7079d15e85bSSebastian Grimberg   CeedCallBackend(magma_malloc((void **)&impl->d_q_weight, num_qpts * sizeof(q_weight[0])));
7089d15e85bSSebastian Grimberg   magma_setvector(num_qpts, sizeof(q_weight[0]), q_weight, 1, impl->d_q_weight, 1, data->queue);
7099d15e85bSSebastian Grimberg   if (interp) {
7109d15e85bSSebastian Grimberg     CeedInt q_comp_interp;
7119d15e85bSSebastian Grimberg 
7129d15e85bSSebastian Grimberg     CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
7139d15e85bSSebastian Grimberg     CeedCallBackend(magma_malloc((void **)&impl->d_interp, num_qpts * num_nodes * q_comp_interp * sizeof(interp[0])));
7149d15e85bSSebastian Grimberg     magma_setvector(num_qpts * num_nodes * q_comp_interp, sizeof(interp[0]), interp, 1, impl->d_interp, 1, data->queue);
7159d15e85bSSebastian Grimberg   }
7169d15e85bSSebastian Grimberg   if (curl) {
7179d15e85bSSebastian Grimberg     CeedInt q_comp_curl;
7189d15e85bSSebastian Grimberg 
7199d15e85bSSebastian Grimberg     CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp_curl));
7209d15e85bSSebastian Grimberg     CeedCallBackend(magma_malloc((void **)&impl->d_curl, num_qpts * num_nodes * q_comp_curl * sizeof(curl[0])));
7219d15e85bSSebastian Grimberg     magma_setvector(num_qpts * num_nodes * q_comp_curl, sizeof(curl[0]), curl, 1, impl->d_curl, 1, data->queue);
7229d15e85bSSebastian Grimberg   }
723940a72f1SSebastian Grimberg 
7247251047cSSebastian Grimberg   // Compile the weight kernel if it won't be compiled later on
7257251047cSSebastian Grimberg   if (num_nodes > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P || num_qpts > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q) {
7267251047cSSebastian Grimberg     Ceed        ceed_delegate;
72722070f95SJeremy L Thompson     char       *basis_kernel_source;
72822070f95SJeremy L Thompson     const char *weight_kernel_path;
7297251047cSSebastian Grimberg 
730940a72f1SSebastian Grimberg     // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data
731940a72f1SSebastian Grimberg     CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate));
732940a72f1SSebastian Grimberg 
733940a72f1SSebastian Grimberg     // Compile weight kernel (the remainder of kernel compilation happens at first call to CeedBasisApply)
734940a72f1SSebastian Grimberg     CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-weight-nontensor.h", &weight_kernel_path));
735940a72f1SSebastian Grimberg     CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
736940a72f1SSebastian Grimberg     CeedCallBackend(CeedLoadSourceToBuffer(ceed, weight_kernel_path, &basis_kernel_source));
737940a72f1SSebastian Grimberg     CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
7387251047cSSebastian Grimberg     CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module[0], 1, "BASIS_Q", num_qpts));
7397251047cSSebastian Grimberg     CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[0], "magma_weight_nontensor", &impl->Weight));
740940a72f1SSebastian Grimberg     CeedCallBackend(CeedFree(&weight_kernel_path));
741940a72f1SSebastian Grimberg     CeedCallBackend(CeedFree(&basis_kernel_source));
7427251047cSSebastian Grimberg   }
743868539c2SNatalie Beams 
744023b8a51Sabdelfattah83   CeedCallBackend(CeedBasisSetData(basis, impl));
745940a72f1SSebastian Grimberg 
746940a72f1SSebastian Grimberg   // Register backend functions
747940a72f1SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Magma));
748940a72f1SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Magma));
749e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
7507f5b9731SStan Tomov }
751940a72f1SSebastian Grimberg 
752940a72f1SSebastian Grimberg //------------------------------------------------------------------------------
753