xref: /libCEED/interface/ceed-basis.c (revision bdee0278611904727ee35fcc2d0d7c3bf83db4c4)
1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <math.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 /// @file
17 /// Implementation of CeedBasis interfaces
18 
19 /// @cond DOXYGEN_SKIP
20 static struct CeedBasis_private ceed_basis_none;
21 /// @endcond
22 
23 /// @addtogroup CeedBasisUser
24 /// @{
25 
26 /// Argument for @ref CeedOperatorSetField() indicating that the field does not require a `CeedBasis`
27 const CeedBasis CEED_BASIS_NONE = &ceed_basis_none;
28 
29 /// @}
30 
31 /// ----------------------------------------------------------------------------
32 /// CeedBasis Library Internal Functions
33 /// ----------------------------------------------------------------------------
34 /// @addtogroup CeedBasisDeveloper
35 /// @{
36 
37 /**
38   @brief Compute Chebyshev polynomial values at a point
39 
40   @param[in]  x           Coordinate to evaluate Chebyshev polynomials at
41   @param[in]  n           Number of Chebyshev polynomials to evaluate, `n >= 2`
42   @param[out] chebyshev_x Array of Chebyshev polynomial values
43 
44   @return An error code: 0 - success, otherwise - failure
45 
46   @ref Developer
47 **/
48 static int CeedChebyshevPolynomialsAtPoint(CeedScalar x, CeedInt n, CeedScalar *chebyshev_x) {
49   chebyshev_x[0] = 1.0;
50   chebyshev_x[1] = 2 * x;
51   for (CeedInt i = 2; i < n; i++) chebyshev_x[i] = 2 * x * chebyshev_x[i - 1] - chebyshev_x[i - 2];
52   return CEED_ERROR_SUCCESS;
53 }
54 
55 /**
56   @brief Compute values of the derivative of Chebyshev polynomials at a point
57 
58   @param[in]  x            Coordinate to evaluate derivative of Chebyshev polynomials at
59   @param[in]  n            Number of Chebyshev polynomials to evaluate, `n >= 2`
60   @param[out] chebyshev_dx Array of Chebyshev polynomial derivative values
61 
62   @return An error code: 0 - success, otherwise - failure
63 
64   @ref Developer
65 **/
66 static int CeedChebyshevDerivativeAtPoint(CeedScalar x, CeedInt n, CeedScalar *chebyshev_dx) {
67   CeedScalar chebyshev_x[3];
68 
69   chebyshev_x[1]  = 1.0;
70   chebyshev_x[2]  = 2 * x;
71   chebyshev_dx[0] = 0.0;
72   chebyshev_dx[1] = 2.0;
73   for (CeedInt i = 2; i < n; i++) {
74     chebyshev_x[0]  = chebyshev_x[1];
75     chebyshev_x[1]  = chebyshev_x[2];
76     chebyshev_x[2]  = 2 * x * chebyshev_x[1] - chebyshev_x[0];
77     chebyshev_dx[i] = 2 * x * chebyshev_dx[i - 1] + 2 * chebyshev_x[1] - chebyshev_dx[i - 2];
78   }
79   return CEED_ERROR_SUCCESS;
80 }
81 
82 /**
83   @brief Compute Householder reflection.
84 
85   Computes \f$A = (I - b v v^T) A\f$, where \f$A\f$ is an \f$m \times n\f$ matrix indexed as `A[i*row + j*col]`.
86 
87   @param[in,out] A   Matrix to apply Householder reflection to, in place
88   @param[in]     v   Householder vector
89   @param[in]     b   Scaling factor
90   @param[in]     m   Number of rows in `A`
91   @param[in]     n   Number of columns in `A`
92   @param[in]     row Row stride
93   @param[in]     col Col stride
94 
95   @return An error code: 0 - success, otherwise - failure
96 
97   @ref Developer
98 **/
99 static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v, CeedScalar b, CeedInt m, CeedInt n, CeedInt row, CeedInt col) {
100   for (CeedInt j = 0; j < n; j++) {
101     CeedScalar w = A[0 * row + j * col];
102 
103     for (CeedInt i = 1; i < m; i++) w += v[i] * A[i * row + j * col];
104     A[0 * row + j * col] -= b * w;
105     for (CeedInt i = 1; i < m; i++) A[i * row + j * col] -= b * w * v[i];
106   }
107   return CEED_ERROR_SUCCESS;
108 }
109 
110 /**
111   @brief Compute Givens rotation
112 
113   Computes \f$A = G A\f$ (or \f$G^T A\f$ in transpose mode), where \f$A\f$ is an \f$m \times n\f$ matrix indexed as `A[i*n + j*m]`.
114 
115   @param[in,out] A      Row major matrix to apply Givens rotation to, in place
116   @param[in]     c      Cosine factor
117   @param[in]     s      Sine factor
118   @param[in]     t_mode @ref CEED_NOTRANSPOSE to rotate the basis counter-clockwise, which has the effect of rotating columns of `A` clockwise;
119                           @ref CEED_TRANSPOSE for the opposite rotation
120   @param[in]     i      First row/column to apply rotation
121   @param[in]     k      Second row/column to apply rotation
122   @param[in]     m      Number of rows in `A`
123   @param[in]     n      Number of columns in `A`
124 
125   @return An error code: 0 - success, otherwise - failure
126 
127   @ref Developer
128 **/
129 static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s, CeedTransposeMode t_mode, CeedInt i, CeedInt k, CeedInt m, CeedInt n) {
130   CeedInt stride_j = 1, stride_ik = m, num_its = n;
131 
132   if (t_mode == CEED_NOTRANSPOSE) {
133     stride_j  = n;
134     stride_ik = 1;
135     num_its   = m;
136   }
137 
138   // Apply rotation
139   for (CeedInt j = 0; j < num_its; j++) {
140     CeedScalar tau1 = A[i * stride_ik + j * stride_j], tau2 = A[k * stride_ik + j * stride_j];
141 
142     A[i * stride_ik + j * stride_j] = c * tau1 - s * tau2;
143     A[k * stride_ik + j * stride_j] = s * tau1 + c * tau2;
144   }
145   return CEED_ERROR_SUCCESS;
146 }
147 
148 /**
149   @brief View an array stored in a `CeedBasis`
150 
151   @param[in] name   Name of array
152   @param[in] fp_fmt Printing format
153   @param[in] m      Number of rows in array
154   @param[in] n      Number of columns in array
155   @param[in] a      Array to be viewed
156   @param[in] tabs   Tabs to append before each new line
157   @param[in] stream Stream to view to, e.g., `stdout`
158 
159   @return An error code: 0 - success, otherwise - failure
160 
161   @ref Developer
162 **/
163 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m, CeedInt n, const CeedScalar *a, const char *tabs, FILE *stream) {
164   if (m > 1) {
165     fprintf(stream, "%s  %s:\n", tabs, name);
166   } else {
167     char padded_name[12];
168 
169     snprintf(padded_name, 11, "%s:", name);
170     fprintf(stream, "%s  %-10s", tabs, padded_name);
171   }
172   for (CeedInt i = 0; i < m; i++) {
173     if (m > 1) fprintf(stream, "%s    [%" CeedInt_FMT "]", tabs, i);
174     for (CeedInt j = 0; j < n; j++) fprintf(stream, fp_fmt, fabs(a[i * n + j]) > 1E-14 ? a[i * n + j] : 0);
175     fputs("\n", stream);
176   }
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief View a `CeedBasis` passed as a `CeedObject`
182 
183   @param[in] basis  `CeedBasis` to view
184   @param[in] stream Filestream to write to
185 
186   @return An error code: 0 - success, otherwise - failure
187 
188   @ref Developer
189 **/
190 static int CeedBasisView_Object(CeedObject basis, FILE *stream) {
191   CeedCall(CeedBasisView((CeedBasis)basis, stream));
192   return CEED_ERROR_SUCCESS;
193 }
194 
195 /**
196   @brief Destroy a `CeedBasis` passed as a `CeedObject`
197 
198   @param[in,out] basis Address of `CeedBasis` to destroy
199 
200   @return An error code: 0 - success, otherwise - failure
201 
202   @ref Developer
203 **/
204 static int CeedBasisDestroy_Object(CeedObject *basis) {
205   CeedCall(CeedBasisDestroy((CeedBasis *)basis));
206   return CEED_ERROR_SUCCESS;
207 }
208 
209 /**
210   @brief Create the interpolation and gradient matrices for projection from the nodes of `basis_from` to the nodes of `basis_to`.
211 
212   The interpolation is given by `interp_project = interp_to^+ * interp_from`, where the pseudoinverse `interp_to^+` is given by QR factorization.
213   The gradient is given by `grad_project = interp_to^+ * grad_from`, and is only computed for \f$H^1\f$ spaces otherwise it should not be used.
214 
215   Note: `basis_from` and `basis_to` must have compatible quadrature spaces.
216 
217   @param[in]  basis_from     `CeedBasis` to project from
218   @param[in]  basis_to       `CeedBasis` to project to
219   @param[out] interp_project Address of the variable where the newly created interpolation matrix will be stored
220   @param[out] grad_project   Address of the variable where the newly created gradient matrix will be stored
221 
222   @return An error code: 0 - success, otherwise - failure
223 
224   @ref Developer
225 **/
226 static int CeedBasisCreateProjectionMatrices(CeedBasis basis_from, CeedBasis basis_to, CeedScalar **interp_project, CeedScalar **grad_project) {
227   bool    are_both_tensor;
228   CeedInt Q, Q_to, Q_from, P_to, P_from;
229 
230   // Check for compatible quadrature spaces
231   CeedCall(CeedBasisGetNumQuadraturePoints(basis_to, &Q_to));
232   CeedCall(CeedBasisGetNumQuadraturePoints(basis_from, &Q_from));
233   CeedCheck(Q_to == Q_from, CeedBasisReturnCeed(basis_to), CEED_ERROR_DIMENSION,
234             "Bases must have compatible quadrature spaces."
235             " 'basis_from' has %" CeedInt_FMT " points and 'basis_to' has %" CeedInt_FMT,
236             Q_from, Q_to);
237   Q = Q_to;
238 
239   // Check for matching tensor or non-tensor
240   {
241     bool is_tensor_to, is_tensor_from;
242 
243     CeedCall(CeedBasisIsTensor(basis_to, &is_tensor_to));
244     CeedCall(CeedBasisIsTensor(basis_from, &is_tensor_from));
245     are_both_tensor = is_tensor_to && is_tensor_from;
246   }
247   if (are_both_tensor) {
248     CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_to));
249     CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_from));
250     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis_from, &Q));
251   } else {
252     CeedCall(CeedBasisGetNumNodes(basis_to, &P_to));
253     CeedCall(CeedBasisGetNumNodes(basis_from, &P_from));
254   }
255 
256   // Check for matching FE space
257   CeedFESpace fe_space_to, fe_space_from;
258 
259   CeedCall(CeedBasisGetFESpace(basis_to, &fe_space_to));
260   CeedCall(CeedBasisGetFESpace(basis_from, &fe_space_from));
261   CeedCheck(fe_space_to == fe_space_from, CeedBasisReturnCeed(basis_to), CEED_ERROR_MINOR,
262             "Bases must both be the same FE space type."
263             " 'basis_from' is a %s and 'basis_to' is a %s",
264             CeedFESpaces[fe_space_from], CeedFESpaces[fe_space_to]);
265 
266   // Get source matrices
267   CeedInt           dim, q_comp = 1;
268   CeedScalar       *interp_to_inv, *interp_from;
269   const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL, *grad_from_source = NULL;
270 
271   CeedCall(CeedBasisGetDimension(basis_from, &dim));
272   if (are_both_tensor) {
273     CeedCall(CeedBasisGetInterp1D(basis_to, &interp_to_source));
274     CeedCall(CeedBasisGetInterp1D(basis_from, &interp_from_source));
275   } else {
276     CeedCall(CeedBasisGetNumQuadratureComponents(basis_from, CEED_EVAL_INTERP, &q_comp));
277     CeedCall(CeedBasisGetInterp(basis_to, &interp_to_source));
278     CeedCall(CeedBasisGetInterp(basis_from, &interp_from_source));
279   }
280   CeedCall(CeedMalloc(Q * P_from * q_comp, &interp_from));
281   CeedCall(CeedCalloc(P_to * P_from, interp_project));
282 
283   // `grad_project = interp_to^+ * grad_from` is computed for the H^1 space case so the
284   // projection basis will have a gradient operation (allocated even if not H^1 for the
285   // basis construction later on)
286   if (fe_space_to == CEED_FE_SPACE_H1) {
287     if (are_both_tensor) {
288       CeedCall(CeedBasisGetGrad1D(basis_from, &grad_from_source));
289     } else {
290       CeedCall(CeedBasisGetGrad(basis_from, &grad_from_source));
291     }
292   }
293   CeedCall(CeedCalloc(P_to * P_from * (are_both_tensor ? 1 : dim), grad_project));
294 
295   // Compute interp_to^+, pseudoinverse of interp_to
296   CeedCall(CeedCalloc(Q * q_comp * P_to, &interp_to_inv));
297   CeedCall(CeedMatrixPseudoinverse(CeedBasisReturnCeed(basis_to), interp_to_source, Q * q_comp, P_to, interp_to_inv));
298   // Build matrices
299   CeedInt     num_matrices = 1 + (fe_space_to == CEED_FE_SPACE_H1) * (are_both_tensor ? 1 : dim);
300   CeedScalar *input_from[num_matrices], *output_project[num_matrices];
301 
302   input_from[0]     = (CeedScalar *)interp_from_source;
303   output_project[0] = *interp_project;
304   for (CeedInt m = 1; m < num_matrices; m++) {
305     input_from[m]     = (CeedScalar *)&grad_from_source[(m - 1) * Q * P_from];
306     output_project[m] = &((*grad_project)[(m - 1) * P_to * P_from]);
307   }
308   for (CeedInt m = 0; m < num_matrices; m++) {
309     // output_project = interp_to^+ * interp_from
310     memcpy(interp_from, input_from[m], Q * P_from * q_comp * sizeof(input_from[m][0]));
311     CeedCall(CeedMatrixMatrixMultiply(CeedBasisReturnCeed(basis_to), interp_to_inv, input_from[m], output_project[m], P_to, P_from, Q * q_comp));
312     // Round zero to machine precision
313     for (CeedInt i = 0; i < P_to * P_from; i++) {
314       if (fabs(output_project[m][i]) < 10 * CEED_EPSILON) output_project[m][i] = 0.0;
315     }
316   }
317 
318   // Cleanup
319   CeedCall(CeedFree(&interp_to_inv));
320   CeedCall(CeedFree(&interp_from));
321   return CEED_ERROR_SUCCESS;
322 }
323 
324 /**
325   @brief Check input vector dimensions for CeedBasisApply[Add]
326 
327   @param[in]  basis     `CeedBasis` to evaluate
328   @param[in]  num_elem  The number of elements to apply the basis evaluation to;
329                           the backend will specify the ordering in @ref CeedElemRestrictionCreate()
330   @param[in]  t_mode    @ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature points;
331                           @ref CEED_TRANSPOSE to apply the transpose, mapping from quadrature points to nodes
332   @param[in]  eval_mode @ref CEED_EVAL_NONE to use values directly,
333                           @ref CEED_EVAL_INTERP to use interpolated values,
334                           @ref CEED_EVAL_GRAD to use gradients,
335                           @ref CEED_EVAL_DIV to use divergence,
336                           @ref CEED_EVAL_CURL to use curl,
337                           @ref CEED_EVAL_WEIGHT to use quadrature weights
338   @param[in]  u         Input `CeedVector`
339   @param[out] v         Output `CeedVector`
340 
341   @return An error code: 0 - success, otherwise - failure
342 
343   @ref Developer
344 **/
345 static int CeedBasisApplyCheckDims(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
346   CeedInt  dim, num_comp, q_comp, num_nodes, num_qpts;
347   CeedSize u_length = 0, v_length;
348 
349   CeedCall(CeedBasisGetDimension(basis, &dim));
350   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
351   CeedCall(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
352   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
353   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
354   CeedCall(CeedVectorGetLength(v, &v_length));
355   if (u) CeedCall(CeedVectorGetLength(u, &u_length));
356 
357   // Check vector lengths to prevent out of bounds issues
358   bool has_good_dims = true;
359   switch (eval_mode) {
360     case CEED_EVAL_NONE:
361     case CEED_EVAL_INTERP:
362     case CEED_EVAL_GRAD:
363     case CEED_EVAL_DIV:
364     case CEED_EVAL_CURL:
365       has_good_dims = ((t_mode == CEED_TRANSPOSE && u_length >= (CeedSize)num_elem * (CeedSize)num_comp * (CeedSize)num_qpts * (CeedSize)q_comp &&
366                         v_length >= (CeedSize)num_elem * (CeedSize)num_comp * (CeedSize)num_nodes) ||
367                        (t_mode == CEED_NOTRANSPOSE && v_length >= (CeedSize)num_elem * (CeedSize)num_qpts * (CeedSize)num_comp * (CeedSize)q_comp &&
368                         u_length >= (CeedSize)num_elem * (CeedSize)num_comp * (CeedSize)num_nodes));
369       break;
370     case CEED_EVAL_WEIGHT:
371       has_good_dims = v_length >= (CeedSize)num_elem * (CeedSize)num_qpts;
372       break;
373   }
374   CeedCheck(has_good_dims, CeedBasisReturnCeed(basis), CEED_ERROR_DIMENSION, "Input/output vectors too short for basis and evaluation mode");
375   return CEED_ERROR_SUCCESS;
376 }
377 
378 /**
379   @brief Check input vector dimensions for CeedBasisApply[Add]AtPoints
380 
381   @param[in]  basis      `CeedBasis` to evaluate
382   @param[in]  num_elem   The number of elements to apply the basis evaluation to;
383                           the backend will specify the ordering in @ref CeedElemRestrictionCreate()
384   @param[in]  num_points Array of the number of points to apply the basis evaluation to in each element, size `num_elem`
385   @param[in]  t_mode     @ref CEED_NOTRANSPOSE to evaluate from nodes to points;
386                            @ref CEED_TRANSPOSE to apply the transpose, mapping from points to nodes
387   @param[in]  eval_mode  @ref CEED_EVAL_INTERP to use interpolated values,
388                            @ref CEED_EVAL_GRAD to use gradients,
389                            @ref CEED_EVAL_WEIGHT to use quadrature weights
390   @param[in]  x_ref      `CeedVector` holding reference coordinates of each point
391   @param[in]  u          Input `CeedVector`, of length `num_nodes * num_comp` for @ref CEED_NOTRANSPOSE
392   @param[out] v          Output `CeedVector`, of length `num_points * num_q_comp` for @ref CEED_NOTRANSPOSE with @ref CEED_EVAL_INTERP
393 
394   @return An error code: 0 - success, otherwise - failure
395 
396   @ref Developer
397 **/
398 static int CeedBasisApplyAtPointsCheckDims(CeedBasis basis, CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode,
399                                            CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
400   CeedInt  dim, num_comp, num_q_comp, num_nodes, P_1d = 1, Q_1d = 1, total_num_points = 0;
401   CeedSize x_length = 0, u_length = 0, v_length;
402 
403   CeedCall(CeedBasisGetDimension(basis, &dim));
404   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
405   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
406   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
407   CeedCall(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &num_q_comp));
408   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
409   CeedCall(CeedVectorGetLength(v, &v_length));
410   if (x_ref != CEED_VECTOR_NONE) CeedCall(CeedVectorGetLength(x_ref, &x_length));
411   if (u != CEED_VECTOR_NONE) CeedCall(CeedVectorGetLength(u, &u_length));
412 
413   // Check compatibility coordinates vector
414   for (CeedInt i = 0; i < num_elem; i++) total_num_points += num_points[i];
415   CeedCheck((x_length >= (CeedSize)total_num_points * (CeedSize)dim) || (eval_mode == CEED_EVAL_WEIGHT), CeedBasisReturnCeed(basis),
416             CEED_ERROR_DIMENSION,
417             "Length of reference coordinate vector incompatible with basis dimension and number of points."
418             " Found reference coordinate vector of length %" CeedSize_FMT ", not of length %" CeedSize_FMT ".",
419             x_length, (CeedSize)total_num_points * (CeedSize)dim);
420 
421   // Check CEED_EVAL_WEIGHT only on CEED_NOTRANSPOSE
422   CeedCheck(eval_mode != CEED_EVAL_WEIGHT || t_mode == CEED_NOTRANSPOSE, CeedBasisReturnCeed(basis), CEED_ERROR_UNSUPPORTED,
423             "CEED_EVAL_WEIGHT only supported with CEED_NOTRANSPOSE");
424 
425   // Check vector lengths to prevent out of bounds issues
426   bool has_good_dims = true;
427   switch (eval_mode) {
428     case CEED_EVAL_INTERP:
429       has_good_dims = ((t_mode == CEED_TRANSPOSE && (u_length >= (CeedSize)total_num_points * (CeedSize)num_q_comp ||
430                                                      v_length >= (CeedSize)num_elem * (CeedSize)num_nodes * (CeedSize)num_comp)) ||
431                        (t_mode == CEED_NOTRANSPOSE && (v_length >= (CeedSize)total_num_points * (CeedSize)num_q_comp ||
432                                                        u_length >= (CeedSize)num_elem * (CeedSize)num_nodes * (CeedSize)num_comp)));
433       break;
434     case CEED_EVAL_GRAD:
435       has_good_dims = ((t_mode == CEED_TRANSPOSE && (u_length >= (CeedSize)total_num_points * (CeedSize)num_q_comp * (CeedSize)dim ||
436                                                      v_length >= (CeedSize)num_elem * (CeedSize)num_nodes * (CeedSize)num_comp)) ||
437                        (t_mode == CEED_NOTRANSPOSE && (v_length >= (CeedSize)total_num_points * (CeedSize)num_q_comp * (CeedSize)dim ||
438                                                        u_length >= (CeedSize)num_elem * (CeedSize)num_nodes * (CeedSize)num_comp)));
439       break;
440     case CEED_EVAL_WEIGHT:
441       has_good_dims = t_mode == CEED_NOTRANSPOSE && (v_length >= total_num_points);
442       break;
443       // LCOV_EXCL_START
444     case CEED_EVAL_NONE:
445     case CEED_EVAL_DIV:
446     case CEED_EVAL_CURL:
447       return CeedError(CeedBasisReturnCeed(basis), CEED_ERROR_UNSUPPORTED, "Evaluation at arbitrary points not supported for %s",
448                        CeedEvalModes[eval_mode]);
449       // LCOV_EXCL_STOP
450   }
451   CeedCheck(has_good_dims, CeedBasisReturnCeed(basis), CEED_ERROR_DIMENSION, "Input/output vectors too short for basis and evaluation mode");
452   return CEED_ERROR_SUCCESS;
453 }
454 
455 /**
456   @brief Default implimentation to apply basis evaluation from nodes to arbitrary points
457 
458   @param[in]  basis      `CeedBasis` to evaluate
459   @param[in]  apply_add  Sum result into target vector or overwrite
460   @param[in]  num_elem   The number of elements to apply the basis evaluation to;
461                           the backend will specify the ordering in @ref CeedElemRestrictionCreate()
462   @param[in]  num_points Array of the number of points to apply the basis evaluation to in each element, size `num_elem`
463   @param[in]  t_mode     @ref CEED_NOTRANSPOSE to evaluate from nodes to points;
464                            @ref CEED_TRANSPOSE to apply the transpose, mapping from points to nodes
465   @param[in]  eval_mode  @ref CEED_EVAL_INTERP to use interpolated values,
466                            @ref CEED_EVAL_GRAD to use gradients,
467                            @ref CEED_EVAL_WEIGHT to use quadrature weights
468   @param[in]  x_ref      `CeedVector` holding reference coordinates of each point
469   @param[in]  u          Input `CeedVector`, of length `num_nodes * num_comp` for @ref CEED_NOTRANSPOSE
470   @param[out] v          Output `CeedVector`, of length `num_points * num_q_comp` for @ref CEED_NOTRANSPOSE with @ref CEED_EVAL_INTERP
471 
472   @return An error code: 0 - success, otherwise - failure
473 
474   @ref Developer
475 **/
476 static int CeedBasisApplyAtPoints_Core(CeedBasis basis, bool apply_add, CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode,
477                                        CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
478   CeedInt dim, num_comp, P_1d = 1, Q_1d = 1, total_num_points = num_points[0];
479 
480   CeedCall(CeedBasisGetDimension(basis, &dim));
481   // Inserting check because clang-tidy doesn't understand this cannot occur
482   CeedCheck(dim > 0, CeedBasisReturnCeed(basis), CEED_ERROR_UNSUPPORTED, "Malformed CeedBasis, dim > 0 is required");
483   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
484   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
485   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
486 
487   // Default implementation
488   {
489     bool is_tensor_basis;
490 
491     CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
492     CeedCheck(is_tensor_basis, CeedBasisReturnCeed(basis), CEED_ERROR_UNSUPPORTED,
493               "Evaluation at arbitrary points only supported for tensor product bases");
494   }
495   CeedCheck(num_elem == 1, CeedBasisReturnCeed(basis), CEED_ERROR_UNSUPPORTED,
496             "Evaluation at arbitrary  points only supported for a single element at a time");
497   if (eval_mode == CEED_EVAL_WEIGHT) {
498     CeedCall(CeedVectorSetValue(v, 1.0));
499     return CEED_ERROR_SUCCESS;
500   }
501   if (!basis->basis_chebyshev) {
502     // Build basis mapping from nodes to Chebyshev coefficients
503     CeedScalar       *chebyshev_interp_1d, *chebyshev_grad_1d, *chebyshev_q_weight_1d;
504     const CeedScalar *q_ref_1d;
505     Ceed              ceed;
506 
507     CeedCall(CeedCalloc(P_1d * Q_1d, &chebyshev_interp_1d));
508     CeedCall(CeedCalloc(P_1d * Q_1d, &chebyshev_grad_1d));
509     CeedCall(CeedCalloc(Q_1d, &chebyshev_q_weight_1d));
510     CeedCall(CeedBasisGetQRef(basis, &q_ref_1d));
511     CeedCall(CeedBasisGetChebyshevInterp1D(basis, chebyshev_interp_1d));
512 
513     CeedCall(CeedBasisGetCeed(basis, &ceed));
514     CeedCall(CeedVectorCreate(ceed, num_comp * CeedIntPow(Q_1d, dim), &basis->vec_chebyshev));
515     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d, Q_1d, chebyshev_interp_1d, chebyshev_grad_1d, q_ref_1d, chebyshev_q_weight_1d,
516                                      &basis->basis_chebyshev));
517 
518     // Cleanup
519     CeedCall(CeedFree(&chebyshev_interp_1d));
520     CeedCall(CeedFree(&chebyshev_grad_1d));
521     CeedCall(CeedFree(&chebyshev_q_weight_1d));
522     CeedCall(CeedDestroy(&ceed));
523   }
524 
525   // Create TensorContract object if needed, such as a basis from the GPU backends
526   if (!basis->contract) {
527     Ceed      ceed_ref;
528     CeedBasis basis_ref = NULL;
529 
530     CeedCall(CeedInit("/cpu/self", &ceed_ref));
531     // Only need matching tensor contraction dimensions, any type of basis will work
532     CeedCall(CeedBasisCreateTensorH1Lagrange(ceed_ref, dim, num_comp, P_1d, Q_1d, CEED_GAUSS, &basis_ref));
533     // Note - clang-tidy doesn't know basis_ref->contract must be valid here
534     CeedCheck(basis_ref && basis_ref->contract, CeedBasisReturnCeed(basis), CEED_ERROR_UNSUPPORTED,
535               "Reference CPU ceed failed to create a tensor contraction object");
536     CeedCall(CeedTensorContractReferenceCopy(basis_ref->contract, &basis->contract));
537     CeedCall(CeedBasisDestroy(&basis_ref));
538     CeedCall(CeedDestroy(&ceed_ref));
539   }
540 
541   // Basis evaluation
542   switch (t_mode) {
543     case CEED_NOTRANSPOSE: {
544       // Nodes to arbitrary points
545       CeedScalar       *v_array;
546       const CeedScalar *chebyshev_coeffs, *x_array_read;
547 
548       // -- Interpolate to Chebyshev coefficients
549       CeedCall(CeedBasisApply(basis->basis_chebyshev, 1, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, basis->vec_chebyshev));
550 
551       // -- Evaluate Chebyshev polynomials at arbitrary points
552       CeedCall(CeedVectorGetArrayRead(basis->vec_chebyshev, CEED_MEM_HOST, &chebyshev_coeffs));
553       CeedCall(CeedVectorGetArrayRead(x_ref, CEED_MEM_HOST, &x_array_read));
554       CeedCall(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &v_array));
555       switch (eval_mode) {
556         case CEED_EVAL_INTERP: {
557           CeedScalar tmp[2][num_comp * CeedIntPow(Q_1d, dim)], chebyshev_x[Q_1d];
558 
559           // ---- Values at point
560           for (CeedInt p = 0; p < total_num_points; p++) {
561             CeedInt pre = num_comp * CeedIntPow(Q_1d, dim - 1), post = 1;
562 
563             for (CeedInt d = 0; d < dim; d++) {
564               // ------ Tensor contract with current Chebyshev polynomial values
565               CeedCall(CeedChebyshevPolynomialsAtPoint(x_array_read[d * total_num_points + p], Q_1d, chebyshev_x));
566               CeedCall(CeedTensorContractApply(basis->contract, pre, Q_1d, post, 1, chebyshev_x, t_mode, false,
567                                                d == 0 ? chebyshev_coeffs : tmp[d % 2], tmp[(d + 1) % 2]));
568               pre /= Q_1d;
569               post *= 1;
570             }
571             for (CeedInt c = 0; c < num_comp; c++) v_array[c * total_num_points + p] = tmp[dim % 2][c];
572           }
573           break;
574         }
575         case CEED_EVAL_GRAD: {
576           CeedScalar tmp[2][num_comp * CeedIntPow(Q_1d, dim)], chebyshev_x[Q_1d];
577 
578           // ---- Values at point
579           for (CeedInt p = 0; p < total_num_points; p++) {
580             // Dim**2 contractions, apply grad when pass == dim
581             for (CeedInt pass = 0; pass < dim; pass++) {
582               CeedInt pre = num_comp * CeedIntPow(Q_1d, dim - 1), post = 1;
583 
584               for (CeedInt d = 0; d < dim; d++) {
585                 // ------ Tensor contract with current Chebyshev polynomial values
586                 if (pass == d) CeedCall(CeedChebyshevDerivativeAtPoint(x_array_read[d * total_num_points + p], Q_1d, chebyshev_x));
587                 else CeedCall(CeedChebyshevPolynomialsAtPoint(x_array_read[d * total_num_points + p], Q_1d, chebyshev_x));
588                 CeedCall(CeedTensorContractApply(basis->contract, pre, Q_1d, post, 1, chebyshev_x, t_mode, false,
589                                                  d == 0 ? chebyshev_coeffs : tmp[d % 2], tmp[(d + 1) % 2]));
590                 pre /= Q_1d;
591                 post *= 1;
592               }
593               for (CeedInt c = 0; c < num_comp; c++) v_array[(pass * num_comp + c) * total_num_points + p] = tmp[dim % 2][c];
594             }
595           }
596           break;
597         }
598         default:
599           // Nothing to do, excluded above
600           break;
601       }
602       CeedCall(CeedVectorRestoreArrayRead(basis->vec_chebyshev, &chebyshev_coeffs));
603       CeedCall(CeedVectorRestoreArrayRead(x_ref, &x_array_read));
604       CeedCall(CeedVectorRestoreArray(v, &v_array));
605       break;
606     }
607     case CEED_TRANSPOSE: {
608       // Note: No switch on e_mode here because only CEED_EVAL_INTERP is supported at this time
609       // Arbitrary points to nodes
610       CeedScalar       *chebyshev_coeffs;
611       const CeedScalar *u_array, *x_array_read;
612 
613       // -- Transpose of evaluation of Chebyshev polynomials at arbitrary points
614       CeedCall(CeedVectorGetArrayWrite(basis->vec_chebyshev, CEED_MEM_HOST, &chebyshev_coeffs));
615       CeedCall(CeedVectorGetArrayRead(x_ref, CEED_MEM_HOST, &x_array_read));
616       CeedCall(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &u_array));
617 
618       switch (eval_mode) {
619         case CEED_EVAL_INTERP: {
620           CeedScalar tmp[2][num_comp * CeedIntPow(Q_1d, dim)], chebyshev_x[Q_1d];
621 
622           // ---- Values at point
623           for (CeedInt p = 0; p < total_num_points; p++) {
624             CeedInt pre = num_comp * 1, post = 1;
625 
626             for (CeedInt c = 0; c < num_comp; c++) tmp[0][c] = u_array[c * total_num_points + p];
627             for (CeedInt d = 0; d < dim; d++) {
628               // ------ Tensor contract with current Chebyshev polynomial values
629               CeedCall(CeedChebyshevPolynomialsAtPoint(x_array_read[d * total_num_points + p], Q_1d, chebyshev_x));
630               CeedCall(CeedTensorContractApply(basis->contract, pre, 1, post, Q_1d, chebyshev_x, t_mode, p > 0 && d == (dim - 1), tmp[d % 2],
631                                                d == (dim - 1) ? chebyshev_coeffs : tmp[(d + 1) % 2]));
632               pre /= 1;
633               post *= Q_1d;
634             }
635           }
636           break;
637         }
638         case CEED_EVAL_GRAD: {
639           CeedScalar tmp[2][num_comp * CeedIntPow(Q_1d, dim)], chebyshev_x[Q_1d];
640 
641           // ---- Values at point
642           for (CeedInt p = 0; p < total_num_points; p++) {
643             // Dim**2 contractions, apply grad when pass == dim
644             for (CeedInt pass = 0; pass < dim; pass++) {
645               CeedInt pre = num_comp * 1, post = 1;
646 
647               for (CeedInt c = 0; c < num_comp; c++) tmp[0][c] = u_array[(pass * num_comp + c) * total_num_points + p];
648               for (CeedInt d = 0; d < dim; d++) {
649                 // ------ Tensor contract with current Chebyshev polynomial values
650                 if (pass == d) CeedCall(CeedChebyshevDerivativeAtPoint(x_array_read[d * total_num_points + p], Q_1d, chebyshev_x));
651                 else CeedCall(CeedChebyshevPolynomialsAtPoint(x_array_read[d * total_num_points + p], Q_1d, chebyshev_x));
652                 CeedCall(CeedTensorContractApply(basis->contract, pre, 1, post, Q_1d, chebyshev_x, t_mode,
653                                                  (p > 0 || (p == 0 && pass > 0)) && d == (dim - 1), tmp[d % 2],
654                                                  d == (dim - 1) ? chebyshev_coeffs : tmp[(d + 1) % 2]));
655                 pre /= 1;
656                 post *= Q_1d;
657               }
658             }
659           }
660           break;
661         }
662         default:
663           // Nothing to do, excluded above
664           break;
665       }
666       CeedCall(CeedVectorRestoreArray(basis->vec_chebyshev, &chebyshev_coeffs));
667       CeedCall(CeedVectorRestoreArrayRead(x_ref, &x_array_read));
668       CeedCall(CeedVectorRestoreArrayRead(u, &u_array));
669 
670       // -- Interpolate transpose from Chebyshev coefficients
671       if (apply_add) CeedCall(CeedBasisApplyAdd(basis->basis_chebyshev, 1, CEED_TRANSPOSE, CEED_EVAL_INTERP, basis->vec_chebyshev, v));
672       else CeedCall(CeedBasisApply(basis->basis_chebyshev, 1, CEED_TRANSPOSE, CEED_EVAL_INTERP, basis->vec_chebyshev, v));
673       break;
674     }
675   }
676   return CEED_ERROR_SUCCESS;
677 }
678 
679 /// @}
680 
681 /// ----------------------------------------------------------------------------
682 /// Ceed Backend API
683 /// ----------------------------------------------------------------------------
684 /// @addtogroup CeedBasisBackend
685 /// @{
686 
687 /**
688   @brief Fallback to a reference implementation for a non tensor-product basis for \f$H^1\f$ discretizations.
689     This function may only be called inside of a backend `BasisCreateH1` function.
690     This is used by a backend when the specific parameters for a `CeedBasis` exceed the backend's support, such as
691     when a `interp` and `grad` matrices require too many bytes to fit into shared memory on a GPU.
692 
693   @param[in]  ceed      `Ceed` object used to create the `CeedBasis`
694   @param[in]  topo      Topology of element, e.g. hypercube, simplex, etc
695   @param[in]  num_comp  Number of field components (1 for scalar fields)
696   @param[in]  num_nodes Total number of nodes
697   @param[in]  num_qpts  Total number of quadrature points
698   @param[in]  interp    Row-major (`num_qpts * num_nodes`) matrix expressing the values of nodal basis functions at quadrature points
699   @param[in]  grad      Row-major (`dim * num_qpts * num_nodes`) matrix expressing derivatives of nodal basis functions at quadrature points
700   @param[in]  q_ref     Array of length `num_qpts * dim` holding the locations of quadrature points on the reference element
701   @param[in]  q_weight  Array of length `num_qpts` holding the quadrature weights on the reference element
702   @param[out] basis     Newly created `CeedBasis`
703 
704   @return An error code: 0 - success, otherwise - failure
705 
706   @ref User
707 **/
708 int CeedBasisCreateH1Fallback(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
709                               const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
710   CeedInt P = num_nodes, Q = num_qpts, dim = 0;
711   Ceed    delegate;
712 
713   CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
714   CeedCheck(delegate, ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateH1");
715 
716   CeedCall(CeedReferenceCopy(delegate, &(basis)->obj.ceed));
717   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
718   CeedCall(delegate->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref, q_weight, basis));
719   CeedCall(CeedDestroy(&delegate));
720   return CEED_ERROR_SUCCESS;
721 }
722 
723 /**
724   @brief Return collocated gradient matrix
725 
726   @param[in]  basis         `CeedBasis`
727   @param[out] collo_grad_1d Row-major (`Q_1d * Q_1d`) matrix expressing derivatives of basis functions at quadrature points
728 
729   @return An error code: 0 - success, otherwise - failure
730 
731   @ref Backend
732 **/
733 int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collo_grad_1d) {
734   Ceed              ceed;
735   CeedInt           P_1d, Q_1d;
736   CeedScalar       *interp_1d_pinv;
737   const CeedScalar *grad_1d, *interp_1d;
738 
739   // Note: This function is for backend use, so all errors are terminal and we do not need to clean up memory on failure.
740   CeedCall(CeedBasisGetCeed(basis, &ceed));
741   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
742   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
743 
744   // Compute interp_1d^+, pseudoinverse of interp_1d
745   CeedCall(CeedCalloc(P_1d * Q_1d, &interp_1d_pinv));
746   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
747   CeedCall(CeedMatrixPseudoinverse(ceed, interp_1d, Q_1d, P_1d, interp_1d_pinv));
748   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
749   CeedCall(CeedMatrixMatrixMultiply(ceed, grad_1d, (const CeedScalar *)interp_1d_pinv, collo_grad_1d, Q_1d, Q_1d, P_1d));
750 
751   CeedCall(CeedFree(&interp_1d_pinv));
752   CeedCall(CeedDestroy(&ceed));
753   return CEED_ERROR_SUCCESS;
754 }
755 
756 /**
757   @brief Return 1D interpolation matrix to Chebyshev polynomial coefficients on quadrature space
758 
759   @param[in]  basis               `CeedBasis`
760   @param[out] chebyshev_interp_1d Row-major (`P_1d * Q_1d`) matrix interpolating from basis nodes to Chebyshev polynomial coefficients
761 
762   @return An error code: 0 - success, otherwise - failure
763 
764   @ref Backend
765 **/
766 int CeedBasisGetChebyshevInterp1D(CeedBasis basis, CeedScalar *chebyshev_interp_1d) {
767   CeedInt           P_1d, Q_1d;
768   CeedScalar       *C, *chebyshev_coeffs_1d_inv;
769   const CeedScalar *interp_1d, *q_ref_1d;
770   Ceed              ceed;
771 
772   CeedCall(CeedBasisGetCeed(basis, &ceed));
773   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
774   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
775 
776   // Build coefficient matrix
777   // -- Note: Clang-tidy needs this check
778   CeedCheck((P_1d > 0) && (Q_1d > 0), ceed, CEED_ERROR_INCOMPATIBLE, "CeedBasis dimensions are malformed");
779   CeedCall(CeedCalloc(Q_1d * Q_1d, &C));
780   CeedCall(CeedBasisGetQRef(basis, &q_ref_1d));
781   for (CeedInt i = 0; i < Q_1d; i++) CeedCall(CeedChebyshevPolynomialsAtPoint(q_ref_1d[i], Q_1d, &C[i * Q_1d]));
782 
783   // Compute C^+, pseudoinverse of coefficient matrix
784   CeedCall(CeedCalloc(Q_1d * Q_1d, &chebyshev_coeffs_1d_inv));
785   CeedCall(CeedMatrixPseudoinverse(ceed, C, Q_1d, Q_1d, chebyshev_coeffs_1d_inv));
786 
787   // Build mapping from nodes to Chebyshev coefficients
788   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
789   CeedCall(CeedMatrixMatrixMultiply(ceed, chebyshev_coeffs_1d_inv, interp_1d, chebyshev_interp_1d, Q_1d, P_1d, Q_1d));
790 
791   // Cleanup
792   CeedCall(CeedFree(&C));
793   CeedCall(CeedFree(&chebyshev_coeffs_1d_inv));
794   CeedCall(CeedDestroy(&ceed));
795   return CEED_ERROR_SUCCESS;
796 }
797 
798 /**
799   @brief Get tensor status for given `CeedBasis`
800 
801   @param[in]  basis     `CeedBasis`
802   @param[out] is_tensor Variable to store tensor status
803 
804   @return An error code: 0 - success, otherwise - failure
805 
806   @ref Backend
807 **/
808 int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor) {
809   *is_tensor = basis->is_tensor_basis;
810   return CEED_ERROR_SUCCESS;
811 }
812 
813 /**
814   @brief Determine if given `CeedBasis` has nodes collocated with quadrature points
815 
816   @param[in]  basis         `CeedBasis`
817   @param[out] is_collocated Variable to store collocated status
818 
819   @return An error code: 0 - success, otherwise - failure
820 
821   @ref Backend
822 **/
823 int CeedBasisIsCollocated(CeedBasis basis, bool *is_collocated) {
824   if (basis->is_tensor_basis && (basis->Q_1d == basis->P_1d)) {
825     *is_collocated = true;
826 
827     for (CeedInt i = 0; i < basis->P_1d; i++) {
828       *is_collocated = *is_collocated && (fabs(basis->interp_1d[i + basis->P_1d * i] - 1.0) < 10 * CEED_EPSILON);
829       for (CeedInt j = 0; j < basis->Q_1d; j++) {
830         if (j != i) *is_collocated = *is_collocated && (fabs(basis->interp_1d[j + basis->P_1d * i]) < 10 * CEED_EPSILON);
831       }
832     }
833   } else {
834     *is_collocated = false;
835   }
836   return CEED_ERROR_SUCCESS;
837 }
838 
839 /**
840   @brief Get backend data of a `CeedBasis`
841 
842   @param[in]  basis `CeedBasis`
843   @param[out] data  Variable to store data
844 
845   @return An error code: 0 - success, otherwise - failure
846 
847   @ref Backend
848 **/
849 int CeedBasisGetData(CeedBasis basis, void *data) {
850   *(void **)data = basis->data;
851   return CEED_ERROR_SUCCESS;
852 }
853 
854 /**
855   @brief Set backend data of a `CeedBasis`
856 
857   @param[in,out] basis  `CeedBasis`
858   @param[in]     data   Data to set
859 
860   @return An error code: 0 - success, otherwise - failure
861 
862   @ref Backend
863 **/
864 int CeedBasisSetData(CeedBasis basis, void *data) {
865   basis->data = data;
866   return CEED_ERROR_SUCCESS;
867 }
868 
869 /**
870   @brief Increment the reference counter for a `CeedBasis`
871 
872   @param[in,out] basis `CeedBasis` to increment the reference counter
873 
874   @return An error code: 0 - success, otherwise - failure
875 
876   @ref Backend
877 **/
878 int CeedBasisReference(CeedBasis basis) {
879   CeedCall(CeedObjectReference((CeedObject)basis));
880   return CEED_ERROR_SUCCESS;
881 }
882 
883 /**
884   @brief Get number of Q-vector components for given `CeedBasis`
885 
886   @param[in]  basis     `CeedBasis`
887   @param[in]  eval_mode @ref CEED_EVAL_INTERP to use interpolated values,
888                           @ref CEED_EVAL_GRAD to use gradients,
889                           @ref CEED_EVAL_DIV to use divergence,
890                           @ref CEED_EVAL_CURL to use curl
891   @param[out] q_comp    Variable to store number of Q-vector components of basis
892 
893   @return An error code: 0 - success, otherwise - failure
894 
895   @ref Backend
896 **/
897 int CeedBasisGetNumQuadratureComponents(CeedBasis basis, CeedEvalMode eval_mode, CeedInt *q_comp) {
898   CeedInt dim;
899 
900   CeedCall(CeedBasisGetDimension(basis, &dim));
901   switch (eval_mode) {
902     case CEED_EVAL_INTERP: {
903       CeedFESpace fe_space;
904 
905       CeedCall(CeedBasisGetFESpace(basis, &fe_space));
906       *q_comp = (fe_space == CEED_FE_SPACE_H1) ? 1 : dim;
907     } break;
908     case CEED_EVAL_GRAD:
909       *q_comp = dim;
910       break;
911     case CEED_EVAL_DIV:
912       *q_comp = 1;
913       break;
914     case CEED_EVAL_CURL:
915       *q_comp = (dim < 3) ? 1 : dim;
916       break;
917     case CEED_EVAL_NONE:
918     case CEED_EVAL_WEIGHT:
919       *q_comp = 1;
920       break;
921   }
922   return CEED_ERROR_SUCCESS;
923 }
924 
925 /**
926   @brief Estimate number of FLOPs required to apply `CeedBasis` in `t_mode` and `eval_mode`
927 
928   @param[in]  basis        `CeedBasis` to estimate FLOPs for
929   @param[in]  t_mode       Apply basis or transpose
930   @param[in]  eval_mode    @ref CeedEvalMode
931   @param[in]  is_at_points Evaluate the basis at points or quadrature points
932   @param[in]  num_points   Number of points basis is evaluated at
933   @param[out] flops        Address of variable to hold FLOPs estimate
934 
935   @ref Backend
936 **/
937 int CeedBasisGetFlopsEstimate(CeedBasis basis, CeedTransposeMode t_mode, CeedEvalMode eval_mode, bool is_at_points, CeedInt num_points,
938                               CeedSize *flops) {
939   bool is_tensor;
940 
941   CeedCall(CeedBasisIsTensor(basis, &is_tensor));
942   CeedCheck(!is_at_points || is_tensor, CeedBasisReturnCeed(basis), CEED_ERROR_INCOMPATIBLE, "Can only evaluate tensor-product bases at points");
943   if (is_tensor) {
944     CeedInt dim, num_comp, P_1d, Q_1d;
945 
946     CeedCall(CeedBasisGetDimension(basis, &dim));
947     CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
948     CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
949     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
950     if (t_mode == CEED_TRANSPOSE) {
951       P_1d = Q_1d;
952       Q_1d = P_1d;
953     }
954     CeedInt tensor_flops = 0, pre = num_comp * CeedIntPow(P_1d, dim - 1), post = 1;
955 
956     for (CeedInt d = 0; d < dim; d++) {
957       tensor_flops += 2 * pre * P_1d * post * Q_1d;
958       pre /= P_1d;
959       post *= Q_1d;
960     }
961     if (is_at_points) {
962       bool is_gpu = false;
963 
964       {
965         CeedMemType mem_type;
966 
967         CeedCall(CeedGetPreferredMemType(CeedBasisReturnCeed(basis), &mem_type));
968         is_gpu = mem_type == CEED_MEM_DEVICE;
969       }
970 
971       CeedInt chebyshev_flops = (Q_1d - 2) * 3 + 1, d_chebyshev_flops = (Q_1d - 2) * 8 + 1;
972       CeedInt point_tensor_flops = 0, pre = CeedIntPow(Q_1d, dim - 1), post = 1;
973 
974       for (CeedInt d = 0; d < dim; d++) {
975         point_tensor_flops += 2 * pre * Q_1d * post * 1;
976         pre /= P_1d;
977         post *= Q_1d;
978       }
979 
980       switch (eval_mode) {
981         case CEED_EVAL_NONE:
982           *flops = 0;
983           break;
984         case CEED_EVAL_INTERP: {
985           *flops = tensor_flops + num_points * num_comp * (point_tensor_flops + (t_mode == CEED_TRANSPOSE ? CeedIntPow(Q_1d, dim) : 0));
986           if (dim == 3 && is_gpu) {
987             *flops += num_points * Q_1d *
988                       (chebyshev_flops + num_comp * (2 * chebyshev_flops + 2 * Q_1d * Q_1d + (t_mode == CEED_TRANSPOSE ? 2 * Q_1d + 1 : 3 * Q_1d)));
989           } else {
990             *flops += num_points * (is_gpu ? num_comp : 1) * dim * chebyshev_flops;
991           }
992           break;
993         }
994         case CEED_EVAL_GRAD: {
995           *flops = tensor_flops + num_points * num_comp * (point_tensor_flops + (t_mode == CEED_TRANSPOSE ? CeedIntPow(Q_1d, dim) : 0));
996           if (dim == 3 && is_gpu) {
997             CeedInt inner_flops =
998                 dim * (2 * Q_1d * Q_1d + (t_mode == CEED_TRANSPOSE ? 2 : 3) * Q_1d) + (dim - 1) * (2 * chebyshev_flops + d_chebyshev_flops);
999 
1000             *flops += num_points * Q_1d * (chebyshev_flops + d_chebyshev_flops + num_comp * (inner_flops + (t_mode == CEED_TRANSPOSE ? 1 : 0)));
1001           } else {
1002             *flops += num_points * (is_gpu ? num_comp : 1) * dim * (d_chebyshev_flops + (dim - 1) * chebyshev_flops);
1003           }
1004           break;
1005         }
1006         case CEED_EVAL_DIV:
1007         case CEED_EVAL_CURL: {
1008           // LCOV_EXCL_START
1009           return CeedError(CeedBasisReturnCeed(basis), CEED_ERROR_INCOMPATIBLE, "Tensor basis evaluation for %s not supported at points",
1010                            CeedEvalModes[eval_mode]);
1011           break;
1012           // LCOV_EXCL_STOP
1013         }
1014         case CEED_EVAL_WEIGHT:
1015           *flops = num_points;
1016           break;
1017       }
1018     } else {
1019       switch (eval_mode) {
1020         case CEED_EVAL_NONE:
1021           *flops = 0;
1022           break;
1023         case CEED_EVAL_INTERP:
1024           *flops = tensor_flops;
1025           break;
1026         case CEED_EVAL_GRAD:
1027           *flops = tensor_flops * 2;
1028           break;
1029         case CEED_EVAL_DIV:
1030         case CEED_EVAL_CURL: {
1031           // LCOV_EXCL_START
1032           return CeedError(CeedBasisReturnCeed(basis), CEED_ERROR_INCOMPATIBLE, "Tensor basis evaluation for %s not supported",
1033                            CeedEvalModes[eval_mode]);
1034           break;
1035           // LCOV_EXCL_STOP
1036         }
1037         case CEED_EVAL_WEIGHT:
1038           *flops = dim * CeedIntPow(Q_1d, dim);
1039           break;
1040       }
1041     }
1042   } else {
1043     CeedInt dim, num_comp, q_comp, num_nodes, num_qpts;
1044 
1045     CeedCall(CeedBasisGetDimension(basis, &dim));
1046     CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
1047     CeedCall(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
1048     CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
1049     CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
1050     switch (eval_mode) {
1051       case CEED_EVAL_NONE:
1052         *flops = 0;
1053         break;
1054       case CEED_EVAL_INTERP:
1055       case CEED_EVAL_GRAD:
1056       case CEED_EVAL_DIV:
1057       case CEED_EVAL_CURL:
1058         *flops = num_nodes * num_qpts * num_comp * q_comp;
1059         break;
1060       case CEED_EVAL_WEIGHT:
1061         *flops = 0;
1062         break;
1063     }
1064   }
1065   return CEED_ERROR_SUCCESS;
1066 }
1067 
1068 /**
1069   @brief Get `CeedFESpace` for a `CeedBasis`
1070 
1071   @param[in]  basis    `CeedBasis`
1072   @param[out] fe_space Variable to store `CeedFESpace`
1073 
1074   @return An error code: 0 - success, otherwise - failure
1075 
1076   @ref Backend
1077 **/
1078 int CeedBasisGetFESpace(CeedBasis basis, CeedFESpace *fe_space) {
1079   *fe_space = basis->fe_space;
1080   return CEED_ERROR_SUCCESS;
1081 }
1082 
1083 /**
1084   @brief Get dimension for given `CeedElemTopology`
1085 
1086   @param[in]  topo `CeedElemTopology`
1087   @param[out] dim  Variable to store dimension of topology
1088 
1089   @return An error code: 0 - success, otherwise - failure
1090 
1091   @ref Backend
1092 **/
1093 int CeedBasisGetTopologyDimension(CeedElemTopology topo, CeedInt *dim) {
1094   *dim = (CeedInt)topo >> 16;
1095   return CEED_ERROR_SUCCESS;
1096 }
1097 
1098 /**
1099   @brief Get `CeedTensorContract` of a `CeedBasis`
1100 
1101   @param[in]  basis     `CeedBasis`
1102   @param[out] contract  Variable to store `CeedTensorContract`
1103 
1104   @return An error code: 0 - success, otherwise - failure
1105 
1106   @ref Backend
1107 **/
1108 int CeedBasisGetTensorContract(CeedBasis basis, CeedTensorContract *contract) {
1109   *contract = basis->contract;
1110   return CEED_ERROR_SUCCESS;
1111 }
1112 
1113 /**
1114   @brief Set `CeedTensorContract` of a `CeedBasis`
1115 
1116   @param[in,out] basis    `CeedBasis`
1117   @param[in]     contract `CeedTensorContract` to set
1118 
1119   @return An error code: 0 - success, otherwise - failure
1120 
1121   @ref Backend
1122 **/
1123 int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract contract) {
1124   basis->contract = contract;
1125   CeedCall(CeedTensorContractReference(contract));
1126   return CEED_ERROR_SUCCESS;
1127 }
1128 
1129 /**
1130   @brief Return a reference implementation of matrix multiplication \f$C = A B\f$.
1131 
1132   Note: This is a reference implementation for CPU `CeedScalar` pointers that is not intended for high performance.
1133 
1134   @param[in]  ceed  `Ceed` context for error handling
1135   @param[in]  mat_A Row-major matrix `A`
1136   @param[in]  mat_B Row-major matrix `B`
1137   @param[out] mat_C Row-major output matrix `C`
1138   @param[in]  m     Number of rows of `C`
1139   @param[in]  n     Number of columns of `C`
1140   @param[in]  kk    Number of columns of `A`/rows of `B`
1141 
1142   @return An error code: 0 - success, otherwise - failure
1143 
1144   @ref Utility
1145 **/
1146 int CeedMatrixMatrixMultiply(Ceed ceed, const CeedScalar *mat_A, const CeedScalar *mat_B, CeedScalar *mat_C, CeedInt m, CeedInt n, CeedInt kk) {
1147   for (CeedInt i = 0; i < m; i++) {
1148     for (CeedInt j = 0; j < n; j++) {
1149       CeedScalar sum = 0;
1150 
1151       for (CeedInt k = 0; k < kk; k++) sum += mat_A[k + i * kk] * mat_B[j + k * n];
1152       mat_C[j + i * n] = sum;
1153     }
1154   }
1155   return CEED_ERROR_SUCCESS;
1156 }
1157 
1158 /**
1159   @brief Return QR Factorization of a matrix
1160 
1161   @param[in]     ceed `Ceed` context for error handling
1162   @param[in,out] mat  Row-major matrix to be factorized in place
1163   @param[in,out] tau  Vector of length `m` of scaling factors
1164   @param[in]     m    Number of rows
1165   @param[in]     n    Number of columns
1166 
1167   @return An error code: 0 - success, otherwise - failure
1168 
1169   @ref Utility
1170 **/
1171 int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau, CeedInt m, CeedInt n) {
1172   CeedScalar v[m];
1173 
1174   // Check matrix shape
1175   CeedCheck(n <= m, ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute QR factorization with n > m");
1176 
1177   for (CeedInt i = 0; i < n; i++) {
1178     CeedScalar sigma = 0.0;
1179 
1180     if (i >= m - 1) {  // last row of matrix, no reflection needed
1181       tau[i] = 0.;
1182       break;
1183     }
1184     // Calculate Householder vector, magnitude
1185     v[i] = mat[i + n * i];
1186     for (CeedInt j = i + 1; j < m; j++) {
1187       v[j] = mat[i + n * j];
1188       sigma += v[j] * v[j];
1189     }
1190     const CeedScalar norm = sqrt(v[i] * v[i] + sigma);  // norm of v[i:m]
1191     const CeedScalar R_ii = -copysign(norm, v[i]);
1192 
1193     v[i] -= R_ii;
1194     // norm of v[i:m] after modification above and scaling below
1195     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
1196     //   tau = 2 / (norm*norm)
1197     tau[i] = 2 * v[i] * v[i] / (v[i] * v[i] + sigma);
1198     for (CeedInt j = i + 1; j < m; j++) v[j] /= v[i];
1199 
1200     // Apply Householder reflector to lower right panel
1201     CeedHouseholderReflect(&mat[i * n + i + 1], &v[i], tau[i], m - i, n - i - 1, n, 1);
1202     // Save v
1203     mat[i + n * i] = R_ii;
1204     for (CeedInt j = i + 1; j < m; j++) mat[i + n * j] = v[j];
1205   }
1206   return CEED_ERROR_SUCCESS;
1207 }
1208 
1209 /**
1210   @brief Apply Householder Q matrix
1211 
1212   Compute `mat_A = mat_Q mat_A`, where `mat_Q` is \f$m \times m\f$ and `mat_A` is \f$m \times n\f$.
1213 
1214   @param[in,out] mat_A  Matrix to apply Householder Q to, in place
1215   @param[in]     mat_Q  Householder Q matrix
1216   @param[in]     tau    Householder scaling factors
1217   @param[in]     t_mode Transpose mode for application
1218   @param[in]     m      Number of rows in `A`
1219   @param[in]     n      Number of columns in `A`
1220   @param[in]     k      Number of elementary reflectors in Q, `k < m`
1221   @param[in]     row    Row stride in `A`
1222   @param[in]     col    Col stride in `A`
1223 
1224   @return An error code: 0 - success, otherwise - failure
1225 
1226   @ref Utility
1227 **/
1228 int CeedHouseholderApplyQ(CeedScalar *mat_A, const CeedScalar *mat_Q, const CeedScalar *tau, CeedTransposeMode t_mode, CeedInt m, CeedInt n,
1229                           CeedInt k, CeedInt row, CeedInt col) {
1230   CeedScalar *v;
1231 
1232   CeedCall(CeedMalloc(m, &v));
1233   for (CeedInt ii = 0; ii < k; ii++) {
1234     CeedInt i = t_mode == CEED_TRANSPOSE ? ii : k - 1 - ii;
1235     for (CeedInt j = i + 1; j < m; j++) v[j] = mat_Q[j * k + i];
1236     // Apply Householder reflector (I - tau v v^T) collo_grad_1d^T
1237     CeedCall(CeedHouseholderReflect(&mat_A[i * row], &v[i], tau[i], m - i, n, row, col));
1238   }
1239   CeedCall(CeedFree(&v));
1240   return CEED_ERROR_SUCCESS;
1241 }
1242 
1243 /**
1244   @brief Return pseudoinverse of a matrix
1245 
1246   @param[in]     ceed      Ceed context for error handling
1247   @param[in]     mat       Row-major matrix to compute pseudoinverse of
1248   @param[in]     m         Number of rows
1249   @param[in]     n         Number of columns
1250   @param[out]    mat_pinv  Row-major pseudoinverse matrix
1251 
1252   @return An error code: 0 - success, otherwise - failure
1253 
1254   @ref Utility
1255 **/
1256 int CeedMatrixPseudoinverse(Ceed ceed, const CeedScalar *mat, CeedInt m, CeedInt n, CeedScalar *mat_pinv) {
1257   CeedScalar *tau, *I, *mat_copy;
1258 
1259   CeedCall(CeedCalloc(m, &tau));
1260   CeedCall(CeedCalloc(m * m, &I));
1261   CeedCall(CeedCalloc(m * n, &mat_copy));
1262   memcpy(mat_copy, mat, m * n * sizeof mat[0]);
1263 
1264   // QR Factorization, mat = Q R
1265   CeedCall(CeedQRFactorization(ceed, mat_copy, tau, m, n));
1266 
1267   // -- Apply Q^T, I = Q^T * I
1268   for (CeedInt i = 0; i < m; i++) I[i * m + i] = 1.0;
1269   CeedCall(CeedHouseholderApplyQ(I, mat_copy, tau, CEED_TRANSPOSE, m, m, n, m, 1));
1270   // -- Apply R_inv, mat_pinv = R_inv * Q^T
1271   for (CeedInt j = 0; j < m; j++) {  // Column j
1272     mat_pinv[j + m * (n - 1)] = I[j + m * (n - 1)] / mat_copy[n * n - 1];
1273     for (CeedInt i = n - 2; i >= 0; i--) {  // Row i
1274       mat_pinv[j + m * i] = I[j + m * i];
1275       for (CeedInt k = i + 1; k < n; k++) mat_pinv[j + m * i] -= mat_copy[k + n * i] * mat_pinv[j + m * k];
1276       mat_pinv[j + m * i] /= mat_copy[i + n * i];
1277     }
1278   }
1279 
1280   // Cleanup
1281   CeedCall(CeedFree(&I));
1282   CeedCall(CeedFree(&tau));
1283   CeedCall(CeedFree(&mat_copy));
1284   return CEED_ERROR_SUCCESS;
1285 }
1286 
1287 /**
1288   @brief Return symmetric Schur decomposition of the symmetric matrix mat via symmetric QR factorization
1289 
1290   @param[in]     ceed   `Ceed` context for error handling
1291   @param[in,out] mat    Row-major matrix to be factorized in place
1292   @param[out]    lambda Vector of length n of eigenvalues
1293   @param[in]     n      Number of rows/columns
1294 
1295   @return An error code: 0 - success, otherwise - failure
1296 
1297   @ref Utility
1298 **/
1299 CeedPragmaOptimizeOff
1300 int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat, CeedScalar *lambda, CeedInt n) {
1301   // Check bounds for clang-tidy
1302   CeedCheck(n > 1, ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute symmetric Schur decomposition of scalars");
1303 
1304   CeedScalar v[n - 1], tau[n - 1], mat_T[n * n];
1305 
1306   // Copy mat to mat_T and set mat to I
1307   memcpy(mat_T, mat, n * n * sizeof(mat[0]));
1308   for (CeedInt i = 0; i < n; i++) {
1309     for (CeedInt j = 0; j < n; j++) mat[j + n * i] = (i == j) ? 1 : 0;
1310   }
1311 
1312   // Reduce to tridiagonal
1313   for (CeedInt i = 0; i < n - 1; i++) {
1314     // Calculate Householder vector, magnitude
1315     CeedScalar sigma = 0.0;
1316 
1317     v[i] = mat_T[i + n * (i + 1)];
1318     for (CeedInt j = i + 1; j < n - 1; j++) {
1319       v[j] = mat_T[i + n * (j + 1)];
1320       sigma += v[j] * v[j];
1321     }
1322     const CeedScalar norm = sqrt(v[i] * v[i] + sigma);  // norm of v[i:n-1]
1323     const CeedScalar R_ii = -copysign(norm, v[i]);
1324 
1325     v[i] -= R_ii;
1326     // norm of v[i:m] after modification above and scaling below
1327     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
1328     //   tau = 2 / (norm*norm)
1329     tau[i] = i == n - 2 ? 2 : 2 * v[i] * v[i] / (v[i] * v[i] + sigma);
1330     for (CeedInt j = i + 1; j < n - 1; j++) v[j] /= v[i];
1331 
1332     // Update sub and super diagonal
1333     for (CeedInt j = i + 2; j < n; j++) {
1334       mat_T[i + n * j] = 0;
1335       mat_T[j + n * i] = 0;
1336     }
1337     // Apply symmetric Householder reflector to lower right panel
1338     CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1);
1339     CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), 1, n);
1340 
1341     // Save v
1342     mat_T[i + n * (i + 1)] = R_ii;
1343     mat_T[(i + 1) + n * i] = R_ii;
1344     for (CeedInt j = i + 1; j < n - 1; j++) {
1345       mat_T[i + n * (j + 1)] = v[j];
1346     }
1347   }
1348   // Backwards accumulation of Q
1349   for (CeedInt i = n - 2; i >= 0; i--) {
1350     if (tau[i] > 0.0) {
1351       v[i] = 1;
1352       for (CeedInt j = i + 1; j < n - 1; j++) {
1353         v[j]                   = mat_T[i + n * (j + 1)];
1354         mat_T[i + n * (j + 1)] = 0;
1355       }
1356       CeedHouseholderReflect(&mat[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1);
1357     }
1358   }
1359 
1360   // Reduce sub and super diagonal
1361   CeedInt    p = 0, q = 0, itr = 0, max_itr = n * n * n * n;
1362   CeedScalar tol = CEED_EPSILON;
1363 
1364   while (itr < max_itr) {
1365     // Update p, q, size of reduced portions of diagonal
1366     p = 0;
1367     q = 0;
1368     for (CeedInt i = n - 2; i >= 0; i--) {
1369       if (fabs(mat_T[i + n * (i + 1)]) < tol) q += 1;
1370       else break;
1371     }
1372     for (CeedInt i = 0; i < n - q - 1; i++) {
1373       if (fabs(mat_T[i + n * (i + 1)]) < tol) p += 1;
1374       else break;
1375     }
1376     if (q == n - 1) break;  // Finished reducing
1377 
1378     // Reduce tridiagonal portion
1379     CeedScalar t_nn = mat_T[(n - 1 - q) + n * (n - 1 - q)], t_nnm1 = mat_T[(n - 2 - q) + n * (n - 1 - q)];
1380     CeedScalar d  = (mat_T[(n - 2 - q) + n * (n - 2 - q)] - t_nn) / 2;
1381     CeedScalar mu = t_nn - t_nnm1 * t_nnm1 / (d + copysign(sqrt(d * d + t_nnm1 * t_nnm1), d));
1382     CeedScalar x  = mat_T[p + n * p] - mu;
1383     CeedScalar z  = mat_T[p + n * (p + 1)];
1384 
1385     for (CeedInt k = p; k < n - q - 1; k++) {
1386       // Compute Givens rotation
1387       CeedScalar c = 1, s = 0;
1388 
1389       if (fabs(z) > tol) {
1390         if (fabs(z) > fabs(x)) {
1391           const CeedScalar tau = -x / z;
1392 
1393           s = 1 / sqrt(1 + tau * tau);
1394           c = s * tau;
1395         } else {
1396           const CeedScalar tau = -z / x;
1397 
1398           c = 1 / sqrt(1 + tau * tau);
1399           s = c * tau;
1400         }
1401       }
1402 
1403       // Apply Givens rotation to T
1404       CeedGivensRotation(mat_T, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n);
1405       CeedGivensRotation(mat_T, c, s, CEED_TRANSPOSE, k, k + 1, n, n);
1406 
1407       // Apply Givens rotation to Q
1408       CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n);
1409 
1410       // Update x, z
1411       if (k < n - q - 2) {
1412         x = mat_T[k + n * (k + 1)];
1413         z = mat_T[k + n * (k + 2)];
1414       }
1415     }
1416     itr++;
1417   }
1418 
1419   // Save eigenvalues
1420   for (CeedInt i = 0; i < n; i++) lambda[i] = mat_T[i + n * i];
1421 
1422   // Check convergence
1423   CeedCheck(itr < max_itr || q > n, ceed, CEED_ERROR_MINOR, "Symmetric QR failed to converge");
1424   return CEED_ERROR_SUCCESS;
1425 }
1426 CeedPragmaOptimizeOn
1427 
1428 /**
1429   @brief Return Simultaneous Diagonalization of two matrices.
1430 
1431   This solves the generalized eigenvalue problem `A x = lambda B x`, where `A` and `B` are symmetric and `B` is positive definite.
1432   We generate the matrix `X` and vector `Lambda` such that `X^T A X = Lambda` and `X^T B X = I`.
1433   This is equivalent to the LAPACK routine 'sygv' with `TYPE = 1`.
1434 
1435   @param[in]  ceed   `Ceed` context for error handling
1436   @param[in]  mat_A  Row-major matrix to be factorized with eigenvalues
1437   @param[in]  mat_B  Row-major matrix to be factorized to identity
1438   @param[out] mat_X  Row-major orthogonal matrix
1439   @param[out] lambda Vector of length `n` of generalized eigenvalues
1440   @param[in]  n      Number of rows/columns
1441 
1442   @return An error code: 0 - success, otherwise - failure
1443 
1444   @ref Utility
1445 **/
1446 CeedPragmaOptimizeOff
1447 int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A, CeedScalar *mat_B, CeedScalar *mat_X, CeedScalar *lambda, CeedInt n) {
1448   CeedScalar *mat_C, *mat_G, *vec_D;
1449 
1450   CeedCall(CeedCalloc(n * n, &mat_C));
1451   CeedCall(CeedCalloc(n * n, &mat_G));
1452   CeedCall(CeedCalloc(n, &vec_D));
1453 
1454   // Compute B = G D G^T
1455   memcpy(mat_G, mat_B, n * n * sizeof(mat_B[0]));
1456   CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_G, vec_D, n));
1457 
1458   // Sort eigenvalues
1459   for (CeedInt i = n - 1; i >= 0; i--) {
1460     for (CeedInt j = 0; j < i; j++) {
1461       if (fabs(vec_D[j]) > fabs(vec_D[j + 1])) {
1462         CeedScalarSwap(vec_D[j], vec_D[j + 1]);
1463         for (CeedInt k = 0; k < n; k++) CeedScalarSwap(mat_G[k * n + j], mat_G[k * n + j + 1]);
1464       }
1465     }
1466   }
1467 
1468   // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T
1469   //           = D^-1/2 G^T A G D^-1/2
1470   // -- D = D^-1/2
1471   for (CeedInt i = 0; i < n; i++) vec_D[i] = 1. / sqrt(vec_D[i]);
1472   // -- G = G D^-1/2
1473   // -- C = D^-1/2 G^T
1474   for (CeedInt i = 0; i < n; i++) {
1475     for (CeedInt j = 0; j < n; j++) {
1476       mat_G[i * n + j] *= vec_D[j];
1477       mat_C[j * n + i] = mat_G[i * n + j];
1478     }
1479   }
1480   // -- X = (D^-1/2 G^T) A
1481   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_C, (const CeedScalar *)mat_A, mat_X, n, n, n));
1482   // -- C = (D^-1/2 G^T A) (G D^-1/2)
1483   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_X, (const CeedScalar *)mat_G, mat_C, n, n, n));
1484 
1485   // Compute Q^T C Q = lambda
1486   CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n));
1487 
1488   // Sort eigenvalues
1489   for (CeedInt i = n - 1; i >= 0; i--) {
1490     for (CeedInt j = 0; j < i; j++) {
1491       if (fabs(lambda[j]) > fabs(lambda[j + 1])) {
1492         CeedScalarSwap(lambda[j], lambda[j + 1]);
1493         for (CeedInt k = 0; k < n; k++) CeedScalarSwap(mat_C[k * n + j], mat_C[k * n + j + 1]);
1494       }
1495     }
1496   }
1497 
1498   // Set X = (G D^1/2)^-T Q
1499   //       = G D^-1/2 Q
1500   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_G, (const CeedScalar *)mat_C, mat_X, n, n, n));
1501 
1502   // Cleanup
1503   CeedCall(CeedFree(&mat_C));
1504   CeedCall(CeedFree(&mat_G));
1505   CeedCall(CeedFree(&vec_D));
1506   return CEED_ERROR_SUCCESS;
1507 }
1508 CeedPragmaOptimizeOn
1509 
1510 /// @}
1511 
1512 /// ----------------------------------------------------------------------------
1513 /// CeedBasis Public API
1514 /// ----------------------------------------------------------------------------
1515 /// @addtogroup CeedBasisUser
1516 /// @{
1517 
1518 /**
1519   @brief Create a tensor-product basis for \f$H^1\f$ discretizations
1520 
1521   @param[in]  ceed        `Ceed` object used to create the `CeedBasis`
1522   @param[in]  dim         Topological dimension
1523   @param[in]  num_comp    Number of field components (1 for scalar fields)
1524   @param[in]  P_1d        Number of nodes in one dimension
1525   @param[in]  Q_1d        Number of quadrature points in one dimension
1526   @param[in]  interp_1d   Row-major (`Q_1d * P_1d`) matrix expressing the values of nodal basis functions at quadrature points
1527   @param[in]  grad_1d     Row-major (`Q_1d * P_1d`) matrix expressing derivatives of nodal basis functions at quadrature points
1528   @param[in]  q_ref_1d    Array of length `Q_1d` holding the locations of quadrature points on the 1D reference element `[-1, 1]`
1529   @param[in]  q_weight_1d Array of length `Q_1d` holding the quadrature weights on the reference element
1530   @param[out] basis       Address of the variable where the newly created `CeedBasis` will be stored
1531 
1532   @return An error code: 0 - success, otherwise - failure
1533 
1534   @ref User
1535 **/
1536 int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d,
1537                             const CeedScalar *grad_1d, const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis *basis) {
1538   if (!ceed->BasisCreateTensorH1) {
1539     Ceed delegate;
1540 
1541     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1542     CeedCheck(delegate, ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateTensorH1");
1543     CeedCall(CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis));
1544     CeedCall(CeedDestroy(&delegate));
1545     return CEED_ERROR_SUCCESS;
1546   }
1547 
1548   CeedCheck(dim > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis dimension must be a positive value");
1549   CeedCheck(num_comp > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis must have at least 1 component");
1550   CeedCheck(P_1d > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis must have at least 1 node");
1551   CeedCheck(Q_1d > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis must have at least 1 quadrature point");
1552 
1553   CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE : dim == 2 ? CEED_TOPOLOGY_QUAD : CEED_TOPOLOGY_HEX;
1554 
1555   CeedCall(CeedCalloc(1, basis));
1556   CeedCall(CeedObjectCreate(ceed, CeedBasisView_Object, CeedBasisDestroy_Object, &(*basis)->obj));
1557   (*basis)->is_tensor_basis = true;
1558   (*basis)->dim             = dim;
1559   (*basis)->topo            = topo;
1560   (*basis)->num_comp        = num_comp;
1561   (*basis)->P_1d            = P_1d;
1562   (*basis)->Q_1d            = Q_1d;
1563   (*basis)->P               = CeedIntPow(P_1d, dim);
1564   (*basis)->Q               = CeedIntPow(Q_1d, dim);
1565   (*basis)->fe_space        = CEED_FE_SPACE_H1;
1566   CeedCall(CeedCalloc(Q_1d, &(*basis)->q_ref_1d));
1567   CeedCall(CeedCalloc(Q_1d, &(*basis)->q_weight_1d));
1568   if (q_ref_1d) memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d * sizeof(q_ref_1d[0]));
1569   if (q_weight_1d) memcpy((*basis)->q_weight_1d, q_weight_1d, Q_1d * sizeof(q_weight_1d[0]));
1570   CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->interp_1d));
1571   CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->grad_1d));
1572   if (interp_1d) memcpy((*basis)->interp_1d, interp_1d, Q_1d * P_1d * sizeof(interp_1d[0]));
1573   if (grad_1d) memcpy((*basis)->grad_1d, grad_1d, Q_1d * P_1d * sizeof(grad_1d[0]));
1574   CeedCall(ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, *basis));
1575   return CEED_ERROR_SUCCESS;
1576 }
1577 
1578 /**
1579   @brief Create a tensor-product \f$H^1\f$ Lagrange basis
1580 
1581   @param[in]  ceed      `Ceed` object used to create the `CeedBasis`
1582   @param[in]  dim       Topological dimension of element
1583   @param[in]  num_comp  Number of field components (1 for scalar fields)
1584   @param[in]  P         Number of Gauss-Lobatto nodes in one dimension.
1585                           The polynomial degree of the resulting `Q_k` element is `k = P - 1`.
1586   @param[in]  Q         Number of quadrature points in one dimension.
1587   @param[in]  quad_mode Distribution of the `Q` quadrature points (affects order of accuracy for the quadrature)
1588   @param[out] basis     Address of the variable where the newly created `CeedBasis` will be stored
1589 
1590   @return An error code: 0 - success, otherwise - failure
1591 
1592   @ref User
1593 **/
1594 int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P, CeedInt Q, CeedQuadMode quad_mode, CeedBasis *basis) {
1595   // Allocate
1596   int        ierr = CEED_ERROR_SUCCESS;
1597   CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d, *q_weight_1d;
1598 
1599   CeedCheck(dim > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis dimension must be a positive value");
1600   CeedCheck(num_comp > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis must have at least 1 component");
1601   CeedCheck(P > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis must have at least 1 node");
1602   CeedCheck(Q > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis must have at least 1 quadrature point");
1603 
1604   // Get Nodes and Weights
1605   CeedCall(CeedCalloc(P * Q, &interp_1d));
1606   CeedCall(CeedCalloc(P * Q, &grad_1d));
1607   CeedCall(CeedCalloc(P, &nodes));
1608   CeedCall(CeedCalloc(Q, &q_ref_1d));
1609   CeedCall(CeedCalloc(Q, &q_weight_1d));
1610   if (CeedLobattoQuadrature(P, nodes, NULL) != CEED_ERROR_SUCCESS) goto cleanup;
1611   switch (quad_mode) {
1612     case CEED_GAUSS:
1613       ierr = CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
1614       break;
1615     case CEED_GAUSS_LOBATTO:
1616       ierr = CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
1617       break;
1618   }
1619   if (ierr != CEED_ERROR_SUCCESS) goto cleanup;
1620 
1621   // Build B, D matrix
1622   // Fornberg, 1998
1623   for (CeedInt i = 0; i < Q; i++) {
1624     c1                   = 1.0;
1625     c3                   = nodes[0] - q_ref_1d[i];
1626     interp_1d[i * P + 0] = 1.0;
1627     for (CeedInt j = 1; j < P; j++) {
1628       c2 = 1.0;
1629       c4 = c3;
1630       c3 = nodes[j] - q_ref_1d[i];
1631       for (CeedInt k = 0; k < j; k++) {
1632         dx = nodes[j] - nodes[k];
1633         c2 *= dx;
1634         if (k == j - 1) {
1635           grad_1d[i * P + j]   = c1 * (interp_1d[i * P + k] - c4 * grad_1d[i * P + k]) / c2;
1636           interp_1d[i * P + j] = -c1 * c4 * interp_1d[i * P + k] / c2;
1637         }
1638         grad_1d[i * P + k]   = (c3 * grad_1d[i * P + k] - interp_1d[i * P + k]) / dx;
1639         interp_1d[i * P + k] = c3 * interp_1d[i * P + k] / dx;
1640       }
1641       c1 = c2;
1642     }
1643   }
1644   // Pass to CeedBasisCreateTensorH1
1645   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis));
1646 cleanup:
1647   CeedCall(CeedFree(&interp_1d));
1648   CeedCall(CeedFree(&grad_1d));
1649   CeedCall(CeedFree(&nodes));
1650   CeedCall(CeedFree(&q_ref_1d));
1651   CeedCall(CeedFree(&q_weight_1d));
1652   return CEED_ERROR_SUCCESS;
1653 }
1654 
1655 /**
1656   @brief Create a non tensor-product basis for \f$H^1\f$ discretizations
1657 
1658   @param[in]  ceed      `Ceed` object used to create the `CeedBasis`
1659   @param[in]  topo      Topology of element, e.g. hypercube, simplex, etc
1660   @param[in]  num_comp  Number of field components (1 for scalar fields)
1661   @param[in]  num_nodes Total number of nodes
1662   @param[in]  num_qpts  Total number of quadrature points
1663   @param[in]  interp    Row-major (`num_qpts * num_nodes`) matrix expressing the values of nodal basis functions at quadrature points
1664   @param[in]  grad      Row-major (`dim * num_qpts * num_nodes`) matrix expressing derivatives of nodal basis functions at quadrature points
1665   @param[in]  q_ref     Array of length `num_qpts * dim` holding the locations of quadrature points on the reference element
1666   @param[in]  q_weight  Array of length `num_qpts` holding the quadrature weights on the reference element
1667   @param[out] basis     Address of the variable where the newly created `CeedBasis` will be stored
1668 
1669   @return An error code: 0 - success, otherwise - failure
1670 
1671   @ref User
1672 **/
1673 int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1674                       const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1675   CeedInt P = num_nodes, Q = num_qpts, dim = 0;
1676 
1677   if (!ceed->BasisCreateH1) {
1678     Ceed delegate;
1679 
1680     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1681     CeedCheck(delegate, ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateH1");
1682     CeedCall(CeedBasisCreateH1(delegate, topo, num_comp, num_nodes, num_qpts, interp, grad, q_ref, q_weight, basis));
1683     CeedCall(CeedDestroy(&delegate));
1684     return CEED_ERROR_SUCCESS;
1685   }
1686 
1687   CeedCheck(num_comp > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis must have at least 1 component");
1688   CeedCheck(num_nodes > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis must have at least 1 node");
1689   CeedCheck(num_qpts > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis must have at least 1 quadrature point");
1690 
1691   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1692 
1693   CeedCall(CeedCalloc(1, basis));
1694   CeedCall(CeedObjectCreate(ceed, CeedBasisView_Object, CeedBasisDestroy_Object, &(*basis)->obj));
1695   (*basis)->is_tensor_basis = false;
1696   (*basis)->dim             = dim;
1697   (*basis)->topo            = topo;
1698   (*basis)->num_comp        = num_comp;
1699   (*basis)->P               = P;
1700   (*basis)->Q               = Q;
1701   (*basis)->fe_space        = CEED_FE_SPACE_H1;
1702   CeedCall(CeedCalloc(Q * dim, &(*basis)->q_ref_1d));
1703   CeedCall(CeedCalloc(Q, &(*basis)->q_weight_1d));
1704   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1705   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1706   CeedCall(CeedCalloc(Q * P, &(*basis)->interp));
1707   CeedCall(CeedCalloc(dim * Q * P, &(*basis)->grad));
1708   if (interp) memcpy((*basis)->interp, interp, Q * P * sizeof(interp[0]));
1709   if (grad) memcpy((*basis)->grad, grad, dim * Q * P * sizeof(grad[0]));
1710   CeedCall(ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref, q_weight, *basis));
1711   return CEED_ERROR_SUCCESS;
1712 }
1713 
1714 /**
1715   @brief Create a non tensor-product basis for \f$H(\mathrm{div})\f$ discretizations
1716 
1717   @param[in]  ceed      `Ceed` object used to create the `CeedBasis`
1718   @param[in]  topo      Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.), dimension of which is used in some array sizes below
1719   @param[in]  num_comp  Number of components (usually 1 for vectors in H(div) bases)
1720   @param[in]  num_nodes Total number of nodes (DoFs per element)
1721   @param[in]  num_qpts  Total number of quadrature points
1722   @param[in]  interp    Row-major (`dim * num_qpts * num_nodes`) matrix expressing the values of basis functions at quadrature points
1723   @param[in]  div       Row-major (`num_qpts * num_nodes`) matrix expressing divergence of basis functions at quadrature points
1724   @param[in]  q_ref     Array of length `num_qpts` * dim holding the locations of quadrature points on the reference element
1725   @param[in]  q_weight  Array of length `num_qpts` holding the quadrature weights on the reference element
1726   @param[out] basis     Address of the variable where the newly created `CeedBasis` will be stored
1727 
1728   @return An error code: 0 - success, otherwise - failure
1729 
1730   @ref User
1731 **/
1732 int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1733                         const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1734   CeedInt Q = num_qpts, P = num_nodes, dim = 0;
1735 
1736   if (!ceed->BasisCreateHdiv) {
1737     Ceed delegate;
1738 
1739     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1740     CeedCheck(delegate, ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateHdiv");
1741     CeedCall(CeedBasisCreateHdiv(delegate, topo, num_comp, num_nodes, num_qpts, interp, div, q_ref, q_weight, basis));
1742     CeedCall(CeedDestroy(&delegate));
1743     return CEED_ERROR_SUCCESS;
1744   }
1745 
1746   CeedCheck(num_comp > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis must have at least 1 component");
1747   CeedCheck(num_nodes > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis must have at least 1 node");
1748   CeedCheck(num_qpts > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis must have at least 1 quadrature point");
1749 
1750   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1751 
1752   CeedCall(CeedCalloc(1, basis));
1753   CeedCall(CeedObjectCreate(ceed, CeedBasisView_Object, CeedBasisDestroy_Object, &(*basis)->obj));
1754   (*basis)->is_tensor_basis = false;
1755   (*basis)->dim             = dim;
1756   (*basis)->topo            = topo;
1757   (*basis)->num_comp        = num_comp;
1758   (*basis)->P               = P;
1759   (*basis)->Q               = Q;
1760   (*basis)->fe_space        = CEED_FE_SPACE_HDIV;
1761   CeedCall(CeedMalloc(Q * dim, &(*basis)->q_ref_1d));
1762   CeedCall(CeedMalloc(Q, &(*basis)->q_weight_1d));
1763   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1764   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1765   CeedCall(CeedMalloc(dim * Q * P, &(*basis)->interp));
1766   CeedCall(CeedMalloc(Q * P, &(*basis)->div));
1767   if (interp) memcpy((*basis)->interp, interp, dim * Q * P * sizeof(interp[0]));
1768   if (div) memcpy((*basis)->div, div, Q * P * sizeof(div[0]));
1769   CeedCall(ceed->BasisCreateHdiv(topo, dim, P, Q, interp, div, q_ref, q_weight, *basis));
1770   return CEED_ERROR_SUCCESS;
1771 }
1772 
1773 /**
1774   @brief Create a non tensor-product basis for \f$H(\mathrm{curl})\f$ discretizations
1775 
1776   @param[in]  ceed      `Ceed` object used to create the `CeedBasis`
1777   @param[in]  topo      Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.), dimension of which is used in some array sizes below
1778   @param[in]  num_comp  Number of components (usually 1 for vectors in \f$H(\mathrm{curl})\f$ bases)
1779   @param[in]  num_nodes Total number of nodes (DoFs per element)
1780   @param[in]  num_qpts  Total number of quadrature points
1781   @param[in]  interp    Row-major (`dim * num_qpts * num_nodes`) matrix expressing the values of basis functions at quadrature points
1782   @param[in]  curl      Row-major (`curl_comp * num_qpts * num_nodes`, `curl_comp = 1` if `dim < 3` otherwise `curl_comp = dim`) matrix expressing curl of basis functions at quadrature points
1783   @param[in]  q_ref     Array of length `num_qpts * dim` holding the locations of quadrature points on the reference element
1784   @param[in]  q_weight  Array of length `num_qpts` holding the quadrature weights on the reference element
1785   @param[out] basis     Address of the variable where the newly created `CeedBasis` will be stored
1786 
1787   @return An error code: 0 - success, otherwise - failure
1788 
1789   @ref User
1790 **/
1791 int CeedBasisCreateHcurl(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1792                          const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1793   CeedInt Q = num_qpts, P = num_nodes, dim = 0, curl_comp = 0;
1794 
1795   if (!ceed->BasisCreateHcurl) {
1796     Ceed delegate;
1797 
1798     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1799     CeedCheck(delegate, ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateHcurl");
1800     CeedCall(CeedBasisCreateHcurl(delegate, topo, num_comp, num_nodes, num_qpts, interp, curl, q_ref, q_weight, basis));
1801     CeedCall(CeedDestroy(&delegate));
1802     return CEED_ERROR_SUCCESS;
1803   }
1804 
1805   CeedCheck(num_comp > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis must have at least 1 component");
1806   CeedCheck(num_nodes > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis must have at least 1 node");
1807   CeedCheck(num_qpts > 0, ceed, CEED_ERROR_DIMENSION, "CeedBasis must have at least 1 quadrature point");
1808 
1809   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1810   curl_comp = (dim < 3) ? 1 : dim;
1811 
1812   CeedCall(CeedCalloc(1, basis));
1813   CeedCall(CeedObjectCreate(ceed, CeedBasisView_Object, CeedBasisDestroy_Object, &(*basis)->obj));
1814   (*basis)->is_tensor_basis = false;
1815   (*basis)->dim             = dim;
1816   (*basis)->topo            = topo;
1817   (*basis)->num_comp        = num_comp;
1818   (*basis)->P               = P;
1819   (*basis)->Q               = Q;
1820   (*basis)->fe_space        = CEED_FE_SPACE_HCURL;
1821   CeedCall(CeedMalloc(Q * dim, &(*basis)->q_ref_1d));
1822   CeedCall(CeedMalloc(Q, &(*basis)->q_weight_1d));
1823   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1824   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1825   CeedCall(CeedMalloc(dim * Q * P, &(*basis)->interp));
1826   CeedCall(CeedMalloc(curl_comp * Q * P, &(*basis)->curl));
1827   if (interp) memcpy((*basis)->interp, interp, dim * Q * P * sizeof(interp[0]));
1828   if (curl) memcpy((*basis)->curl, curl, curl_comp * Q * P * sizeof(curl[0]));
1829   CeedCall(ceed->BasisCreateHcurl(topo, dim, P, Q, interp, curl, q_ref, q_weight, *basis));
1830   return CEED_ERROR_SUCCESS;
1831 }
1832 
1833 /**
1834   @brief Create a `CeedBasis` for projection from the nodes of `basis_from` to the nodes of `basis_to`.
1835 
1836   Only @ref CEED_EVAL_INTERP will be valid for the new basis, `basis_project`.
1837   For \f$H^1\f$ spaces, @ref CEED_EVAL_GRAD will also be valid.
1838   The interpolation is given by `interp_project = interp_to^+ * interp_from`, where the pseudoinverse `interp_to^+` is given by QR factorization.
1839   The gradient (for the \f$H^1\f$ case) is given by `grad_project = interp_to^+ * grad_from`.
1840 
1841   Note: `basis_from` and `basis_to` must have compatible quadrature spaces.
1842 
1843   Note: `basis_project` will have the same number of components as `basis_from`, regardless of the number of components that `basis_to` has.
1844         If `basis_from` has 3 components and `basis_to` has 5 components, then `basis_project` will have 3 components.
1845 
1846   Note: If either `basis_from` or `basis_to` are non-tensor, then `basis_project` will also be non-tensor
1847 
1848   @param[in]  basis_from    `CeedBasis` to prolong from
1849   @param[in]  basis_to      `CeedBasis` to prolong to
1850   @param[out] basis_project Address of the variable where the newly created `CeedBasis` will be stored
1851 
1852   @return An error code: 0 - success, otherwise - failure
1853 
1854   @ref User
1855 **/
1856 int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, CeedBasis *basis_project) {
1857   Ceed        ceed;
1858   bool        create_tensor;
1859   CeedInt     dim, num_comp;
1860   CeedScalar *interp_project, *grad_project;
1861 
1862   CeedCall(CeedBasisGetCeed(basis_to, &ceed));
1863 
1864   // Create projection matrix
1865   CeedCall(CeedBasisCreateProjectionMatrices(basis_from, basis_to, &interp_project, &grad_project));
1866 
1867   // Build basis
1868   {
1869     bool is_tensor_to, is_tensor_from;
1870 
1871     CeedCall(CeedBasisIsTensor(basis_to, &is_tensor_to));
1872     CeedCall(CeedBasisIsTensor(basis_from, &is_tensor_from));
1873     create_tensor = is_tensor_from && is_tensor_to;
1874   }
1875   CeedCall(CeedBasisGetDimension(basis_to, &dim));
1876   CeedCall(CeedBasisGetNumComponents(basis_from, &num_comp));
1877   if (create_tensor) {
1878     CeedInt P_1d_to, P_1d_from;
1879 
1880     CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_1d_from));
1881     CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_1d_to));
1882     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_from, P_1d_to, interp_project, grad_project, NULL, NULL, basis_project));
1883   } else {
1884     // Even if basis_to and basis_from are not H1, the resulting basis is H1 for interpolation to work
1885     CeedInt          num_nodes_to, num_nodes_from;
1886     CeedElemTopology topo;
1887 
1888     CeedCall(CeedBasisGetTopology(basis_from, &topo));
1889     CeedCall(CeedBasisGetNumNodes(basis_from, &num_nodes_from));
1890     CeedCall(CeedBasisGetNumNodes(basis_to, &num_nodes_to));
1891     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_from, num_nodes_to, interp_project, grad_project, NULL, NULL, basis_project));
1892   }
1893 
1894   // Cleanup
1895   CeedCall(CeedFree(&interp_project));
1896   CeedCall(CeedFree(&grad_project));
1897   CeedCall(CeedDestroy(&ceed));
1898   return CEED_ERROR_SUCCESS;
1899 }
1900 
1901 /**
1902   @brief Copy the pointer to a `CeedBasis`.
1903 
1904   Note: If the value of `*basis_copy` passed into this function is non-`NULL`, then it is assumed that `*basis_copy` is a pointer to a `CeedBasis`.
1905         This `CeedBasis` will be destroyed if `*basis_copy` is the only reference to this `CeedBasis`.
1906 
1907   @param[in]     basis      `CeedBasis` to copy reference to
1908   @param[in,out] basis_copy Variable to store copied reference
1909 
1910   @return An error code: 0 - success, otherwise - failure
1911 
1912   @ref User
1913 **/
1914 int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy) {
1915   if (basis != CEED_BASIS_NONE) CeedCall(CeedBasisReference(basis));
1916   CeedCall(CeedBasisDestroy(basis_copy));
1917   *basis_copy = basis;
1918   return CEED_ERROR_SUCCESS;
1919 }
1920 
1921 /**
1922   @brief Set the number of tabs to indent for @ref CeedBasisView() output
1923 
1924   @param[in] basis    `CeedBasis` to set the number of view tabs
1925   @param[in] num_tabs Number of view tabs to set
1926 
1927   @return Error code: 0 - success, otherwise - failure
1928 
1929   @ref User
1930 **/
1931 int CeedBasisSetNumViewTabs(CeedBasis basis, CeedInt num_tabs) {
1932   CeedCall(CeedObjectSetNumViewTabs((CeedObject)basis, num_tabs));
1933   return CEED_ERROR_SUCCESS;
1934 }
1935 
1936 /**
1937   @brief Get the number of tabs to indent for @ref CeedBasisView() output
1938 
1939   @param[in]  basis    `CeedBasis` to get the number of view tabs
1940   @param[out] num_tabs Number of view tabs
1941 
1942   @return Error code: 0 - success, otherwise - failure
1943 
1944   @ref User
1945 **/
1946 int CeedBasisGetNumViewTabs(CeedBasis basis, CeedInt *num_tabs) {
1947   CeedCall(CeedObjectGetNumViewTabs((CeedObject)basis, num_tabs));
1948   return CEED_ERROR_SUCCESS;
1949 }
1950 
1951 /**
1952   @brief View a `CeedBasis`
1953 
1954   @param[in] basis  `CeedBasis` to view
1955   @param[in] stream Stream to view to, e.g., `stdout`
1956 
1957   @return An error code: 0 - success, otherwise - failure
1958 
1959   @ref User
1960 **/
1961 int CeedBasisView(CeedBasis basis, FILE *stream) {
1962   bool             is_tensor_basis;
1963   char            *tabs = NULL;
1964   CeedElemTopology topo;
1965   CeedFESpace      fe_space;
1966 
1967   // Basis data
1968   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
1969   CeedCall(CeedBasisGetTopology(basis, &topo));
1970   CeedCall(CeedBasisGetFESpace(basis, &fe_space));
1971 
1972   {
1973     CeedInt num_tabs = 0;
1974 
1975     CeedCall(CeedBasisGetNumViewTabs(basis, &num_tabs));
1976     CeedCall(CeedCalloc(CEED_TAB_WIDTH * num_tabs + 1, &tabs));
1977     for (CeedInt i = 0; i < CEED_TAB_WIDTH * num_tabs; i++) tabs[i] = ' ';
1978   }
1979 
1980   // Print FE space and element topology of the basis
1981   fprintf(stream, "%sCeedBasis in a %s on a %s element\n", tabs, CeedFESpaces[fe_space], CeedElemTopologies[topo]);
1982   if (is_tensor_basis) {
1983     fprintf(stream, "%s  P: %" CeedInt_FMT "\n%s  Q: %" CeedInt_FMT "\n", tabs, basis->P_1d, tabs, basis->Q_1d);
1984   } else {
1985     fprintf(stream, "%s  P: %" CeedInt_FMT "\n%s  Q: %" CeedInt_FMT "\n", tabs, basis->P, tabs, basis->Q);
1986   }
1987   fprintf(stream, "%s  dimension: %" CeedInt_FMT "\n%s  field components: %" CeedInt_FMT "\n", tabs, basis->dim, tabs, basis->num_comp);
1988   // Print quadrature data, interpolation/gradient/divergence/curl of the basis
1989   if (is_tensor_basis) {  // tensor basis
1990     CeedInt           P_1d, Q_1d;
1991     const CeedScalar *q_ref_1d, *q_weight_1d, *interp_1d, *grad_1d;
1992 
1993     CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
1994     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
1995     CeedCall(CeedBasisGetQRef(basis, &q_ref_1d));
1996     CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
1997     CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
1998     CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
1999 
2000     CeedCall(CeedScalarView("qref1d", "\t% 12.8f", 1, Q_1d, q_ref_1d, tabs, stream));
2001     CeedCall(CeedScalarView("qweight1d", "\t% 12.8f", 1, Q_1d, q_weight_1d, tabs, stream));
2002     CeedCall(CeedScalarView("interp1d", "\t% 12.8f", Q_1d, P_1d, interp_1d, tabs, stream));
2003     CeedCall(CeedScalarView("grad1d", "\t% 12.8f", Q_1d, P_1d, grad_1d, tabs, stream));
2004   } else {  // non-tensor basis
2005     CeedInt           P, Q, dim, q_comp;
2006     const CeedScalar *q_ref, *q_weight, *interp, *grad, *div, *curl;
2007 
2008     CeedCall(CeedBasisGetNumNodes(basis, &P));
2009     CeedCall(CeedBasisGetNumQuadraturePoints(basis, &Q));
2010     CeedCall(CeedBasisGetDimension(basis, &dim));
2011     CeedCall(CeedBasisGetQRef(basis, &q_ref));
2012     CeedCall(CeedBasisGetQWeights(basis, &q_weight));
2013     CeedCall(CeedBasisGetInterp(basis, &interp));
2014     CeedCall(CeedBasisGetGrad(basis, &grad));
2015     CeedCall(CeedBasisGetDiv(basis, &div));
2016     CeedCall(CeedBasisGetCurl(basis, &curl));
2017 
2018     CeedCall(CeedScalarView("qref", "\t% 12.8f", 1, Q * dim, q_ref, tabs, stream));
2019     CeedCall(CeedScalarView("qweight", "\t% 12.8f", 1, Q, q_weight, tabs, stream));
2020     CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp));
2021     CeedCall(CeedScalarView("interp", "\t% 12.8f", q_comp * Q, P, interp, tabs, stream));
2022     if (grad) {
2023       CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp));
2024       CeedCall(CeedScalarView("grad", "\t% 12.8f", q_comp * Q, P, grad, tabs, stream));
2025     }
2026     if (div) {
2027       CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp));
2028       CeedCall(CeedScalarView("div", "\t% 12.8f", q_comp * Q, P, div, tabs, stream));
2029     }
2030     if (curl) {
2031       CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp));
2032       CeedCall(CeedScalarView("curl", "\t% 12.8f", q_comp * Q, P, curl, tabs, stream));
2033     }
2034   }
2035   CeedCall(CeedFree(&tabs));
2036   return CEED_ERROR_SUCCESS;
2037 }
2038 
2039 /**
2040   @brief Apply basis evaluation from nodes to quadrature points or vice versa
2041 
2042   @param[in]  basis     `CeedBasis` to evaluate
2043   @param[in]  num_elem  The number of elements to apply the basis evaluation to;
2044                           the backend will specify the ordering in @ref CeedElemRestrictionCreate()
2045   @param[in]  t_mode    @ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature points;
2046                           @ref CEED_TRANSPOSE to apply the transpose, mapping from quadrature points to nodes
2047   @param[in]  eval_mode @ref CEED_EVAL_NONE to use values directly,
2048                           @ref CEED_EVAL_INTERP to use interpolated values,
2049                           @ref CEED_EVAL_GRAD to use gradients,
2050                           @ref CEED_EVAL_DIV to use divergence,
2051                           @ref CEED_EVAL_CURL to use curl,
2052                           @ref CEED_EVAL_WEIGHT to use quadrature weights
2053   @param[in]  u         Input `CeedVector`
2054   @param[out] v         Output `CeedVector`
2055 
2056   @return An error code: 0 - success, otherwise - failure
2057 
2058   @ref User
2059 **/
2060 int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
2061   CeedCall(CeedBasisApplyCheckDims(basis, num_elem, t_mode, eval_mode, u, v));
2062   CeedCheck(basis->Apply, CeedBasisReturnCeed(basis), CEED_ERROR_UNSUPPORTED, "Backend does not support CeedBasisApply");
2063   CeedCall(basis->Apply(basis, num_elem, t_mode, eval_mode, u, v));
2064   return CEED_ERROR_SUCCESS;
2065 }
2066 
2067 /**
2068   @brief Apply basis evaluation from quadrature points to nodes and sum into target vector
2069 
2070   @param[in]  basis     `CeedBasis` to evaluate
2071   @param[in]  num_elem  The number of elements to apply the basis evaluation to;
2072                           the backend will specify the ordering in @ref CeedElemRestrictionCreate()
2073   @param[in]  t_mode    @ref CEED_TRANSPOSE to apply the transpose, mapping from quadrature points to nodes;
2074                            @ref CEED_NOTRANSPOSE is not valid for `CeedBasisApplyAdd()`
2075   @param[in]  eval_mode @ref CEED_EVAL_NONE to use values directly,
2076                           @ref CEED_EVAL_INTERP to use interpolated values,
2077                           @ref CEED_EVAL_GRAD to use gradients,
2078                           @ref CEED_EVAL_DIV to use divergence,
2079                           @ref CEED_EVAL_CURL to use curl,
2080                           @ref CEED_EVAL_WEIGHT to use quadrature weights
2081   @param[in]  u         Input `CeedVector`
2082   @param[out] v         Output `CeedVector` to sum into
2083 
2084   @return An error code: 0 - success, otherwise - failure
2085 
2086   @ref User
2087 **/
2088 int CeedBasisApplyAdd(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
2089   CeedCheck(t_mode == CEED_TRANSPOSE, CeedBasisReturnCeed(basis), CEED_ERROR_UNSUPPORTED, "CeedBasisApplyAdd only supports CEED_TRANSPOSE");
2090   CeedCall(CeedBasisApplyCheckDims(basis, num_elem, t_mode, eval_mode, u, v));
2091   CeedCheck(basis->ApplyAdd, CeedBasisReturnCeed(basis), CEED_ERROR_UNSUPPORTED, "Backend does not implement CeedBasisApplyAdd");
2092   CeedCall(basis->ApplyAdd(basis, num_elem, t_mode, eval_mode, u, v));
2093   return CEED_ERROR_SUCCESS;
2094 }
2095 
2096 /**
2097   @brief Apply basis evaluation from nodes to arbitrary points
2098 
2099   @param[in]  basis      `CeedBasis` to evaluate
2100   @param[in]  num_elem   The number of elements to apply the basis evaluation to;
2101                           the backend will specify the ordering in @ref CeedElemRestrictionCreate()
2102   @param[in]  num_points Array of the number of points to apply the basis evaluation to in each element, size `num_elem`
2103   @param[in]  t_mode     @ref CEED_NOTRANSPOSE to evaluate from nodes to points;
2104                            @ref CEED_TRANSPOSE to apply the transpose, mapping from points to nodes
2105   @param[in]  eval_mode  @ref CEED_EVAL_INTERP to use interpolated values,
2106                            @ref CEED_EVAL_GRAD to use gradients,
2107                            @ref CEED_EVAL_WEIGHT to use quadrature weights
2108   @param[in]  x_ref      `CeedVector` holding reference coordinates of each point
2109   @param[in]  u          Input `CeedVector`, of length `num_nodes * num_comp` for @ref CEED_NOTRANSPOSE
2110   @param[out] v          Output `CeedVector`, of length `num_points * num_q_comp` for @ref CEED_NOTRANSPOSE with @ref CEED_EVAL_INTERP
2111 
2112   @return An error code: 0 - success, otherwise - failure
2113 
2114   @ref User
2115 **/
2116 int CeedBasisApplyAtPoints(CeedBasis basis, CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, CeedEvalMode eval_mode,
2117                            CeedVector x_ref, CeedVector u, CeedVector v) {
2118   CeedCall(CeedBasisApplyAtPointsCheckDims(basis, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
2119   if (basis->ApplyAtPoints) {
2120     CeedCall(basis->ApplyAtPoints(basis, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
2121   } else {
2122     CeedCall(CeedBasisApplyAtPoints_Core(basis, false, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
2123   }
2124   return CEED_ERROR_SUCCESS;
2125 }
2126 
2127 /**
2128   @brief Apply basis evaluation from nodes to arbitrary points and sum into target vector
2129 
2130   @param[in]  basis      `CeedBasis` to evaluate
2131   @param[in]  num_elem   The number of elements to apply the basis evaluation to;
2132                           the backend will specify the ordering in @ref CeedElemRestrictionCreate()
2133   @param[in]  num_points Array of the number of points to apply the basis evaluation to in each element, size `num_elem`
2134   @param[in]  t_mode     @ref CEED_NOTRANSPOSE to evaluate from nodes to points;
2135                            @ref CEED_NOTRANSPOSE is not valid for `CeedBasisApplyAddAtPoints()`
2136   @param[in]  eval_mode  @ref CEED_EVAL_INTERP to use interpolated values,
2137                            @ref CEED_EVAL_GRAD to use gradients,
2138                            @ref CEED_EVAL_WEIGHT to use quadrature weights
2139   @param[in]  x_ref      `CeedVector` holding reference coordinates of each point
2140   @param[in]  u          Input `CeedVector`, of length `num_nodes * num_comp` for @ref CEED_NOTRANSPOSE
2141   @param[out] v          Output `CeedVector`, of length `num_points * num_q_comp` for @ref CEED_NOTRANSPOSE with @ref CEED_EVAL_INTERP
2142 
2143   @return An error code: 0 - success, otherwise - failure
2144 
2145   @ref User
2146 **/
2147 int CeedBasisApplyAddAtPoints(CeedBasis basis, CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, CeedEvalMode eval_mode,
2148                               CeedVector x_ref, CeedVector u, CeedVector v) {
2149   CeedCheck(t_mode == CEED_TRANSPOSE, CeedBasisReturnCeed(basis), CEED_ERROR_UNSUPPORTED, "CeedBasisApplyAddAtPoints only supports CEED_TRANSPOSE");
2150   CeedCall(CeedBasisApplyAtPointsCheckDims(basis, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
2151   if (basis->ApplyAddAtPoints) {
2152     CeedCall(basis->ApplyAddAtPoints(basis, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
2153   } else {
2154     CeedCall(CeedBasisApplyAtPoints_Core(basis, true, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
2155   }
2156   return CEED_ERROR_SUCCESS;
2157 }
2158 
2159 /**
2160   @brief Get the `Ceed` associated with a `CeedBasis`
2161 
2162   @param[in]  basis `CeedBasis`
2163   @param[out] ceed  Variable to store `Ceed`
2164 
2165   @return An error code: 0 - success, otherwise - failure
2166 
2167   @ref Advanced
2168 **/
2169 int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed) {
2170   CeedCall(CeedObjectGetCeed((CeedObject)basis, ceed));
2171   return CEED_ERROR_SUCCESS;
2172 }
2173 
2174 /**
2175   @brief Return the `Ceed` associated with a `CeedBasis`
2176 
2177   @param[in] basis `CeedBasis`
2178 
2179   @return `Ceed` associated with the `basis`
2180 
2181   @ref Advanced
2182 **/
2183 Ceed CeedBasisReturnCeed(CeedBasis basis) { return CeedObjectReturnCeed((CeedObject)basis); }
2184 
2185 /**
2186   @brief Get dimension for given `CeedBasis`
2187 
2188   @param[in]  basis `CeedBasis`
2189   @param[out] dim   Variable to store dimension of basis
2190 
2191   @return An error code: 0 - success, otherwise - failure
2192 
2193   @ref Advanced
2194 **/
2195 int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim) {
2196   *dim = basis->dim;
2197   return CEED_ERROR_SUCCESS;
2198 }
2199 
2200 /**
2201   @brief Get topology for given `CeedBasis`
2202 
2203   @param[in]  basis `CeedBasis`
2204   @param[out] topo  Variable to store topology of basis
2205 
2206   @return An error code: 0 - success, otherwise - failure
2207 
2208   @ref Advanced
2209 **/
2210 int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo) {
2211   *topo = basis->topo;
2212   return CEED_ERROR_SUCCESS;
2213 }
2214 
2215 /**
2216   @brief Get number of components for given `CeedBasis`
2217 
2218   @param[in]  basis    `CeedBasis`
2219   @param[out] num_comp Variable to store number of components
2220 
2221   @return An error code: 0 - success, otherwise - failure
2222 
2223   @ref Advanced
2224 **/
2225 int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *num_comp) {
2226   *num_comp = basis->num_comp;
2227   return CEED_ERROR_SUCCESS;
2228 }
2229 
2230 /**
2231   @brief Get total number of nodes (in `dim` dimensions) of a `CeedBasis`
2232 
2233   @param[in]  basis `CeedBasis`
2234   @param[out] P     Variable to store number of nodes
2235 
2236   @return An error code: 0 - success, otherwise - failure
2237 
2238   @ref Utility
2239 **/
2240 int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P) {
2241   *P = basis->P;
2242   return CEED_ERROR_SUCCESS;
2243 }
2244 
2245 /**
2246   @brief Get total number of nodes (in 1 dimension) of a `CeedBasis`
2247 
2248   @param[in]  basis `CeedBasis`
2249   @param[out] P_1d  Variable to store number of nodes
2250 
2251   @return An error code: 0 - success, otherwise - failure
2252 
2253   @ref Advanced
2254 **/
2255 int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P_1d) {
2256   CeedCheck(basis->is_tensor_basis, CeedBasisReturnCeed(basis), CEED_ERROR_MINOR, "Cannot supply P_1d for non-tensor CeedBasis");
2257   *P_1d = basis->P_1d;
2258   return CEED_ERROR_SUCCESS;
2259 }
2260 
2261 /**
2262   @brief Get total number of quadrature points (in `dim` dimensions) of a `CeedBasis`
2263 
2264   @param[in]  basis `CeedBasis`
2265   @param[out] Q     Variable to store number of quadrature points
2266 
2267   @return An error code: 0 - success, otherwise - failure
2268 
2269   @ref Utility
2270 **/
2271 int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q) {
2272   *Q = basis->Q;
2273   return CEED_ERROR_SUCCESS;
2274 }
2275 
2276 /**
2277   @brief Get total number of quadrature points (in 1 dimension) of a `CeedBasis`
2278 
2279   @param[in]  basis `CeedBasis`
2280   @param[out] Q_1d  Variable to store number of quadrature points
2281 
2282   @return An error code: 0 - success, otherwise - failure
2283 
2284   @ref Advanced
2285 **/
2286 int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, CeedInt *Q_1d) {
2287   CeedCheck(basis->is_tensor_basis, CeedBasisReturnCeed(basis), CEED_ERROR_MINOR, "Cannot supply Q_1d for non-tensor CeedBasis");
2288   *Q_1d = basis->Q_1d;
2289   return CEED_ERROR_SUCCESS;
2290 }
2291 
2292 /**
2293   @brief Get reference coordinates of quadrature points (in `dim` dimensions) of a `CeedBasis`
2294 
2295   @param[in]  basis `CeedBasis`
2296   @param[out] q_ref Variable to store reference coordinates of quadrature points
2297 
2298   @return An error code: 0 - success, otherwise - failure
2299 
2300   @ref Advanced
2301 **/
2302 int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **q_ref) {
2303   *q_ref = basis->q_ref_1d;
2304   return CEED_ERROR_SUCCESS;
2305 }
2306 
2307 /**
2308   @brief Get quadrature weights of quadrature points (in `dim` dimensions) of a `CeedBasis`
2309 
2310   @param[in]  basis    `CeedBasis`
2311   @param[out] q_weight Variable to store quadrature weights
2312 
2313   @return An error code: 0 - success, otherwise - failure
2314 
2315   @ref Advanced
2316 **/
2317 int CeedBasisGetQWeights(CeedBasis basis, const CeedScalar **q_weight) {
2318   *q_weight = basis->q_weight_1d;
2319   return CEED_ERROR_SUCCESS;
2320 }
2321 
2322 /**
2323   @brief Get interpolation matrix of a `CeedBasis`
2324 
2325   @param[in]  basis  `CeedBasis`
2326   @param[out] interp Variable to store interpolation matrix
2327 
2328   @return An error code: 0 - success, otherwise - failure
2329 
2330   @ref Advanced
2331 **/
2332 int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp) {
2333   if (!basis->interp && basis->is_tensor_basis) {
2334     // Allocate
2335     CeedCall(CeedMalloc(basis->Q * basis->P, &basis->interp));
2336 
2337     // Initialize
2338     for (CeedInt i = 0; i < basis->Q * basis->P; i++) basis->interp[i] = 1.0;
2339 
2340     // Calculate
2341     for (CeedInt d = 0; d < basis->dim; d++) {
2342       for (CeedInt qpt = 0; qpt < basis->Q; qpt++) {
2343         for (CeedInt node = 0; node < basis->P; node++) {
2344           CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
2345           CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
2346 
2347           basis->interp[qpt * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p];
2348         }
2349       }
2350     }
2351   }
2352   *interp = basis->interp;
2353   return CEED_ERROR_SUCCESS;
2354 }
2355 
2356 /**
2357   @brief Get 1D interpolation matrix of a tensor product `CeedBasis`
2358 
2359   @param[in]  basis     `CeedBasis`
2360   @param[out] interp_1d Variable to store interpolation matrix
2361 
2362   @return An error code: 0 - success, otherwise - failure
2363 
2364   @ref Backend
2365 **/
2366 int CeedBasisGetInterp1D(CeedBasis basis, const CeedScalar **interp_1d) {
2367   bool is_tensor_basis;
2368 
2369   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2370   CeedCheck(is_tensor_basis, CeedBasisReturnCeed(basis), CEED_ERROR_MINOR, "CeedBasis is not a tensor product CeedBasis");
2371   *interp_1d = basis->interp_1d;
2372   return CEED_ERROR_SUCCESS;
2373 }
2374 
2375 /**
2376   @brief Get gradient matrix of a `CeedBasis`
2377 
2378   @param[in]  basis `CeedBasis`
2379   @param[out] grad  Variable to store gradient matrix
2380 
2381   @return An error code: 0 - success, otherwise - failure
2382 
2383   @ref Advanced
2384 **/
2385 int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad) {
2386   if (!basis->grad && basis->is_tensor_basis) {
2387     // Allocate
2388     CeedCall(CeedMalloc(basis->dim * basis->Q * basis->P, &basis->grad));
2389 
2390     // Initialize
2391     for (CeedInt i = 0; i < basis->dim * basis->Q * basis->P; i++) basis->grad[i] = 1.0;
2392 
2393     // Calculate
2394     for (CeedInt d = 0; d < basis->dim; d++) {
2395       for (CeedInt i = 0; i < basis->dim; i++) {
2396         for (CeedInt qpt = 0; qpt < basis->Q; qpt++) {
2397           for (CeedInt node = 0; node < basis->P; node++) {
2398             CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
2399             CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
2400 
2401             if (i == d) basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->grad_1d[q * basis->P_1d + p];
2402             else basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p];
2403           }
2404         }
2405       }
2406     }
2407   }
2408   *grad = basis->grad;
2409   return CEED_ERROR_SUCCESS;
2410 }
2411 
2412 /**
2413   @brief Get 1D gradient matrix of a tensor product `CeedBasis`
2414 
2415   @param[in]  basis   `CeedBasis`
2416   @param[out] grad_1d Variable to store gradient matrix
2417 
2418   @return An error code: 0 - success, otherwise - failure
2419 
2420   @ref Advanced
2421 **/
2422 int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad_1d) {
2423   bool is_tensor_basis;
2424 
2425   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2426   CeedCheck(is_tensor_basis, CeedBasisReturnCeed(basis), CEED_ERROR_MINOR, "CeedBasis is not a tensor product CeedBasis");
2427   *grad_1d = basis->grad_1d;
2428   return CEED_ERROR_SUCCESS;
2429 }
2430 
2431 /**
2432   @brief Get divergence matrix of a `CeedBasis`
2433 
2434   @param[in]  basis `CeedBasis`
2435   @param[out] div   Variable to store divergence matrix
2436 
2437   @return An error code: 0 - success, otherwise - failure
2438 
2439   @ref Advanced
2440 **/
2441 int CeedBasisGetDiv(CeedBasis basis, const CeedScalar **div) {
2442   *div = basis->div;
2443   return CEED_ERROR_SUCCESS;
2444 }
2445 
2446 /**
2447   @brief Get curl matrix of a `CeedBasis`
2448 
2449   @param[in]  basis `CeedBasis`
2450   @param[out] curl  Variable to store curl matrix
2451 
2452   @return An error code: 0 - success, otherwise - failure
2453 
2454   @ref Advanced
2455 **/
2456 int CeedBasisGetCurl(CeedBasis basis, const CeedScalar **curl) {
2457   *curl = basis->curl;
2458   return CEED_ERROR_SUCCESS;
2459 }
2460 
2461 /**
2462   @brief Destroy a @ref CeedBasis
2463 
2464   @param[in,out] basis `CeedBasis` to destroy
2465 
2466   @return An error code: 0 - success, otherwise - failure
2467 
2468   @ref User
2469 **/
2470 int CeedBasisDestroy(CeedBasis *basis) {
2471   if (!*basis || *basis == CEED_BASIS_NONE || CeedObjectDereference((CeedObject)*basis) > 0) {
2472     *basis = NULL;
2473     return CEED_ERROR_SUCCESS;
2474   }
2475   if ((*basis)->Destroy) CeedCall((*basis)->Destroy(*basis));
2476   CeedCall(CeedTensorContractDestroy(&(*basis)->contract));
2477   CeedCall(CeedFree(&(*basis)->q_ref_1d));
2478   CeedCall(CeedFree(&(*basis)->q_weight_1d));
2479   CeedCall(CeedFree(&(*basis)->interp));
2480   CeedCall(CeedFree(&(*basis)->interp_1d));
2481   CeedCall(CeedFree(&(*basis)->grad));
2482   CeedCall(CeedFree(&(*basis)->grad_1d));
2483   CeedCall(CeedFree(&(*basis)->div));
2484   CeedCall(CeedFree(&(*basis)->curl));
2485   CeedCall(CeedVectorDestroy(&(*basis)->vec_chebyshev));
2486   CeedCall(CeedBasisDestroy(&(*basis)->basis_chebyshev));
2487   CeedCall(CeedObjectDestroy_Private(&(*basis)->obj));
2488   CeedCall(CeedFree(basis));
2489   return CEED_ERROR_SUCCESS;
2490 }
2491 
2492 /**
2493   @brief Construct a Gauss-Legendre quadrature
2494 
2495   @param[in]  Q           Number of quadrature points (integrates polynomials of degree `2*Q-1` exactly)
2496   @param[out] q_ref_1d    Array of length `Q` to hold the abscissa on `[-1, 1]`
2497   @param[out] q_weight_1d Array of length `Q` to hold the weights
2498 
2499   @return An error code: 0 - success, otherwise - failure
2500 
2501   @ref Utility
2502 **/
2503 int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) {
2504   CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0 * atan(1.0);
2505 
2506   // Build q_ref_1d, q_weight_1d
2507   for (CeedInt i = 0; i <= Q / 2; i++) {
2508     // Guess
2509     xi = cos(PI * (CeedScalar)(2 * i + 1) / ((CeedScalar)(2 * Q)));
2510     // Pn(xi)
2511     P0 = 1.0;
2512     P1 = xi;
2513     P2 = 0.0;
2514     for (CeedInt j = 2; j <= Q; j++) {
2515       P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
2516       P0 = P1;
2517       P1 = P2;
2518     }
2519     // First Newton Step
2520     dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
2521     xi  = xi - P2 / dP2;
2522     // Newton to convergence
2523     for (CeedInt k = 0; k < 100 && fabs(P2) > 10 * CEED_EPSILON; k++) {
2524       P0 = 1.0;
2525       P1 = xi;
2526       for (CeedInt j = 2; j <= Q; j++) {
2527         P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
2528         P0 = P1;
2529         P1 = P2;
2530       }
2531       dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
2532       xi  = xi - P2 / dP2;
2533     }
2534     // Save xi, wi
2535     wi                     = 2.0 / ((1.0 - xi * xi) * dP2 * dP2);
2536     q_weight_1d[i]         = wi;
2537     q_weight_1d[Q - 1 - i] = wi;
2538     q_ref_1d[i]            = -xi;
2539     q_ref_1d[Q - 1 - i]    = xi;
2540   }
2541   return CEED_ERROR_SUCCESS;
2542 }
2543 
2544 /**
2545   @brief Construct a Gauss-Legendre-Lobatto quadrature
2546 
2547   @param[in]  Q           Number of quadrature points (integrates polynomials of degree `2*Q-3` exactly)
2548   @param[out] q_ref_1d    Array of length `Q` to hold the abscissa on `[-1, 1]`
2549   @param[out] q_weight_1d Array of length `Q` to hold the weights
2550 
2551   @return An error code: 0 - success, otherwise - failure
2552 
2553   @ref Utility
2554 **/
2555 int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) {
2556   CeedScalar P0, P1, P2, dP2, d2P2, xi, wi, PI = 4.0 * atan(1.0);
2557 
2558   // Build q_ref_1d, q_weight_1d
2559   // Set endpoints
2560   CeedCheck(Q > 1, NULL, CEED_ERROR_DIMENSION, "Cannot create Lobatto quadrature with Q=%" CeedInt_FMT " < 2 points", Q);
2561   wi = 2.0 / ((CeedScalar)(Q * (Q - 1)));
2562   if (q_weight_1d) {
2563     q_weight_1d[0]     = wi;
2564     q_weight_1d[Q - 1] = wi;
2565   }
2566   q_ref_1d[0]     = -1.0;
2567   q_ref_1d[Q - 1] = 1.0;
2568   // Interior
2569   for (CeedInt i = 1; i <= (Q - 1) / 2; i++) {
2570     // Guess
2571     xi = cos(PI * (CeedScalar)(i) / (CeedScalar)(Q - 1));
2572     // Pn(xi)
2573     P0 = 1.0;
2574     P1 = xi;
2575     P2 = 0.0;
2576     for (CeedInt j = 2; j < Q; j++) {
2577       P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
2578       P0 = P1;
2579       P1 = P2;
2580     }
2581     // First Newton step
2582     dP2  = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
2583     d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi);
2584     xi   = xi - dP2 / d2P2;
2585     // Newton to convergence
2586     for (CeedInt k = 0; k < 100 && fabs(dP2) > 10 * CEED_EPSILON; k++) {
2587       P0 = 1.0;
2588       P1 = xi;
2589       for (CeedInt j = 2; j < Q; j++) {
2590         P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
2591         P0 = P1;
2592         P1 = P2;
2593       }
2594       dP2  = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
2595       d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi);
2596       xi   = xi - dP2 / d2P2;
2597     }
2598     // Save xi, wi
2599     wi = 2.0 / (((CeedScalar)(Q * (Q - 1))) * P2 * P2);
2600     if (q_weight_1d) {
2601       q_weight_1d[i]         = wi;
2602       q_weight_1d[Q - 1 - i] = wi;
2603     }
2604     q_ref_1d[i]         = -xi;
2605     q_ref_1d[Q - 1 - i] = xi;
2606   }
2607   return CEED_ERROR_SUCCESS;
2608 }
2609 
2610 /// @}
2611