xref: /libCEED/interface/ceed-basis.c (revision 11b88dda510d0aa70e79dc59ad165e2a5539c3c3)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <math.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 /// @file
17 /// Implementation of CeedBasis interfaces
18 
19 /// @cond DOXYGEN_SKIP
20 static struct CeedBasis_private ceed_basis_collocated;
21 /// @endcond
22 
23 /// @addtogroup CeedBasisUser
24 /// @{
25 
26 /// Indicate that the quadrature points are collocated with the nodes
27 const CeedBasis CEED_BASIS_COLLOCATED = &ceed_basis_collocated;
28 
29 /// @}
30 
31 /// ----------------------------------------------------------------------------
32 /// CeedBasis Library Internal Functions
33 /// ----------------------------------------------------------------------------
34 /// @addtogroup CeedBasisDeveloper
35 /// @{
36 
37 /**
38   @brief Compute Householder reflection
39 
40   Computes A = (I - b v v^T) A, where A is an mxn matrix indexed as A[i*row + j*col]
41 
42   @param[in,out] A   Matrix to apply Householder reflection to, in place
43   @param[in]     v   Householder vector
44   @param[in]     b   Scaling factor
45   @param[in]     m   Number of rows in A
46   @param[in]     n   Number of columns in A
47   @param[in]     row Row stride
48   @param[in]     col Col stride
49 
50   @return An error code: 0 - success, otherwise - failure
51 
52   @ref Developer
53 **/
54 static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v, CeedScalar b, CeedInt m, CeedInt n, CeedInt row, CeedInt col) {
55   for (CeedInt j = 0; j < n; j++) {
56     CeedScalar w = A[0 * row + j * col];
57     for (CeedInt i = 1; i < m; i++) w += v[i] * A[i * row + j * col];
58     A[0 * row + j * col] -= b * w;
59     for (CeedInt i = 1; i < m; i++) A[i * row + j * col] -= b * w * v[i];
60   }
61   return CEED_ERROR_SUCCESS;
62 }
63 
64 /**
65   @brief Compute Givens rotation
66 
67   Computes A = G A (or G^T A in transpose mode), where A is an mxn matrix indexed as A[i*n + j*m]
68 
69   @param[in,out] A      Row major matrix to apply Givens rotation to, in place
70   @param[in]     c      Cosine factor
71   @param[in]     s      Sine factor
72   @param[in]     t_mode @ref CEED_NOTRANSPOSE to rotate the basis counter-clockwise, which has the effect of rotating columns of A clockwise;
73                           @ref CEED_TRANSPOSE for the opposite rotation
74   @param[in]     i      First row/column to apply rotation
75   @param[in]     k      Second row/column to apply rotation
76   @param[in]     m      Number of rows in A
77   @param[in]     n      Number of columns in A
78 
79   @return An error code: 0 - success, otherwise - failure
80 
81   @ref Developer
82 **/
83 static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s, CeedTransposeMode t_mode, CeedInt i, CeedInt k, CeedInt m, CeedInt n) {
84   CeedInt stride_j = 1, stride_ik = m, num_its = n;
85   if (t_mode == CEED_NOTRANSPOSE) {
86     stride_j  = n;
87     stride_ik = 1;
88     num_its   = m;
89   }
90 
91   // Apply rotation
92   for (CeedInt j = 0; j < num_its; j++) {
93     CeedScalar tau1 = A[i * stride_ik + j * stride_j], tau2 = A[k * stride_ik + j * stride_j];
94     A[i * stride_ik + j * stride_j] = c * tau1 - s * tau2;
95     A[k * stride_ik + j * stride_j] = s * tau1 + c * tau2;
96   }
97   return CEED_ERROR_SUCCESS;
98 }
99 
100 /**
101   @brief View an array stored in a CeedBasis
102 
103   @param[in] name   Name of array
104   @param[in] fp_fmt Printing format
105   @param[in] m      Number of rows in array
106   @param[in] n      Number of columns in array
107   @param[in] a      Array to be viewed
108   @param[in] stream Stream to view to, e.g., stdout
109 
110   @return An error code: 0 - success, otherwise - failure
111 
112   @ref Developer
113 **/
114 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m, CeedInt n, const CeedScalar *a, FILE *stream) {
115   for (CeedInt i = 0; i < m; i++) {
116     if (m > 1) fprintf(stream, "%12s[%" CeedInt_FMT "]:", name, i);
117     else fprintf(stream, "%12s:", name);
118     for (CeedInt j = 0; j < n; j++) fprintf(stream, fp_fmt, fabs(a[i * n + j]) > 1E-14 ? a[i * n + j] : 0);
119     fputs("\n", stream);
120   }
121   return CEED_ERROR_SUCCESS;
122 }
123 
124 /**
125   @brief Create the interpolation and gradient matrices for projection from the nodes of `basis_from` to the nodes of `basis_to`.
126 
127   The interpolation is given by `interp_project = interp_to^+ * interp_from`, where the pseudoinverse `interp_to^+` is given by QR factorization.
128   The gradient is given by `grad_project = interp_to^+ * grad_from`, and is only computed for H^1 spaces otherwise it should not be used.
129 
130   Note: `basis_from` and `basis_to` must have compatible quadrature spaces.
131 
132   @param[in]  basis_from     CeedBasis to project from
133   @param[in]  basis_to       CeedBasis to project to
134   @param[out] interp_project Address of the variable where the newly created interpolation matrix will be stored.
135   @param[out] grad_project   Address of the variable where the newly created gradient matrix will be stored.
136 
137   @return An error code: 0 - success, otherwise - failure
138 
139   @ref Developer
140 **/
141 static int CeedBasisCreateProjectionMatrices(CeedBasis basis_from, CeedBasis basis_to, CeedScalar **interp_project, CeedScalar **grad_project) {
142   Ceed ceed;
143   CeedCall(CeedBasisGetCeed(basis_to, &ceed));
144 
145   // Check for compatible quadrature spaces
146   CeedInt Q_to, Q_from;
147   CeedCall(CeedBasisGetNumQuadraturePoints(basis_to, &Q_to));
148   CeedCall(CeedBasisGetNumQuadraturePoints(basis_from, &Q_from));
149   if (Q_to != Q_from) {
150     // LCOV_EXCL_START
151     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
152     // LCOV_EXCL_STOP
153   }
154 
155   // Check for matching tensor or non-tensor
156   CeedInt P_to, P_from, Q = Q_to;
157   bool    is_tensor_to, is_tensor_from;
158   CeedCall(CeedBasisIsTensor(basis_to, &is_tensor_to));
159   CeedCall(CeedBasisIsTensor(basis_from, &is_tensor_from));
160   if (is_tensor_to && is_tensor_from) {
161     CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_to));
162     CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_from));
163     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis_from, &Q));
164   } else if (!is_tensor_to && !is_tensor_from) {
165     CeedCall(CeedBasisGetNumNodes(basis_to, &P_to));
166     CeedCall(CeedBasisGetNumNodes(basis_from, &P_from));
167   } else {
168     // LCOV_EXCL_START
169     return CeedError(ceed, CEED_ERROR_MINOR, "Bases must both be tensor or non-tensor");
170     // LCOV_EXCL_STOP
171   }
172 
173   // Check for matching FE space
174   CeedFESpace fe_space_to, fe_space_from;
175   CeedCall(CeedBasisGetFESpace(basis_to, &fe_space_to));
176   CeedCall(CeedBasisGetFESpace(basis_from, &fe_space_from));
177   if (fe_space_to != fe_space_from) {
178     // LCOV_EXCL_START
179     return CeedError(ceed, CEED_ERROR_MINOR, "Bases must both be the same FE space type");
180     // LCOV_EXCL_STOP
181   }
182 
183   // Get source matrices
184   CeedInt           dim, q_comp = 1;
185   const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL;
186   CeedScalar       *interp_to, *interp_from, *tau;
187   CeedCall(CeedBasisGetDimension(basis_to, &dim));
188   if (is_tensor_to) {
189     CeedCall(CeedBasisGetInterp1D(basis_to, &interp_to_source));
190     CeedCall(CeedBasisGetInterp1D(basis_from, &interp_from_source));
191   } else {
192     CeedCall(CeedBasisGetNumQuadratureComponents(basis_from, CEED_EVAL_INTERP, &q_comp));
193     CeedCall(CeedBasisGetInterp(basis_to, &interp_to_source));
194     CeedCall(CeedBasisGetInterp(basis_from, &interp_from_source));
195   }
196   CeedCall(CeedMalloc(Q * P_from * q_comp, &interp_from));
197   CeedCall(CeedMalloc(Q * P_to * q_comp, &interp_to));
198   CeedCall(CeedCalloc(P_to * P_from, interp_project));
199   CeedCall(CeedMalloc(Q * q_comp, &tau));
200 
201   // `grad_project = interp_to^+ * grad_from` is computed for the H^1 space case so the
202   // projection basis will have a gradient operation
203   const CeedScalar *grad_from_source = NULL;
204   if (fe_space_to == CEED_FE_SPACE_H1) {
205     if (is_tensor_to) {
206       CeedCall(CeedBasisGetGrad1D(basis_from, &grad_from_source));
207     } else {
208       CeedCall(CeedBasisGetGrad(basis_from, &grad_from_source));
209     }
210     CeedCall(CeedCalloc(P_to * P_from * (is_tensor_to ? 1 : dim), grad_project));
211   } else {
212     // Projected derivate operator is only calculated for H^1 but we allocate it for the
213     // basis construction later
214     CeedInt q_comp_deriv = 1;
215     if (fe_space_to == CEED_FE_SPACE_HDIV) {
216       CeedCall(CeedBasisGetNumQuadratureComponents(basis_from, CEED_EVAL_DIV, &q_comp_deriv));
217     } else if (fe_space_to == CEED_FE_SPACE_HCURL) {
218       CeedCall(CeedBasisGetNumQuadratureComponents(basis_from, CEED_EVAL_CURL, &q_comp_deriv));
219     }
220     CeedCall(CeedCalloc(P_to * P_from * q_comp_deriv, grad_project));
221   }
222 
223   // QR Factorization, interp_to = Q R
224   memcpy(interp_to, interp_to_source, Q * P_to * q_comp * sizeof(interp_to_source[0]));
225   CeedCall(CeedQRFactorization(ceed, interp_to, tau, Q * q_comp, P_to));
226 
227   // Build matrices
228   CeedInt     num_matrices = 1 + (fe_space_to == CEED_FE_SPACE_H1) * (is_tensor_to ? 1 : dim);
229   CeedScalar *input_from[num_matrices], *output_project[num_matrices];
230   input_from[0]     = (CeedScalar *)interp_from_source;
231   output_project[0] = *interp_project;
232   for (CeedInt m = 1; m < num_matrices; m++) {
233     input_from[m]     = (CeedScalar *)&grad_from_source[(m - 1) * Q * P_from];
234     output_project[m] = &((*grad_project)[(m - 1) * P_to * P_from]);
235   }
236   for (CeedInt m = 0; m < num_matrices; m++) {
237     // Apply Q^T, interp_from = Q^T interp_from
238     memcpy(interp_from, input_from[m], Q * P_from * q_comp * sizeof(input_from[m][0]));
239     CeedCall(CeedHouseholderApplyQ(interp_from, interp_to, tau, CEED_TRANSPOSE, Q * q_comp, P_from, P_to, P_from, 1));
240 
241     // Apply Rinv, output_project = Rinv interp_from
242     for (CeedInt j = 0; j < P_from; j++) {  // Column j
243       output_project[m][j + P_from * (P_to - 1)] = interp_from[j + P_from * (P_to - 1)] / interp_to[P_to * P_to - 1];
244       for (CeedInt i = P_to - 2; i >= 0; i--) {  // Row i
245         output_project[m][j + P_from * i] = interp_from[j + P_from * i];
246         for (CeedInt k = i + 1; k < P_to; k++) {
247           output_project[m][j + P_from * i] -= interp_to[k + P_to * i] * output_project[m][j + P_from * k];
248         }
249         output_project[m][j + P_from * i] /= interp_to[i + P_to * i];
250       }
251     }
252   }
253 
254   // Cleanup
255   CeedCall(CeedFree(&tau));
256   CeedCall(CeedFree(&interp_to));
257   CeedCall(CeedFree(&interp_from));
258 
259   return CEED_ERROR_SUCCESS;
260 }
261 
262 /// @}
263 
264 /// ----------------------------------------------------------------------------
265 /// Ceed Backend API
266 /// ----------------------------------------------------------------------------
267 /// @addtogroup CeedBasisBackend
268 /// @{
269 
270 /**
271   @brief Return collocated grad matrix
272 
273   @param[in]  basis         CeedBasis
274   @param[out] collo_grad_1d Row-major (Q_1d * Q_1d) matrix expressing derivatives of basis functions at quadrature points
275 
276   @return An error code: 0 - success, otherwise - failure
277 
278   @ref Backend
279 **/
280 int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collo_grad_1d) {
281   int         i, j, k;
282   Ceed        ceed;
283   CeedInt     P_1d = (basis)->P_1d, Q_1d = (basis)->Q_1d;
284   CeedScalar *interp_1d, *grad_1d, *tau;
285 
286   CeedCall(CeedMalloc(Q_1d * P_1d, &interp_1d));
287   CeedCall(CeedMalloc(Q_1d * P_1d, &grad_1d));
288   CeedCall(CeedMalloc(Q_1d, &tau));
289   memcpy(interp_1d, (basis)->interp_1d, Q_1d * P_1d * sizeof(basis)->interp_1d[0]);
290   memcpy(grad_1d, (basis)->grad_1d, Q_1d * P_1d * sizeof(basis)->interp_1d[0]);
291 
292   // QR Factorization, interp_1d = Q R
293   CeedCall(CeedBasisGetCeed(basis, &ceed));
294   CeedCall(CeedQRFactorization(ceed, interp_1d, tau, Q_1d, P_1d));
295   // Note: This function is for backend use, so all errors are terminal and we do not need to clean up memory on failure.
296 
297   // Apply Rinv, collo_grad_1d = grad_1d Rinv
298   for (i = 0; i < Q_1d; i++) {  // Row i
299     collo_grad_1d[Q_1d * i] = grad_1d[P_1d * i] / interp_1d[0];
300     for (j = 1; j < P_1d; j++) {  // Column j
301       collo_grad_1d[j + Q_1d * i] = grad_1d[j + P_1d * i];
302       for (k = 0; k < j; k++) collo_grad_1d[j + Q_1d * i] -= interp_1d[j + P_1d * k] * collo_grad_1d[k + Q_1d * i];
303       collo_grad_1d[j + Q_1d * i] /= interp_1d[j + P_1d * j];
304     }
305     for (j = P_1d; j < Q_1d; j++) collo_grad_1d[j + Q_1d * i] = 0;
306   }
307 
308   // Apply Q^T, collo_grad_1d = collo_grad_1d Q^T
309   CeedCall(CeedHouseholderApplyQ(collo_grad_1d, interp_1d, tau, CEED_NOTRANSPOSE, Q_1d, Q_1d, P_1d, 1, Q_1d));
310 
311   CeedCall(CeedFree(&interp_1d));
312   CeedCall(CeedFree(&grad_1d));
313   CeedCall(CeedFree(&tau));
314   return CEED_ERROR_SUCCESS;
315 }
316 
317 /**
318   @brief Get tensor status for given CeedBasis
319 
320   @param[in]  basis     CeedBasis
321   @param[out] is_tensor Variable to store tensor status
322 
323   @return An error code: 0 - success, otherwise - failure
324 
325   @ref Backend
326 **/
327 int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor) {
328   *is_tensor = basis->tensor_basis;
329   return CEED_ERROR_SUCCESS;
330 }
331 
332 /**
333   @brief Get backend data of a CeedBasis
334 
335   @param[in]  basis CeedBasis
336   @param[out] data  Variable to store data
337 
338   @return An error code: 0 - success, otherwise - failure
339 
340   @ref Backend
341 **/
342 int CeedBasisGetData(CeedBasis basis, void *data) {
343   *(void **)data = basis->data;
344   return CEED_ERROR_SUCCESS;
345 }
346 
347 /**
348   @brief Set backend data of a CeedBasis
349 
350   @param[in,out] basis  CeedBasis
351   @param[in]     data   Data to set
352 
353   @return An error code: 0 - success, otherwise - failure
354 
355   @ref Backend
356 **/
357 int CeedBasisSetData(CeedBasis basis, void *data) {
358   basis->data = data;
359   return CEED_ERROR_SUCCESS;
360 }
361 
362 /**
363   @brief Increment the reference counter for a CeedBasis
364 
365   @param[in,out] basis Basis to increment the reference counter
366 
367   @return An error code: 0 - success, otherwise - failure
368 
369   @ref Backend
370 **/
371 int CeedBasisReference(CeedBasis basis) {
372   basis->ref_count++;
373   return CEED_ERROR_SUCCESS;
374 }
375 
376 /**
377   @brief Get number of Q-vector components for given CeedBasis
378 
379   @param[in]  basis  CeedBasis
380   @param[in]  eval_mode \ref CEED_EVAL_INTERP to use interpolated values,
381                           \ref CEED_EVAL_GRAD to use gradients,
382                           \ref CEED_EVAL_DIV to use divergence,
383                           \ref CEED_EVAL_CURL to use curl.
384   @param[out] q_comp Variable to store number of Q-vector components of basis
385 
386   @return An error code: 0 - success, otherwise - failure
387 
388   @ref Backend
389 **/
390 int CeedBasisGetNumQuadratureComponents(CeedBasis basis, CeedEvalMode eval_mode, CeedInt *q_comp) {
391   switch (eval_mode) {
392     case CEED_EVAL_INTERP:
393       *q_comp = (basis->fe_space == CEED_FE_SPACE_H1) ? 1 : basis->dim;
394       break;
395     case CEED_EVAL_GRAD:
396       *q_comp = basis->dim;
397       break;
398     case CEED_EVAL_DIV:
399       *q_comp = 1;
400       break;
401     case CEED_EVAL_CURL:
402       *q_comp = (basis->dim < 3) ? 1 : basis->dim;
403       break;
404     case CEED_EVAL_NONE:
405     case CEED_EVAL_WEIGHT:
406       *q_comp = 1;
407       break;
408   }
409   return CEED_ERROR_SUCCESS;
410 }
411 
412 /**
413   @brief Estimate number of FLOPs required to apply CeedBasis in t_mode and eval_mode
414 
415   @param[in]  basis     Basis to estimate FLOPs for
416   @param[in]  t_mode    Apply basis or transpose
417   @param[in]  eval_mode Basis evaluation mode
418   @param[out] flops     Address of variable to hold FLOPs estimate
419 
420   @ref Backend
421 **/
422 int CeedBasisGetFlopsEstimate(CeedBasis basis, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedSize *flops) {
423   bool is_tensor;
424 
425   CeedCall(CeedBasisIsTensor(basis, &is_tensor));
426   if (is_tensor) {
427     CeedInt dim, num_comp, P_1d, Q_1d;
428     CeedCall(CeedBasisGetDimension(basis, &dim));
429     CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
430     CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
431     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
432     if (t_mode == CEED_TRANSPOSE) {
433       P_1d = Q_1d;
434       Q_1d = P_1d;
435     }
436     CeedInt tensor_flops = 0, pre = num_comp * CeedIntPow(P_1d, dim - 1), post = 1;
437     for (CeedInt d = 0; d < dim; d++) {
438       tensor_flops += 2 * pre * P_1d * post * Q_1d;
439       pre /= P_1d;
440       post *= Q_1d;
441     }
442     switch (eval_mode) {
443       case CEED_EVAL_NONE:
444         *flops = 0;
445         break;
446       case CEED_EVAL_INTERP:
447         *flops = tensor_flops;
448         break;
449       case CEED_EVAL_GRAD:
450         *flops = tensor_flops * 2;
451         break;
452       case CEED_EVAL_DIV:
453         // LCOV_EXCL_START
454         return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, "Tensor CEED_EVAL_DIV not supported");
455         break;
456       case CEED_EVAL_CURL:
457         return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, "Tensor CEED_EVAL_CURL not supported");
458         break;
459       // LCOV_EXCL_STOP
460       case CEED_EVAL_WEIGHT:
461         *flops = dim * CeedIntPow(Q_1d, dim);
462         break;
463     }
464   } else {
465     CeedInt dim, num_comp, q_comp, num_nodes, num_qpts;
466     CeedCall(CeedBasisGetDimension(basis, &dim));
467     CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
468     CeedCall(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
469     CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
470     CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
471     switch (eval_mode) {
472       case CEED_EVAL_NONE:
473         *flops = 0;
474         break;
475       case CEED_EVAL_INTERP:
476       case CEED_EVAL_GRAD:
477       case CEED_EVAL_DIV:
478       case CEED_EVAL_CURL:
479         *flops = num_nodes * num_qpts * num_comp * q_comp;
480         break;
481       case CEED_EVAL_WEIGHT:
482         *flops = 0;
483         break;
484     }
485   }
486 
487   return CEED_ERROR_SUCCESS;
488 }
489 
490 /**
491   @brief Get CeedFESpace for a CeedBasis
492 
493   @param[in]  basis     CeedBasis
494   @param[out] fe_space  Variable to store CeedFESpace
495 
496   @return An error code: 0 - success, otherwise - failure
497 
498   @ref Backend
499 **/
500 int CeedBasisGetFESpace(CeedBasis basis, CeedFESpace *fe_space) {
501   *fe_space = basis->fe_space;
502   return CEED_ERROR_SUCCESS;
503 }
504 
505 /**
506   @brief Get dimension for given CeedElemTopology
507 
508   @param[in]  topo CeedElemTopology
509   @param[out] dim  Variable to store dimension of topology
510 
511   @return An error code: 0 - success, otherwise - failure
512 
513   @ref Backend
514 **/
515 int CeedBasisGetTopologyDimension(CeedElemTopology topo, CeedInt *dim) {
516   *dim = (CeedInt)topo >> 16;
517   return CEED_ERROR_SUCCESS;
518 }
519 
520 /**
521   @brief Get CeedTensorContract of a CeedBasis
522 
523   @param[in]  basis     CeedBasis
524   @param[out] contract  Variable to store CeedTensorContract
525 
526   @return An error code: 0 - success, otherwise - failure
527 
528   @ref Backend
529 **/
530 int CeedBasisGetTensorContract(CeedBasis basis, CeedTensorContract *contract) {
531   *contract = basis->contract;
532   return CEED_ERROR_SUCCESS;
533 }
534 
535 /**
536   @brief Set CeedTensorContract of a CeedBasis
537 
538   @param[in,out] basis    CeedBasis
539   @param[in]     contract CeedTensorContract to set
540 
541   @return An error code: 0 - success, otherwise - failure
542 
543   @ref Backend
544 **/
545 int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract contract) {
546   basis->contract = contract;
547   CeedCall(CeedTensorContractReference(contract));
548   return CEED_ERROR_SUCCESS;
549 }
550 
551 /**
552   @brief Return a reference implementation of matrix multiplication C = A B.
553 
554   Note: This is a reference implementation for CPU CeedScalar pointers that is not intended for high performance.
555 
556   @param[in]  ceed  Ceed context for error handling
557   @param[in]  mat_A Row-major matrix A
558   @param[in]  mat_B Row-major matrix B
559   @param[out] mat_C Row-major output matrix C
560   @param[in]  m     Number of rows of C
561   @param[in]  n     Number of columns of C
562   @param[in]  kk    Number of columns of A/rows of B
563 
564   @return An error code: 0 - success, otherwise - failure
565 
566   @ref Utility
567 **/
568 int CeedMatrixMatrixMultiply(Ceed ceed, const CeedScalar *mat_A, const CeedScalar *mat_B, CeedScalar *mat_C, CeedInt m, CeedInt n, CeedInt kk) {
569   for (CeedInt i = 0; i < m; i++) {
570     for (CeedInt j = 0; j < n; j++) {
571       CeedScalar sum = 0;
572       for (CeedInt k = 0; k < kk; k++) sum += mat_A[k + i * kk] * mat_B[j + k * n];
573       mat_C[j + i * n] = sum;
574     }
575   }
576   return CEED_ERROR_SUCCESS;
577 }
578 
579 /**
580   @brief Return QR Factorization of a matrix
581 
582   @param[in]     ceed Ceed context for error handling
583   @param[in,out] mat  Row-major matrix to be factorized in place
584   @param[in,out] tau  Vector of length m of scaling factors
585   @param[in]     m    Number of rows
586   @param[in]     n    Number of columns
587 
588   @return An error code: 0 - success, otherwise - failure
589 
590   @ref Utility
591 **/
592 int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau, CeedInt m, CeedInt n) {
593   CeedScalar v[m];
594 
595   // Check matrix shape
596   if (n > m) {
597     // LCOV_EXCL_START
598     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute QR factorization with n > m");
599     // LCOV_EXCL_STOP
600   }
601 
602   for (CeedInt i = 0; i < n; i++) {
603     if (i >= m - 1) {  // last row of matrix, no reflection needed
604       tau[i] = 0.;
605       break;
606     }
607     // Calculate Householder vector, magnitude
608     CeedScalar sigma = 0.0;
609     v[i]             = mat[i + n * i];
610     for (CeedInt j = i + 1; j < m; j++) {
611       v[j] = mat[i + n * j];
612       sigma += v[j] * v[j];
613     }
614     CeedScalar norm = sqrt(v[i] * v[i] + sigma);  // norm of v[i:m]
615     CeedScalar R_ii = -copysign(norm, v[i]);
616     v[i] -= R_ii;
617     // norm of v[i:m] after modification above and scaling below
618     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
619     //   tau = 2 / (norm*norm)
620     tau[i] = 2 * v[i] * v[i] / (v[i] * v[i] + sigma);
621     for (CeedInt j = i + 1; j < m; j++) v[j] /= v[i];
622 
623     // Apply Householder reflector to lower right panel
624     CeedHouseholderReflect(&mat[i * n + i + 1], &v[i], tau[i], m - i, n - i - 1, n, 1);
625     // Save v
626     mat[i + n * i] = R_ii;
627     for (CeedInt j = i + 1; j < m; j++) mat[i + n * j] = v[j];
628   }
629   return CEED_ERROR_SUCCESS;
630 }
631 
632 /**
633   @brief Apply Householder Q matrix
634 
635   Compute mat_A = mat_Q mat_A, where mat_Q is mxm and mat_A is mxn.
636 
637   @param[in,out] mat_A  Matrix to apply Householder Q to, in place
638   @param[in]     mat_Q  Householder Q matrix
639   @param[in]     tau    Householder scaling factors
640   @param[in]     t_mode Transpose mode for application
641   @param[in]     m      Number of rows in A
642   @param[in]     n      Number of columns in A
643   @param[in]     k      Number of elementary reflectors in Q, k<m
644   @param[in]     row    Row stride in A
645   @param[in]     col    Col stride in A
646 
647   @return An error code: 0 - success, otherwise - failure
648 
649   @ref Utility
650 **/
651 int CeedHouseholderApplyQ(CeedScalar *mat_A, const CeedScalar *mat_Q, const CeedScalar *tau, CeedTransposeMode t_mode, CeedInt m, CeedInt n,
652                           CeedInt k, CeedInt row, CeedInt col) {
653   CeedScalar *v;
654   CeedCall(CeedMalloc(m, &v));
655   for (CeedInt ii = 0; ii < k; ii++) {
656     CeedInt i = t_mode == CEED_TRANSPOSE ? ii : k - 1 - ii;
657     for (CeedInt j = i + 1; j < m; j++) v[j] = mat_Q[j * k + i];
658     // Apply Householder reflector (I - tau v v^T) collo_grad_1d^T
659     CeedCall(CeedHouseholderReflect(&mat_A[i * row], &v[i], tau[i], m - i, n, row, col));
660   }
661   CeedCall(CeedFree(&v));
662   return CEED_ERROR_SUCCESS;
663 }
664 
665 /**
666   @brief Return symmetric Schur decomposition of the symmetric matrix mat via symmetric QR factorization
667 
668   @param[in]     ceed   Ceed context for error handling
669   @param[in,out] mat    Row-major matrix to be factorized in place
670   @param[out]    lambda Vector of length n of eigenvalues
671   @param[in]     n      Number of rows/columns
672 
673   @return An error code: 0 - success, otherwise - failure
674 
675   @ref Utility
676 **/
677 CeedPragmaOptimizeOff int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat, CeedScalar *lambda, CeedInt n) {
678   // Check bounds for clang-tidy
679   if (n < 2) {
680     // LCOV_EXCL_START
681     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute symmetric Schur decomposition of scalars");
682     // LCOV_EXCL_STOP
683   }
684 
685   CeedScalar v[n - 1], tau[n - 1], mat_T[n * n];
686 
687   // Copy mat to mat_T and set mat to I
688   memcpy(mat_T, mat, n * n * sizeof(mat[0]));
689   for (CeedInt i = 0; i < n; i++) {
690     for (CeedInt j = 0; j < n; j++) mat[j + n * i] = (i == j) ? 1 : 0;
691   }
692 
693   // Reduce to tridiagonal
694   for (CeedInt i = 0; i < n - 1; i++) {
695     // Calculate Householder vector, magnitude
696     CeedScalar sigma = 0.0;
697     v[i]             = mat_T[i + n * (i + 1)];
698     for (CeedInt j = i + 1; j < n - 1; j++) {
699       v[j] = mat_T[i + n * (j + 1)];
700       sigma += v[j] * v[j];
701     }
702     CeedScalar norm = sqrt(v[i] * v[i] + sigma);  // norm of v[i:n-1]
703     CeedScalar R_ii = -copysign(norm, v[i]);
704     v[i] -= R_ii;
705     // norm of v[i:m] after modification above and scaling below
706     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
707     //   tau = 2 / (norm*norm)
708     tau[i] = i == n - 2 ? 2 : 2 * v[i] * v[i] / (v[i] * v[i] + sigma);
709     for (CeedInt j = i + 1; j < n - 1; j++) v[j] /= v[i];
710 
711     // Update sub and super diagonal
712     for (CeedInt j = i + 2; j < n; j++) {
713       mat_T[i + n * j] = 0;
714       mat_T[j + n * i] = 0;
715     }
716     // Apply symmetric Householder reflector to lower right panel
717     CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1);
718     CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), 1, n);
719 
720     // Save v
721     mat_T[i + n * (i + 1)] = R_ii;
722     mat_T[(i + 1) + n * i] = R_ii;
723     for (CeedInt j = i + 1; j < n - 1; j++) {
724       mat_T[i + n * (j + 1)] = v[j];
725     }
726   }
727   // Backwards accumulation of Q
728   for (CeedInt i = n - 2; i >= 0; i--) {
729     if (tau[i] > 0.0) {
730       v[i] = 1;
731       for (CeedInt j = i + 1; j < n - 1; j++) {
732         v[j]                   = mat_T[i + n * (j + 1)];
733         mat_T[i + n * (j + 1)] = 0;
734       }
735       CeedHouseholderReflect(&mat[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1);
736     }
737   }
738 
739   // Reduce sub and super diagonal
740   CeedInt    p = 0, q = 0, itr = 0, max_itr = n * n * n * n;
741   CeedScalar tol = CEED_EPSILON;
742 
743   while (itr < max_itr) {
744     // Update p, q, size of reduced portions of diagonal
745     p = 0;
746     q = 0;
747     for (CeedInt i = n - 2; i >= 0; i--) {
748       if (fabs(mat_T[i + n * (i + 1)]) < tol) q += 1;
749       else break;
750     }
751     for (CeedInt i = 0; i < n - q - 1; i++) {
752       if (fabs(mat_T[i + n * (i + 1)]) < tol) p += 1;
753       else break;
754     }
755     if (q == n - 1) break;  // Finished reducing
756 
757     // Reduce tridiagonal portion
758     CeedScalar t_nn = mat_T[(n - 1 - q) + n * (n - 1 - q)], t_nnm1 = mat_T[(n - 2 - q) + n * (n - 1 - q)];
759     CeedScalar d  = (mat_T[(n - 2 - q) + n * (n - 2 - q)] - t_nn) / 2;
760     CeedScalar mu = t_nn - t_nnm1 * t_nnm1 / (d + copysign(sqrt(d * d + t_nnm1 * t_nnm1), d));
761     CeedScalar x  = mat_T[p + n * p] - mu;
762     CeedScalar z  = mat_T[p + n * (p + 1)];
763     for (CeedInt k = p; k < n - q - 1; k++) {
764       // Compute Givens rotation
765       CeedScalar c = 1, s = 0;
766       if (fabs(z) > tol) {
767         if (fabs(z) > fabs(x)) {
768           CeedScalar tau = -x / z;
769           s = 1 / sqrt(1 + tau * tau), c = s * tau;
770         } else {
771           CeedScalar tau = -z / x;
772           c = 1 / sqrt(1 + tau * tau), s = c * tau;
773         }
774       }
775 
776       // Apply Givens rotation to T
777       CeedGivensRotation(mat_T, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n);
778       CeedGivensRotation(mat_T, c, s, CEED_TRANSPOSE, k, k + 1, n, n);
779 
780       // Apply Givens rotation to Q
781       CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n);
782 
783       // Update x, z
784       if (k < n - q - 2) {
785         x = mat_T[k + n * (k + 1)];
786         z = mat_T[k + n * (k + 2)];
787       }
788     }
789     itr++;
790   }
791 
792   // Save eigenvalues
793   for (CeedInt i = 0; i < n; i++) lambda[i] = mat_T[i + n * i];
794 
795   // Check convergence
796   if (itr == max_itr && q < n - 1) {
797     // LCOV_EXCL_START
798     return CeedError(ceed, CEED_ERROR_MINOR, "Symmetric QR failed to converge");
799     // LCOV_EXCL_STOP
800   }
801   return CEED_ERROR_SUCCESS;
802 }
803 CeedPragmaOptimizeOn;
804 
805 /**
806   @brief Return Simultaneous Diagonalization of two matrices.
807 
808   This solves the generalized eigenvalue problem A x = lambda B x, where A and B are symmetric and B is positive definite.
809   We generate the matrix X and vector Lambda such that X^T A X = Lambda and X^T B X = I.
810   This is equivalent to the LAPACK routine 'sygv' with TYPE = 1.
811 
812   @param[in]  ceed   Ceed context for error handling
813   @param[in]  mat_A  Row-major matrix to be factorized with eigenvalues
814   @param[in]  mat_B  Row-major matrix to be factorized to identity
815   @param[out] mat_X  Row-major orthogonal matrix
816   @param[out] lambda Vector of length n of generalized eigenvalues
817   @param[in]  n      Number of rows/columns
818 
819   @return An error code: 0 - success, otherwise - failure
820 
821   @ref Utility
822 **/
823 CeedPragmaOptimizeOff int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A, CeedScalar *mat_B, CeedScalar *mat_X, CeedScalar *lambda,
824                                                           CeedInt n) {
825   CeedScalar *mat_C, *mat_G, *vec_D;
826   CeedCall(CeedCalloc(n * n, &mat_C));
827   CeedCall(CeedCalloc(n * n, &mat_G));
828   CeedCall(CeedCalloc(n, &vec_D));
829 
830   // Compute B = G D G^T
831   memcpy(mat_G, mat_B, n * n * sizeof(mat_B[0]));
832   CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_G, vec_D, n));
833 
834   // Sort eigenvalues
835   for (CeedInt i = n - 1; i >= 0; i--) {
836     for (CeedInt j = 0; j < i; j++) {
837       if (fabs(vec_D[j]) > fabs(vec_D[j + 1])) {
838         CeedScalar temp;
839         temp         = vec_D[j];
840         vec_D[j]     = vec_D[j + 1];
841         vec_D[j + 1] = temp;
842         for (CeedInt k = 0; k < n; k++) {
843           temp                 = mat_G[k * n + j];
844           mat_G[k * n + j]     = mat_G[k * n + j + 1];
845           mat_G[k * n + j + 1] = temp;
846         }
847       }
848     }
849   }
850 
851   // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T
852   //           = D^-1/2 G^T A G D^-1/2
853   // -- D = D^-1/2
854   for (CeedInt i = 0; i < n; i++) vec_D[i] = 1. / sqrt(vec_D[i]);
855   // -- G = G D^-1/2
856   // -- C = D^-1/2 G^T
857   for (CeedInt i = 0; i < n; i++) {
858     for (CeedInt j = 0; j < n; j++) {
859       mat_G[i * n + j] *= vec_D[j];
860       mat_C[j * n + i] = mat_G[i * n + j];
861     }
862   }
863   // -- X = (D^-1/2 G^T) A
864   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_C, (const CeedScalar *)mat_A, mat_X, n, n, n));
865   // -- C = (D^-1/2 G^T A) (G D^-1/2)
866   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_X, (const CeedScalar *)mat_G, mat_C, n, n, n));
867 
868   // Compute Q^T C Q = lambda
869   CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n));
870 
871   // Sort eigenvalues
872   for (CeedInt i = n - 1; i >= 0; i--) {
873     for (CeedInt j = 0; j < i; j++) {
874       if (fabs(lambda[j]) > fabs(lambda[j + 1])) {
875         CeedScalar temp;
876         temp          = lambda[j];
877         lambda[j]     = lambda[j + 1];
878         lambda[j + 1] = temp;
879         for (CeedInt k = 0; k < n; k++) {
880           temp                 = mat_C[k * n + j];
881           mat_C[k * n + j]     = mat_C[k * n + j + 1];
882           mat_C[k * n + j + 1] = temp;
883         }
884       }
885     }
886   }
887 
888   // Set X = (G D^1/2)^-T Q
889   //       = G D^-1/2 Q
890   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_G, (const CeedScalar *)mat_C, mat_X, n, n, n));
891 
892   // Cleanup
893   CeedCall(CeedFree(&mat_C));
894   CeedCall(CeedFree(&mat_G));
895   CeedCall(CeedFree(&vec_D));
896   return CEED_ERROR_SUCCESS;
897 }
898 CeedPragmaOptimizeOn;
899 
900 /// @}
901 
902 /// ----------------------------------------------------------------------------
903 /// CeedBasis Public API
904 /// ----------------------------------------------------------------------------
905 /// @addtogroup CeedBasisUser
906 /// @{
907 
908 /**
909   @brief Create a tensor-product basis for H^1 discretizations
910 
911   @param[in]  ceed        Ceed object where the CeedBasis will be created
912   @param[in]  dim         Topological dimension
913   @param[in]  num_comp    Number of field components (1 for scalar fields)
914   @param[in]  P_1d        Number of nodes in one dimension
915   @param[in]  Q_1d        Number of quadrature points in one dimension
916   @param[in]  interp_1d   Row-major (Q_1d * P_1d) matrix expressing the values of nodal basis functions at quadrature points
917   @param[in]  grad_1d     Row-major (Q_1d * P_1d) matrix expressing derivatives of nodal basis functions at quadrature points
918   @param[in]  q_ref_1d    Array of length Q_1d holding the locations of quadrature points on the 1D reference element [-1, 1]
919   @param[in]  q_weight_1d Array of length Q_1d holding the quadrature weights on the reference element
920   @param[out] basis       Address of the variable where the newly created CeedBasis will be stored.
921 
922   @return An error code: 0 - success, otherwise - failure
923 
924   @ref User
925 **/
926 int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d,
927                             const CeedScalar *grad_1d, const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis *basis) {
928   if (!ceed->BasisCreateTensorH1) {
929     Ceed delegate;
930     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
931 
932     if (!delegate) {
933       // LCOV_EXCL_START
934       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateTensorH1");
935       // LCOV_EXCL_STOP
936     }
937 
938     CeedCall(CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis));
939     return CEED_ERROR_SUCCESS;
940   }
941 
942   if (dim < 1) {
943     // LCOV_EXCL_START
944     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value");
945     // LCOV_EXCL_STOP
946   }
947 
948   if (num_comp < 1) {
949     // LCOV_EXCL_START
950     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
951     // LCOV_EXCL_STOP
952   }
953 
954   if (P_1d < 1) {
955     // LCOV_EXCL_START
956     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
957     // LCOV_EXCL_STOP
958   }
959 
960   if (Q_1d < 1) {
961     // LCOV_EXCL_START
962     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
963     // LCOV_EXCL_STOP
964   }
965 
966   CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE : dim == 2 ? CEED_TOPOLOGY_QUAD : CEED_TOPOLOGY_HEX;
967 
968   CeedCall(CeedCalloc(1, basis));
969   (*basis)->ceed = ceed;
970   CeedCall(CeedReference(ceed));
971   (*basis)->ref_count    = 1;
972   (*basis)->tensor_basis = 1;
973   (*basis)->dim          = dim;
974   (*basis)->topo         = topo;
975   (*basis)->num_comp     = num_comp;
976   (*basis)->P_1d         = P_1d;
977   (*basis)->Q_1d         = Q_1d;
978   (*basis)->P            = CeedIntPow(P_1d, dim);
979   (*basis)->Q            = CeedIntPow(Q_1d, dim);
980   (*basis)->fe_space     = CEED_FE_SPACE_H1;
981   CeedCall(CeedCalloc(Q_1d, &(*basis)->q_ref_1d));
982   CeedCall(CeedCalloc(Q_1d, &(*basis)->q_weight_1d));
983   if (q_ref_1d) memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d * sizeof(q_ref_1d[0]));
984   if (q_weight_1d) memcpy((*basis)->q_weight_1d, q_weight_1d, Q_1d * sizeof(q_weight_1d[0]));
985   CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->interp_1d));
986   CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->grad_1d));
987   if (interp_1d) memcpy((*basis)->interp_1d, interp_1d, Q_1d * P_1d * sizeof(interp_1d[0]));
988   if (grad_1d) memcpy((*basis)->grad_1d, grad_1d, Q_1d * P_1d * sizeof(grad_1d[0]));
989   CeedCall(ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, *basis));
990   return CEED_ERROR_SUCCESS;
991 }
992 
993 /**
994   @brief Create a tensor-product Lagrange basis
995 
996   @param[in]  ceed      Ceed object where the CeedBasis will be created
997   @param[in]  dim       Topological dimension of element
998   @param[in]  num_comp  Number of field components (1 for scalar fields)
999   @param[in]  P         Number of Gauss-Lobatto nodes in one dimension.
1000                           The polynomial degree of the resulting Q_k element is k=P-1.
1001   @param[in]  Q         Number of quadrature points in one dimension.
1002   @param[in]  quad_mode Distribution of the Q quadrature points (affects order of accuracy for the quadrature)
1003   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1004 
1005   @return An error code: 0 - success, otherwise - failure
1006 
1007   @ref User
1008 **/
1009 int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P, CeedInt Q, CeedQuadMode quad_mode, CeedBasis *basis) {
1010   // Allocate
1011   int        ierr = CEED_ERROR_SUCCESS, i, j, k;
1012   CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d, *q_weight_1d;
1013 
1014   if (dim < 1) {
1015     // LCOV_EXCL_START
1016     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value");
1017     // LCOV_EXCL_STOP
1018   }
1019 
1020   if (num_comp < 1) {
1021     // LCOV_EXCL_START
1022     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1023     // LCOV_EXCL_STOP
1024   }
1025 
1026   if (P < 1) {
1027     // LCOV_EXCL_START
1028     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1029     // LCOV_EXCL_STOP
1030   }
1031 
1032   if (Q < 1) {
1033     // LCOV_EXCL_START
1034     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1035     // LCOV_EXCL_STOP
1036   }
1037 
1038   // Get Nodes and Weights
1039   CeedCall(CeedCalloc(P * Q, &interp_1d));
1040   CeedCall(CeedCalloc(P * Q, &grad_1d));
1041   CeedCall(CeedCalloc(P, &nodes));
1042   CeedCall(CeedCalloc(Q, &q_ref_1d));
1043   CeedCall(CeedCalloc(Q, &q_weight_1d));
1044   if (CeedLobattoQuadrature(P, nodes, NULL) != CEED_ERROR_SUCCESS) goto cleanup;
1045   switch (quad_mode) {
1046     case CEED_GAUSS:
1047       ierr = CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
1048       break;
1049     case CEED_GAUSS_LOBATTO:
1050       ierr = CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
1051       break;
1052   }
1053   if (ierr != CEED_ERROR_SUCCESS) goto cleanup;
1054 
1055   // Build B, D matrix
1056   // Fornberg, 1998
1057   for (i = 0; i < Q; i++) {
1058     c1                   = 1.0;
1059     c3                   = nodes[0] - q_ref_1d[i];
1060     interp_1d[i * P + 0] = 1.0;
1061     for (j = 1; j < P; j++) {
1062       c2 = 1.0;
1063       c4 = c3;
1064       c3 = nodes[j] - q_ref_1d[i];
1065       for (k = 0; k < j; k++) {
1066         dx = nodes[j] - nodes[k];
1067         c2 *= dx;
1068         if (k == j - 1) {
1069           grad_1d[i * P + j]   = c1 * (interp_1d[i * P + k] - c4 * grad_1d[i * P + k]) / c2;
1070           interp_1d[i * P + j] = -c1 * c4 * interp_1d[i * P + k] / c2;
1071         }
1072         grad_1d[i * P + k]   = (c3 * grad_1d[i * P + k] - interp_1d[i * P + k]) / dx;
1073         interp_1d[i * P + k] = c3 * interp_1d[i * P + k] / dx;
1074       }
1075       c1 = c2;
1076     }
1077   }
1078   // Pass to CeedBasisCreateTensorH1
1079   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis));
1080 cleanup:
1081   CeedCall(CeedFree(&interp_1d));
1082   CeedCall(CeedFree(&grad_1d));
1083   CeedCall(CeedFree(&nodes));
1084   CeedCall(CeedFree(&q_ref_1d));
1085   CeedCall(CeedFree(&q_weight_1d));
1086   return CEED_ERROR_SUCCESS;
1087 }
1088 
1089 /**
1090   @brief Create a non tensor-product basis for H^1 discretizations
1091 
1092   @param[in]  ceed      Ceed object where the CeedBasis will be created
1093   @param[in]  topo      Topology of element, e.g. hypercube, simplex, ect
1094   @param[in]  num_comp  Number of field components (1 for scalar fields)
1095   @param[in]  num_nodes Total number of nodes
1096   @param[in]  num_qpts  Total number of quadrature points
1097   @param[in]  interp    Row-major (num_qpts * num_nodes) matrix expressing the values of nodal basis functions at quadrature points
1098   @param[in]  grad      Row-major (dim * num_qpts * num_nodes) matrix expressing derivatives of nodal basis functions at quadrature points
1099   @param[in]  q_ref     Array of length num_qpts * dim holding the locations of quadrature points on the reference element
1100   @param[in]  q_weight  Array of length num_qpts holding the quadrature weights on the reference element
1101   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1102 
1103   @return An error code: 0 - success, otherwise - failure
1104 
1105   @ref User
1106 **/
1107 int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1108                       const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1109   CeedInt P = num_nodes, Q = num_qpts, dim = 0;
1110 
1111   if (!ceed->BasisCreateH1) {
1112     Ceed delegate;
1113     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1114 
1115     if (!delegate) {
1116       // LCOV_EXCL_START
1117       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateH1");
1118       // LCOV_EXCL_STOP
1119     }
1120 
1121     CeedCall(CeedBasisCreateH1(delegate, topo, num_comp, num_nodes, num_qpts, interp, grad, q_ref, q_weight, basis));
1122     return CEED_ERROR_SUCCESS;
1123   }
1124 
1125   if (num_comp < 1) {
1126     // LCOV_EXCL_START
1127     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1128     // LCOV_EXCL_STOP
1129   }
1130 
1131   if (num_nodes < 1) {
1132     // LCOV_EXCL_START
1133     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1134     // LCOV_EXCL_STOP
1135   }
1136 
1137   if (num_qpts < 1) {
1138     // LCOV_EXCL_START
1139     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1140     // LCOV_EXCL_STOP
1141   }
1142 
1143   CeedCall(CeedCalloc(1, basis));
1144 
1145   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1146 
1147   (*basis)->ceed = ceed;
1148   CeedCall(CeedReference(ceed));
1149   (*basis)->ref_count    = 1;
1150   (*basis)->tensor_basis = 0;
1151   (*basis)->dim          = dim;
1152   (*basis)->topo         = topo;
1153   (*basis)->num_comp     = num_comp;
1154   (*basis)->P            = P;
1155   (*basis)->Q            = Q;
1156   (*basis)->fe_space     = CEED_FE_SPACE_H1;
1157   CeedCall(CeedCalloc(Q * dim, &(*basis)->q_ref_1d));
1158   CeedCall(CeedCalloc(Q, &(*basis)->q_weight_1d));
1159   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1160   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1161   CeedCall(CeedCalloc(Q * P, &(*basis)->interp));
1162   CeedCall(CeedCalloc(dim * Q * P, &(*basis)->grad));
1163   if (interp) memcpy((*basis)->interp, interp, Q * P * sizeof(interp[0]));
1164   if (grad) memcpy((*basis)->grad, grad, dim * Q * P * sizeof(grad[0]));
1165   CeedCall(ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref, q_weight, *basis));
1166   return CEED_ERROR_SUCCESS;
1167 }
1168 
1169 /**
1170   @brief Create a non tensor-product basis for \f$H(\mathrm{div})\f$ discretizations
1171 
1172   @param[in]  ceed      Ceed object where the CeedBasis will be created
1173   @param[in]  topo      Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.), dimension of which is used in some array sizes below
1174   @param[in]  num_comp  Number of components (usually 1 for vectors in H(div) bases)
1175   @param[in]  num_nodes Total number of nodes (dofs per element)
1176   @param[in]  num_qpts  Total number of quadrature points
1177   @param[in]  interp    Row-major (dim * num_qpts * num_nodes) matrix expressing the values of basis functions at quadrature points
1178   @param[in]  div       Row-major (num_qpts * num_nodes) matrix expressing divergence of basis functions at quadrature points
1179   @param[in]  q_ref     Array of length num_qpts * dim holding the locations of quadrature points on the reference element
1180   @param[in]  q_weight  Array of length num_qpts holding the quadrature weights on the reference element
1181   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1182 
1183   @return An error code: 0 - success, otherwise - failure
1184 
1185   @ref User
1186 **/
1187 int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1188                         const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1189   CeedInt Q = num_qpts, P = num_nodes, dim = 0;
1190 
1191   if (!ceed->BasisCreateHdiv) {
1192     Ceed delegate;
1193     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1194 
1195     if (!delegate) {
1196       // LCOV_EXCL_START
1197       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateHdiv");
1198       // LCOV_EXCL_STOP
1199     }
1200 
1201     CeedCall(CeedBasisCreateHdiv(delegate, topo, num_comp, num_nodes, num_qpts, interp, div, q_ref, q_weight, basis));
1202     return CEED_ERROR_SUCCESS;
1203   }
1204 
1205   if (num_comp < 1) {
1206     // LCOV_EXCL_START
1207     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1208     // LCOV_EXCL_STOP
1209   }
1210 
1211   if (num_nodes < 1) {
1212     // LCOV_EXCL_START
1213     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1214     // LCOV_EXCL_STOP
1215   }
1216 
1217   if (num_qpts < 1) {
1218     // LCOV_EXCL_START
1219     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1220     // LCOV_EXCL_STOP
1221   }
1222 
1223   CeedCall(CeedCalloc(1, basis));
1224 
1225   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1226 
1227   (*basis)->ceed = ceed;
1228   CeedCall(CeedReference(ceed));
1229   (*basis)->ref_count    = 1;
1230   (*basis)->tensor_basis = 0;
1231   (*basis)->dim          = dim;
1232   (*basis)->topo         = topo;
1233   (*basis)->num_comp     = num_comp;
1234   (*basis)->P            = P;
1235   (*basis)->Q            = Q;
1236   (*basis)->fe_space     = CEED_FE_SPACE_HDIV;
1237   CeedCall(CeedMalloc(Q * dim, &(*basis)->q_ref_1d));
1238   CeedCall(CeedMalloc(Q, &(*basis)->q_weight_1d));
1239   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1240   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1241   CeedCall(CeedMalloc(dim * Q * P, &(*basis)->interp));
1242   CeedCall(CeedMalloc(Q * P, &(*basis)->div));
1243   if (interp) memcpy((*basis)->interp, interp, dim * Q * P * sizeof(interp[0]));
1244   if (div) memcpy((*basis)->div, div, Q * P * sizeof(div[0]));
1245   CeedCall(ceed->BasisCreateHdiv(topo, dim, P, Q, interp, div, q_ref, q_weight, *basis));
1246   return CEED_ERROR_SUCCESS;
1247 }
1248 
1249 /**
1250   @brief Create a non tensor-product basis for H(curl) discretizations
1251 
1252   @param[in]  ceed      Ceed object where the CeedBasis will be created
1253   @param[in]  topo      Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.), dimension of which is used in some array sizes below
1254   @param[in]  num_comp  Number of components (usually 1 for vectors in H(curl) bases)
1255   @param[in]  num_nodes Total number of nodes (dofs per element)
1256   @param[in]  num_qpts  Total number of quadrature points
1257   @param[in]  interp    Row-major (dim * num_qpts * num_nodes) matrix expressing the values of basis functions at quadrature points
1258   @param[in]  curl      Row-major (curl_comp * num_qpts * num_nodes, curl_comp = 1 if dim < 3 else dim) matrix expressing curl of basis functions at
1259 quadrature points
1260   @param[in]  q_ref     Array of length num_qpts * dim holding the locations of quadrature points on the reference element
1261   @param[in]  q_weight  Array of length num_qpts holding the quadrature weights on the reference element
1262   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1263 
1264   @return An error code: 0 - success, otherwise - failure
1265 
1266   @ref User
1267 **/
1268 int CeedBasisCreateHcurl(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1269                          const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1270   CeedInt Q = num_qpts, P = num_nodes, dim = 0, curl_comp = 0;
1271 
1272   if (!ceed->BasisCreateHdiv) {
1273     Ceed delegate;
1274     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1275 
1276     if (!delegate) {
1277       // LCOV_EXCL_START
1278       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateHcurl");
1279       // LCOV_EXCL_STOP
1280     }
1281 
1282     CeedCall(CeedBasisCreateHcurl(delegate, topo, num_comp, num_nodes, num_qpts, interp, curl, q_ref, q_weight, basis));
1283     return CEED_ERROR_SUCCESS;
1284   }
1285 
1286   if (num_comp < 1) {
1287     // LCOV_EXCL_START
1288     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1289     // LCOV_EXCL_STOP
1290   }
1291 
1292   if (num_nodes < 1) {
1293     // LCOV_EXCL_START
1294     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1295     // LCOV_EXCL_STOP
1296   }
1297 
1298   if (num_qpts < 1) {
1299     // LCOV_EXCL_START
1300     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1301     // LCOV_EXCL_STOP
1302   }
1303 
1304   CeedCall(CeedCalloc(1, basis));
1305 
1306   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1307   curl_comp = (dim < 3) ? 1 : dim;
1308 
1309   (*basis)->ceed = ceed;
1310   CeedCall(CeedReference(ceed));
1311   (*basis)->ref_count    = 1;
1312   (*basis)->tensor_basis = 0;
1313   (*basis)->dim          = dim;
1314   (*basis)->topo         = topo;
1315   (*basis)->num_comp     = num_comp;
1316   (*basis)->P            = P;
1317   (*basis)->Q            = Q;
1318   (*basis)->fe_space     = CEED_FE_SPACE_HCURL;
1319   CeedCall(CeedMalloc(Q * dim, &(*basis)->q_ref_1d));
1320   CeedCall(CeedMalloc(Q, &(*basis)->q_weight_1d));
1321   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1322   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1323   CeedCall(CeedMalloc(dim * Q * P, &(*basis)->interp));
1324   CeedCall(CeedMalloc(curl_comp * Q * P, &(*basis)->curl));
1325   if (interp) memcpy((*basis)->interp, interp, dim * Q * P * sizeof(interp[0]));
1326   if (curl) memcpy((*basis)->curl, curl, curl_comp * Q * P * sizeof(curl[0]));
1327   CeedCall(ceed->BasisCreateHcurl(topo, dim, P, Q, interp, curl, q_ref, q_weight, *basis));
1328   return CEED_ERROR_SUCCESS;
1329 }
1330 
1331 /**
1332   @brief Create a CeedBasis for projection from the nodes of `basis_from` to the nodes of `basis_to`.
1333 
1334   Only `CEED_EVAL_INTERP` will be valid for the new basis, `basis_project`. For H^1 spaces, `CEED_EVAL_GRAD` will also be valid.
1335   The interpolation is given by `interp_project = interp_to^+ * interp_from`, where the pesudoinverse `interp_to^+` is given by QR
1336 factorization.  The gradient (for the H^1 case) is given by `grad_project = interp_to^+ * grad_from`.
1337 
1338   Note: `basis_from` and `basis_to` must have compatible quadrature spaces.
1339 
1340   Note: `basis_project` will have the same number of components as `basis_from`, regardless of the number of components that `basis_to` has. If
1341 `basis_from` has 3 components and `basis_to` has 5 components, then `basis_project` will have 3 components.
1342 
1343   @param[in]  basis_from    CeedBasis to prolong from
1344   @param[in]  basis_to      CeedBasis to prolong to
1345   @param[out] basis_project Address of the variable where the newly created CeedBasis will be stored.
1346 
1347   @return An error code: 0 - success, otherwise - failure
1348 
1349   @ref User
1350 **/
1351 int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, CeedBasis *basis_project) {
1352   Ceed ceed;
1353   CeedCall(CeedBasisGetCeed(basis_to, &ceed));
1354 
1355   // Create projection matrix
1356   CeedScalar *interp_project, *grad_project;
1357   CeedCall(CeedBasisCreateProjectionMatrices(basis_from, basis_to, &interp_project, &grad_project));
1358 
1359   // Build basis
1360   bool        is_tensor;
1361   CeedFESpace fe_space;
1362   CeedInt     dim, num_comp;
1363   CeedScalar *q_ref, *q_weight;
1364   CeedCall(CeedBasisIsTensor(basis_to, &is_tensor));
1365   CeedCall(CeedBasisGetFESpace(basis_to, &fe_space));
1366   CeedCall(CeedBasisGetDimension(basis_to, &dim));
1367   CeedCall(CeedBasisGetNumComponents(basis_from, &num_comp));
1368   if (is_tensor) {
1369     CeedInt P_1d_to, P_1d_from;
1370     CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_1d_from));
1371     CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_1d_to));
1372     CeedCall(CeedCalloc(P_1d_to, &q_ref));
1373     CeedCall(CeedCalloc(P_1d_to, &q_weight));
1374     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_from, P_1d_to, interp_project, grad_project, q_ref, q_weight, basis_project));
1375   } else {
1376     CeedElemTopology topo;
1377     CeedCall(CeedBasisGetTopology(basis_to, &topo));
1378     CeedInt num_nodes_to, num_nodes_from;
1379     CeedCall(CeedBasisGetNumNodes(basis_from, &num_nodes_from));
1380     CeedCall(CeedBasisGetNumNodes(basis_to, &num_nodes_to));
1381     CeedCall(CeedCalloc(num_nodes_to * dim, &q_ref));
1382     CeedCall(CeedCalloc(num_nodes_to, &q_weight));
1383     switch (fe_space) {
1384       case CEED_FE_SPACE_H1:
1385         CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_from, num_nodes_to, interp_project, grad_project, q_ref, q_weight, basis_project));
1386         break;
1387       case CEED_FE_SPACE_HDIV:
1388         CeedCall(
1389             CeedBasisCreateHdiv(ceed, topo, num_comp, num_nodes_from, num_nodes_to, interp_project, grad_project, q_ref, q_weight, basis_project));
1390         break;
1391       case CEED_FE_SPACE_HCURL:
1392         CeedCall(
1393             CeedBasisCreateHcurl(ceed, topo, num_comp, num_nodes_from, num_nodes_to, interp_project, grad_project, q_ref, q_weight, basis_project));
1394         break;
1395     }
1396   }
1397 
1398   // Cleanup
1399   CeedCall(CeedFree(&interp_project));
1400   CeedCall(CeedFree(&grad_project));
1401   CeedCall(CeedFree(&q_ref));
1402   CeedCall(CeedFree(&q_weight));
1403 
1404   return CEED_ERROR_SUCCESS;
1405 }
1406 
1407 /**
1408   @brief Copy the pointer to a CeedBasis.
1409 
1410   Note: If the value of `basis_copy` passed into this function is non-NULL, then it is assumed that `basis_copy` is a pointer to a CeedBasis.
1411   This CeedBasis will be destroyed if `basis_copy` is the only reference to this CeedBasis.
1412 
1413   @param[in]     basis      CeedBasis to copy reference to
1414   @param[in,out] basis_copy Variable to store copied reference
1415 
1416   @return An error code: 0 - success, otherwise - failure
1417 
1418   @ref User
1419 **/
1420 int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy) {
1421   if (basis != CEED_BASIS_COLLOCATED) CeedCall(CeedBasisReference(basis));
1422   CeedCall(CeedBasisDestroy(basis_copy));
1423   *basis_copy = basis;
1424   return CEED_ERROR_SUCCESS;
1425 }
1426 
1427 /**
1428   @brief View a CeedBasis
1429 
1430   @param[in] basis  CeedBasis to view
1431   @param[in] stream Stream to view to, e.g., stdout
1432 
1433   @return An error code: 0 - success, otherwise - failure
1434 
1435   @ref User
1436 **/
1437 int CeedBasisView(CeedBasis basis, FILE *stream) {
1438   CeedElemTopology topo     = basis->topo;
1439   CeedFESpace      fe_space = basis->fe_space;
1440   CeedInt          q_comp   = 0;
1441 
1442   // Print FE space and element topology of the basis
1443   if (basis->tensor_basis) {
1444     fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" CeedInt_FMT " Q=%" CeedInt_FMT "\n", CeedFESpaces[fe_space],
1445             CeedElemTopologies[topo], basis->dim, basis->P_1d, basis->Q_1d);
1446   } else {
1447     fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" CeedInt_FMT " Q=%" CeedInt_FMT "\n", CeedFESpaces[fe_space],
1448             CeedElemTopologies[topo], basis->dim, basis->P, basis->Q);
1449   }
1450   // Print quadrature data, interpolation/gradient/divergence/curl of the basis
1451   if (basis->tensor_basis) {  // tensor basis
1452     CeedCall(CeedScalarView("qref1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_ref_1d, stream));
1453     CeedCall(CeedScalarView("qweight1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_weight_1d, stream));
1454     CeedCall(CeedScalarView("interp1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, basis->interp_1d, stream));
1455     CeedCall(CeedScalarView("grad1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, basis->grad_1d, stream));
1456   } else {  // non-tensor basis
1457     CeedCall(CeedScalarView("qref", "\t% 12.8f", 1, basis->Q * basis->dim, basis->q_ref_1d, stream));
1458     CeedCall(CeedScalarView("qweight", "\t% 12.8f", 1, basis->Q, basis->q_weight_1d, stream));
1459     CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp));
1460     CeedCall(CeedScalarView("interp", "\t% 12.8f", q_comp * basis->Q, basis->P, basis->interp, stream));
1461     if (basis->grad) {
1462       CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp));
1463       CeedCall(CeedScalarView("grad", "\t% 12.8f", q_comp * basis->Q, basis->P, basis->grad, stream));
1464     }
1465     if (basis->div) {
1466       CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp));
1467       CeedCall(CeedScalarView("div", "\t% 12.8f", q_comp * basis->Q, basis->P, basis->div, stream));
1468     }
1469     if (basis->curl) {
1470       CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp));
1471       CeedCall(CeedScalarView("curl", "\t% 12.8f", q_comp * basis->Q, basis->P, basis->curl, stream));
1472     }
1473   }
1474   return CEED_ERROR_SUCCESS;
1475 }
1476 
1477 /**
1478   @brief Apply basis evaluation from nodes to quadrature points or vice versa
1479 
1480   @param[in]  basis      CeedBasis to evaluate
1481   @param[in]  num_elem   The number of elements to apply the basis evaluation to;
1482                            the backend will specify the ordering in CeedElemRestrictionCreateBlocked()
1483   @param[in]  t_mode    \ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature points;
1484                           \ref CEED_TRANSPOSE to apply the transpose, mapping from quadrature points to nodes
1485   @param[in]  eval_mode \ref CEED_EVAL_NONE to use values directly,
1486                           \ref CEED_EVAL_INTERP to use interpolated values,
1487                           \ref CEED_EVAL_GRAD to use gradients,
1488                           \ref CEED_EVAL_DIV to use divergence,
1489                           \ref CEED_EVAL_CURL to use curl,
1490                           \ref CEED_EVAL_WEIGHT to use quadrature weights.
1491   @param[in]  u        Input CeedVector
1492   @param[out] v        Output CeedVector
1493 
1494   @return An error code: 0 - success, otherwise - failure
1495 
1496   @ref User
1497 **/
1498 int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
1499   CeedSize u_length = 0, v_length;
1500   CeedInt  dim, num_comp, q_comp, num_nodes, num_qpts;
1501   CeedCall(CeedBasisGetDimension(basis, &dim));
1502   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
1503   CeedCall(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
1504   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
1505   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
1506   CeedCall(CeedVectorGetLength(v, &v_length));
1507   if (u) {
1508     CeedCall(CeedVectorGetLength(u, &u_length));
1509   }
1510 
1511   if (!basis->Apply) {
1512     // LCOV_EXCL_START
1513     return CeedError(basis->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisApply");
1514     // LCOV_EXCL_STOP
1515   }
1516 
1517   // Check compatibility of topological and geometrical dimensions
1518   if ((t_mode == CEED_TRANSPOSE && (v_length % num_nodes != 0 || u_length % num_qpts != 0)) ||
1519       (t_mode == CEED_NOTRANSPOSE && (u_length % num_nodes != 0 || v_length % num_qpts != 0))) {
1520     // LCOV_EXCL_START
1521     return CeedError(basis->ceed, CEED_ERROR_DIMENSION, "Length of input/output vectors incompatible with basis dimensions");
1522     // LCOV_EXCL_STOP
1523   }
1524 
1525   // Check vector lengths to prevent out of bounds issues
1526   bool bad_dims = false;
1527   switch (eval_mode) {
1528     case CEED_EVAL_NONE:
1529     case CEED_EVAL_INTERP:
1530     case CEED_EVAL_GRAD:
1531     case CEED_EVAL_DIV:
1532     case CEED_EVAL_CURL:
1533       bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts * q_comp || v_length < num_elem * num_comp * num_nodes)) ||
1534                   (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp * q_comp || u_length < num_elem * num_comp * num_nodes)));
1535       break;
1536     case CEED_EVAL_WEIGHT:
1537       bad_dims = v_length < num_elem * num_qpts;
1538       break;
1539   }
1540   if (bad_dims) {
1541     // LCOV_EXCL_START
1542     return CeedError(basis->ceed, CEED_ERROR_DIMENSION, "Input/output vectors too short for basis and evaluation mode");
1543     // LCOV_EXCL_STOP
1544   }
1545 
1546   CeedCall(basis->Apply(basis, num_elem, t_mode, eval_mode, u, v));
1547   return CEED_ERROR_SUCCESS;
1548 }
1549 
1550 /**
1551   @brief Get Ceed associated with a CeedBasis
1552 
1553   @param[in]  basis CeedBasis
1554   @param[out] ceed  Variable to store Ceed
1555 
1556   @return An error code: 0 - success, otherwise - failure
1557 
1558   @ref Advanced
1559 **/
1560 int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed) {
1561   *ceed = basis->ceed;
1562   return CEED_ERROR_SUCCESS;
1563 }
1564 
1565 /**
1566   @brief Get dimension for given CeedBasis
1567 
1568   @param[in]  basis CeedBasis
1569   @param[out] dim   Variable to store dimension of basis
1570 
1571   @return An error code: 0 - success, otherwise - failure
1572 
1573   @ref Advanced
1574 **/
1575 int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim) {
1576   *dim = basis->dim;
1577   return CEED_ERROR_SUCCESS;
1578 }
1579 
1580 /**
1581   @brief Get topology for given CeedBasis
1582 
1583   @param[in]  basis CeedBasis
1584   @param[out] topo  Variable to store topology of basis
1585 
1586   @return An error code: 0 - success, otherwise - failure
1587 
1588   @ref Advanced
1589 **/
1590 int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo) {
1591   *topo = basis->topo;
1592   return CEED_ERROR_SUCCESS;
1593 }
1594 
1595 /**
1596   @brief Get number of components for given CeedBasis
1597 
1598   @param[in]  basis    CeedBasis
1599   @param[out] num_comp Variable to store number of components of basis
1600 
1601   @return An error code: 0 - success, otherwise - failure
1602 
1603   @ref Advanced
1604 **/
1605 int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *num_comp) {
1606   *num_comp = basis->num_comp;
1607   return CEED_ERROR_SUCCESS;
1608 }
1609 
1610 /**
1611   @brief Get total number of nodes (in dim dimensions) of a CeedBasis
1612 
1613   @param[in]  basis CeedBasis
1614   @param[out] P     Variable to store number of nodes
1615 
1616   @return An error code: 0 - success, otherwise - failure
1617 
1618   @ref Utility
1619 **/
1620 int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P) {
1621   *P = basis->P;
1622   return CEED_ERROR_SUCCESS;
1623 }
1624 
1625 /**
1626   @brief Get total number of nodes (in 1 dimension) of a CeedBasis
1627 
1628   @param[in]  basis CeedBasis
1629   @param[out] P_1d  Variable to store number of nodes
1630 
1631   @return An error code: 0 - success, otherwise - failure
1632 
1633   @ref Advanced
1634 **/
1635 int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P_1d) {
1636   if (!basis->tensor_basis) {
1637     // LCOV_EXCL_START
1638     return CeedError(basis->ceed, CEED_ERROR_MINOR, "Cannot supply P_1d for non-tensor basis");
1639     // LCOV_EXCL_STOP
1640   }
1641 
1642   *P_1d = basis->P_1d;
1643   return CEED_ERROR_SUCCESS;
1644 }
1645 
1646 /**
1647   @brief Get total number of quadrature points (in dim dimensions) of a CeedBasis
1648 
1649   @param[in]  basis CeedBasis
1650   @param[out] Q     Variable to store number of quadrature points
1651 
1652   @return An error code: 0 - success, otherwise - failure
1653 
1654   @ref Utility
1655 **/
1656 int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q) {
1657   *Q = basis->Q;
1658   return CEED_ERROR_SUCCESS;
1659 }
1660 
1661 /**
1662   @brief Get total number of quadrature points (in 1 dimension) of a CeedBasis
1663 
1664   @param[in]  basis CeedBasis
1665   @param[out] Q_1d  Variable to store number of quadrature points
1666 
1667   @return An error code: 0 - success, otherwise - failure
1668 
1669   @ref Advanced
1670 **/
1671 int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, CeedInt *Q_1d) {
1672   if (!basis->tensor_basis) {
1673     // LCOV_EXCL_START
1674     return CeedError(basis->ceed, CEED_ERROR_MINOR, "Cannot supply Q_1d for non-tensor basis");
1675     // LCOV_EXCL_STOP
1676   }
1677 
1678   *Q_1d = basis->Q_1d;
1679   return CEED_ERROR_SUCCESS;
1680 }
1681 
1682 /**
1683   @brief Get reference coordinates of quadrature points (in dim dimensions) of a CeedBasis
1684 
1685   @param[in]  basis CeedBasis
1686   @param[out] q_ref Variable to store reference coordinates of quadrature points
1687 
1688   @return An error code: 0 - success, otherwise - failure
1689 
1690   @ref Advanced
1691 **/
1692 int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **q_ref) {
1693   *q_ref = basis->q_ref_1d;
1694   return CEED_ERROR_SUCCESS;
1695 }
1696 
1697 /**
1698   @brief Get quadrature weights of quadrature points (in dim dimensions) of a CeedBasis
1699 
1700   @param[in]  basis    CeedBasis
1701   @param[out] q_weight Variable to store quadrature weights
1702 
1703   @return An error code: 0 - success, otherwise - failure
1704 
1705   @ref Advanced
1706 **/
1707 int CeedBasisGetQWeights(CeedBasis basis, const CeedScalar **q_weight) {
1708   *q_weight = basis->q_weight_1d;
1709   return CEED_ERROR_SUCCESS;
1710 }
1711 
1712 /**
1713   @brief Get interpolation matrix of a CeedBasis
1714 
1715   @param[in]  basis  CeedBasis
1716   @param[out] interp Variable to store interpolation matrix
1717 
1718   @return An error code: 0 - success, otherwise - failure
1719 
1720   @ref Advanced
1721 **/
1722 int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp) {
1723   if (!basis->interp && basis->tensor_basis) {
1724     // Allocate
1725     CeedCall(CeedMalloc(basis->Q * basis->P, &basis->interp));
1726 
1727     // Initialize
1728     for (CeedInt i = 0; i < basis->Q * basis->P; i++) basis->interp[i] = 1.0;
1729 
1730     // Calculate
1731     for (CeedInt d = 0; d < basis->dim; d++) {
1732       for (CeedInt qpt = 0; qpt < basis->Q; qpt++) {
1733         for (CeedInt node = 0; node < basis->P; node++) {
1734           CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1735           CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1736           basis->interp[qpt * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p];
1737         }
1738       }
1739     }
1740   }
1741   *interp = basis->interp;
1742   return CEED_ERROR_SUCCESS;
1743 }
1744 
1745 /**
1746   @brief Get 1D interpolation matrix of a tensor product CeedBasis
1747 
1748   @param[in]  basis     CeedBasis
1749   @param[out] interp_1d Variable to store interpolation matrix
1750 
1751   @return An error code: 0 - success, otherwise - failure
1752 
1753   @ref Backend
1754 **/
1755 int CeedBasisGetInterp1D(CeedBasis basis, const CeedScalar **interp_1d) {
1756   if (!basis->tensor_basis) {
1757     // LCOV_EXCL_START
1758     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis is not a tensor product basis.");
1759     // LCOV_EXCL_STOP
1760   }
1761 
1762   *interp_1d = basis->interp_1d;
1763   return CEED_ERROR_SUCCESS;
1764 }
1765 
1766 /**
1767   @brief Get gradient matrix of a CeedBasis
1768 
1769   @param[in]  basis CeedBasis
1770   @param[out] grad  Variable to store gradient matrix
1771 
1772   @return An error code: 0 - success, otherwise - failure
1773 
1774   @ref Advanced
1775 **/
1776 int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad) {
1777   if (!basis->grad && basis->tensor_basis) {
1778     // Allocate
1779     CeedCall(CeedMalloc(basis->dim * basis->Q * basis->P, &basis->grad));
1780 
1781     // Initialize
1782     for (CeedInt i = 0; i < basis->dim * basis->Q * basis->P; i++) basis->grad[i] = 1.0;
1783 
1784     // Calculate
1785     for (CeedInt d = 0; d < basis->dim; d++) {
1786       for (CeedInt i = 0; i < basis->dim; i++) {
1787         for (CeedInt qpt = 0; qpt < basis->Q; qpt++) {
1788           for (CeedInt node = 0; node < basis->P; node++) {
1789             CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1790             CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1791             if (i == d) basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->grad_1d[q * basis->P_1d + p];
1792             else basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p];
1793           }
1794         }
1795       }
1796     }
1797   }
1798   *grad = basis->grad;
1799   return CEED_ERROR_SUCCESS;
1800 }
1801 
1802 /**
1803   @brief Get 1D gradient matrix of a tensor product CeedBasis
1804 
1805   @param[in]  basis   CeedBasis
1806   @param[out] grad_1d Variable to store gradient matrix
1807 
1808   @return An error code: 0 - success, otherwise - failure
1809 
1810   @ref Advanced
1811 **/
1812 int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad_1d) {
1813   if (!basis->tensor_basis) {
1814     // LCOV_EXCL_START
1815     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis is not a tensor product basis.");
1816     // LCOV_EXCL_STOP
1817   }
1818 
1819   *grad_1d = basis->grad_1d;
1820   return CEED_ERROR_SUCCESS;
1821 }
1822 
1823 /**
1824   @brief Get divergence matrix of a CeedBasis
1825 
1826   @param[in]  basis CeedBasis
1827   @param[out] div   Variable to store divergence matrix
1828 
1829   @return An error code: 0 - success, otherwise - failure
1830 
1831   @ref Advanced
1832 **/
1833 int CeedBasisGetDiv(CeedBasis basis, const CeedScalar **div) {
1834   if (!basis->div) {
1835     // LCOV_EXCL_START
1836     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis does not have divergence matrix.");
1837     // LCOV_EXCL_STOP
1838   }
1839 
1840   *div = basis->div;
1841   return CEED_ERROR_SUCCESS;
1842 }
1843 
1844 /**
1845   @brief Get curl matrix of a CeedBasis
1846 
1847   @param[in]  basis CeedBasis
1848   @param[out] curl  Variable to store curl matrix
1849 
1850   @return An error code: 0 - success, otherwise - failure
1851 
1852   @ref Advanced
1853 **/
1854 int CeedBasisGetCurl(CeedBasis basis, const CeedScalar **curl) {
1855   if (!basis->curl) {
1856     // LCOV_EXCL_START
1857     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis does not have curl matrix.");
1858     // LCOV_EXCL_STOP
1859   }
1860 
1861   *curl = basis->curl;
1862   return CEED_ERROR_SUCCESS;
1863 }
1864 
1865 /**
1866   @brief Destroy a CeedBasis
1867 
1868   @param[in,out] basis CeedBasis to destroy
1869 
1870   @return An error code: 0 - success, otherwise - failure
1871 
1872   @ref User
1873 **/
1874 int CeedBasisDestroy(CeedBasis *basis) {
1875   if (!*basis || *basis == CEED_BASIS_COLLOCATED || --(*basis)->ref_count > 0) {
1876     *basis = NULL;
1877     return CEED_ERROR_SUCCESS;
1878   }
1879   if ((*basis)->Destroy) CeedCall((*basis)->Destroy(*basis));
1880   if ((*basis)->contract) CeedCall(CeedTensorContractDestroy(&(*basis)->contract));
1881   CeedCall(CeedFree(&(*basis)->q_ref_1d));
1882   CeedCall(CeedFree(&(*basis)->q_weight_1d));
1883   CeedCall(CeedFree(&(*basis)->interp));
1884   CeedCall(CeedFree(&(*basis)->interp_1d));
1885   CeedCall(CeedFree(&(*basis)->grad));
1886   CeedCall(CeedFree(&(*basis)->grad_1d));
1887   CeedCall(CeedFree(&(*basis)->div));
1888   CeedCall(CeedFree(&(*basis)->curl));
1889   CeedCall(CeedDestroy(&(*basis)->ceed));
1890   CeedCall(CeedFree(basis));
1891   return CEED_ERROR_SUCCESS;
1892 }
1893 
1894 /**
1895   @brief Construct a Gauss-Legendre quadrature
1896 
1897   @param[in]  Q           Number of quadrature points (integrates polynomials of degree 2*Q-1 exactly)
1898   @param[out] q_ref_1d    Array of length Q to hold the abscissa on [-1, 1]
1899   @param[out] q_weight_1d Array of length Q to hold the weights
1900 
1901   @return An error code: 0 - success, otherwise - failure
1902 
1903   @ref Utility
1904 **/
1905 int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) {
1906   // Allocate
1907   CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0 * atan(1.0);
1908   // Build q_ref_1d, q_weight_1d
1909   for (CeedInt i = 0; i <= Q / 2; i++) {
1910     // Guess
1911     xi = cos(PI * (CeedScalar)(2 * i + 1) / ((CeedScalar)(2 * Q)));
1912     // Pn(xi)
1913     P0 = 1.0;
1914     P1 = xi;
1915     P2 = 0.0;
1916     for (CeedInt j = 2; j <= Q; j++) {
1917       P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1918       P0 = P1;
1919       P1 = P2;
1920     }
1921     // First Newton Step
1922     dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1923     xi  = xi - P2 / dP2;
1924     // Newton to convergence
1925     for (CeedInt k = 0; k < 100 && fabs(P2) > 10 * CEED_EPSILON; k++) {
1926       P0 = 1.0;
1927       P1 = xi;
1928       for (CeedInt j = 2; j <= Q; j++) {
1929         P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1930         P0 = P1;
1931         P1 = P2;
1932       }
1933       dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1934       xi  = xi - P2 / dP2;
1935     }
1936     // Save xi, wi
1937     wi                     = 2.0 / ((1.0 - xi * xi) * dP2 * dP2);
1938     q_weight_1d[i]         = wi;
1939     q_weight_1d[Q - 1 - i] = wi;
1940     q_ref_1d[i]            = -xi;
1941     q_ref_1d[Q - 1 - i]    = xi;
1942   }
1943   return CEED_ERROR_SUCCESS;
1944 }
1945 
1946 /**
1947   @brief Construct a Gauss-Legendre-Lobatto quadrature
1948 
1949   @param[in]  Q           Number of quadrature points (integrates polynomials of degree 2*Q-3 exactly)
1950   @param[out] q_ref_1d    Array of length Q to hold the abscissa on [-1, 1]
1951   @param[out] q_weight_1d Array of length Q to hold the weights
1952 
1953   @return An error code: 0 - success, otherwise - failure
1954 
1955   @ref Utility
1956 **/
1957 int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) {
1958   // Allocate
1959   CeedScalar P0, P1, P2, dP2, d2P2, xi, wi, PI = 4.0 * atan(1.0);
1960   // Build q_ref_1d, q_weight_1d
1961   // Set endpoints
1962   if (Q < 2) {
1963     // LCOV_EXCL_START
1964     return CeedError(NULL, CEED_ERROR_DIMENSION, "Cannot create Lobatto quadrature with Q=%" CeedInt_FMT " < 2 points", Q);
1965     // LCOV_EXCL_STOP
1966   }
1967   wi = 2.0 / ((CeedScalar)(Q * (Q - 1)));
1968   if (q_weight_1d) {
1969     q_weight_1d[0]     = wi;
1970     q_weight_1d[Q - 1] = wi;
1971   }
1972   q_ref_1d[0]     = -1.0;
1973   q_ref_1d[Q - 1] = 1.0;
1974   // Interior
1975   for (CeedInt i = 1; i <= (Q - 1) / 2; i++) {
1976     // Guess
1977     xi = cos(PI * (CeedScalar)(i) / (CeedScalar)(Q - 1));
1978     // Pn(xi)
1979     P0 = 1.0;
1980     P1 = xi;
1981     P2 = 0.0;
1982     for (CeedInt j = 2; j < Q; j++) {
1983       P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1984       P0 = P1;
1985       P1 = P2;
1986     }
1987     // First Newton step
1988     dP2  = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1989     d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi);
1990     xi   = xi - dP2 / d2P2;
1991     // Newton to convergence
1992     for (CeedInt k = 0; k < 100 && fabs(dP2) > 10 * CEED_EPSILON; k++) {
1993       P0 = 1.0;
1994       P1 = xi;
1995       for (CeedInt j = 2; j < Q; j++) {
1996         P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1997         P0 = P1;
1998         P1 = P2;
1999       }
2000       dP2  = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
2001       d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi);
2002       xi   = xi - dP2 / d2P2;
2003     }
2004     // Save xi, wi
2005     wi = 2.0 / (((CeedScalar)(Q * (Q - 1))) * P2 * P2);
2006     if (q_weight_1d) {
2007       q_weight_1d[i]         = wi;
2008       q_weight_1d[Q - 1 - i] = wi;
2009     }
2010     q_ref_1d[i]         = -xi;
2011     q_ref_1d[Q - 1 - i] = xi;
2012   }
2013   return CEED_ERROR_SUCCESS;
2014 }
2015 
2016 /// @}
2017