xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-basis.c (revision 9fd66db6d1a7a1c9032418479851d404799ab3ba)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <math.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 /// @file
17 /// Implementation of CeedBasis interfaces
18 
19 /// @cond DOXYGEN_SKIP
20 static struct CeedBasis_private ceed_basis_collocated;
21 /// @endcond
22 
23 /// @addtogroup CeedBasisUser
24 /// @{
25 
26 /// Indicate that the quadrature points are collocated with the nodes
27 const CeedBasis CEED_BASIS_COLLOCATED = &ceed_basis_collocated;
28 
29 /// @}
30 
31 /// ----------------------------------------------------------------------------
32 /// CeedBasis Library Internal Functions
33 /// ----------------------------------------------------------------------------
34 /// @addtogroup CeedBasisDeveloper
35 /// @{
36 
37 /**
38   @brief Compute Householder reflection
39 
40   Computes A = (I - b v v^T) A, where A is an mxn matrix indexed as A[i*row + j*col]
41 
42   @param[in,out] A   Matrix to apply Householder reflection to, in place
43   @param[in]     v   Householder vector
44   @param[in]     b   Scaling factor
45   @param[in]     m   Number of rows in A
46   @param[in]     n   Number of columns in A
47   @param[in]     row Row stride
48   @param[in]     col Col stride
49 
50   @return An error code: 0 - success, otherwise - failure
51 
52   @ref Developer
53 **/
54 static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v, CeedScalar b, CeedInt m, CeedInt n, CeedInt row, CeedInt col) {
55   for (CeedInt j = 0; j < n; j++) {
56     CeedScalar w = A[0 * row + j * col];
57     for (CeedInt i = 1; i < m; i++) w += v[i] * A[i * row + j * col];
58     A[0 * row + j * col] -= b * w;
59     for (CeedInt i = 1; i < m; i++) A[i * row + j * col] -= b * w * v[i];
60   }
61   return CEED_ERROR_SUCCESS;
62 }
63 
64 /**
65   @brief Compute Givens rotation
66 
67   Computes A = G A (or G^T A in transpose mode), where A is an mxn matrix indexed as A[i*n + j*m]
68 
69   @param[in,out] A      Row major matrix to apply Givens rotation to, in place
70   @param[in]     c      Cosine factor
71   @param[in]     s      Sine factor
72   @param[in]     t_mode @ref CEED_NOTRANSPOSE to rotate the basis counter-clockwise, which has the effect of rotating columns of A clockwise;
73                           @ref CEED_TRANSPOSE for the opposite rotation
74   @param[in]     i      First row/column to apply rotation
75   @param[in]     k      Second row/column to apply rotation
76   @param[in]     m      Number of rows in A
77   @param[in]     n      Number of columns in A
78 
79   @return An error code: 0 - success, otherwise - failure
80 
81   @ref Developer
82 **/
83 static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s, CeedTransposeMode t_mode, CeedInt i, CeedInt k, CeedInt m, CeedInt n) {
84   CeedInt stride_j = 1, stride_ik = m, num_its = n;
85   if (t_mode == CEED_NOTRANSPOSE) {
86     stride_j  = n;
87     stride_ik = 1;
88     num_its   = m;
89   }
90 
91   // Apply rotation
92   for (CeedInt j = 0; j < num_its; j++) {
93     CeedScalar tau1 = A[i * stride_ik + j * stride_j], tau2 = A[k * stride_ik + j * stride_j];
94     A[i * stride_ik + j * stride_j] = c * tau1 - s * tau2;
95     A[k * stride_ik + j * stride_j] = s * tau1 + c * tau2;
96   }
97   return CEED_ERROR_SUCCESS;
98 }
99 
100 /**
101   @brief View an array stored in a CeedBasis
102 
103   @param[in] name   Name of array
104   @param[in] fp_fmt Printing format
105   @param[in] m      Number of rows in array
106   @param[in] n      Number of columns in array
107   @param[in] a      Array to be viewed
108   @param[in] stream Stream to view to, e.g., stdout
109 
110   @return An error code: 0 - success, otherwise - failure
111 
112   @ref Developer
113 **/
114 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m, CeedInt n, const CeedScalar *a, FILE *stream) {
115   for (CeedInt i = 0; i < m; i++) {
116     if (m > 1) fprintf(stream, "%12s[%" CeedInt_FMT "]:", name, i);
117     else fprintf(stream, "%12s:", name);
118     for (CeedInt j = 0; j < n; j++) fprintf(stream, fp_fmt, fabs(a[i * n + j]) > 1E-14 ? a[i * n + j] : 0);
119     fputs("\n", stream);
120   }
121   return CEED_ERROR_SUCCESS;
122 }
123 
124 /**
125   @brief Create the interpolation and gradient matrices for projection from the nodes of `basis_from` to the nodes of `basis_to`.
126 
127   The interpolation is given by `interp_project = interp_to^+ * interp_from`, where the pseudoinverse `interp_to^+` is given by QR factorization.
128   The gradient is given by `grad_project = interp_to^+ * grad_from`, and is only computed for H^1 spaces otherwise it should not be used.
129 
130   Note: `basis_from` and `basis_to` must have compatible quadrature spaces.
131 
132   @param[in]  basis_from     CeedBasis to project from
133   @param[in]  basis_to       CeedBasis to project to
134   @param[out] interp_project Address of the variable where the newly created interpolation matrix will be stored.
135   @param[out] grad_project   Address of the variable where the newly created gradient matrix will be stored.
136 
137   @return An error code: 0 - success, otherwise - failure
138 
139   @ref Developer
140 **/
141 static int CeedBasisCreateProjectionMatrices(CeedBasis basis_from, CeedBasis basis_to, CeedScalar **interp_project, CeedScalar **grad_project) {
142   Ceed ceed;
143   CeedCall(CeedBasisGetCeed(basis_to, &ceed));
144 
145   // Check for compatible quadrature spaces
146   CeedInt Q_to, Q_from;
147   CeedCall(CeedBasisGetNumQuadraturePoints(basis_to, &Q_to));
148   CeedCall(CeedBasisGetNumQuadraturePoints(basis_from, &Q_from));
149   if (Q_to != Q_from) {
150     // LCOV_EXCL_START
151     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
152     // LCOV_EXCL_STOP
153   }
154 
155   // Check for matching tensor or non-tensor
156   CeedInt P_to, P_from, Q = Q_to;
157   bool    is_tensor_to, is_tensor_from;
158   CeedCall(CeedBasisIsTensor(basis_to, &is_tensor_to));
159   CeedCall(CeedBasisIsTensor(basis_from, &is_tensor_from));
160   if (is_tensor_to && is_tensor_from) {
161     CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_to));
162     CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_from));
163     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis_from, &Q));
164   } else if (!is_tensor_to && !is_tensor_from) {
165     CeedCall(CeedBasisGetNumNodes(basis_to, &P_to));
166     CeedCall(CeedBasisGetNumNodes(basis_from, &P_from));
167   } else {
168     // LCOV_EXCL_START
169     return CeedError(ceed, CEED_ERROR_MINOR, "Bases must both be tensor or non-tensor");
170     // LCOV_EXCL_STOP
171   }
172 
173   // Check for matching FE space
174   CeedFESpace fe_space_to, fe_space_from;
175   CeedCall(CeedBasisGetFESpace(basis_to, &fe_space_to));
176   CeedCall(CeedBasisGetFESpace(basis_from, &fe_space_from));
177   if (fe_space_to != fe_space_from) {
178     // LCOV_EXCL_START
179     return CeedError(ceed, CEED_ERROR_MINOR, "Bases must both be the same FE space type");
180     // LCOV_EXCL_STOP
181   }
182 
183   // Get source matrices
184   CeedInt           dim, q_comp = 1;
185   const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL;
186   CeedScalar       *interp_to, *interp_from, *tau;
187   CeedCall(CeedBasisGetDimension(basis_to, &dim));
188   if (is_tensor_to) {
189     CeedCall(CeedBasisGetInterp1D(basis_to, &interp_to_source));
190     CeedCall(CeedBasisGetInterp1D(basis_from, &interp_from_source));
191   } else {
192     CeedCall(CeedBasisGetNumQuadratureComponents(basis_from, CEED_EVAL_INTERP, &q_comp));
193     CeedCall(CeedBasisGetInterp(basis_to, &interp_to_source));
194     CeedCall(CeedBasisGetInterp(basis_from, &interp_from_source));
195   }
196   CeedCall(CeedMalloc(Q * P_from * q_comp, &interp_from));
197   CeedCall(CeedMalloc(Q * P_to * q_comp, &interp_to));
198   CeedCall(CeedCalloc(P_to * P_from, interp_project));
199   CeedCall(CeedMalloc(Q * q_comp, &tau));
200 
201   // `grad_project = interp_to^+ * grad_from` is computed for the H^1 space case so the
202   // projection basis will have a gradient operation
203   const CeedScalar *grad_from_source = NULL;
204   if (fe_space_to == CEED_FE_SPACE_H1) {
205     if (is_tensor_to) {
206       CeedCall(CeedBasisGetGrad1D(basis_from, &grad_from_source));
207     } else {
208       CeedCall(CeedBasisGetGrad(basis_from, &grad_from_source));
209     }
210     CeedCall(CeedCalloc(P_to * P_from * (is_tensor_to ? 1 : dim), grad_project));
211   } else {
212     // Projected derivate operator is only calculated for H^1 but we allocate it for the
213     // basis construction later
214     CeedInt q_comp_deriv = 1;
215     if (fe_space_to == CEED_FE_SPACE_HDIV) {
216       CeedCall(CeedBasisGetNumQuadratureComponents(basis_from, CEED_EVAL_DIV, &q_comp_deriv));
217     } else if (fe_space_to == CEED_FE_SPACE_HCURL) {
218       CeedCall(CeedBasisGetNumQuadratureComponents(basis_from, CEED_EVAL_CURL, &q_comp_deriv));
219     }
220     CeedCall(CeedCalloc(P_to * P_from * q_comp_deriv, grad_project));
221   }
222 
223   // QR Factorization, interp_to = Q R
224   memcpy(interp_to, interp_to_source, Q * P_to * q_comp * sizeof(interp_to_source[0]));
225   CeedCall(CeedQRFactorization(ceed, interp_to, tau, Q * q_comp, P_to));
226 
227   // Build matrices
228   CeedInt     num_matrices = 1 + (fe_space_to == CEED_FE_SPACE_H1) * (is_tensor_to ? 1 : dim);
229   CeedScalar *input_from[num_matrices], *output_project[num_matrices];
230   input_from[0]     = (CeedScalar *)interp_from_source;
231   output_project[0] = *interp_project;
232   for (CeedInt m = 1; m < num_matrices; m++) {
233     input_from[m]     = (CeedScalar *)&grad_from_source[(m - 1) * Q * P_from];
234     output_project[m] = &((*grad_project)[(m - 1) * P_to * P_from]);
235   }
236   for (CeedInt m = 0; m < num_matrices; m++) {
237     // Apply Q^T, interp_from = Q^T interp_from
238     memcpy(interp_from, input_from[m], Q * P_from * q_comp * sizeof(input_from[m][0]));
239     CeedCall(CeedHouseholderApplyQ(interp_from, interp_to, tau, CEED_TRANSPOSE, Q * q_comp, P_from, P_to, P_from, 1));
240 
241     // Apply Rinv, output_project = Rinv interp_from
242     for (CeedInt j = 0; j < P_from; j++) {  // Column j
243       output_project[m][j + P_from * (P_to - 1)] = interp_from[j + P_from * (P_to - 1)] / interp_to[P_to * P_to - 1];
244       for (CeedInt i = P_to - 2; i >= 0; i--) {  // Row i
245         output_project[m][j + P_from * i] = interp_from[j + P_from * i];
246         for (CeedInt k = i + 1; k < P_to; k++) {
247           output_project[m][j + P_from * i] -= interp_to[k + P_to * i] * output_project[m][j + P_from * k];
248         }
249         output_project[m][j + P_from * i] /= interp_to[i + P_to * i];
250       }
251     }
252   }
253 
254   // Cleanup
255   CeedCall(CeedFree(&tau));
256   CeedCall(CeedFree(&interp_to));
257   CeedCall(CeedFree(&interp_from));
258 
259   return CEED_ERROR_SUCCESS;
260 }
261 
262 /// @}
263 
264 /// ----------------------------------------------------------------------------
265 /// Ceed Backend API
266 /// ----------------------------------------------------------------------------
267 /// @addtogroup CeedBasisBackend
268 /// @{
269 
270 /**
271   @brief Return collocated grad matrix
272 
273   @param[in]  basis         CeedBasis
274   @param[out] collo_grad_1d Row-major (Q_1d * Q_1d) matrix expressing derivatives of basis functions at quadrature points
275 
276   @return An error code: 0 - success, otherwise - failure
277 
278   @ref Backend
279 **/
280 int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collo_grad_1d) {
281   int         i, j, k;
282   Ceed        ceed;
283   CeedInt     P_1d = (basis)->P_1d, Q_1d = (basis)->Q_1d;
284   CeedScalar *interp_1d, *grad_1d, *tau;
285 
286   CeedCall(CeedMalloc(Q_1d * P_1d, &interp_1d));
287   CeedCall(CeedMalloc(Q_1d * P_1d, &grad_1d));
288   CeedCall(CeedMalloc(Q_1d, &tau));
289   memcpy(interp_1d, (basis)->interp_1d, Q_1d * P_1d * sizeof(basis)->interp_1d[0]);
290   memcpy(grad_1d, (basis)->grad_1d, Q_1d * P_1d * sizeof(basis)->interp_1d[0]);
291 
292   // QR Factorization, interp_1d = Q R
293   CeedCall(CeedBasisGetCeed(basis, &ceed));
294   CeedCall(CeedQRFactorization(ceed, interp_1d, tau, Q_1d, P_1d));
295   // Note: This function is for backend use, so all errors are terminal and we do not need to clean up memory on failure.
296 
297   // Apply Rinv, collo_grad_1d = grad_1d Rinv
298   for (i = 0; i < Q_1d; i++) {  // Row i
299     collo_grad_1d[Q_1d * i] = grad_1d[P_1d * i] / interp_1d[0];
300     for (j = 1; j < P_1d; j++) {  // Column j
301       collo_grad_1d[j + Q_1d * i] = grad_1d[j + P_1d * i];
302       for (k = 0; k < j; k++) collo_grad_1d[j + Q_1d * i] -= interp_1d[j + P_1d * k] * collo_grad_1d[k + Q_1d * i];
303       collo_grad_1d[j + Q_1d * i] /= interp_1d[j + P_1d * j];
304     }
305     for (j = P_1d; j < Q_1d; j++) collo_grad_1d[j + Q_1d * i] = 0;
306   }
307 
308   // Apply Q^T, collo_grad_1d = collo_grad_1d Q^T
309   CeedCall(CeedHouseholderApplyQ(collo_grad_1d, interp_1d, tau, CEED_NOTRANSPOSE, Q_1d, Q_1d, P_1d, 1, Q_1d));
310 
311   CeedCall(CeedFree(&interp_1d));
312   CeedCall(CeedFree(&grad_1d));
313   CeedCall(CeedFree(&tau));
314   return CEED_ERROR_SUCCESS;
315 }
316 
317 /**
318   @brief Get tensor status for given CeedBasis
319 
320   @param[in]  basis     CeedBasis
321   @param[out] is_tensor Variable to store tensor status
322 
323   @return An error code: 0 - success, otherwise - failure
324 
325   @ref Backend
326 **/
327 int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor) {
328   *is_tensor = basis->tensor_basis;
329   return CEED_ERROR_SUCCESS;
330 }
331 
332 /**
333   @brief Get backend data of a CeedBasis
334 
335   @param[in]  basis CeedBasis
336   @param[out] data  Variable to store data
337 
338   @return An error code: 0 - success, otherwise - failure
339 
340   @ref Backend
341 **/
342 int CeedBasisGetData(CeedBasis basis, void *data) {
343   *(void **)data = basis->data;
344   return CEED_ERROR_SUCCESS;
345 }
346 
347 /**
348   @brief Set backend data of a CeedBasis
349 
350   @param[in,out] basis  CeedBasis
351   @param[in]     data   Data to set
352 
353   @return An error code: 0 - success, otherwise - failure
354 
355   @ref Backend
356 **/
357 int CeedBasisSetData(CeedBasis basis, void *data) {
358   basis->data = data;
359   return CEED_ERROR_SUCCESS;
360 }
361 
362 /**
363   @brief Increment the reference counter for a CeedBasis
364 
365   @param[in,out] basis Basis to increment the reference counter
366 
367   @return An error code: 0 - success, otherwise - failure
368 
369   @ref Backend
370 **/
371 int CeedBasisReference(CeedBasis basis) {
372   basis->ref_count++;
373   return CEED_ERROR_SUCCESS;
374 }
375 
376 /**
377   @brief Get number of Q-vector components for given CeedBasis
378 
379   @param[in]  basis  CeedBasis
380   @param[in]  eval_mode \ref CEED_EVAL_INTERP to use interpolated values,
381                           \ref CEED_EVAL_GRAD to use gradients,
382                           \ref CEED_EVAL_DIV to use divergence,
383                           \ref CEED_EVAL_CURL to use curl.
384   @param[out] q_comp Variable to store number of Q-vector components of basis
385 
386   @return An error code: 0 - success, otherwise - failure
387 
388   @ref Backend
389 **/
390 int CeedBasisGetNumQuadratureComponents(CeedBasis basis, CeedEvalMode eval_mode, CeedInt *q_comp) {
391   switch (eval_mode) {
392     case CEED_EVAL_INTERP:
393       *q_comp = (basis->fe_space == CEED_FE_SPACE_H1) ? 1 : basis->dim;
394       break;
395     case CEED_EVAL_GRAD:
396       *q_comp = basis->dim;
397       break;
398     case CEED_EVAL_DIV:
399       *q_comp = 1;
400       break;
401     case CEED_EVAL_CURL:
402       *q_comp = (basis->dim < 3) ? 1 : basis->dim;
403       break;
404     case CEED_EVAL_NONE:
405     case CEED_EVAL_WEIGHT:
406       *q_comp = 1;
407       break;
408   }
409   return CEED_ERROR_SUCCESS;
410 }
411 
412 /**
413   @brief Estimate number of FLOPs required to apply CeedBasis in t_mode and eval_mode
414 
415   @param[in]  basis     Basis to estimate FLOPs for
416   @param[in]  t_mode    Apply basis or transpose
417   @param[in]  eval_mode Basis evaluation mode
418   @param[out] flops     Address of variable to hold FLOPs estimate
419 
420   @ref Backend
421 **/
422 int CeedBasisGetFlopsEstimate(CeedBasis basis, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedSize *flops) {
423   bool is_tensor;
424 
425   CeedCall(CeedBasisIsTensor(basis, &is_tensor));
426   if (is_tensor) {
427     CeedInt dim, num_comp, P_1d, Q_1d;
428     CeedCall(CeedBasisGetDimension(basis, &dim));
429     CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
430     CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
431     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
432     if (t_mode == CEED_TRANSPOSE) {
433       P_1d = Q_1d;
434       Q_1d = P_1d;
435     }
436     CeedInt tensor_flops = 0, pre = num_comp * CeedIntPow(P_1d, dim - 1), post = 1;
437     for (CeedInt d = 0; d < dim; d++) {
438       tensor_flops += 2 * pre * P_1d * post * Q_1d;
439       pre /= P_1d;
440       post *= Q_1d;
441     }
442     switch (eval_mode) {
443       case CEED_EVAL_NONE:
444         *flops = 0;
445         break;
446       case CEED_EVAL_INTERP:
447         *flops = tensor_flops;
448         break;
449       case CEED_EVAL_GRAD:
450         *flops = tensor_flops * 2;
451         break;
452       case CEED_EVAL_DIV:
453         // LCOV_EXCL_START
454         return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, "Tensor CEED_EVAL_DIV not supported");
455         break;
456       case CEED_EVAL_CURL:
457         return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, "Tensor CEED_EVAL_CURL not supported");
458         break;
459       // LCOV_EXCL_STOP
460       case CEED_EVAL_WEIGHT:
461         *flops = dim * CeedIntPow(Q_1d, dim);
462         break;
463     }
464   } else {
465     CeedInt dim, num_comp, q_comp, num_nodes, num_qpts;
466     CeedCall(CeedBasisGetDimension(basis, &dim));
467     CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
468     CeedCall(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
469     CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
470     CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
471     switch (eval_mode) {
472       case CEED_EVAL_NONE:
473         *flops = 0;
474         break;
475       case CEED_EVAL_INTERP:
476       case CEED_EVAL_GRAD:
477       case CEED_EVAL_DIV:
478       case CEED_EVAL_CURL:
479         *flops = num_nodes * num_qpts * num_comp * q_comp;
480         break;
481       case CEED_EVAL_WEIGHT:
482         *flops = 0;
483         break;
484     }
485   }
486 
487   return CEED_ERROR_SUCCESS;
488 }
489 
490 /**
491   @brief Get CeedFESpace for a CeedBasis
492 
493   @param[in]  basis     CeedBasis
494   @param[out] fe_space  Variable to store CeedFESpace
495 
496   @return An error code: 0 - success, otherwise - failure
497 
498   @ref Backend
499 **/
500 int CeedBasisGetFESpace(CeedBasis basis, CeedFESpace *fe_space) {
501   *fe_space = basis->fe_space;
502   return CEED_ERROR_SUCCESS;
503 }
504 
505 /**
506   @brief Get dimension for given CeedElemTopology
507 
508   @param[in]  topo CeedElemTopology
509   @param[out] dim  Variable to store dimension of topology
510 
511   @return An error code: 0 - success, otherwise - failure
512 
513   @ref Backend
514 **/
515 int CeedBasisGetTopologyDimension(CeedElemTopology topo, CeedInt *dim) {
516   *dim = (CeedInt)topo >> 16;
517   return CEED_ERROR_SUCCESS;
518 }
519 
520 /**
521   @brief Get CeedTensorContract of a CeedBasis
522 
523   @param[in]  basis     CeedBasis
524   @param[out] contract  Variable to store CeedTensorContract
525 
526   @return An error code: 0 - success, otherwise - failure
527 
528   @ref Backend
529 **/
530 int CeedBasisGetTensorContract(CeedBasis basis, CeedTensorContract *contract) {
531   *contract = basis->contract;
532   return CEED_ERROR_SUCCESS;
533 }
534 
535 /**
536   @brief Set CeedTensorContract of a CeedBasis
537 
538   @param[in,out] basis    CeedBasis
539   @param[in]     contract CeedTensorContract to set
540 
541   @return An error code: 0 - success, otherwise - failure
542 
543   @ref Backend
544 **/
545 int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract contract) {
546   basis->contract = contract;
547   CeedCall(CeedTensorContractReference(contract));
548   return CEED_ERROR_SUCCESS;
549 }
550 
551 /**
552   @brief Return a reference implementation of matrix multiplication C = A B.
553 
554   Note: This is a reference implementation for CPU CeedScalar pointers that is not intended for high performance.
555 
556   @param[in]  ceed  Ceed context for error handling
557   @param[in]  mat_A Row-major matrix A
558   @param[in]  mat_B Row-major matrix B
559   @param[out] mat_C Row-major output matrix C
560   @param[in]  m     Number of rows of C
561   @param[in]  n     Number of columns of C
562   @param[in]  kk    Number of columns of A/rows of B
563 
564   @return An error code: 0 - success, otherwise - failure
565 
566   @ref Utility
567 **/
568 int CeedMatrixMatrixMultiply(Ceed ceed, const CeedScalar *mat_A, const CeedScalar *mat_B, CeedScalar *mat_C, CeedInt m, CeedInt n, CeedInt kk) {
569   for (CeedInt i = 0; i < m; i++) {
570     for (CeedInt j = 0; j < n; j++) {
571       CeedScalar sum = 0;
572       for (CeedInt k = 0; k < kk; k++) sum += mat_A[k + i * kk] * mat_B[j + k * n];
573       mat_C[j + i * n] = sum;
574     }
575   }
576   return CEED_ERROR_SUCCESS;
577 }
578 
579 /**
580   @brief Return QR Factorization of a matrix
581 
582   @param[in]     ceed Ceed context for error handling
583   @param[in,out] mat  Row-major matrix to be factorized in place
584   @param[in,out] tau  Vector of length m of scaling factors
585   @param[in]     m    Number of rows
586   @param[in]     n    Number of columns
587 
588   @return An error code: 0 - success, otherwise - failure
589 
590   @ref Utility
591 **/
592 int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau, CeedInt m, CeedInt n) {
593   CeedScalar v[m];
594 
595   // Check matrix shape
596   if (n > m) {
597     // LCOV_EXCL_START
598     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute QR factorization with n > m");
599     // LCOV_EXCL_STOP
600   }
601 
602   for (CeedInt i = 0; i < n; i++) {
603     if (i >= m - 1) {  // last row of matrix, no reflection needed
604       tau[i] = 0.;
605       break;
606     }
607     // Calculate Householder vector, magnitude
608     CeedScalar sigma = 0.0;
609     v[i]             = mat[i + n * i];
610     for (CeedInt j = i + 1; j < m; j++) {
611       v[j] = mat[i + n * j];
612       sigma += v[j] * v[j];
613     }
614     CeedScalar norm = sqrt(v[i] * v[i] + sigma);  // norm of v[i:m]
615     CeedScalar R_ii = -copysign(norm, v[i]);
616     v[i] -= R_ii;
617     // norm of v[i:m] after modification above and scaling below
618     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
619     //   tau = 2 / (norm*norm)
620     tau[i] = 2 * v[i] * v[i] / (v[i] * v[i] + sigma);
621     for (CeedInt j = i + 1; j < m; j++) v[j] /= v[i];
622 
623     // Apply Householder reflector to lower right panel
624     CeedHouseholderReflect(&mat[i * n + i + 1], &v[i], tau[i], m - i, n - i - 1, n, 1);
625     // Save v
626     mat[i + n * i] = R_ii;
627     for (CeedInt j = i + 1; j < m; j++) mat[i + n * j] = v[j];
628   }
629   return CEED_ERROR_SUCCESS;
630 }
631 
632 /**
633   @brief Apply Householder Q matrix
634 
635   Compute mat_A = mat_Q mat_A, where mat_Q is mxm and mat_A is mxn.
636 
637   @param[in,out] mat_A  Matrix to apply Householder Q to, in place
638   @param[in]     mat_Q  Householder Q matrix
639   @param[in]     tau    Householder scaling factors
640   @param[in]     t_mode Transpose mode for application
641   @param[in]     m      Number of rows in A
642   @param[in]     n      Number of columns in A
643   @param[in]     k      Number of elementary reflectors in Q, k<m
644   @param[in]     row    Row stride in A
645   @param[in]     col    Col stride in A
646 
647   @return An error code: 0 - success, otherwise - failure
648 
649   @ref Utility
650 **/
651 int CeedHouseholderApplyQ(CeedScalar *mat_A, const CeedScalar *mat_Q, const CeedScalar *tau, CeedTransposeMode t_mode, CeedInt m, CeedInt n,
652                           CeedInt k, CeedInt row, CeedInt col) {
653   CeedScalar *v;
654   CeedCall(CeedMalloc(m, &v));
655   for (CeedInt ii = 0; ii < k; ii++) {
656     CeedInt i = t_mode == CEED_TRANSPOSE ? ii : k - 1 - ii;
657     for (CeedInt j = i + 1; j < m; j++) v[j] = mat_Q[j * k + i];
658     // Apply Householder reflector (I - tau v v^T) collo_grad_1d^T
659     CeedCall(CeedHouseholderReflect(&mat_A[i * row], &v[i], tau[i], m - i, n, row, col));
660   }
661   CeedCall(CeedFree(&v));
662   return CEED_ERROR_SUCCESS;
663 }
664 
665 /**
666   @brief Return symmetric Schur decomposition of the symmetric matrix mat via symmetric QR factorization
667 
668   @param[in]     ceed   Ceed context for error handling
669   @param[in,out] mat    Row-major matrix to be factorized in place
670   @param[out]    lambda Vector of length n of eigenvalues
671   @param[in]     n      Number of rows/columns
672 
673   @return An error code: 0 - success, otherwise - failure
674 
675   @ref Utility
676 **/
677 CeedPragmaOptimizeOff
678 int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat, CeedScalar *lambda, CeedInt n) {
679   // Check bounds for clang-tidy
680   if (n < 2) {
681     // LCOV_EXCL_START
682     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute symmetric Schur decomposition of scalars");
683     // LCOV_EXCL_STOP
684   }
685 
686   CeedScalar v[n - 1], tau[n - 1], mat_T[n * n];
687 
688   // Copy mat to mat_T and set mat to I
689   memcpy(mat_T, mat, n * n * sizeof(mat[0]));
690   for (CeedInt i = 0; i < n; i++) {
691     for (CeedInt j = 0; j < n; j++) mat[j + n * i] = (i == j) ? 1 : 0;
692   }
693 
694   // Reduce to tridiagonal
695   for (CeedInt i = 0; i < n - 1; i++) {
696     // Calculate Householder vector, magnitude
697     CeedScalar sigma = 0.0;
698     v[i]             = mat_T[i + n * (i + 1)];
699     for (CeedInt j = i + 1; j < n - 1; j++) {
700       v[j] = mat_T[i + n * (j + 1)];
701       sigma += v[j] * v[j];
702     }
703     CeedScalar norm = sqrt(v[i] * v[i] + sigma);  // norm of v[i:n-1]
704     CeedScalar R_ii = -copysign(norm, v[i]);
705     v[i] -= R_ii;
706     // norm of v[i:m] after modification above and scaling below
707     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
708     //   tau = 2 / (norm*norm)
709     tau[i] = i == n - 2 ? 2 : 2 * v[i] * v[i] / (v[i] * v[i] + sigma);
710     for (CeedInt j = i + 1; j < n - 1; j++) v[j] /= v[i];
711 
712     // Update sub and super diagonal
713     for (CeedInt j = i + 2; j < n; j++) {
714       mat_T[i + n * j] = 0;
715       mat_T[j + n * i] = 0;
716     }
717     // Apply symmetric Householder reflector to lower right panel
718     CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1);
719     CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), 1, n);
720 
721     // Save v
722     mat_T[i + n * (i + 1)] = R_ii;
723     mat_T[(i + 1) + n * i] = R_ii;
724     for (CeedInt j = i + 1; j < n - 1; j++) {
725       mat_T[i + n * (j + 1)] = v[j];
726     }
727   }
728   // Backwards accumulation of Q
729   for (CeedInt i = n - 2; i >= 0; i--) {
730     if (tau[i] > 0.0) {
731       v[i] = 1;
732       for (CeedInt j = i + 1; j < n - 1; j++) {
733         v[j]                   = mat_T[i + n * (j + 1)];
734         mat_T[i + n * (j + 1)] = 0;
735       }
736       CeedHouseholderReflect(&mat[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1);
737     }
738   }
739 
740   // Reduce sub and super diagonal
741   CeedInt    p = 0, q = 0, itr = 0, max_itr = n * n * n * n;
742   CeedScalar tol = CEED_EPSILON;
743 
744   while (itr < max_itr) {
745     // Update p, q, size of reduced portions of diagonal
746     p = 0;
747     q = 0;
748     for (CeedInt i = n - 2; i >= 0; i--) {
749       if (fabs(mat_T[i + n * (i + 1)]) < tol) q += 1;
750       else break;
751     }
752     for (CeedInt i = 0; i < n - q - 1; i++) {
753       if (fabs(mat_T[i + n * (i + 1)]) < tol) p += 1;
754       else break;
755     }
756     if (q == n - 1) break;  // Finished reducing
757 
758     // Reduce tridiagonal portion
759     CeedScalar t_nn = mat_T[(n - 1 - q) + n * (n - 1 - q)], t_nnm1 = mat_T[(n - 2 - q) + n * (n - 1 - q)];
760     CeedScalar d  = (mat_T[(n - 2 - q) + n * (n - 2 - q)] - t_nn) / 2;
761     CeedScalar mu = t_nn - t_nnm1 * t_nnm1 / (d + copysign(sqrt(d * d + t_nnm1 * t_nnm1), d));
762     CeedScalar x  = mat_T[p + n * p] - mu;
763     CeedScalar z  = mat_T[p + n * (p + 1)];
764     for (CeedInt k = p; k < n - q - 1; k++) {
765       // Compute Givens rotation
766       CeedScalar c = 1, s = 0;
767       if (fabs(z) > tol) {
768         if (fabs(z) > fabs(x)) {
769           CeedScalar tau = -x / z;
770           s = 1 / sqrt(1 + tau * tau), c = s * tau;
771         } else {
772           CeedScalar tau = -z / x;
773           c = 1 / sqrt(1 + tau * tau), s = c * tau;
774         }
775       }
776 
777       // Apply Givens rotation to T
778       CeedGivensRotation(mat_T, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n);
779       CeedGivensRotation(mat_T, c, s, CEED_TRANSPOSE, k, k + 1, n, n);
780 
781       // Apply Givens rotation to Q
782       CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n);
783 
784       // Update x, z
785       if (k < n - q - 2) {
786         x = mat_T[k + n * (k + 1)];
787         z = mat_T[k + n * (k + 2)];
788       }
789     }
790     itr++;
791   }
792 
793   // Save eigenvalues
794   for (CeedInt i = 0; i < n; i++) lambda[i] = mat_T[i + n * i];
795 
796   // Check convergence
797   if (itr == max_itr && q < n - 1) {
798     // LCOV_EXCL_START
799     return CeedError(ceed, CEED_ERROR_MINOR, "Symmetric QR failed to converge");
800     // LCOV_EXCL_STOP
801   }
802   return CEED_ERROR_SUCCESS;
803 }
804 CeedPragmaOptimizeOn
805 
806 /**
807   @brief Return Simultaneous Diagonalization of two matrices.
808 
809   This solves the generalized eigenvalue problem A x = lambda B x, where A and B are symmetric and B is positive definite.
810   We generate the matrix X and vector Lambda such that X^T A X = Lambda and X^T B X = I.
811   This is equivalent to the LAPACK routine 'sygv' with TYPE = 1.
812 
813   @param[in]  ceed   Ceed context for error handling
814   @param[in]  mat_A  Row-major matrix to be factorized with eigenvalues
815   @param[in]  mat_B  Row-major matrix to be factorized to identity
816   @param[out] mat_X  Row-major orthogonal matrix
817   @param[out] lambda Vector of length n of generalized eigenvalues
818   @param[in]  n      Number of rows/columns
819 
820   @return An error code: 0 - success, otherwise - failure
821 
822   @ref Utility
823 **/
824 CeedPragmaOptimizeOff
825 int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A, CeedScalar *mat_B, CeedScalar *mat_X, CeedScalar *lambda, CeedInt n) {
826   CeedScalar *mat_C, *mat_G, *vec_D;
827   CeedCall(CeedCalloc(n * n, &mat_C));
828   CeedCall(CeedCalloc(n * n, &mat_G));
829   CeedCall(CeedCalloc(n, &vec_D));
830 
831   // Compute B = G D G^T
832   memcpy(mat_G, mat_B, n * n * sizeof(mat_B[0]));
833   CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_G, vec_D, n));
834 
835   // Sort eigenvalues
836   for (CeedInt i = n - 1; i >= 0; i--) {
837     for (CeedInt j = 0; j < i; j++) {
838       if (fabs(vec_D[j]) > fabs(vec_D[j + 1])) {
839         CeedScalar temp;
840         temp         = vec_D[j];
841         vec_D[j]     = vec_D[j + 1];
842         vec_D[j + 1] = temp;
843         for (CeedInt k = 0; k < n; k++) {
844           temp                 = mat_G[k * n + j];
845           mat_G[k * n + j]     = mat_G[k * n + j + 1];
846           mat_G[k * n + j + 1] = temp;
847         }
848       }
849     }
850   }
851 
852   // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T
853   //           = D^-1/2 G^T A G D^-1/2
854   // -- D = D^-1/2
855   for (CeedInt i = 0; i < n; i++) vec_D[i] = 1. / sqrt(vec_D[i]);
856   // -- G = G D^-1/2
857   // -- C = D^-1/2 G^T
858   for (CeedInt i = 0; i < n; i++) {
859     for (CeedInt j = 0; j < n; j++) {
860       mat_G[i * n + j] *= vec_D[j];
861       mat_C[j * n + i] = mat_G[i * n + j];
862     }
863   }
864   // -- X = (D^-1/2 G^T) A
865   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_C, (const CeedScalar *)mat_A, mat_X, n, n, n));
866   // -- C = (D^-1/2 G^T A) (G D^-1/2)
867   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_X, (const CeedScalar *)mat_G, mat_C, n, n, n));
868 
869   // Compute Q^T C Q = lambda
870   CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n));
871 
872   // Sort eigenvalues
873   for (CeedInt i = n - 1; i >= 0; i--) {
874     for (CeedInt j = 0; j < i; j++) {
875       if (fabs(lambda[j]) > fabs(lambda[j + 1])) {
876         CeedScalar temp;
877         temp          = lambda[j];
878         lambda[j]     = lambda[j + 1];
879         lambda[j + 1] = temp;
880         for (CeedInt k = 0; k < n; k++) {
881           temp                 = mat_C[k * n + j];
882           mat_C[k * n + j]     = mat_C[k * n + j + 1];
883           mat_C[k * n + j + 1] = temp;
884         }
885       }
886     }
887   }
888 
889   // Set X = (G D^1/2)^-T Q
890   //       = G D^-1/2 Q
891   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_G, (const CeedScalar *)mat_C, mat_X, n, n, n));
892 
893   // Cleanup
894   CeedCall(CeedFree(&mat_C));
895   CeedCall(CeedFree(&mat_G));
896   CeedCall(CeedFree(&vec_D));
897   return CEED_ERROR_SUCCESS;
898 }
899 CeedPragmaOptimizeOn
900 
901 /// @}
902 
903 /// ----------------------------------------------------------------------------
904 /// CeedBasis Public API
905 /// ----------------------------------------------------------------------------
906 /// @addtogroup CeedBasisUser
907 /// @{
908 
909 /**
910   @brief Create a tensor-product basis for H^1 discretizations
911 
912   @param[in]  ceed        Ceed object where the CeedBasis will be created
913   @param[in]  dim         Topological dimension
914   @param[in]  num_comp    Number of field components (1 for scalar fields)
915   @param[in]  P_1d        Number of nodes in one dimension
916   @param[in]  Q_1d        Number of quadrature points in one dimension
917   @param[in]  interp_1d   Row-major (Q_1d * P_1d) matrix expressing the values of nodal basis functions at quadrature points
918   @param[in]  grad_1d     Row-major (Q_1d * P_1d) matrix expressing derivatives of nodal basis functions at quadrature points
919   @param[in]  q_ref_1d    Array of length Q_1d holding the locations of quadrature points on the 1D reference element [-1, 1]
920   @param[in]  q_weight_1d Array of length Q_1d holding the quadrature weights on the reference element
921   @param[out] basis       Address of the variable where the newly created CeedBasis will be stored.
922 
923   @return An error code: 0 - success, otherwise - failure
924 
925   @ref User
926 **/
927 int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d,
928                             const CeedScalar *grad_1d, const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis *basis) {
929   if (!ceed->BasisCreateTensorH1) {
930     Ceed delegate;
931     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
932 
933     if (!delegate) {
934       // LCOV_EXCL_START
935       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateTensorH1");
936       // LCOV_EXCL_STOP
937     }
938 
939     CeedCall(CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis));
940     return CEED_ERROR_SUCCESS;
941   }
942 
943   if (dim < 1) {
944     // LCOV_EXCL_START
945     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value");
946     // LCOV_EXCL_STOP
947   }
948 
949   if (num_comp < 1) {
950     // LCOV_EXCL_START
951     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
952     // LCOV_EXCL_STOP
953   }
954 
955   if (P_1d < 1) {
956     // LCOV_EXCL_START
957     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
958     // LCOV_EXCL_STOP
959   }
960 
961   if (Q_1d < 1) {
962     // LCOV_EXCL_START
963     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
964     // LCOV_EXCL_STOP
965   }
966 
967   CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE : dim == 2 ? CEED_TOPOLOGY_QUAD : CEED_TOPOLOGY_HEX;
968 
969   CeedCall(CeedCalloc(1, basis));
970   (*basis)->ceed = ceed;
971   CeedCall(CeedReference(ceed));
972   (*basis)->ref_count    = 1;
973   (*basis)->tensor_basis = 1;
974   (*basis)->dim          = dim;
975   (*basis)->topo         = topo;
976   (*basis)->num_comp     = num_comp;
977   (*basis)->P_1d         = P_1d;
978   (*basis)->Q_1d         = Q_1d;
979   (*basis)->P            = CeedIntPow(P_1d, dim);
980   (*basis)->Q            = CeedIntPow(Q_1d, dim);
981   (*basis)->fe_space     = CEED_FE_SPACE_H1;
982   CeedCall(CeedCalloc(Q_1d, &(*basis)->q_ref_1d));
983   CeedCall(CeedCalloc(Q_1d, &(*basis)->q_weight_1d));
984   if (q_ref_1d) memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d * sizeof(q_ref_1d[0]));
985   if (q_weight_1d) memcpy((*basis)->q_weight_1d, q_weight_1d, Q_1d * sizeof(q_weight_1d[0]));
986   CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->interp_1d));
987   CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->grad_1d));
988   if (interp_1d) memcpy((*basis)->interp_1d, interp_1d, Q_1d * P_1d * sizeof(interp_1d[0]));
989   if (grad_1d) memcpy((*basis)->grad_1d, grad_1d, Q_1d * P_1d * sizeof(grad_1d[0]));
990   CeedCall(ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, *basis));
991   return CEED_ERROR_SUCCESS;
992 }
993 
994 /**
995   @brief Create a tensor-product Lagrange basis
996 
997   @param[in]  ceed      Ceed object where the CeedBasis will be created
998   @param[in]  dim       Topological dimension of element
999   @param[in]  num_comp  Number of field components (1 for scalar fields)
1000   @param[in]  P         Number of Gauss-Lobatto nodes in one dimension.
1001                           The polynomial degree of the resulting Q_k element is k=P-1.
1002   @param[in]  Q         Number of quadrature points in one dimension.
1003   @param[in]  quad_mode Distribution of the Q quadrature points (affects order of accuracy for the quadrature)
1004   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1005 
1006   @return An error code: 0 - success, otherwise - failure
1007 
1008   @ref User
1009 **/
1010 int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P, CeedInt Q, CeedQuadMode quad_mode, CeedBasis *basis) {
1011   // Allocate
1012   int        ierr = CEED_ERROR_SUCCESS, i, j, k;
1013   CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d, *q_weight_1d;
1014 
1015   if (dim < 1) {
1016     // LCOV_EXCL_START
1017     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value");
1018     // LCOV_EXCL_STOP
1019   }
1020 
1021   if (num_comp < 1) {
1022     // LCOV_EXCL_START
1023     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1024     // LCOV_EXCL_STOP
1025   }
1026 
1027   if (P < 1) {
1028     // LCOV_EXCL_START
1029     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1030     // LCOV_EXCL_STOP
1031   }
1032 
1033   if (Q < 1) {
1034     // LCOV_EXCL_START
1035     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1036     // LCOV_EXCL_STOP
1037   }
1038 
1039   // Get Nodes and Weights
1040   CeedCall(CeedCalloc(P * Q, &interp_1d));
1041   CeedCall(CeedCalloc(P * Q, &grad_1d));
1042   CeedCall(CeedCalloc(P, &nodes));
1043   CeedCall(CeedCalloc(Q, &q_ref_1d));
1044   CeedCall(CeedCalloc(Q, &q_weight_1d));
1045   if (CeedLobattoQuadrature(P, nodes, NULL) != CEED_ERROR_SUCCESS) goto cleanup;
1046   switch (quad_mode) {
1047     case CEED_GAUSS:
1048       ierr = CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
1049       break;
1050     case CEED_GAUSS_LOBATTO:
1051       ierr = CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
1052       break;
1053   }
1054   if (ierr != CEED_ERROR_SUCCESS) goto cleanup;
1055 
1056   // Build B, D matrix
1057   // Fornberg, 1998
1058   for (i = 0; i < Q; i++) {
1059     c1                   = 1.0;
1060     c3                   = nodes[0] - q_ref_1d[i];
1061     interp_1d[i * P + 0] = 1.0;
1062     for (j = 1; j < P; j++) {
1063       c2 = 1.0;
1064       c4 = c3;
1065       c3 = nodes[j] - q_ref_1d[i];
1066       for (k = 0; k < j; k++) {
1067         dx = nodes[j] - nodes[k];
1068         c2 *= dx;
1069         if (k == j - 1) {
1070           grad_1d[i * P + j]   = c1 * (interp_1d[i * P + k] - c4 * grad_1d[i * P + k]) / c2;
1071           interp_1d[i * P + j] = -c1 * c4 * interp_1d[i * P + k] / c2;
1072         }
1073         grad_1d[i * P + k]   = (c3 * grad_1d[i * P + k] - interp_1d[i * P + k]) / dx;
1074         interp_1d[i * P + k] = c3 * interp_1d[i * P + k] / dx;
1075       }
1076       c1 = c2;
1077     }
1078   }
1079   // Pass to CeedBasisCreateTensorH1
1080   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis));
1081 cleanup:
1082   CeedCall(CeedFree(&interp_1d));
1083   CeedCall(CeedFree(&grad_1d));
1084   CeedCall(CeedFree(&nodes));
1085   CeedCall(CeedFree(&q_ref_1d));
1086   CeedCall(CeedFree(&q_weight_1d));
1087   return CEED_ERROR_SUCCESS;
1088 }
1089 
1090 /**
1091   @brief Create a non tensor-product basis for H^1 discretizations
1092 
1093   @param[in]  ceed      Ceed object where the CeedBasis will be created
1094   @param[in]  topo      Topology of element, e.g. hypercube, simplex, ect
1095   @param[in]  num_comp  Number of field components (1 for scalar fields)
1096   @param[in]  num_nodes Total number of nodes
1097   @param[in]  num_qpts  Total number of quadrature points
1098   @param[in]  interp    Row-major (num_qpts * num_nodes) matrix expressing the values of nodal basis functions at quadrature points
1099   @param[in]  grad      Row-major (dim * num_qpts * num_nodes) matrix expressing derivatives of nodal basis functions at quadrature points
1100   @param[in]  q_ref     Array of length num_qpts * dim holding the locations of quadrature points on the reference element
1101   @param[in]  q_weight  Array of length num_qpts holding the quadrature weights on the reference element
1102   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1103 
1104   @return An error code: 0 - success, otherwise - failure
1105 
1106   @ref User
1107 **/
1108 int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1109                       const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1110   CeedInt P = num_nodes, Q = num_qpts, dim = 0;
1111 
1112   if (!ceed->BasisCreateH1) {
1113     Ceed delegate;
1114     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1115 
1116     if (!delegate) {
1117       // LCOV_EXCL_START
1118       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateH1");
1119       // LCOV_EXCL_STOP
1120     }
1121 
1122     CeedCall(CeedBasisCreateH1(delegate, topo, num_comp, num_nodes, num_qpts, interp, grad, q_ref, q_weight, basis));
1123     return CEED_ERROR_SUCCESS;
1124   }
1125 
1126   if (num_comp < 1) {
1127     // LCOV_EXCL_START
1128     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1129     // LCOV_EXCL_STOP
1130   }
1131 
1132   if (num_nodes < 1) {
1133     // LCOV_EXCL_START
1134     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1135     // LCOV_EXCL_STOP
1136   }
1137 
1138   if (num_qpts < 1) {
1139     // LCOV_EXCL_START
1140     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1141     // LCOV_EXCL_STOP
1142   }
1143 
1144   CeedCall(CeedCalloc(1, basis));
1145 
1146   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1147 
1148   (*basis)->ceed = ceed;
1149   CeedCall(CeedReference(ceed));
1150   (*basis)->ref_count    = 1;
1151   (*basis)->tensor_basis = 0;
1152   (*basis)->dim          = dim;
1153   (*basis)->topo         = topo;
1154   (*basis)->num_comp     = num_comp;
1155   (*basis)->P            = P;
1156   (*basis)->Q            = Q;
1157   (*basis)->fe_space     = CEED_FE_SPACE_H1;
1158   CeedCall(CeedCalloc(Q * dim, &(*basis)->q_ref_1d));
1159   CeedCall(CeedCalloc(Q, &(*basis)->q_weight_1d));
1160   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1161   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1162   CeedCall(CeedCalloc(Q * P, &(*basis)->interp));
1163   CeedCall(CeedCalloc(dim * Q * P, &(*basis)->grad));
1164   if (interp) memcpy((*basis)->interp, interp, Q * P * sizeof(interp[0]));
1165   if (grad) memcpy((*basis)->grad, grad, dim * Q * P * sizeof(grad[0]));
1166   CeedCall(ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref, q_weight, *basis));
1167   return CEED_ERROR_SUCCESS;
1168 }
1169 
1170 /**
1171   @brief Create a non tensor-product basis for \f$H(\mathrm{div})\f$ discretizations
1172 
1173   @param[in]  ceed      Ceed object where the CeedBasis will be created
1174   @param[in]  topo      Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.), dimension of which is used in some array sizes below
1175   @param[in]  num_comp  Number of components (usually 1 for vectors in H(div) bases)
1176   @param[in]  num_nodes Total number of nodes (dofs per element)
1177   @param[in]  num_qpts  Total number of quadrature points
1178   @param[in]  interp    Row-major (dim * num_qpts * num_nodes) matrix expressing the values of basis functions at quadrature points
1179   @param[in]  div       Row-major (num_qpts * num_nodes) matrix expressing divergence of basis functions at quadrature points
1180   @param[in]  q_ref     Array of length num_qpts * dim holding the locations of quadrature points on the reference element
1181   @param[in]  q_weight  Array of length num_qpts holding the quadrature weights on the reference element
1182   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1183 
1184   @return An error code: 0 - success, otherwise - failure
1185 
1186   @ref User
1187 **/
1188 int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1189                         const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1190   CeedInt Q = num_qpts, P = num_nodes, dim = 0;
1191 
1192   if (!ceed->BasisCreateHdiv) {
1193     Ceed delegate;
1194     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1195 
1196     if (!delegate) {
1197       // LCOV_EXCL_START
1198       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateHdiv");
1199       // LCOV_EXCL_STOP
1200     }
1201 
1202     CeedCall(CeedBasisCreateHdiv(delegate, topo, num_comp, num_nodes, num_qpts, interp, div, q_ref, q_weight, basis));
1203     return CEED_ERROR_SUCCESS;
1204   }
1205 
1206   if (num_comp < 1) {
1207     // LCOV_EXCL_START
1208     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1209     // LCOV_EXCL_STOP
1210   }
1211 
1212   if (num_nodes < 1) {
1213     // LCOV_EXCL_START
1214     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1215     // LCOV_EXCL_STOP
1216   }
1217 
1218   if (num_qpts < 1) {
1219     // LCOV_EXCL_START
1220     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1221     // LCOV_EXCL_STOP
1222   }
1223 
1224   CeedCall(CeedCalloc(1, basis));
1225 
1226   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1227 
1228   (*basis)->ceed = ceed;
1229   CeedCall(CeedReference(ceed));
1230   (*basis)->ref_count    = 1;
1231   (*basis)->tensor_basis = 0;
1232   (*basis)->dim          = dim;
1233   (*basis)->topo         = topo;
1234   (*basis)->num_comp     = num_comp;
1235   (*basis)->P            = P;
1236   (*basis)->Q            = Q;
1237   (*basis)->fe_space     = CEED_FE_SPACE_HDIV;
1238   CeedCall(CeedMalloc(Q * dim, &(*basis)->q_ref_1d));
1239   CeedCall(CeedMalloc(Q, &(*basis)->q_weight_1d));
1240   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1241   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1242   CeedCall(CeedMalloc(dim * Q * P, &(*basis)->interp));
1243   CeedCall(CeedMalloc(Q * P, &(*basis)->div));
1244   if (interp) memcpy((*basis)->interp, interp, dim * Q * P * sizeof(interp[0]));
1245   if (div) memcpy((*basis)->div, div, Q * P * sizeof(div[0]));
1246   CeedCall(ceed->BasisCreateHdiv(topo, dim, P, Q, interp, div, q_ref, q_weight, *basis));
1247   return CEED_ERROR_SUCCESS;
1248 }
1249 
1250 /**
1251   @brief Create a non tensor-product basis for \f$H(\mathrm{curl})\f$ discretizations
1252 
1253   @param[in]  ceed      Ceed object where the CeedBasis will be created
1254   @param[in]  topo      Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.), dimension of which is used in some array sizes below
1255   @param[in]  num_comp  Number of components (usually 1 for vectors in H(curl) bases)
1256   @param[in]  num_nodes Total number of nodes (dofs per element)
1257   @param[in]  num_qpts  Total number of quadrature points
1258   @param[in]  interp    Row-major (dim * num_qpts * num_nodes) matrix expressing the values of basis functions at quadrature points
1259   @param[in]  curl      Row-major (curl_comp * num_qpts * num_nodes, curl_comp = 1 if dim < 3 else dim) matrix expressing curl of basis functions at
1260 quadrature points
1261   @param[in]  q_ref     Array of length num_qpts * dim holding the locations of quadrature points on the reference element
1262   @param[in]  q_weight  Array of length num_qpts holding the quadrature weights on the reference element
1263   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1264 
1265   @return An error code: 0 - success, otherwise - failure
1266 
1267   @ref User
1268 **/
1269 int CeedBasisCreateHcurl(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1270                          const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1271   CeedInt Q = num_qpts, P = num_nodes, dim = 0, curl_comp = 0;
1272 
1273   if (!ceed->BasisCreateHdiv) {
1274     Ceed delegate;
1275     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1276 
1277     if (!delegate) {
1278       // LCOV_EXCL_START
1279       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateHcurl");
1280       // LCOV_EXCL_STOP
1281     }
1282 
1283     CeedCall(CeedBasisCreateHcurl(delegate, topo, num_comp, num_nodes, num_qpts, interp, curl, q_ref, q_weight, basis));
1284     return CEED_ERROR_SUCCESS;
1285   }
1286 
1287   if (num_comp < 1) {
1288     // LCOV_EXCL_START
1289     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1290     // LCOV_EXCL_STOP
1291   }
1292 
1293   if (num_nodes < 1) {
1294     // LCOV_EXCL_START
1295     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1296     // LCOV_EXCL_STOP
1297   }
1298 
1299   if (num_qpts < 1) {
1300     // LCOV_EXCL_START
1301     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1302     // LCOV_EXCL_STOP
1303   }
1304 
1305   CeedCall(CeedCalloc(1, basis));
1306 
1307   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1308   curl_comp = (dim < 3) ? 1 : dim;
1309 
1310   (*basis)->ceed = ceed;
1311   CeedCall(CeedReference(ceed));
1312   (*basis)->ref_count    = 1;
1313   (*basis)->tensor_basis = 0;
1314   (*basis)->dim          = dim;
1315   (*basis)->topo         = topo;
1316   (*basis)->num_comp     = num_comp;
1317   (*basis)->P            = P;
1318   (*basis)->Q            = Q;
1319   (*basis)->fe_space     = CEED_FE_SPACE_HCURL;
1320   CeedCall(CeedMalloc(Q * dim, &(*basis)->q_ref_1d));
1321   CeedCall(CeedMalloc(Q, &(*basis)->q_weight_1d));
1322   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1323   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1324   CeedCall(CeedMalloc(dim * Q * P, &(*basis)->interp));
1325   CeedCall(CeedMalloc(curl_comp * Q * P, &(*basis)->curl));
1326   if (interp) memcpy((*basis)->interp, interp, dim * Q * P * sizeof(interp[0]));
1327   if (curl) memcpy((*basis)->curl, curl, curl_comp * Q * P * sizeof(curl[0]));
1328   CeedCall(ceed->BasisCreateHcurl(topo, dim, P, Q, interp, curl, q_ref, q_weight, *basis));
1329   return CEED_ERROR_SUCCESS;
1330 }
1331 
1332 /**
1333   @brief Create a CeedBasis for projection from the nodes of `basis_from` to the nodes of `basis_to`.
1334 
1335   Only `CEED_EVAL_INTERP` will be valid for the new basis, `basis_project`.
1336   For H^1 spaces, `CEED_EVAL_GRAD` will also be valid.
1337   The interpolation is given by `interp_project = interp_to^+ * interp_from`, where the pesudoinverse `interp_to^+` is given by QR
1338 factorization.
1339   The gradient (for the H^1 case) is given by `grad_project = interp_to^+ * grad_from`.
1340 
1341   Note: `basis_from` and `basis_to` must have compatible quadrature spaces.
1342 
1343   Note: `basis_project` will have the same number of components as `basis_from`, regardless of the number of components that `basis_to` has.
1344         If `basis_from` has 3 components and `basis_to` has 5 components, then `basis_project` will have 3 components.
1345 
1346   @param[in]  basis_from    CeedBasis to prolong from
1347   @param[in]  basis_to      CeedBasis to prolong to
1348   @param[out] basis_project Address of the variable where the newly created CeedBasis will be stored.
1349 
1350   @return An error code: 0 - success, otherwise - failure
1351 
1352   @ref User
1353 **/
1354 int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, CeedBasis *basis_project) {
1355   Ceed ceed;
1356   CeedCall(CeedBasisGetCeed(basis_to, &ceed));
1357 
1358   // Create projection matrix
1359   CeedScalar *interp_project, *grad_project;
1360   CeedCall(CeedBasisCreateProjectionMatrices(basis_from, basis_to, &interp_project, &grad_project));
1361 
1362   // Build basis
1363   bool        is_tensor;
1364   CeedFESpace fe_space;
1365   CeedInt     dim, num_comp;
1366   CeedScalar *q_ref, *q_weight;
1367   CeedCall(CeedBasisIsTensor(basis_to, &is_tensor));
1368   CeedCall(CeedBasisGetFESpace(basis_to, &fe_space));
1369   CeedCall(CeedBasisGetDimension(basis_to, &dim));
1370   CeedCall(CeedBasisGetNumComponents(basis_from, &num_comp));
1371   if (is_tensor) {
1372     CeedInt P_1d_to, P_1d_from;
1373     CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_1d_from));
1374     CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_1d_to));
1375     CeedCall(CeedCalloc(P_1d_to, &q_ref));
1376     CeedCall(CeedCalloc(P_1d_to, &q_weight));
1377     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_from, P_1d_to, interp_project, grad_project, q_ref, q_weight, basis_project));
1378   } else {
1379     CeedElemTopology topo;
1380     CeedCall(CeedBasisGetTopology(basis_to, &topo));
1381     CeedInt num_nodes_to, num_nodes_from;
1382     CeedCall(CeedBasisGetNumNodes(basis_from, &num_nodes_from));
1383     CeedCall(CeedBasisGetNumNodes(basis_to, &num_nodes_to));
1384     CeedCall(CeedCalloc(num_nodes_to * dim, &q_ref));
1385     CeedCall(CeedCalloc(num_nodes_to, &q_weight));
1386     switch (fe_space) {
1387       case CEED_FE_SPACE_H1:
1388         CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_from, num_nodes_to, interp_project, grad_project, q_ref, q_weight, basis_project));
1389         break;
1390       case CEED_FE_SPACE_HDIV:
1391         CeedCall(
1392             CeedBasisCreateHdiv(ceed, topo, num_comp, num_nodes_from, num_nodes_to, interp_project, grad_project, q_ref, q_weight, basis_project));
1393         break;
1394       case CEED_FE_SPACE_HCURL:
1395         CeedCall(
1396             CeedBasisCreateHcurl(ceed, topo, num_comp, num_nodes_from, num_nodes_to, interp_project, grad_project, q_ref, q_weight, basis_project));
1397         break;
1398     }
1399   }
1400 
1401   // Cleanup
1402   CeedCall(CeedFree(&interp_project));
1403   CeedCall(CeedFree(&grad_project));
1404   CeedCall(CeedFree(&q_ref));
1405   CeedCall(CeedFree(&q_weight));
1406 
1407   return CEED_ERROR_SUCCESS;
1408 }
1409 
1410 /**
1411   @brief Copy the pointer to a CeedBasis.
1412 
1413   Note: If the value of `basis_copy` passed into this function is non-NULL, then it is assumed that `basis_copy` is a pointer to a CeedBasis.
1414         This CeedBasis will be destroyed if `basis_copy` is the only reference to this CeedBasis.
1415 
1416   @param[in]     basis      CeedBasis to copy reference to
1417   @param[in,out] basis_copy Variable to store copied reference
1418 
1419   @return An error code: 0 - success, otherwise - failure
1420 
1421   @ref User
1422 **/
1423 int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy) {
1424   if (basis != CEED_BASIS_COLLOCATED) CeedCall(CeedBasisReference(basis));
1425   CeedCall(CeedBasisDestroy(basis_copy));
1426   *basis_copy = basis;
1427   return CEED_ERROR_SUCCESS;
1428 }
1429 
1430 /**
1431   @brief View a CeedBasis
1432 
1433   @param[in] basis  CeedBasis to view
1434   @param[in] stream Stream to view to, e.g., stdout
1435 
1436   @return An error code: 0 - success, otherwise - failure
1437 
1438   @ref User
1439 **/
1440 int CeedBasisView(CeedBasis basis, FILE *stream) {
1441   CeedElemTopology topo     = basis->topo;
1442   CeedFESpace      fe_space = basis->fe_space;
1443   CeedInt          q_comp   = 0;
1444 
1445   // Print FE space and element topology of the basis
1446   if (basis->tensor_basis) {
1447     fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" CeedInt_FMT " Q=%" CeedInt_FMT "\n", CeedFESpaces[fe_space],
1448             CeedElemTopologies[topo], basis->dim, basis->P_1d, basis->Q_1d);
1449   } else {
1450     fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" CeedInt_FMT " Q=%" CeedInt_FMT "\n", CeedFESpaces[fe_space],
1451             CeedElemTopologies[topo], basis->dim, basis->P, basis->Q);
1452   }
1453   // Print quadrature data, interpolation/gradient/divergence/curl of the basis
1454   if (basis->tensor_basis) {  // tensor basis
1455     CeedCall(CeedScalarView("qref1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_ref_1d, stream));
1456     CeedCall(CeedScalarView("qweight1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_weight_1d, stream));
1457     CeedCall(CeedScalarView("interp1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, basis->interp_1d, stream));
1458     CeedCall(CeedScalarView("grad1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, basis->grad_1d, stream));
1459   } else {  // non-tensor basis
1460     CeedCall(CeedScalarView("qref", "\t% 12.8f", 1, basis->Q * basis->dim, basis->q_ref_1d, stream));
1461     CeedCall(CeedScalarView("qweight", "\t% 12.8f", 1, basis->Q, basis->q_weight_1d, stream));
1462     CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp));
1463     CeedCall(CeedScalarView("interp", "\t% 12.8f", q_comp * basis->Q, basis->P, basis->interp, stream));
1464     if (basis->grad) {
1465       CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp));
1466       CeedCall(CeedScalarView("grad", "\t% 12.8f", q_comp * basis->Q, basis->P, basis->grad, stream));
1467     }
1468     if (basis->div) {
1469       CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp));
1470       CeedCall(CeedScalarView("div", "\t% 12.8f", q_comp * basis->Q, basis->P, basis->div, stream));
1471     }
1472     if (basis->curl) {
1473       CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp));
1474       CeedCall(CeedScalarView("curl", "\t% 12.8f", q_comp * basis->Q, basis->P, basis->curl, stream));
1475     }
1476   }
1477   return CEED_ERROR_SUCCESS;
1478 }
1479 
1480 /**
1481   @brief Apply basis evaluation from nodes to quadrature points or vice versa
1482 
1483   @param[in]  basis      CeedBasis to evaluate
1484   @param[in]  num_elem   The number of elements to apply the basis evaluation to;
1485                            the backend will specify the ordering in CeedElemRestrictionCreateBlocked()
1486   @param[in]  t_mode    \ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature points;
1487                           \ref CEED_TRANSPOSE to apply the transpose, mapping from quadrature points to nodes
1488   @param[in]  eval_mode \ref CEED_EVAL_NONE to use values directly,
1489                           \ref CEED_EVAL_INTERP to use interpolated values,
1490                           \ref CEED_EVAL_GRAD to use gradients,
1491                           \ref CEED_EVAL_DIV to use divergence,
1492                           \ref CEED_EVAL_CURL to use curl,
1493                           \ref CEED_EVAL_WEIGHT to use quadrature weights.
1494   @param[in]  u        Input CeedVector
1495   @param[out] v        Output CeedVector
1496 
1497   @return An error code: 0 - success, otherwise - failure
1498 
1499   @ref User
1500 **/
1501 int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
1502   CeedSize u_length = 0, v_length;
1503   CeedInt  dim, num_comp, q_comp, num_nodes, num_qpts;
1504   CeedCall(CeedBasisGetDimension(basis, &dim));
1505   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
1506   CeedCall(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
1507   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
1508   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
1509   CeedCall(CeedVectorGetLength(v, &v_length));
1510   if (u) {
1511     CeedCall(CeedVectorGetLength(u, &u_length));
1512   }
1513 
1514   if (!basis->Apply) {
1515     // LCOV_EXCL_START
1516     return CeedError(basis->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisApply");
1517     // LCOV_EXCL_STOP
1518   }
1519 
1520   // Check compatibility of topological and geometrical dimensions
1521   if ((t_mode == CEED_TRANSPOSE && (v_length % num_nodes != 0 || u_length % num_qpts != 0)) ||
1522       (t_mode == CEED_NOTRANSPOSE && (u_length % num_nodes != 0 || v_length % num_qpts != 0))) {
1523     // LCOV_EXCL_START
1524     return CeedError(basis->ceed, CEED_ERROR_DIMENSION, "Length of input/output vectors incompatible with basis dimensions");
1525     // LCOV_EXCL_STOP
1526   }
1527 
1528   // Check vector lengths to prevent out of bounds issues
1529   bool bad_dims = false;
1530   switch (eval_mode) {
1531     case CEED_EVAL_NONE:
1532     case CEED_EVAL_INTERP:
1533     case CEED_EVAL_GRAD:
1534     case CEED_EVAL_DIV:
1535     case CEED_EVAL_CURL:
1536       bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts * q_comp || v_length < num_elem * num_comp * num_nodes)) ||
1537                   (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp * q_comp || u_length < num_elem * num_comp * num_nodes)));
1538       break;
1539     case CEED_EVAL_WEIGHT:
1540       bad_dims = v_length < num_elem * num_qpts;
1541       break;
1542   }
1543   if (bad_dims) {
1544     // LCOV_EXCL_START
1545     return CeedError(basis->ceed, CEED_ERROR_DIMENSION, "Input/output vectors too short for basis and evaluation mode");
1546     // LCOV_EXCL_STOP
1547   }
1548 
1549   CeedCall(basis->Apply(basis, num_elem, t_mode, eval_mode, u, v));
1550   return CEED_ERROR_SUCCESS;
1551 }
1552 
1553 /**
1554   @brief Get Ceed associated with a CeedBasis
1555 
1556   @param[in]  basis CeedBasis
1557   @param[out] ceed  Variable to store Ceed
1558 
1559   @return An error code: 0 - success, otherwise - failure
1560 
1561   @ref Advanced
1562 **/
1563 int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed) {
1564   *ceed = basis->ceed;
1565   return CEED_ERROR_SUCCESS;
1566 }
1567 
1568 /**
1569   @brief Get dimension for given CeedBasis
1570 
1571   @param[in]  basis CeedBasis
1572   @param[out] dim   Variable to store dimension of basis
1573 
1574   @return An error code: 0 - success, otherwise - failure
1575 
1576   @ref Advanced
1577 **/
1578 int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim) {
1579   *dim = basis->dim;
1580   return CEED_ERROR_SUCCESS;
1581 }
1582 
1583 /**
1584   @brief Get topology for given CeedBasis
1585 
1586   @param[in]  basis CeedBasis
1587   @param[out] topo  Variable to store topology of basis
1588 
1589   @return An error code: 0 - success, otherwise - failure
1590 
1591   @ref Advanced
1592 **/
1593 int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo) {
1594   *topo = basis->topo;
1595   return CEED_ERROR_SUCCESS;
1596 }
1597 
1598 /**
1599   @brief Get number of components for given CeedBasis
1600 
1601   @param[in]  basis    CeedBasis
1602   @param[out] num_comp Variable to store number of components of basis
1603 
1604   @return An error code: 0 - success, otherwise - failure
1605 
1606   @ref Advanced
1607 **/
1608 int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *num_comp) {
1609   *num_comp = basis->num_comp;
1610   return CEED_ERROR_SUCCESS;
1611 }
1612 
1613 /**
1614   @brief Get total number of nodes (in dim dimensions) of a CeedBasis
1615 
1616   @param[in]  basis CeedBasis
1617   @param[out] P     Variable to store number of nodes
1618 
1619   @return An error code: 0 - success, otherwise - failure
1620 
1621   @ref Utility
1622 **/
1623 int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P) {
1624   *P = basis->P;
1625   return CEED_ERROR_SUCCESS;
1626 }
1627 
1628 /**
1629   @brief Get total number of nodes (in 1 dimension) of a CeedBasis
1630 
1631   @param[in]  basis CeedBasis
1632   @param[out] P_1d  Variable to store number of nodes
1633 
1634   @return An error code: 0 - success, otherwise - failure
1635 
1636   @ref Advanced
1637 **/
1638 int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P_1d) {
1639   if (!basis->tensor_basis) {
1640     // LCOV_EXCL_START
1641     return CeedError(basis->ceed, CEED_ERROR_MINOR, "Cannot supply P_1d for non-tensor basis");
1642     // LCOV_EXCL_STOP
1643   }
1644 
1645   *P_1d = basis->P_1d;
1646   return CEED_ERROR_SUCCESS;
1647 }
1648 
1649 /**
1650   @brief Get total number of quadrature points (in dim dimensions) of a CeedBasis
1651 
1652   @param[in]  basis CeedBasis
1653   @param[out] Q     Variable to store number of quadrature points
1654 
1655   @return An error code: 0 - success, otherwise - failure
1656 
1657   @ref Utility
1658 **/
1659 int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q) {
1660   *Q = basis->Q;
1661   return CEED_ERROR_SUCCESS;
1662 }
1663 
1664 /**
1665   @brief Get total number of quadrature points (in 1 dimension) of a CeedBasis
1666 
1667   @param[in]  basis CeedBasis
1668   @param[out] Q_1d  Variable to store number of quadrature points
1669 
1670   @return An error code: 0 - success, otherwise - failure
1671 
1672   @ref Advanced
1673 **/
1674 int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, CeedInt *Q_1d) {
1675   if (!basis->tensor_basis) {
1676     // LCOV_EXCL_START
1677     return CeedError(basis->ceed, CEED_ERROR_MINOR, "Cannot supply Q_1d for non-tensor basis");
1678     // LCOV_EXCL_STOP
1679   }
1680 
1681   *Q_1d = basis->Q_1d;
1682   return CEED_ERROR_SUCCESS;
1683 }
1684 
1685 /**
1686   @brief Get reference coordinates of quadrature points (in dim dimensions) of a CeedBasis
1687 
1688   @param[in]  basis CeedBasis
1689   @param[out] q_ref Variable to store reference coordinates of quadrature points
1690 
1691   @return An error code: 0 - success, otherwise - failure
1692 
1693   @ref Advanced
1694 **/
1695 int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **q_ref) {
1696   *q_ref = basis->q_ref_1d;
1697   return CEED_ERROR_SUCCESS;
1698 }
1699 
1700 /**
1701   @brief Get quadrature weights of quadrature points (in dim dimensions) of a CeedBasis
1702 
1703   @param[in]  basis    CeedBasis
1704   @param[out] q_weight Variable to store quadrature weights
1705 
1706   @return An error code: 0 - success, otherwise - failure
1707 
1708   @ref Advanced
1709 **/
1710 int CeedBasisGetQWeights(CeedBasis basis, const CeedScalar **q_weight) {
1711   *q_weight = basis->q_weight_1d;
1712   return CEED_ERROR_SUCCESS;
1713 }
1714 
1715 /**
1716   @brief Get interpolation matrix of a CeedBasis
1717 
1718   @param[in]  basis  CeedBasis
1719   @param[out] interp Variable to store interpolation matrix
1720 
1721   @return An error code: 0 - success, otherwise - failure
1722 
1723   @ref Advanced
1724 **/
1725 int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp) {
1726   if (!basis->interp && basis->tensor_basis) {
1727     // Allocate
1728     CeedCall(CeedMalloc(basis->Q * basis->P, &basis->interp));
1729 
1730     // Initialize
1731     for (CeedInt i = 0; i < basis->Q * basis->P; i++) basis->interp[i] = 1.0;
1732 
1733     // Calculate
1734     for (CeedInt d = 0; d < basis->dim; d++) {
1735       for (CeedInt qpt = 0; qpt < basis->Q; qpt++) {
1736         for (CeedInt node = 0; node < basis->P; node++) {
1737           CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1738           CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1739           basis->interp[qpt * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p];
1740         }
1741       }
1742     }
1743   }
1744   *interp = basis->interp;
1745   return CEED_ERROR_SUCCESS;
1746 }
1747 
1748 /**
1749   @brief Get 1D interpolation matrix of a tensor product CeedBasis
1750 
1751   @param[in]  basis     CeedBasis
1752   @param[out] interp_1d Variable to store interpolation matrix
1753 
1754   @return An error code: 0 - success, otherwise - failure
1755 
1756   @ref Backend
1757 **/
1758 int CeedBasisGetInterp1D(CeedBasis basis, const CeedScalar **interp_1d) {
1759   if (!basis->tensor_basis) {
1760     // LCOV_EXCL_START
1761     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis is not a tensor product basis.");
1762     // LCOV_EXCL_STOP
1763   }
1764 
1765   *interp_1d = basis->interp_1d;
1766   return CEED_ERROR_SUCCESS;
1767 }
1768 
1769 /**
1770   @brief Get gradient matrix of a CeedBasis
1771 
1772   @param[in]  basis CeedBasis
1773   @param[out] grad  Variable to store gradient matrix
1774 
1775   @return An error code: 0 - success, otherwise - failure
1776 
1777   @ref Advanced
1778 **/
1779 int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad) {
1780   if (!basis->grad && basis->tensor_basis) {
1781     // Allocate
1782     CeedCall(CeedMalloc(basis->dim * basis->Q * basis->P, &basis->grad));
1783 
1784     // Initialize
1785     for (CeedInt i = 0; i < basis->dim * basis->Q * basis->P; i++) basis->grad[i] = 1.0;
1786 
1787     // Calculate
1788     for (CeedInt d = 0; d < basis->dim; d++) {
1789       for (CeedInt i = 0; i < basis->dim; i++) {
1790         for (CeedInt qpt = 0; qpt < basis->Q; qpt++) {
1791           for (CeedInt node = 0; node < basis->P; node++) {
1792             CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1793             CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1794             if (i == d) basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->grad_1d[q * basis->P_1d + p];
1795             else basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p];
1796           }
1797         }
1798       }
1799     }
1800   }
1801   *grad = basis->grad;
1802   return CEED_ERROR_SUCCESS;
1803 }
1804 
1805 /**
1806   @brief Get 1D gradient matrix of a tensor product CeedBasis
1807 
1808   @param[in]  basis   CeedBasis
1809   @param[out] grad_1d Variable to store gradient matrix
1810 
1811   @return An error code: 0 - success, otherwise - failure
1812 
1813   @ref Advanced
1814 **/
1815 int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad_1d) {
1816   if (!basis->tensor_basis) {
1817     // LCOV_EXCL_START
1818     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis is not a tensor product basis.");
1819     // LCOV_EXCL_STOP
1820   }
1821 
1822   *grad_1d = basis->grad_1d;
1823   return CEED_ERROR_SUCCESS;
1824 }
1825 
1826 /**
1827   @brief Get divergence matrix of a CeedBasis
1828 
1829   @param[in]  basis CeedBasis
1830   @param[out] div   Variable to store divergence matrix
1831 
1832   @return An error code: 0 - success, otherwise - failure
1833 
1834   @ref Advanced
1835 **/
1836 int CeedBasisGetDiv(CeedBasis basis, const CeedScalar **div) {
1837   if (!basis->div) {
1838     // LCOV_EXCL_START
1839     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis does not have divergence matrix.");
1840     // LCOV_EXCL_STOP
1841   }
1842 
1843   *div = basis->div;
1844   return CEED_ERROR_SUCCESS;
1845 }
1846 
1847 /**
1848   @brief Get curl matrix of a CeedBasis
1849 
1850   @param[in]  basis CeedBasis
1851   @param[out] curl  Variable to store curl matrix
1852 
1853   @return An error code: 0 - success, otherwise - failure
1854 
1855   @ref Advanced
1856 **/
1857 int CeedBasisGetCurl(CeedBasis basis, const CeedScalar **curl) {
1858   if (!basis->curl) {
1859     // LCOV_EXCL_START
1860     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis does not have curl matrix.");
1861     // LCOV_EXCL_STOP
1862   }
1863 
1864   *curl = basis->curl;
1865   return CEED_ERROR_SUCCESS;
1866 }
1867 
1868 /**
1869   @brief Destroy a CeedBasis
1870 
1871   @param[in,out] basis CeedBasis to destroy
1872 
1873   @return An error code: 0 - success, otherwise - failure
1874 
1875   @ref User
1876 **/
1877 int CeedBasisDestroy(CeedBasis *basis) {
1878   if (!*basis || *basis == CEED_BASIS_COLLOCATED || --(*basis)->ref_count > 0) {
1879     *basis = NULL;
1880     return CEED_ERROR_SUCCESS;
1881   }
1882   if ((*basis)->Destroy) CeedCall((*basis)->Destroy(*basis));
1883   if ((*basis)->contract) CeedCall(CeedTensorContractDestroy(&(*basis)->contract));
1884   CeedCall(CeedFree(&(*basis)->q_ref_1d));
1885   CeedCall(CeedFree(&(*basis)->q_weight_1d));
1886   CeedCall(CeedFree(&(*basis)->interp));
1887   CeedCall(CeedFree(&(*basis)->interp_1d));
1888   CeedCall(CeedFree(&(*basis)->grad));
1889   CeedCall(CeedFree(&(*basis)->grad_1d));
1890   CeedCall(CeedFree(&(*basis)->div));
1891   CeedCall(CeedFree(&(*basis)->curl));
1892   CeedCall(CeedDestroy(&(*basis)->ceed));
1893   CeedCall(CeedFree(basis));
1894   return CEED_ERROR_SUCCESS;
1895 }
1896 
1897 /**
1898   @brief Construct a Gauss-Legendre quadrature
1899 
1900   @param[in]  Q           Number of quadrature points (integrates polynomials of degree 2*Q-1 exactly)
1901   @param[out] q_ref_1d    Array of length Q to hold the abscissa on [-1, 1]
1902   @param[out] q_weight_1d Array of length Q to hold the weights
1903 
1904   @return An error code: 0 - success, otherwise - failure
1905 
1906   @ref Utility
1907 **/
1908 int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) {
1909   // Allocate
1910   CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0 * atan(1.0);
1911   // Build q_ref_1d, q_weight_1d
1912   for (CeedInt i = 0; i <= Q / 2; i++) {
1913     // Guess
1914     xi = cos(PI * (CeedScalar)(2 * i + 1) / ((CeedScalar)(2 * Q)));
1915     // Pn(xi)
1916     P0 = 1.0;
1917     P1 = xi;
1918     P2 = 0.0;
1919     for (CeedInt j = 2; j <= Q; j++) {
1920       P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1921       P0 = P1;
1922       P1 = P2;
1923     }
1924     // First Newton Step
1925     dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1926     xi  = xi - P2 / dP2;
1927     // Newton to convergence
1928     for (CeedInt k = 0; k < 100 && fabs(P2) > 10 * CEED_EPSILON; k++) {
1929       P0 = 1.0;
1930       P1 = xi;
1931       for (CeedInt j = 2; j <= Q; j++) {
1932         P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1933         P0 = P1;
1934         P1 = P2;
1935       }
1936       dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1937       xi  = xi - P2 / dP2;
1938     }
1939     // Save xi, wi
1940     wi                     = 2.0 / ((1.0 - xi * xi) * dP2 * dP2);
1941     q_weight_1d[i]         = wi;
1942     q_weight_1d[Q - 1 - i] = wi;
1943     q_ref_1d[i]            = -xi;
1944     q_ref_1d[Q - 1 - i]    = xi;
1945   }
1946   return CEED_ERROR_SUCCESS;
1947 }
1948 
1949 /**
1950   @brief Construct a Gauss-Legendre-Lobatto quadrature
1951 
1952   @param[in]  Q           Number of quadrature points (integrates polynomials of degree 2*Q-3 exactly)
1953   @param[out] q_ref_1d    Array of length Q to hold the abscissa on [-1, 1]
1954   @param[out] q_weight_1d Array of length Q to hold the weights
1955 
1956   @return An error code: 0 - success, otherwise - failure
1957 
1958   @ref Utility
1959 **/
1960 int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) {
1961   // Allocate
1962   CeedScalar P0, P1, P2, dP2, d2P2, xi, wi, PI = 4.0 * atan(1.0);
1963   // Build q_ref_1d, q_weight_1d
1964   // Set endpoints
1965   if (Q < 2) {
1966     // LCOV_EXCL_START
1967     return CeedError(NULL, CEED_ERROR_DIMENSION, "Cannot create Lobatto quadrature with Q=%" CeedInt_FMT " < 2 points", Q);
1968     // LCOV_EXCL_STOP
1969   }
1970   wi = 2.0 / ((CeedScalar)(Q * (Q - 1)));
1971   if (q_weight_1d) {
1972     q_weight_1d[0]     = wi;
1973     q_weight_1d[Q - 1] = wi;
1974   }
1975   q_ref_1d[0]     = -1.0;
1976   q_ref_1d[Q - 1] = 1.0;
1977   // Interior
1978   for (CeedInt i = 1; i <= (Q - 1) / 2; i++) {
1979     // Guess
1980     xi = cos(PI * (CeedScalar)(i) / (CeedScalar)(Q - 1));
1981     // Pn(xi)
1982     P0 = 1.0;
1983     P1 = xi;
1984     P2 = 0.0;
1985     for (CeedInt j = 2; j < Q; j++) {
1986       P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1987       P0 = P1;
1988       P1 = P2;
1989     }
1990     // First Newton step
1991     dP2  = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1992     d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi);
1993     xi   = xi - dP2 / d2P2;
1994     // Newton to convergence
1995     for (CeedInt k = 0; k < 100 && fabs(dP2) > 10 * CEED_EPSILON; k++) {
1996       P0 = 1.0;
1997       P1 = xi;
1998       for (CeedInt j = 2; j < Q; j++) {
1999         P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
2000         P0 = P1;
2001         P1 = P2;
2002       }
2003       dP2  = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
2004       d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi);
2005       xi   = xi - dP2 / d2P2;
2006     }
2007     // Save xi, wi
2008     wi = 2.0 / (((CeedScalar)(Q * (Q - 1))) * P2 * P2);
2009     if (q_weight_1d) {
2010       q_weight_1d[i]         = wi;
2011       q_weight_1d[Q - 1 - i] = wi;
2012     }
2013     q_ref_1d[i]         = -xi;
2014     q_ref_1d[Q - 1 - i] = xi;
2015   }
2016   return CEED_ERROR_SUCCESS;
2017 }
2018 
2019 /// @}
2020