xref: /libCEED/interface/ceed-basis.c (revision c85e864000754611328ed43fe87e29f99787d593)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <math.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 /// @file
17 /// Implementation of CeedBasis interfaces
18 
19 /// @cond DOXYGEN_SKIP
20 static struct CeedBasis_private ceed_basis_collocated;
21 /// @endcond
22 
23 /// @addtogroup CeedBasisUser
24 /// @{
25 
26 /// Indicate that the quadrature points are collocated with the nodes
27 const CeedBasis CEED_BASIS_COLLOCATED = &ceed_basis_collocated;
28 
29 /// @}
30 
31 /// ----------------------------------------------------------------------------
32 /// CeedBasis Library Internal Functions
33 /// ----------------------------------------------------------------------------
34 /// @addtogroup CeedBasisDeveloper
35 /// @{
36 
37 /**
38   @brief Compute Householder reflection
39 
40   Computes A = (I - b v v^T) A, where A is an mxn matrix indexed as A[i*row + j*col]
41 
42   @param[in,out] A   Matrix to apply Householder reflection to, in place
43   @param[in]     v   Householder vector
44   @param[in]     b   Scaling factor
45   @param[in]     m   Number of rows in A
46   @param[in]     n   Number of columns in A
47   @param[in]     row Row stride
48   @param[in]     col Col stride
49 
50   @return An error code: 0 - success, otherwise - failure
51 
52   @ref Developer
53 **/
54 static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v, CeedScalar b, CeedInt m, CeedInt n, CeedInt row, CeedInt col) {
55   for (CeedInt j = 0; j < n; j++) {
56     CeedScalar w = A[0 * row + j * col];
57     for (CeedInt i = 1; i < m; i++) w += v[i] * A[i * row + j * col];
58     A[0 * row + j * col] -= b * w;
59     for (CeedInt i = 1; i < m; i++) A[i * row + j * col] -= b * w * v[i];
60   }
61   return CEED_ERROR_SUCCESS;
62 }
63 
64 /**
65   @brief Compute Givens rotation
66 
67   Computes A = G A (or G^T A in transpose mode), where A is an mxn matrix indexed as A[i*n + j*m]
68 
69   @param[in,out] A      Row major matrix to apply Givens rotation to, in place
70   @param[in]     c      Cosine factor
71   @param[in]     s      Sine factor
72   @param[in]     t_mode @ref CEED_NOTRANSPOSE to rotate the basis counter-clockwise, which has the effect of rotating columns of A clockwise;
73                           @ref CEED_TRANSPOSE for the opposite rotation
74   @param[in]     i      First row/column to apply rotation
75   @param[in]     k      Second row/column to apply rotation
76   @param[in]     m      Number of rows in A
77   @param[in]     n      Number of columns in A
78 
79   @return An error code: 0 - success, otherwise - failure
80 
81   @ref Developer
82 **/
83 static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s, CeedTransposeMode t_mode, CeedInt i, CeedInt k, CeedInt m, CeedInt n) {
84   CeedInt stride_j = 1, stride_ik = m, num_its = n;
85   if (t_mode == CEED_NOTRANSPOSE) {
86     stride_j  = n;
87     stride_ik = 1;
88     num_its   = m;
89   }
90 
91   // Apply rotation
92   for (CeedInt j = 0; j < num_its; j++) {
93     CeedScalar tau1 = A[i * stride_ik + j * stride_j], tau2 = A[k * stride_ik + j * stride_j];
94     A[i * stride_ik + j * stride_j] = c * tau1 - s * tau2;
95     A[k * stride_ik + j * stride_j] = s * tau1 + c * tau2;
96   }
97   return CEED_ERROR_SUCCESS;
98 }
99 
100 /**
101   @brief View an array stored in a CeedBasis
102 
103   @param[in] name   Name of array
104   @param[in] fp_fmt Printing format
105   @param[in] m      Number of rows in array
106   @param[in] n      Number of columns in array
107   @param[in] a      Array to be viewed
108   @param[in] stream Stream to view to, e.g., stdout
109 
110   @return An error code: 0 - success, otherwise - failure
111 
112   @ref Developer
113 **/
114 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m, CeedInt n, const CeedScalar *a, FILE *stream) {
115   for (CeedInt i = 0; i < m; i++) {
116     if (m > 1) fprintf(stream, "%12s[%" CeedInt_FMT "]:", name, i);
117     else fprintf(stream, "%12s:", name);
118     for (CeedInt j = 0; j < n; j++) fprintf(stream, fp_fmt, fabs(a[i * n + j]) > 1E-14 ? a[i * n + j] : 0);
119     fputs("\n", stream);
120   }
121   return CEED_ERROR_SUCCESS;
122 }
123 
124 /**
125   @brief Create the interpolation and gradient matrices for projection from the nodes of `basis_from` to the nodes of `basis_to`.
126 
127   The interpolation is given by `interp_project = interp_to^+ * interp_from`, where the pseudoinverse `interp_to^+` is given by QR factorization.
128   The gradient is given by `grad_project = interp_to^+ * grad_from`, and is only computed for H^1 spaces otherwise it should not be used.
129 
130   Note: `basis_from` and `basis_to` must have compatible quadrature spaces.
131 
132   @param[in]  basis_from     CeedBasis to project from
133   @param[in]  basis_to       CeedBasis to project to
134   @param[out] interp_project Address of the variable where the newly created interpolation matrix will be stored.
135   @param[out] grad_project   Address of the variable where the newly created gradient matrix will be stored.
136 
137   @return An error code: 0 - success, otherwise - failure
138 
139   @ref Developer
140 **/
141 static int CeedBasisCreateProjectionMatrices(CeedBasis basis_from, CeedBasis basis_to, CeedScalar **interp_project, CeedScalar **grad_project) {
142   Ceed ceed;
143   CeedCall(CeedBasisGetCeed(basis_to, &ceed));
144 
145   // Check for compatible quadrature spaces
146   CeedInt Q_to, Q_from;
147   CeedCall(CeedBasisGetNumQuadraturePoints(basis_to, &Q_to));
148   CeedCall(CeedBasisGetNumQuadraturePoints(basis_from, &Q_from));
149   if (Q_to != Q_from) {
150     // LCOV_EXCL_START
151     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
152     // LCOV_EXCL_STOP
153   }
154 
155   // Check for matching tensor or non-tensor
156   CeedInt P_to, P_from, Q = Q_to;
157   bool    is_tensor_to, is_tensor_from;
158   CeedCall(CeedBasisIsTensor(basis_to, &is_tensor_to));
159   CeedCall(CeedBasisIsTensor(basis_from, &is_tensor_from));
160   if (is_tensor_to && is_tensor_from) {
161     CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_to));
162     CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_from));
163     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis_from, &Q));
164   } else if (!is_tensor_to && !is_tensor_from) {
165     CeedCall(CeedBasisGetNumNodes(basis_to, &P_to));
166     CeedCall(CeedBasisGetNumNodes(basis_from, &P_from));
167   } else {
168     // LCOV_EXCL_START
169     return CeedError(ceed, CEED_ERROR_MINOR, "Bases must both be tensor or non-tensor");
170     // LCOV_EXCL_STOP
171   }
172 
173   // Check for matching FE space
174   CeedFESpace fe_space_to, fe_space_from;
175   CeedCall(CeedBasisGetFESpace(basis_to, &fe_space_to));
176   CeedCall(CeedBasisGetFESpace(basis_from, &fe_space_from));
177   if (fe_space_to != fe_space_from) {
178     // LCOV_EXCL_START
179     return CeedError(ceed, CEED_ERROR_MINOR, "Bases must both be the same FE space type");
180     // LCOV_EXCL_STOP
181   }
182 
183   // Get source matrices
184   CeedInt           dim, q_comp = 1;
185   const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL;
186   CeedScalar       *interp_to, *interp_from, *tau;
187   CeedCall(CeedBasisGetDimension(basis_to, &dim));
188   if (is_tensor_to) {
189     CeedCall(CeedBasisGetInterp1D(basis_to, &interp_to_source));
190     CeedCall(CeedBasisGetInterp1D(basis_from, &interp_from_source));
191   } else {
192     CeedCall(CeedBasisGetNumQuadratureComponents(basis_from, CEED_EVAL_INTERP, &q_comp));
193     CeedCall(CeedBasisGetInterp(basis_to, &interp_to_source));
194     CeedCall(CeedBasisGetInterp(basis_from, &interp_from_source));
195   }
196   CeedCall(CeedMalloc(Q * P_from * q_comp, &interp_from));
197   CeedCall(CeedMalloc(Q * P_to * q_comp, &interp_to));
198   CeedCall(CeedCalloc(P_to * P_from, interp_project));
199   CeedCall(CeedMalloc(Q * q_comp, &tau));
200 
201   // `grad_project = interp_to^+ * grad_from` is computed for the H^1 space case so the
202   // projection basis will have a gradient operation
203   const CeedScalar *grad_from_source = NULL;
204   if (fe_space_to == CEED_FE_SPACE_H1) {
205     if (is_tensor_to) {
206       CeedCall(CeedBasisGetGrad1D(basis_from, &grad_from_source));
207     } else {
208       CeedCall(CeedBasisGetGrad(basis_from, &grad_from_source));
209     }
210     CeedCall(CeedCalloc(P_to * P_from * (is_tensor_to ? 1 : dim), grad_project));
211   } else {
212     // Projected derivate operator is only calculated for H^1 but we allocate it for the
213     // basis construction later
214     CeedInt q_comp_deriv = 1;
215     if (fe_space_to == CEED_FE_SPACE_HDIV) {
216       CeedCall(CeedBasisGetNumQuadratureComponents(basis_from, CEED_EVAL_DIV, &q_comp_deriv));
217     } else if (fe_space_to == CEED_FE_SPACE_HCURL) {
218       CeedCall(CeedBasisGetNumQuadratureComponents(basis_from, CEED_EVAL_CURL, &q_comp_deriv));
219     }
220     CeedCall(CeedCalloc(P_to * P_from * q_comp_deriv, grad_project));
221   }
222 
223   // QR Factorization, interp_to = Q R
224   memcpy(interp_to, interp_to_source, Q * P_to * q_comp * sizeof(interp_to_source[0]));
225   CeedCall(CeedQRFactorization(ceed, interp_to, tau, Q * q_comp, P_to));
226 
227   // Build matrices
228   CeedInt     num_matrices = 1 + (fe_space_to == CEED_FE_SPACE_H1) * (is_tensor_to ? 1 : dim);
229   CeedScalar *input_from[num_matrices], *output_project[num_matrices];
230   input_from[0]     = (CeedScalar *)interp_from_source;
231   output_project[0] = *interp_project;
232   for (CeedInt m = 1; m < num_matrices; m++) {
233     input_from[m]     = (CeedScalar *)&grad_from_source[(m - 1) * Q * P_from];
234     output_project[m] = &((*grad_project)[(m - 1) * P_to * P_from]);
235   }
236   for (CeedInt m = 0; m < num_matrices; m++) {
237     // Apply Q^T, interp_from = Q^T interp_from
238     memcpy(interp_from, input_from[m], Q * P_from * q_comp * sizeof(input_from[m][0]));
239     CeedCall(CeedHouseholderApplyQ(interp_from, interp_to, tau, CEED_TRANSPOSE, Q * q_comp, P_from, P_to, P_from, 1));
240 
241     // Apply Rinv, output_project = Rinv interp_from
242     for (CeedInt j = 0; j < P_from; j++) {  // Column j
243       output_project[m][j + P_from * (P_to - 1)] = interp_from[j + P_from * (P_to - 1)] / interp_to[P_to * P_to - 1];
244       for (CeedInt i = P_to - 2; i >= 0; i--) {  // Row i
245         output_project[m][j + P_from * i] = interp_from[j + P_from * i];
246         for (CeedInt k = i + 1; k < P_to; k++) {
247           output_project[m][j + P_from * i] -= interp_to[k + P_to * i] * output_project[m][j + P_from * k];
248         }
249         output_project[m][j + P_from * i] /= interp_to[i + P_to * i];
250       }
251     }
252   }
253 
254   // Cleanup
255   CeedCall(CeedFree(&tau));
256   CeedCall(CeedFree(&interp_to));
257   CeedCall(CeedFree(&interp_from));
258 
259   return CEED_ERROR_SUCCESS;
260 }
261 
262 /// @}
263 
264 /// ----------------------------------------------------------------------------
265 /// Ceed Backend API
266 /// ----------------------------------------------------------------------------
267 /// @addtogroup CeedBasisBackend
268 /// @{
269 
270 /**
271   @brief Return collocated grad matrix
272 
273   @param[in]  basis         CeedBasis
274   @param[out] collo_grad_1d Row-major (Q_1d * Q_1d) matrix expressing derivatives of basis functions at quadrature points
275 
276   @return An error code: 0 - success, otherwise - failure
277 
278   @ref Backend
279 **/
280 int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collo_grad_1d) {
281   int         i, j, k;
282   Ceed        ceed;
283   CeedInt     P_1d = (basis)->P_1d, Q_1d = (basis)->Q_1d;
284   CeedScalar *interp_1d, *grad_1d, *tau;
285 
286   CeedCall(CeedMalloc(Q_1d * P_1d, &interp_1d));
287   CeedCall(CeedMalloc(Q_1d * P_1d, &grad_1d));
288   CeedCall(CeedMalloc(Q_1d, &tau));
289   memcpy(interp_1d, (basis)->interp_1d, Q_1d * P_1d * sizeof(basis)->interp_1d[0]);
290   memcpy(grad_1d, (basis)->grad_1d, Q_1d * P_1d * sizeof(basis)->interp_1d[0]);
291 
292   // QR Factorization, interp_1d = Q R
293   CeedCall(CeedBasisGetCeed(basis, &ceed));
294   CeedCall(CeedQRFactorization(ceed, interp_1d, tau, Q_1d, P_1d));
295   // Note: This function is for backend use, so all errors are terminal and we do not need to clean up memory on failure.
296 
297   // Apply Rinv, collo_grad_1d = grad_1d Rinv
298   for (i = 0; i < Q_1d; i++) {  // Row i
299     collo_grad_1d[Q_1d * i] = grad_1d[P_1d * i] / interp_1d[0];
300     for (j = 1; j < P_1d; j++) {  // Column j
301       collo_grad_1d[j + Q_1d * i] = grad_1d[j + P_1d * i];
302       for (k = 0; k < j; k++) collo_grad_1d[j + Q_1d * i] -= interp_1d[j + P_1d * k] * collo_grad_1d[k + Q_1d * i];
303       collo_grad_1d[j + Q_1d * i] /= interp_1d[j + P_1d * j];
304     }
305     for (j = P_1d; j < Q_1d; j++) collo_grad_1d[j + Q_1d * i] = 0;
306   }
307 
308   // Apply Q^T, collo_grad_1d = collo_grad_1d Q^T
309   CeedCall(CeedHouseholderApplyQ(collo_grad_1d, interp_1d, tau, CEED_NOTRANSPOSE, Q_1d, Q_1d, P_1d, 1, Q_1d));
310 
311   CeedCall(CeedFree(&interp_1d));
312   CeedCall(CeedFree(&grad_1d));
313   CeedCall(CeedFree(&tau));
314   return CEED_ERROR_SUCCESS;
315 }
316 
317 /**
318   @brief Get tensor status for given CeedBasis
319 
320   @param[in]  basis     CeedBasis
321   @param[out] is_tensor Variable to store tensor status
322 
323   @return An error code: 0 - success, otherwise - failure
324 
325   @ref Backend
326 **/
327 int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor) {
328   *is_tensor = basis->tensor_basis;
329   return CEED_ERROR_SUCCESS;
330 }
331 
332 /**
333   @brief Get backend data of a CeedBasis
334 
335   @param[in]  basis CeedBasis
336   @param[out] data  Variable to store data
337 
338   @return An error code: 0 - success, otherwise - failure
339 
340   @ref Backend
341 **/
342 int CeedBasisGetData(CeedBasis basis, void *data) {
343   *(void **)data = basis->data;
344   return CEED_ERROR_SUCCESS;
345 }
346 
347 /**
348   @brief Set backend data of a CeedBasis
349 
350   @param[in,out] basis  CeedBasis
351   @param[in]     data   Data to set
352 
353   @return An error code: 0 - success, otherwise - failure
354 
355   @ref Backend
356 **/
357 int CeedBasisSetData(CeedBasis basis, void *data) {
358   basis->data = data;
359   return CEED_ERROR_SUCCESS;
360 }
361 
362 /**
363   @brief Increment the reference counter for a CeedBasis
364 
365   @param[in,out] basis Basis to increment the reference counter
366 
367   @return An error code: 0 - success, otherwise - failure
368 
369   @ref Backend
370 **/
371 int CeedBasisReference(CeedBasis basis) {
372   basis->ref_count++;
373   return CEED_ERROR_SUCCESS;
374 }
375 
376 /**
377   @brief Get number of Q-vector components for given CeedBasis
378 
379   @param[in]  basis  CeedBasis
380   @param[in]  eval_mode \ref CEED_EVAL_INTERP to use interpolated values,
381                           \ref CEED_EVAL_GRAD to use gradients,
382                           \ref CEED_EVAL_DIV to use divergence,
383                           \ref CEED_EVAL_CURL to use curl.
384   @param[out] q_comp Variable to store number of Q-vector components of basis
385 
386   @return An error code: 0 - success, otherwise - failure
387 
388   @ref Backend
389 **/
390 int CeedBasisGetNumQuadratureComponents(CeedBasis basis, CeedEvalMode eval_mode, CeedInt *q_comp) {
391   switch (eval_mode) {
392     case CEED_EVAL_INTERP:
393       *q_comp = (basis->fe_space == CEED_FE_SPACE_H1) ? 1 : basis->dim;
394       break;
395     case CEED_EVAL_GRAD:
396       *q_comp = basis->dim;
397       break;
398     case CEED_EVAL_DIV:
399       *q_comp = 1;
400       break;
401     case CEED_EVAL_CURL:
402       *q_comp = (basis->dim < 3) ? 1 : basis->dim;
403       break;
404     case CEED_EVAL_NONE:
405     case CEED_EVAL_WEIGHT:
406       *q_comp = 1;
407       break;
408   }
409   return CEED_ERROR_SUCCESS;
410 }
411 
412 /**
413   @brief Estimate number of FLOPs required to apply CeedBasis in t_mode and eval_mode
414 
415   @param[in]  basis     Basis to estimate FLOPs for
416   @param[in]  t_mode    Apply basis or transpose
417   @param[in]  eval_mode Basis evaluation mode
418   @param[out] flops     Address of variable to hold FLOPs estimate
419 
420   @ref Backend
421 **/
422 int CeedBasisGetFlopsEstimate(CeedBasis basis, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedSize *flops) {
423   bool is_tensor;
424 
425   CeedCall(CeedBasisIsTensor(basis, &is_tensor));
426   if (is_tensor) {
427     CeedInt dim, num_comp, P_1d, Q_1d;
428     CeedCall(CeedBasisGetDimension(basis, &dim));
429     CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
430     CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
431     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
432     if (t_mode == CEED_TRANSPOSE) {
433       P_1d = Q_1d;
434       Q_1d = P_1d;
435     }
436     CeedInt tensor_flops = 0, pre = num_comp * CeedIntPow(P_1d, dim - 1), post = 1;
437     for (CeedInt d = 0; d < dim; d++) {
438       tensor_flops += 2 * pre * P_1d * post * Q_1d;
439       pre /= P_1d;
440       post *= Q_1d;
441     }
442     switch (eval_mode) {
443       case CEED_EVAL_NONE:
444         *flops = 0;
445         break;
446       case CEED_EVAL_INTERP:
447         *flops = tensor_flops;
448         break;
449       case CEED_EVAL_GRAD:
450         *flops = tensor_flops * 2;
451         break;
452       case CEED_EVAL_DIV:
453         // LCOV_EXCL_START
454         return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, "Tensor CEED_EVAL_DIV not supported");
455         break;
456       case CEED_EVAL_CURL:
457         return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, "Tensor CEED_EVAL_CURL not supported");
458         break;
459       // LCOV_EXCL_STOP
460       case CEED_EVAL_WEIGHT:
461         *flops = dim * CeedIntPow(Q_1d, dim);
462         break;
463     }
464   } else {
465     CeedInt dim, num_comp, q_comp, num_nodes, num_qpts;
466     CeedCall(CeedBasisGetDimension(basis, &dim));
467     CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
468     CeedCall(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
469     CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
470     CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
471     switch (eval_mode) {
472       case CEED_EVAL_NONE:
473         *flops = 0;
474         break;
475       case CEED_EVAL_INTERP:
476       case CEED_EVAL_GRAD:
477       case CEED_EVAL_DIV:
478       case CEED_EVAL_CURL:
479         *flops = num_nodes * num_qpts * num_comp * q_comp;
480         break;
481       case CEED_EVAL_WEIGHT:
482         *flops = 0;
483         break;
484     }
485   }
486 
487   return CEED_ERROR_SUCCESS;
488 }
489 
490 /**
491   @brief Get CeedFESpace for a CeedBasis
492 
493   @param[in]  basis     CeedBasis
494   @param[out] fe_space  Variable to store CeedFESpace
495 
496   @return An error code: 0 - success, otherwise - failure
497 
498   @ref Backend
499 **/
500 int CeedBasisGetFESpace(CeedBasis basis, CeedFESpace *fe_space) {
501   *fe_space = basis->fe_space;
502   return CEED_ERROR_SUCCESS;
503 }
504 
505 /**
506   @brief Get dimension for given CeedElemTopology
507 
508   @param[in]  topo CeedElemTopology
509   @param[out] dim  Variable to store dimension of topology
510 
511   @return An error code: 0 - success, otherwise - failure
512 
513   @ref Backend
514 **/
515 int CeedBasisGetTopologyDimension(CeedElemTopology topo, CeedInt *dim) {
516   *dim = (CeedInt)topo >> 16;
517   return CEED_ERROR_SUCCESS;
518 }
519 
520 /**
521   @brief Get CeedTensorContract of a CeedBasis
522 
523   @param[in]  basis     CeedBasis
524   @param[out] contract  Variable to store CeedTensorContract
525 
526   @return An error code: 0 - success, otherwise - failure
527 
528   @ref Backend
529 **/
530 int CeedBasisGetTensorContract(CeedBasis basis, CeedTensorContract *contract) {
531   *contract = basis->contract;
532   return CEED_ERROR_SUCCESS;
533 }
534 
535 /**
536   @brief Set CeedTensorContract of a CeedBasis
537 
538   @param[in,out] basis    CeedBasis
539   @param[in]     contract CeedTensorContract to set
540 
541   @return An error code: 0 - success, otherwise - failure
542 
543   @ref Backend
544 **/
545 int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract contract) {
546   basis->contract = contract;
547   CeedCall(CeedTensorContractReference(contract));
548   return CEED_ERROR_SUCCESS;
549 }
550 
551 /**
552   @brief Return a reference implementation of matrix multiplication C = A B.
553 
554   Note: This is a reference implementation for CPU CeedScalar pointers that is not intended for high performance.
555 
556   @param[in]  ceed  Ceed context for error handling
557   @param[in]  mat_A Row-major matrix A
558   @param[in]  mat_B Row-major matrix B
559   @param[out] mat_C Row-major output matrix C
560   @param[in]  m     Number of rows of C
561   @param[in]  n     Number of columns of C
562   @param[in]  kk    Number of columns of A/rows of B
563 
564   @return An error code: 0 - success, otherwise - failure
565 
566   @ref Utility
567 **/
568 int CeedMatrixMatrixMultiply(Ceed ceed, const CeedScalar *mat_A, const CeedScalar *mat_B, CeedScalar *mat_C, CeedInt m, CeedInt n, CeedInt kk) {
569   for (CeedInt i = 0; i < m; i++) {
570     for (CeedInt j = 0; j < n; j++) {
571       CeedScalar sum = 0;
572       for (CeedInt k = 0; k < kk; k++) sum += mat_A[k + i * kk] * mat_B[j + k * n];
573       mat_C[j + i * n] = sum;
574     }
575   }
576   return CEED_ERROR_SUCCESS;
577 }
578 
579 /**
580   @brief Return QR Factorization of a matrix
581 
582   @param[in]     ceed Ceed context for error handling
583   @param[in,out] mat  Row-major matrix to be factorized in place
584   @param[in,out] tau  Vector of length m of scaling factors
585   @param[in]     m    Number of rows
586   @param[in]     n    Number of columns
587 
588   @return An error code: 0 - success, otherwise - failure
589 
590   @ref Utility
591 **/
592 int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau, CeedInt m, CeedInt n) {
593   CeedScalar v[m];
594 
595   // Check matrix shape
596   if (n > m) {
597     // LCOV_EXCL_START
598     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute QR factorization with n > m");
599     // LCOV_EXCL_STOP
600   }
601 
602   for (CeedInt i = 0; i < n; i++) {
603     if (i >= m - 1) {  // last row of matrix, no reflection needed
604       tau[i] = 0.;
605       break;
606     }
607     // Calculate Householder vector, magnitude
608     CeedScalar sigma = 0.0;
609     v[i]             = mat[i + n * i];
610     for (CeedInt j = i + 1; j < m; j++) {
611       v[j] = mat[i + n * j];
612       sigma += v[j] * v[j];
613     }
614     CeedScalar norm = sqrt(v[i] * v[i] + sigma);  // norm of v[i:m]
615     CeedScalar R_ii = -copysign(norm, v[i]);
616     v[i] -= R_ii;
617     // norm of v[i:m] after modification above and scaling below
618     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
619     //   tau = 2 / (norm*norm)
620     tau[i] = 2 * v[i] * v[i] / (v[i] * v[i] + sigma);
621     for (CeedInt j = i + 1; j < m; j++) v[j] /= v[i];
622 
623     // Apply Householder reflector to lower right panel
624     CeedHouseholderReflect(&mat[i * n + i + 1], &v[i], tau[i], m - i, n - i - 1, n, 1);
625     // Save v
626     mat[i + n * i] = R_ii;
627     for (CeedInt j = i + 1; j < m; j++) mat[i + n * j] = v[j];
628   }
629   return CEED_ERROR_SUCCESS;
630 }
631 
632 /**
633   @brief Apply Householder Q matrix
634 
635   Compute mat_A = mat_Q mat_A, where mat_Q is mxm and mat_A is mxn.
636 
637   @param[in,out] mat_A  Matrix to apply Householder Q to, in place
638   @param[in]     mat_Q  Householder Q matrix
639   @param[in]     tau    Householder scaling factors
640   @param[in]     t_mode Transpose mode for application
641   @param[in]     m      Number of rows in A
642   @param[in]     n      Number of columns in A
643   @param[in]     k      Number of elementary reflectors in Q, k<m
644   @param[in]     row    Row stride in A
645   @param[in]     col    Col stride in A
646 
647   @return An error code: 0 - success, otherwise - failure
648 
649   @ref Utility
650 **/
651 int CeedHouseholderApplyQ(CeedScalar *mat_A, const CeedScalar *mat_Q, const CeedScalar *tau, CeedTransposeMode t_mode, CeedInt m, CeedInt n,
652                           CeedInt k, CeedInt row, CeedInt col) {
653   CeedScalar *v;
654   CeedCall(CeedMalloc(m, &v));
655   for (CeedInt ii = 0; ii < k; ii++) {
656     CeedInt i = t_mode == CEED_TRANSPOSE ? ii : k - 1 - ii;
657     for (CeedInt j = i + 1; j < m; j++) v[j] = mat_Q[j * k + i];
658     // Apply Householder reflector (I - tau v v^T) collo_grad_1d^T
659     CeedCall(CeedHouseholderReflect(&mat_A[i * row], &v[i], tau[i], m - i, n, row, col));
660   }
661   CeedCall(CeedFree(&v));
662   return CEED_ERROR_SUCCESS;
663 }
664 
665 /**
666   @brief Return symmetric Schur decomposition of the symmetric matrix mat via symmetric QR factorization
667 
668   @param[in]     ceed   Ceed context for error handling
669   @param[in,out] mat    Row-major matrix to be factorized in place
670   @param[out]    lambda Vector of length n of eigenvalues
671   @param[in]     n      Number of rows/columns
672 
673   @return An error code: 0 - success, otherwise - failure
674 
675   @ref Utility
676 **/
677 CeedPragmaOptimizeOff
678 int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat, CeedScalar *lambda, CeedInt n) {
679   // Check bounds for clang-tidy
680   if (n < 2) {
681     // LCOV_EXCL_START
682     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute symmetric Schur decomposition of scalars");
683     // LCOV_EXCL_STOP
684   }
685 
686   CeedScalar v[n - 1], tau[n - 1], mat_T[n * n];
687 
688   // Copy mat to mat_T and set mat to I
689   memcpy(mat_T, mat, n * n * sizeof(mat[0]));
690   for (CeedInt i = 0; i < n; i++) {
691     for (CeedInt j = 0; j < n; j++) mat[j + n * i] = (i == j) ? 1 : 0;
692   }
693 
694   // Reduce to tridiagonal
695   for (CeedInt i = 0; i < n - 1; i++) {
696     // Calculate Householder vector, magnitude
697     CeedScalar sigma = 0.0;
698     v[i]             = mat_T[i + n * (i + 1)];
699     for (CeedInt j = i + 1; j < n - 1; j++) {
700       v[j] = mat_T[i + n * (j + 1)];
701       sigma += v[j] * v[j];
702     }
703     CeedScalar norm = sqrt(v[i] * v[i] + sigma);  // norm of v[i:n-1]
704     CeedScalar R_ii = -copysign(norm, v[i]);
705     v[i] -= R_ii;
706     // norm of v[i:m] after modification above and scaling below
707     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
708     //   tau = 2 / (norm*norm)
709     tau[i] = i == n - 2 ? 2 : 2 * v[i] * v[i] / (v[i] * v[i] + sigma);
710     for (CeedInt j = i + 1; j < n - 1; j++) v[j] /= v[i];
711 
712     // Update sub and super diagonal
713     for (CeedInt j = i + 2; j < n; j++) {
714       mat_T[i + n * j] = 0;
715       mat_T[j + n * i] = 0;
716     }
717     // Apply symmetric Householder reflector to lower right panel
718     CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1);
719     CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), 1, n);
720 
721     // Save v
722     mat_T[i + n * (i + 1)] = R_ii;
723     mat_T[(i + 1) + n * i] = R_ii;
724     for (CeedInt j = i + 1; j < n - 1; j++) {
725       mat_T[i + n * (j + 1)] = v[j];
726     }
727   }
728   // Backwards accumulation of Q
729   for (CeedInt i = n - 2; i >= 0; i--) {
730     if (tau[i] > 0.0) {
731       v[i] = 1;
732       for (CeedInt j = i + 1; j < n - 1; j++) {
733         v[j]                   = mat_T[i + n * (j + 1)];
734         mat_T[i + n * (j + 1)] = 0;
735       }
736       CeedHouseholderReflect(&mat[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1);
737     }
738   }
739 
740   // Reduce sub and super diagonal
741   CeedInt    p = 0, q = 0, itr = 0, max_itr = n * n * n * n;
742   CeedScalar tol = CEED_EPSILON;
743 
744   while (itr < max_itr) {
745     // Update p, q, size of reduced portions of diagonal
746     p = 0;
747     q = 0;
748     for (CeedInt i = n - 2; i >= 0; i--) {
749       if (fabs(mat_T[i + n * (i + 1)]) < tol) q += 1;
750       else break;
751     }
752     for (CeedInt i = 0; i < n - q - 1; i++) {
753       if (fabs(mat_T[i + n * (i + 1)]) < tol) p += 1;
754       else break;
755     }
756     if (q == n - 1) break;  // Finished reducing
757 
758     // Reduce tridiagonal portion
759     CeedScalar t_nn = mat_T[(n - 1 - q) + n * (n - 1 - q)], t_nnm1 = mat_T[(n - 2 - q) + n * (n - 1 - q)];
760     CeedScalar d  = (mat_T[(n - 2 - q) + n * (n - 2 - q)] - t_nn) / 2;
761     CeedScalar mu = t_nn - t_nnm1 * t_nnm1 / (d + copysign(sqrt(d * d + t_nnm1 * t_nnm1), d));
762     CeedScalar x  = mat_T[p + n * p] - mu;
763     CeedScalar z  = mat_T[p + n * (p + 1)];
764     for (CeedInt k = p; k < n - q - 1; k++) {
765       // Compute Givens rotation
766       CeedScalar c = 1, s = 0;
767       if (fabs(z) > tol) {
768         if (fabs(z) > fabs(x)) {
769           CeedScalar tau = -x / z;
770           s = 1 / sqrt(1 + tau * tau), c = s * tau;
771         } else {
772           CeedScalar tau = -z / x;
773           c = 1 / sqrt(1 + tau * tau), s = c * tau;
774         }
775       }
776 
777       // Apply Givens rotation to T
778       CeedGivensRotation(mat_T, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n);
779       CeedGivensRotation(mat_T, c, s, CEED_TRANSPOSE, k, k + 1, n, n);
780 
781       // Apply Givens rotation to Q
782       CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n);
783 
784       // Update x, z
785       if (k < n - q - 2) {
786         x = mat_T[k + n * (k + 1)];
787         z = mat_T[k + n * (k + 2)];
788       }
789     }
790     itr++;
791   }
792 
793   // Save eigenvalues
794   for (CeedInt i = 0; i < n; i++) lambda[i] = mat_T[i + n * i];
795 
796   // Check convergence
797   if (itr == max_itr && q < n - 1) {
798     // LCOV_EXCL_START
799     return CeedError(ceed, CEED_ERROR_MINOR, "Symmetric QR failed to converge");
800     // LCOV_EXCL_STOP
801   }
802   return CEED_ERROR_SUCCESS;
803 }
804 CeedPragmaOptimizeOn
805 
806 /**
807   @brief Return Simultaneous Diagonalization of two matrices.
808 
809   This solves the generalized eigenvalue problem A x = lambda B x, where A and B are symmetric and B is positive definite.
810   We generate the matrix X and vector Lambda such that X^T A X = Lambda and X^T B X = I.
811   This is equivalent to the LAPACK routine 'sygv' with TYPE = 1.
812 
813   @param[in]  ceed   Ceed context for error handling
814   @param[in]  mat_A  Row-major matrix to be factorized with eigenvalues
815   @param[in]  mat_B  Row-major matrix to be factorized to identity
816   @param[out] mat_X  Row-major orthogonal matrix
817   @param[out] lambda Vector of length n of generalized eigenvalues
818   @param[in]  n      Number of rows/columns
819 
820   @return An error code: 0 - success, otherwise - failure
821 
822   @ref Utility
823 **/
824 CeedPragmaOptimizeOff
825 int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A, CeedScalar *mat_B, CeedScalar *mat_X, CeedScalar *lambda, CeedInt n) {
826   CeedScalar *mat_C, *mat_G, *vec_D;
827   CeedCall(CeedCalloc(n * n, &mat_C));
828   CeedCall(CeedCalloc(n * n, &mat_G));
829   CeedCall(CeedCalloc(n, &vec_D));
830 
831   // Compute B = G D G^T
832   memcpy(mat_G, mat_B, n * n * sizeof(mat_B[0]));
833   CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_G, vec_D, n));
834 
835   // Sort eigenvalues
836   for (CeedInt i = n - 1; i >= 0; i--) {
837     for (CeedInt j = 0; j < i; j++) {
838       if (fabs(vec_D[j]) > fabs(vec_D[j + 1])) {
839         CeedScalar temp;
840         temp         = vec_D[j];
841         vec_D[j]     = vec_D[j + 1];
842         vec_D[j + 1] = temp;
843         for (CeedInt k = 0; k < n; k++) {
844           temp                 = mat_G[k * n + j];
845           mat_G[k * n + j]     = mat_G[k * n + j + 1];
846           mat_G[k * n + j + 1] = temp;
847         }
848       }
849     }
850   }
851 
852   // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T
853   //           = D^-1/2 G^T A G D^-1/2
854   // -- D = D^-1/2
855   for (CeedInt i = 0; i < n; i++) vec_D[i] = 1. / sqrt(vec_D[i]);
856   // -- G = G D^-1/2
857   // -- C = D^-1/2 G^T
858   for (CeedInt i = 0; i < n; i++) {
859     for (CeedInt j = 0; j < n; j++) {
860       mat_G[i * n + j] *= vec_D[j];
861       mat_C[j * n + i] = mat_G[i * n + j];
862     }
863   }
864   // -- X = (D^-1/2 G^T) A
865   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_C, (const CeedScalar *)mat_A, mat_X, n, n, n));
866   // -- C = (D^-1/2 G^T A) (G D^-1/2)
867   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_X, (const CeedScalar *)mat_G, mat_C, n, n, n));
868 
869   // Compute Q^T C Q = lambda
870   CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n));
871 
872   // Sort eigenvalues
873   for (CeedInt i = n - 1; i >= 0; i--) {
874     for (CeedInt j = 0; j < i; j++) {
875       if (fabs(lambda[j]) > fabs(lambda[j + 1])) {
876         CeedScalar temp;
877         temp          = lambda[j];
878         lambda[j]     = lambda[j + 1];
879         lambda[j + 1] = temp;
880         for (CeedInt k = 0; k < n; k++) {
881           temp                 = mat_C[k * n + j];
882           mat_C[k * n + j]     = mat_C[k * n + j + 1];
883           mat_C[k * n + j + 1] = temp;
884         }
885       }
886     }
887   }
888 
889   // Set X = (G D^1/2)^-T Q
890   //       = G D^-1/2 Q
891   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_G, (const CeedScalar *)mat_C, mat_X, n, n, n));
892 
893   // Cleanup
894   CeedCall(CeedFree(&mat_C));
895   CeedCall(CeedFree(&mat_G));
896   CeedCall(CeedFree(&vec_D));
897   return CEED_ERROR_SUCCESS;
898 }
899 CeedPragmaOptimizeOn
900 
901 /// @}
902 
903 /// ----------------------------------------------------------------------------
904 /// CeedBasis Public API
905 /// ----------------------------------------------------------------------------
906 /// @addtogroup CeedBasisUser
907 /// @{
908 
909 /**
910   @brief Create a tensor-product basis for H^1 discretizations
911 
912   @param[in]  ceed        Ceed object where the CeedBasis will be created
913   @param[in]  dim         Topological dimension
914   @param[in]  num_comp    Number of field components (1 for scalar fields)
915   @param[in]  P_1d        Number of nodes in one dimension
916   @param[in]  Q_1d        Number of quadrature points in one dimension
917   @param[in]  interp_1d   Row-major (Q_1d * P_1d) matrix expressing the values of nodal basis functions at quadrature points
918   @param[in]  grad_1d     Row-major (Q_1d * P_1d) matrix expressing derivatives of nodal basis functions at quadrature points
919   @param[in]  q_ref_1d    Array of length Q_1d holding the locations of quadrature points on the 1D reference element [-1, 1]
920   @param[in]  q_weight_1d Array of length Q_1d holding the quadrature weights on the reference element
921   @param[out] basis       Address of the variable where the newly created CeedBasis will be stored.
922 
923   @return An error code: 0 - success, otherwise - failure
924 
925   @ref User
926 **/
927 int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d,
928                             const CeedScalar *grad_1d, const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis *basis) {
929   if (!ceed->BasisCreateTensorH1) {
930     Ceed delegate;
931     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
932 
933     if (!delegate) {
934       // LCOV_EXCL_START
935       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateTensorH1");
936       // LCOV_EXCL_STOP
937     }
938 
939     CeedCall(CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis));
940     return CEED_ERROR_SUCCESS;
941   }
942 
943   if (dim < 1) {
944     // LCOV_EXCL_START
945     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value");
946     // LCOV_EXCL_STOP
947   }
948 
949   if (num_comp < 1) {
950     // LCOV_EXCL_START
951     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
952     // LCOV_EXCL_STOP
953   }
954 
955   if (P_1d < 1) {
956     // LCOV_EXCL_START
957     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
958     // LCOV_EXCL_STOP
959   }
960 
961   if (Q_1d < 1) {
962     // LCOV_EXCL_START
963     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
964     // LCOV_EXCL_STOP
965   }
966 
967   CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE : dim == 2 ? CEED_TOPOLOGY_QUAD : CEED_TOPOLOGY_HEX;
968 
969   CeedCall(CeedCalloc(1, basis));
970   (*basis)->ceed = ceed;
971   CeedCall(CeedReference(ceed));
972   (*basis)->ref_count    = 1;
973   (*basis)->tensor_basis = 1;
974   (*basis)->dim          = dim;
975   (*basis)->topo         = topo;
976   (*basis)->num_comp     = num_comp;
977   (*basis)->P_1d         = P_1d;
978   (*basis)->Q_1d         = Q_1d;
979   (*basis)->P            = CeedIntPow(P_1d, dim);
980   (*basis)->Q            = CeedIntPow(Q_1d, dim);
981   (*basis)->fe_space     = CEED_FE_SPACE_H1;
982   CeedCall(CeedCalloc(Q_1d, &(*basis)->q_ref_1d));
983   CeedCall(CeedCalloc(Q_1d, &(*basis)->q_weight_1d));
984   if (q_ref_1d) memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d * sizeof(q_ref_1d[0]));
985   if (q_weight_1d) memcpy((*basis)->q_weight_1d, q_weight_1d, Q_1d * sizeof(q_weight_1d[0]));
986   CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->interp_1d));
987   CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->grad_1d));
988   if (interp_1d) memcpy((*basis)->interp_1d, interp_1d, Q_1d * P_1d * sizeof(interp_1d[0]));
989   if (grad_1d) memcpy((*basis)->grad_1d, grad_1d, Q_1d * P_1d * sizeof(grad_1d[0]));
990   CeedCall(ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, *basis));
991   return CEED_ERROR_SUCCESS;
992 }
993 
994 /**
995   @brief Create a tensor-product Lagrange basis
996 
997   @param[in]  ceed      Ceed object where the CeedBasis will be created
998   @param[in]  dim       Topological dimension of element
999   @param[in]  num_comp  Number of field components (1 for scalar fields)
1000   @param[in]  P         Number of Gauss-Lobatto nodes in one dimension.
1001                           The polynomial degree of the resulting Q_k element is k=P-1.
1002   @param[in]  Q         Number of quadrature points in one dimension.
1003   @param[in]  quad_mode Distribution of the Q quadrature points (affects order of accuracy for the quadrature)
1004   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1005 
1006   @return An error code: 0 - success, otherwise - failure
1007 
1008   @ref User
1009 **/
1010 int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P, CeedInt Q, CeedQuadMode quad_mode, CeedBasis *basis) {
1011   // Allocate
1012   int        ierr = CEED_ERROR_SUCCESS, i, j, k;
1013   CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d, *q_weight_1d;
1014 
1015   if (dim < 1) {
1016     // LCOV_EXCL_START
1017     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value");
1018     // LCOV_EXCL_STOP
1019   }
1020 
1021   if (num_comp < 1) {
1022     // LCOV_EXCL_START
1023     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1024     // LCOV_EXCL_STOP
1025   }
1026 
1027   if (P < 1) {
1028     // LCOV_EXCL_START
1029     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1030     // LCOV_EXCL_STOP
1031   }
1032 
1033   if (Q < 1) {
1034     // LCOV_EXCL_START
1035     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1036     // LCOV_EXCL_STOP
1037   }
1038 
1039   // Get Nodes and Weights
1040   CeedCall(CeedCalloc(P * Q, &interp_1d));
1041   CeedCall(CeedCalloc(P * Q, &grad_1d));
1042   CeedCall(CeedCalloc(P, &nodes));
1043   CeedCall(CeedCalloc(Q, &q_ref_1d));
1044   CeedCall(CeedCalloc(Q, &q_weight_1d));
1045   if (CeedLobattoQuadrature(P, nodes, NULL) != CEED_ERROR_SUCCESS) goto cleanup;
1046   switch (quad_mode) {
1047     case CEED_GAUSS:
1048       ierr = CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
1049       break;
1050     case CEED_GAUSS_LOBATTO:
1051       ierr = CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
1052       break;
1053   }
1054   if (ierr != CEED_ERROR_SUCCESS) goto cleanup;
1055 
1056   // Build B, D matrix
1057   // Fornberg, 1998
1058   for (i = 0; i < Q; i++) {
1059     c1                   = 1.0;
1060     c3                   = nodes[0] - q_ref_1d[i];
1061     interp_1d[i * P + 0] = 1.0;
1062     for (j = 1; j < P; j++) {
1063       c2 = 1.0;
1064       c4 = c3;
1065       c3 = nodes[j] - q_ref_1d[i];
1066       for (k = 0; k < j; k++) {
1067         dx = nodes[j] - nodes[k];
1068         c2 *= dx;
1069         if (k == j - 1) {
1070           grad_1d[i * P + j]   = c1 * (interp_1d[i * P + k] - c4 * grad_1d[i * P + k]) / c2;
1071           interp_1d[i * P + j] = -c1 * c4 * interp_1d[i * P + k] / c2;
1072         }
1073         grad_1d[i * P + k]   = (c3 * grad_1d[i * P + k] - interp_1d[i * P + k]) / dx;
1074         interp_1d[i * P + k] = c3 * interp_1d[i * P + k] / dx;
1075       }
1076       c1 = c2;
1077     }
1078   }
1079   // Pass to CeedBasisCreateTensorH1
1080   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis));
1081 cleanup:
1082   CeedCall(CeedFree(&interp_1d));
1083   CeedCall(CeedFree(&grad_1d));
1084   CeedCall(CeedFree(&nodes));
1085   CeedCall(CeedFree(&q_ref_1d));
1086   CeedCall(CeedFree(&q_weight_1d));
1087   return CEED_ERROR_SUCCESS;
1088 }
1089 
1090 /**
1091   @brief Create a non tensor-product basis for H^1 discretizations
1092 
1093   @param[in]  ceed      Ceed object where the CeedBasis will be created
1094   @param[in]  topo      Topology of element, e.g. hypercube, simplex, ect
1095   @param[in]  num_comp  Number of field components (1 for scalar fields)
1096   @param[in]  num_nodes Total number of nodes
1097   @param[in]  num_qpts  Total number of quadrature points
1098   @param[in]  interp    Row-major (num_qpts * num_nodes) matrix expressing the values of nodal basis functions at quadrature points
1099   @param[in]  grad      Row-major (dim * num_qpts * num_nodes) matrix expressing derivatives of nodal basis functions at quadrature points
1100   @param[in]  q_ref     Array of length num_qpts * dim holding the locations of quadrature points on the reference element
1101   @param[in]  q_weight  Array of length num_qpts holding the quadrature weights on the reference element
1102   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1103 
1104   @return An error code: 0 - success, otherwise - failure
1105 
1106   @ref User
1107 **/
1108 int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1109                       const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1110   CeedInt P = num_nodes, Q = num_qpts, dim = 0;
1111 
1112   if (!ceed->BasisCreateH1) {
1113     Ceed delegate;
1114     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1115 
1116     if (!delegate) {
1117       // LCOV_EXCL_START
1118       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateH1");
1119       // LCOV_EXCL_STOP
1120     }
1121 
1122     CeedCall(CeedBasisCreateH1(delegate, topo, num_comp, num_nodes, num_qpts, interp, grad, q_ref, q_weight, basis));
1123     return CEED_ERROR_SUCCESS;
1124   }
1125 
1126   if (num_comp < 1) {
1127     // LCOV_EXCL_START
1128     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1129     // LCOV_EXCL_STOP
1130   }
1131 
1132   if (num_nodes < 1) {
1133     // LCOV_EXCL_START
1134     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1135     // LCOV_EXCL_STOP
1136   }
1137 
1138   if (num_qpts < 1) {
1139     // LCOV_EXCL_START
1140     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1141     // LCOV_EXCL_STOP
1142   }
1143 
1144   CeedCall(CeedCalloc(1, basis));
1145 
1146   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1147 
1148   (*basis)->ceed = ceed;
1149   CeedCall(CeedReference(ceed));
1150   (*basis)->ref_count    = 1;
1151   (*basis)->tensor_basis = 0;
1152   (*basis)->dim          = dim;
1153   (*basis)->topo         = topo;
1154   (*basis)->num_comp     = num_comp;
1155   (*basis)->P            = P;
1156   (*basis)->Q            = Q;
1157   (*basis)->fe_space     = CEED_FE_SPACE_H1;
1158   CeedCall(CeedCalloc(Q * dim, &(*basis)->q_ref_1d));
1159   CeedCall(CeedCalloc(Q, &(*basis)->q_weight_1d));
1160   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1161   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1162   CeedCall(CeedCalloc(Q * P, &(*basis)->interp));
1163   CeedCall(CeedCalloc(dim * Q * P, &(*basis)->grad));
1164   if (interp) memcpy((*basis)->interp, interp, Q * P * sizeof(interp[0]));
1165   if (grad) memcpy((*basis)->grad, grad, dim * Q * P * sizeof(grad[0]));
1166   CeedCall(ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref, q_weight, *basis));
1167   return CEED_ERROR_SUCCESS;
1168 }
1169 
1170 /**
1171   @brief Create a non tensor-product basis for \f$H(\mathrm{div})\f$ discretizations
1172 
1173   @param[in]  ceed      Ceed object where the CeedBasis will be created
1174   @param[in]  topo      Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.), dimension of which is used in some array sizes below
1175   @param[in]  num_comp  Number of components (usually 1 for vectors in H(div) bases)
1176   @param[in]  num_nodes Total number of nodes (dofs per element)
1177   @param[in]  num_qpts  Total number of quadrature points
1178   @param[in]  interp    Row-major (dim * num_qpts * num_nodes) matrix expressing the values of basis functions at quadrature points
1179   @param[in]  div       Row-major (num_qpts * num_nodes) matrix expressing divergence of basis functions at quadrature points
1180   @param[in]  q_ref     Array of length num_qpts * dim holding the locations of quadrature points on the reference element
1181   @param[in]  q_weight  Array of length num_qpts holding the quadrature weights on the reference element
1182   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1183 
1184   @return An error code: 0 - success, otherwise - failure
1185 
1186   @ref User
1187 **/
1188 int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1189                         const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1190   CeedInt Q = num_qpts, P = num_nodes, dim = 0;
1191 
1192   if (!ceed->BasisCreateHdiv) {
1193     Ceed delegate;
1194     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1195 
1196     if (!delegate) {
1197       // LCOV_EXCL_START
1198       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateHdiv");
1199       // LCOV_EXCL_STOP
1200     }
1201 
1202     CeedCall(CeedBasisCreateHdiv(delegate, topo, num_comp, num_nodes, num_qpts, interp, div, q_ref, q_weight, basis));
1203     return CEED_ERROR_SUCCESS;
1204   }
1205 
1206   if (num_comp < 1) {
1207     // LCOV_EXCL_START
1208     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1209     // LCOV_EXCL_STOP
1210   }
1211 
1212   if (num_nodes < 1) {
1213     // LCOV_EXCL_START
1214     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1215     // LCOV_EXCL_STOP
1216   }
1217 
1218   if (num_qpts < 1) {
1219     // LCOV_EXCL_START
1220     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1221     // LCOV_EXCL_STOP
1222   }
1223 
1224   CeedCall(CeedCalloc(1, basis));
1225 
1226   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1227 
1228   (*basis)->ceed = ceed;
1229   CeedCall(CeedReference(ceed));
1230   (*basis)->ref_count    = 1;
1231   (*basis)->tensor_basis = 0;
1232   (*basis)->dim          = dim;
1233   (*basis)->topo         = topo;
1234   (*basis)->num_comp     = num_comp;
1235   (*basis)->P            = P;
1236   (*basis)->Q            = Q;
1237   (*basis)->fe_space     = CEED_FE_SPACE_HDIV;
1238   CeedCall(CeedMalloc(Q * dim, &(*basis)->q_ref_1d));
1239   CeedCall(CeedMalloc(Q, &(*basis)->q_weight_1d));
1240   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1241   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1242   CeedCall(CeedMalloc(dim * Q * P, &(*basis)->interp));
1243   CeedCall(CeedMalloc(Q * P, &(*basis)->div));
1244   if (interp) memcpy((*basis)->interp, interp, dim * Q * P * sizeof(interp[0]));
1245   if (div) memcpy((*basis)->div, div, Q * P * sizeof(div[0]));
1246   CeedCall(ceed->BasisCreateHdiv(topo, dim, P, Q, interp, div, q_ref, q_weight, *basis));
1247   return CEED_ERROR_SUCCESS;
1248 }
1249 
1250 /**
1251   @brief Create a non tensor-product basis for \f$H(\mathrm{curl})\f$ discretizations
1252 
1253   @param[in]  ceed      Ceed object where the CeedBasis will be created
1254   @param[in]  topo      Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.), dimension of which is used in some array sizes below
1255   @param[in]  num_comp  Number of components (usually 1 for vectors in H(curl) bases)
1256   @param[in]  num_nodes Total number of nodes (dofs per element)
1257   @param[in]  num_qpts  Total number of quadrature points
1258   @param[in]  interp    Row-major (dim * num_qpts * num_nodes) matrix expressing the values of basis functions at quadrature points
1259   @param[in]  curl      Row-major (curl_comp * num_qpts * num_nodes, curl_comp = 1 if dim < 3 else dim) matrix expressing curl of basis functions at
1260 quadrature points
1261   @param[in]  q_ref     Array of length num_qpts * dim holding the locations of quadrature points on the reference element
1262   @param[in]  q_weight  Array of length num_qpts holding the quadrature weights on the reference element
1263   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1264 
1265   @return An error code: 0 - success, otherwise - failure
1266 
1267   @ref User
1268 **/
1269 int CeedBasisCreateHcurl(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1270                          const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1271   CeedInt Q = num_qpts, P = num_nodes, dim = 0, curl_comp = 0;
1272 
1273   if (!ceed->BasisCreateHdiv) {
1274     Ceed delegate;
1275     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1276 
1277     if (!delegate) {
1278       // LCOV_EXCL_START
1279       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateHcurl");
1280       // LCOV_EXCL_STOP
1281     }
1282 
1283     CeedCall(CeedBasisCreateHcurl(delegate, topo, num_comp, num_nodes, num_qpts, interp, curl, q_ref, q_weight, basis));
1284     return CEED_ERROR_SUCCESS;
1285   }
1286 
1287   if (num_comp < 1) {
1288     // LCOV_EXCL_START
1289     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1290     // LCOV_EXCL_STOP
1291   }
1292 
1293   if (num_nodes < 1) {
1294     // LCOV_EXCL_START
1295     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1296     // LCOV_EXCL_STOP
1297   }
1298 
1299   if (num_qpts < 1) {
1300     // LCOV_EXCL_START
1301     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1302     // LCOV_EXCL_STOP
1303   }
1304 
1305   CeedCall(CeedCalloc(1, basis));
1306 
1307   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1308   curl_comp = (dim < 3) ? 1 : dim;
1309 
1310   (*basis)->ceed = ceed;
1311   CeedCall(CeedReference(ceed));
1312   (*basis)->ref_count    = 1;
1313   (*basis)->tensor_basis = 0;
1314   (*basis)->dim          = dim;
1315   (*basis)->topo         = topo;
1316   (*basis)->num_comp     = num_comp;
1317   (*basis)->P            = P;
1318   (*basis)->Q            = Q;
1319   (*basis)->fe_space     = CEED_FE_SPACE_HCURL;
1320   CeedCall(CeedMalloc(Q * dim, &(*basis)->q_ref_1d));
1321   CeedCall(CeedMalloc(Q, &(*basis)->q_weight_1d));
1322   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1323   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1324   CeedCall(CeedMalloc(dim * Q * P, &(*basis)->interp));
1325   CeedCall(CeedMalloc(curl_comp * Q * P, &(*basis)->curl));
1326   if (interp) memcpy((*basis)->interp, interp, dim * Q * P * sizeof(interp[0]));
1327   if (curl) memcpy((*basis)->curl, curl, curl_comp * Q * P * sizeof(curl[0]));
1328   CeedCall(ceed->BasisCreateHcurl(topo, dim, P, Q, interp, curl, q_ref, q_weight, *basis));
1329   return CEED_ERROR_SUCCESS;
1330 }
1331 
1332 /**
1333   @brief Create a CeedBasis for projection from the nodes of `basis_from` to the nodes of `basis_to`.
1334 
1335   Only `CEED_EVAL_INTERP` will be valid for the new basis, `basis_project`. For H^1 spaces, `CEED_EVAL_GRAD` will also be valid.
1336   The interpolation is given by `interp_project = interp_to^+ * interp_from`, where the pesudoinverse `interp_to^+` is given by QR
1337 factorization. The gradient (for the H^1 case) is given by `grad_project = interp_to^+ * grad_from`.
1338 
1339   Note: `basis_from` and `basis_to` must have compatible quadrature spaces.
1340 
1341   Note: `basis_project` will have the same number of components as `basis_from`, regardless of the number of components that `basis_to` has. If
1342 `basis_from` has 3 components and `basis_to` has 5 components, then `basis_project` will have 3 components.
1343 
1344   @param[in]  basis_from    CeedBasis to prolong from
1345   @param[in]  basis_to      CeedBasis to prolong to
1346   @param[out] basis_project Address of the variable where the newly created CeedBasis will be stored.
1347 
1348   @return An error code: 0 - success, otherwise - failure
1349 
1350   @ref User
1351 **/
1352 int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, CeedBasis *basis_project) {
1353   Ceed ceed;
1354   CeedCall(CeedBasisGetCeed(basis_to, &ceed));
1355 
1356   // Create projection matrix
1357   CeedScalar *interp_project, *grad_project;
1358   CeedCall(CeedBasisCreateProjectionMatrices(basis_from, basis_to, &interp_project, &grad_project));
1359 
1360   // Build basis
1361   bool        is_tensor;
1362   CeedFESpace fe_space;
1363   CeedInt     dim, num_comp;
1364   CeedScalar *q_ref, *q_weight;
1365   CeedCall(CeedBasisIsTensor(basis_to, &is_tensor));
1366   CeedCall(CeedBasisGetFESpace(basis_to, &fe_space));
1367   CeedCall(CeedBasisGetDimension(basis_to, &dim));
1368   CeedCall(CeedBasisGetNumComponents(basis_from, &num_comp));
1369   if (is_tensor) {
1370     CeedInt P_1d_to, P_1d_from;
1371     CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_1d_from));
1372     CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_1d_to));
1373     CeedCall(CeedCalloc(P_1d_to, &q_ref));
1374     CeedCall(CeedCalloc(P_1d_to, &q_weight));
1375     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_from, P_1d_to, interp_project, grad_project, q_ref, q_weight, basis_project));
1376   } else {
1377     CeedElemTopology topo;
1378     CeedCall(CeedBasisGetTopology(basis_to, &topo));
1379     CeedInt num_nodes_to, num_nodes_from;
1380     CeedCall(CeedBasisGetNumNodes(basis_from, &num_nodes_from));
1381     CeedCall(CeedBasisGetNumNodes(basis_to, &num_nodes_to));
1382     CeedCall(CeedCalloc(num_nodes_to * dim, &q_ref));
1383     CeedCall(CeedCalloc(num_nodes_to, &q_weight));
1384     switch (fe_space) {
1385       case CEED_FE_SPACE_H1:
1386         CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_from, num_nodes_to, interp_project, grad_project, q_ref, q_weight, basis_project));
1387         break;
1388       case CEED_FE_SPACE_HDIV:
1389         CeedCall(
1390             CeedBasisCreateHdiv(ceed, topo, num_comp, num_nodes_from, num_nodes_to, interp_project, grad_project, q_ref, q_weight, basis_project));
1391         break;
1392       case CEED_FE_SPACE_HCURL:
1393         CeedCall(
1394             CeedBasisCreateHcurl(ceed, topo, num_comp, num_nodes_from, num_nodes_to, interp_project, grad_project, q_ref, q_weight, basis_project));
1395         break;
1396     }
1397   }
1398 
1399   // Cleanup
1400   CeedCall(CeedFree(&interp_project));
1401   CeedCall(CeedFree(&grad_project));
1402   CeedCall(CeedFree(&q_ref));
1403   CeedCall(CeedFree(&q_weight));
1404 
1405   return CEED_ERROR_SUCCESS;
1406 }
1407 
1408 /**
1409   @brief Copy the pointer to a CeedBasis.
1410 
1411   Note: If the value of `basis_copy` passed into this function is non-NULL, then it is assumed that `basis_copy` is a pointer to a CeedBasis.
1412         This CeedBasis will be destroyed if `basis_copy` is the only reference to this CeedBasis.
1413 
1414   @param[in]     basis      CeedBasis to copy reference to
1415   @param[in,out] basis_copy Variable to store copied reference
1416 
1417   @return An error code: 0 - success, otherwise - failure
1418 
1419   @ref User
1420 **/
1421 int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy) {
1422   if (basis != CEED_BASIS_COLLOCATED) CeedCall(CeedBasisReference(basis));
1423   CeedCall(CeedBasisDestroy(basis_copy));
1424   *basis_copy = basis;
1425   return CEED_ERROR_SUCCESS;
1426 }
1427 
1428 /**
1429   @brief View a CeedBasis
1430 
1431   @param[in] basis  CeedBasis to view
1432   @param[in] stream Stream to view to, e.g., stdout
1433 
1434   @return An error code: 0 - success, otherwise - failure
1435 
1436   @ref User
1437 **/
1438 int CeedBasisView(CeedBasis basis, FILE *stream) {
1439   CeedElemTopology topo     = basis->topo;
1440   CeedFESpace      fe_space = basis->fe_space;
1441   CeedInt          q_comp   = 0;
1442 
1443   // Print FE space and element topology of the basis
1444   if (basis->tensor_basis) {
1445     fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" CeedInt_FMT " Q=%" CeedInt_FMT "\n", CeedFESpaces[fe_space],
1446             CeedElemTopologies[topo], basis->dim, basis->P_1d, basis->Q_1d);
1447   } else {
1448     fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" CeedInt_FMT " Q=%" CeedInt_FMT "\n", CeedFESpaces[fe_space],
1449             CeedElemTopologies[topo], basis->dim, basis->P, basis->Q);
1450   }
1451   // Print quadrature data, interpolation/gradient/divergence/curl of the basis
1452   if (basis->tensor_basis) {  // tensor basis
1453     CeedCall(CeedScalarView("qref1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_ref_1d, stream));
1454     CeedCall(CeedScalarView("qweight1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_weight_1d, stream));
1455     CeedCall(CeedScalarView("interp1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, basis->interp_1d, stream));
1456     CeedCall(CeedScalarView("grad1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, basis->grad_1d, stream));
1457   } else {  // non-tensor basis
1458     CeedCall(CeedScalarView("qref", "\t% 12.8f", 1, basis->Q * basis->dim, basis->q_ref_1d, stream));
1459     CeedCall(CeedScalarView("qweight", "\t% 12.8f", 1, basis->Q, basis->q_weight_1d, stream));
1460     CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp));
1461     CeedCall(CeedScalarView("interp", "\t% 12.8f", q_comp * basis->Q, basis->P, basis->interp, stream));
1462     if (basis->grad) {
1463       CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp));
1464       CeedCall(CeedScalarView("grad", "\t% 12.8f", q_comp * basis->Q, basis->P, basis->grad, stream));
1465     }
1466     if (basis->div) {
1467       CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp));
1468       CeedCall(CeedScalarView("div", "\t% 12.8f", q_comp * basis->Q, basis->P, basis->div, stream));
1469     }
1470     if (basis->curl) {
1471       CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp));
1472       CeedCall(CeedScalarView("curl", "\t% 12.8f", q_comp * basis->Q, basis->P, basis->curl, stream));
1473     }
1474   }
1475   return CEED_ERROR_SUCCESS;
1476 }
1477 
1478 /**
1479   @brief Apply basis evaluation from nodes to quadrature points or vice versa
1480 
1481   @param[in]  basis      CeedBasis to evaluate
1482   @param[in]  num_elem   The number of elements to apply the basis evaluation to;
1483                            the backend will specify the ordering in CeedElemRestrictionCreateBlocked()
1484   @param[in]  t_mode    \ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature points;
1485                           \ref CEED_TRANSPOSE to apply the transpose, mapping from quadrature points to nodes
1486   @param[in]  eval_mode \ref CEED_EVAL_NONE to use values directly,
1487                           \ref CEED_EVAL_INTERP to use interpolated values,
1488                           \ref CEED_EVAL_GRAD to use gradients,
1489                           \ref CEED_EVAL_DIV to use divergence,
1490                           \ref CEED_EVAL_CURL to use curl,
1491                           \ref CEED_EVAL_WEIGHT to use quadrature weights.
1492   @param[in]  u        Input CeedVector
1493   @param[out] v        Output CeedVector
1494 
1495   @return An error code: 0 - success, otherwise - failure
1496 
1497   @ref User
1498 **/
1499 int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
1500   CeedSize u_length = 0, v_length;
1501   CeedInt  dim, num_comp, q_comp, num_nodes, num_qpts;
1502   CeedCall(CeedBasisGetDimension(basis, &dim));
1503   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
1504   CeedCall(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
1505   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
1506   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
1507   CeedCall(CeedVectorGetLength(v, &v_length));
1508   if (u) {
1509     CeedCall(CeedVectorGetLength(u, &u_length));
1510   }
1511 
1512   if (!basis->Apply) {
1513     // LCOV_EXCL_START
1514     return CeedError(basis->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisApply");
1515     // LCOV_EXCL_STOP
1516   }
1517 
1518   // Check compatibility of topological and geometrical dimensions
1519   if ((t_mode == CEED_TRANSPOSE && (v_length % num_nodes != 0 || u_length % num_qpts != 0)) ||
1520       (t_mode == CEED_NOTRANSPOSE && (u_length % num_nodes != 0 || v_length % num_qpts != 0))) {
1521     // LCOV_EXCL_START
1522     return CeedError(basis->ceed, CEED_ERROR_DIMENSION, "Length of input/output vectors incompatible with basis dimensions");
1523     // LCOV_EXCL_STOP
1524   }
1525 
1526   // Check vector lengths to prevent out of bounds issues
1527   bool bad_dims = false;
1528   switch (eval_mode) {
1529     case CEED_EVAL_NONE:
1530     case CEED_EVAL_INTERP:
1531     case CEED_EVAL_GRAD:
1532     case CEED_EVAL_DIV:
1533     case CEED_EVAL_CURL:
1534       bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts * q_comp || v_length < num_elem * num_comp * num_nodes)) ||
1535                   (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp * q_comp || u_length < num_elem * num_comp * num_nodes)));
1536       break;
1537     case CEED_EVAL_WEIGHT:
1538       bad_dims = v_length < num_elem * num_qpts;
1539       break;
1540   }
1541   if (bad_dims) {
1542     // LCOV_EXCL_START
1543     return CeedError(basis->ceed, CEED_ERROR_DIMENSION, "Input/output vectors too short for basis and evaluation mode");
1544     // LCOV_EXCL_STOP
1545   }
1546 
1547   CeedCall(basis->Apply(basis, num_elem, t_mode, eval_mode, u, v));
1548   return CEED_ERROR_SUCCESS;
1549 }
1550 
1551 /**
1552   @brief Get Ceed associated with a CeedBasis
1553 
1554   @param[in]  basis CeedBasis
1555   @param[out] ceed  Variable to store Ceed
1556 
1557   @return An error code: 0 - success, otherwise - failure
1558 
1559   @ref Advanced
1560 **/
1561 int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed) {
1562   *ceed = basis->ceed;
1563   return CEED_ERROR_SUCCESS;
1564 }
1565 
1566 /**
1567   @brief Get dimension for given CeedBasis
1568 
1569   @param[in]  basis CeedBasis
1570   @param[out] dim   Variable to store dimension of basis
1571 
1572   @return An error code: 0 - success, otherwise - failure
1573 
1574   @ref Advanced
1575 **/
1576 int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim) {
1577   *dim = basis->dim;
1578   return CEED_ERROR_SUCCESS;
1579 }
1580 
1581 /**
1582   @brief Get topology for given CeedBasis
1583 
1584   @param[in]  basis CeedBasis
1585   @param[out] topo  Variable to store topology of basis
1586 
1587   @return An error code: 0 - success, otherwise - failure
1588 
1589   @ref Advanced
1590 **/
1591 int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo) {
1592   *topo = basis->topo;
1593   return CEED_ERROR_SUCCESS;
1594 }
1595 
1596 /**
1597   @brief Get number of components for given CeedBasis
1598 
1599   @param[in]  basis    CeedBasis
1600   @param[out] num_comp Variable to store number of components of basis
1601 
1602   @return An error code: 0 - success, otherwise - failure
1603 
1604   @ref Advanced
1605 **/
1606 int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *num_comp) {
1607   *num_comp = basis->num_comp;
1608   return CEED_ERROR_SUCCESS;
1609 }
1610 
1611 /**
1612   @brief Get total number of nodes (in dim dimensions) of a CeedBasis
1613 
1614   @param[in]  basis CeedBasis
1615   @param[out] P     Variable to store number of nodes
1616 
1617   @return An error code: 0 - success, otherwise - failure
1618 
1619   @ref Utility
1620 **/
1621 int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P) {
1622   *P = basis->P;
1623   return CEED_ERROR_SUCCESS;
1624 }
1625 
1626 /**
1627   @brief Get total number of nodes (in 1 dimension) of a CeedBasis
1628 
1629   @param[in]  basis CeedBasis
1630   @param[out] P_1d  Variable to store number of nodes
1631 
1632   @return An error code: 0 - success, otherwise - failure
1633 
1634   @ref Advanced
1635 **/
1636 int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P_1d) {
1637   if (!basis->tensor_basis) {
1638     // LCOV_EXCL_START
1639     return CeedError(basis->ceed, CEED_ERROR_MINOR, "Cannot supply P_1d for non-tensor basis");
1640     // LCOV_EXCL_STOP
1641   }
1642 
1643   *P_1d = basis->P_1d;
1644   return CEED_ERROR_SUCCESS;
1645 }
1646 
1647 /**
1648   @brief Get total number of quadrature points (in dim dimensions) of a CeedBasis
1649 
1650   @param[in]  basis CeedBasis
1651   @param[out] Q     Variable to store number of quadrature points
1652 
1653   @return An error code: 0 - success, otherwise - failure
1654 
1655   @ref Utility
1656 **/
1657 int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q) {
1658   *Q = basis->Q;
1659   return CEED_ERROR_SUCCESS;
1660 }
1661 
1662 /**
1663   @brief Get total number of quadrature points (in 1 dimension) of a CeedBasis
1664 
1665   @param[in]  basis CeedBasis
1666   @param[out] Q_1d  Variable to store number of quadrature points
1667 
1668   @return An error code: 0 - success, otherwise - failure
1669 
1670   @ref Advanced
1671 **/
1672 int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, CeedInt *Q_1d) {
1673   if (!basis->tensor_basis) {
1674     // LCOV_EXCL_START
1675     return CeedError(basis->ceed, CEED_ERROR_MINOR, "Cannot supply Q_1d for non-tensor basis");
1676     // LCOV_EXCL_STOP
1677   }
1678 
1679   *Q_1d = basis->Q_1d;
1680   return CEED_ERROR_SUCCESS;
1681 }
1682 
1683 /**
1684   @brief Get reference coordinates of quadrature points (in dim dimensions) of a CeedBasis
1685 
1686   @param[in]  basis CeedBasis
1687   @param[out] q_ref Variable to store reference coordinates of quadrature points
1688 
1689   @return An error code: 0 - success, otherwise - failure
1690 
1691   @ref Advanced
1692 **/
1693 int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **q_ref) {
1694   *q_ref = basis->q_ref_1d;
1695   return CEED_ERROR_SUCCESS;
1696 }
1697 
1698 /**
1699   @brief Get quadrature weights of quadrature points (in dim dimensions) of a CeedBasis
1700 
1701   @param[in]  basis    CeedBasis
1702   @param[out] q_weight Variable to store quadrature weights
1703 
1704   @return An error code: 0 - success, otherwise - failure
1705 
1706   @ref Advanced
1707 **/
1708 int CeedBasisGetQWeights(CeedBasis basis, const CeedScalar **q_weight) {
1709   *q_weight = basis->q_weight_1d;
1710   return CEED_ERROR_SUCCESS;
1711 }
1712 
1713 /**
1714   @brief Get interpolation matrix of a CeedBasis
1715 
1716   @param[in]  basis  CeedBasis
1717   @param[out] interp Variable to store interpolation matrix
1718 
1719   @return An error code: 0 - success, otherwise - failure
1720 
1721   @ref Advanced
1722 **/
1723 int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp) {
1724   if (!basis->interp && basis->tensor_basis) {
1725     // Allocate
1726     CeedCall(CeedMalloc(basis->Q * basis->P, &basis->interp));
1727 
1728     // Initialize
1729     for (CeedInt i = 0; i < basis->Q * basis->P; i++) basis->interp[i] = 1.0;
1730 
1731     // Calculate
1732     for (CeedInt d = 0; d < basis->dim; d++) {
1733       for (CeedInt qpt = 0; qpt < basis->Q; qpt++) {
1734         for (CeedInt node = 0; node < basis->P; node++) {
1735           CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1736           CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1737           basis->interp[qpt * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p];
1738         }
1739       }
1740     }
1741   }
1742   *interp = basis->interp;
1743   return CEED_ERROR_SUCCESS;
1744 }
1745 
1746 /**
1747   @brief Get 1D interpolation matrix of a tensor product CeedBasis
1748 
1749   @param[in]  basis     CeedBasis
1750   @param[out] interp_1d Variable to store interpolation matrix
1751 
1752   @return An error code: 0 - success, otherwise - failure
1753 
1754   @ref Backend
1755 **/
1756 int CeedBasisGetInterp1D(CeedBasis basis, const CeedScalar **interp_1d) {
1757   if (!basis->tensor_basis) {
1758     // LCOV_EXCL_START
1759     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis is not a tensor product basis.");
1760     // LCOV_EXCL_STOP
1761   }
1762 
1763   *interp_1d = basis->interp_1d;
1764   return CEED_ERROR_SUCCESS;
1765 }
1766 
1767 /**
1768   @brief Get gradient matrix of a CeedBasis
1769 
1770   @param[in]  basis CeedBasis
1771   @param[out] grad  Variable to store gradient matrix
1772 
1773   @return An error code: 0 - success, otherwise - failure
1774 
1775   @ref Advanced
1776 **/
1777 int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad) {
1778   if (!basis->grad && basis->tensor_basis) {
1779     // Allocate
1780     CeedCall(CeedMalloc(basis->dim * basis->Q * basis->P, &basis->grad));
1781 
1782     // Initialize
1783     for (CeedInt i = 0; i < basis->dim * basis->Q * basis->P; i++) basis->grad[i] = 1.0;
1784 
1785     // Calculate
1786     for (CeedInt d = 0; d < basis->dim; d++) {
1787       for (CeedInt i = 0; i < basis->dim; i++) {
1788         for (CeedInt qpt = 0; qpt < basis->Q; qpt++) {
1789           for (CeedInt node = 0; node < basis->P; node++) {
1790             CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1791             CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1792             if (i == d) basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->grad_1d[q * basis->P_1d + p];
1793             else basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p];
1794           }
1795         }
1796       }
1797     }
1798   }
1799   *grad = basis->grad;
1800   return CEED_ERROR_SUCCESS;
1801 }
1802 
1803 /**
1804   @brief Get 1D gradient matrix of a tensor product CeedBasis
1805 
1806   @param[in]  basis   CeedBasis
1807   @param[out] grad_1d Variable to store gradient matrix
1808 
1809   @return An error code: 0 - success, otherwise - failure
1810 
1811   @ref Advanced
1812 **/
1813 int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad_1d) {
1814   if (!basis->tensor_basis) {
1815     // LCOV_EXCL_START
1816     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis is not a tensor product basis.");
1817     // LCOV_EXCL_STOP
1818   }
1819 
1820   *grad_1d = basis->grad_1d;
1821   return CEED_ERROR_SUCCESS;
1822 }
1823 
1824 /**
1825   @brief Get divergence matrix of a CeedBasis
1826 
1827   @param[in]  basis CeedBasis
1828   @param[out] div   Variable to store divergence matrix
1829 
1830   @return An error code: 0 - success, otherwise - failure
1831 
1832   @ref Advanced
1833 **/
1834 int CeedBasisGetDiv(CeedBasis basis, const CeedScalar **div) {
1835   if (!basis->div) {
1836     // LCOV_EXCL_START
1837     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis does not have divergence matrix.");
1838     // LCOV_EXCL_STOP
1839   }
1840 
1841   *div = basis->div;
1842   return CEED_ERROR_SUCCESS;
1843 }
1844 
1845 /**
1846   @brief Get curl matrix of a CeedBasis
1847 
1848   @param[in]  basis CeedBasis
1849   @param[out] curl  Variable to store curl matrix
1850 
1851   @return An error code: 0 - success, otherwise - failure
1852 
1853   @ref Advanced
1854 **/
1855 int CeedBasisGetCurl(CeedBasis basis, const CeedScalar **curl) {
1856   if (!basis->curl) {
1857     // LCOV_EXCL_START
1858     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis does not have curl matrix.");
1859     // LCOV_EXCL_STOP
1860   }
1861 
1862   *curl = basis->curl;
1863   return CEED_ERROR_SUCCESS;
1864 }
1865 
1866 /**
1867   @brief Destroy a CeedBasis
1868 
1869   @param[in,out] basis CeedBasis to destroy
1870 
1871   @return An error code: 0 - success, otherwise - failure
1872 
1873   @ref User
1874 **/
1875 int CeedBasisDestroy(CeedBasis *basis) {
1876   if (!*basis || *basis == CEED_BASIS_COLLOCATED || --(*basis)->ref_count > 0) {
1877     *basis = NULL;
1878     return CEED_ERROR_SUCCESS;
1879   }
1880   if ((*basis)->Destroy) CeedCall((*basis)->Destroy(*basis));
1881   if ((*basis)->contract) CeedCall(CeedTensorContractDestroy(&(*basis)->contract));
1882   CeedCall(CeedFree(&(*basis)->q_ref_1d));
1883   CeedCall(CeedFree(&(*basis)->q_weight_1d));
1884   CeedCall(CeedFree(&(*basis)->interp));
1885   CeedCall(CeedFree(&(*basis)->interp_1d));
1886   CeedCall(CeedFree(&(*basis)->grad));
1887   CeedCall(CeedFree(&(*basis)->grad_1d));
1888   CeedCall(CeedFree(&(*basis)->div));
1889   CeedCall(CeedFree(&(*basis)->curl));
1890   CeedCall(CeedDestroy(&(*basis)->ceed));
1891   CeedCall(CeedFree(basis));
1892   return CEED_ERROR_SUCCESS;
1893 }
1894 
1895 /**
1896   @brief Construct a Gauss-Legendre quadrature
1897 
1898   @param[in]  Q           Number of quadrature points (integrates polynomials of degree 2*Q-1 exactly)
1899   @param[out] q_ref_1d    Array of length Q to hold the abscissa on [-1, 1]
1900   @param[out] q_weight_1d Array of length Q to hold the weights
1901 
1902   @return An error code: 0 - success, otherwise - failure
1903 
1904   @ref Utility
1905 **/
1906 int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) {
1907   // Allocate
1908   CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0 * atan(1.0);
1909   // Build q_ref_1d, q_weight_1d
1910   for (CeedInt i = 0; i <= Q / 2; i++) {
1911     // Guess
1912     xi = cos(PI * (CeedScalar)(2 * i + 1) / ((CeedScalar)(2 * Q)));
1913     // Pn(xi)
1914     P0 = 1.0;
1915     P1 = xi;
1916     P2 = 0.0;
1917     for (CeedInt j = 2; j <= Q; j++) {
1918       P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1919       P0 = P1;
1920       P1 = P2;
1921     }
1922     // First Newton Step
1923     dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1924     xi  = xi - P2 / dP2;
1925     // Newton to convergence
1926     for (CeedInt k = 0; k < 100 && fabs(P2) > 10 * CEED_EPSILON; k++) {
1927       P0 = 1.0;
1928       P1 = xi;
1929       for (CeedInt j = 2; j <= Q; j++) {
1930         P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1931         P0 = P1;
1932         P1 = P2;
1933       }
1934       dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1935       xi  = xi - P2 / dP2;
1936     }
1937     // Save xi, wi
1938     wi                     = 2.0 / ((1.0 - xi * xi) * dP2 * dP2);
1939     q_weight_1d[i]         = wi;
1940     q_weight_1d[Q - 1 - i] = wi;
1941     q_ref_1d[i]            = -xi;
1942     q_ref_1d[Q - 1 - i]    = xi;
1943   }
1944   return CEED_ERROR_SUCCESS;
1945 }
1946 
1947 /**
1948   @brief Construct a Gauss-Legendre-Lobatto quadrature
1949 
1950   @param[in]  Q           Number of quadrature points (integrates polynomials of degree 2*Q-3 exactly)
1951   @param[out] q_ref_1d    Array of length Q to hold the abscissa on [-1, 1]
1952   @param[out] q_weight_1d Array of length Q to hold the weights
1953 
1954   @return An error code: 0 - success, otherwise - failure
1955 
1956   @ref Utility
1957 **/
1958 int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) {
1959   // Allocate
1960   CeedScalar P0, P1, P2, dP2, d2P2, xi, wi, PI = 4.0 * atan(1.0);
1961   // Build q_ref_1d, q_weight_1d
1962   // Set endpoints
1963   if (Q < 2) {
1964     // LCOV_EXCL_START
1965     return CeedError(NULL, CEED_ERROR_DIMENSION, "Cannot create Lobatto quadrature with Q=%" CeedInt_FMT " < 2 points", Q);
1966     // LCOV_EXCL_STOP
1967   }
1968   wi = 2.0 / ((CeedScalar)(Q * (Q - 1)));
1969   if (q_weight_1d) {
1970     q_weight_1d[0]     = wi;
1971     q_weight_1d[Q - 1] = wi;
1972   }
1973   q_ref_1d[0]     = -1.0;
1974   q_ref_1d[Q - 1] = 1.0;
1975   // Interior
1976   for (CeedInt i = 1; i <= (Q - 1) / 2; i++) {
1977     // Guess
1978     xi = cos(PI * (CeedScalar)(i) / (CeedScalar)(Q - 1));
1979     // Pn(xi)
1980     P0 = 1.0;
1981     P1 = xi;
1982     P2 = 0.0;
1983     for (CeedInt j = 2; j < Q; j++) {
1984       P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1985       P0 = P1;
1986       P1 = P2;
1987     }
1988     // First Newton step
1989     dP2  = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1990     d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi);
1991     xi   = xi - dP2 / d2P2;
1992     // Newton to convergence
1993     for (CeedInt k = 0; k < 100 && fabs(dP2) > 10 * CEED_EPSILON; k++) {
1994       P0 = 1.0;
1995       P1 = xi;
1996       for (CeedInt j = 2; j < Q; j++) {
1997         P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1998         P0 = P1;
1999         P1 = P2;
2000       }
2001       dP2  = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
2002       d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi);
2003       xi   = xi - dP2 / d2P2;
2004     }
2005     // Save xi, wi
2006     wi = 2.0 / (((CeedScalar)(Q * (Q - 1))) * P2 * P2);
2007     if (q_weight_1d) {
2008       q_weight_1d[i]         = wi;
2009       q_weight_1d[Q - 1 - i] = wi;
2010     }
2011     q_ref_1d[i]         = -xi;
2012     q_ref_1d[Q - 1 - i] = xi;
2013   }
2014   return CEED_ERROR_SUCCESS;
2015 }
2016 
2017 /// @}
2018