xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-basis.c (revision ad6481ce28fcfada16ee6d8a13bbacd137fe686a)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed/backend.h>
10 #include <ceed/ceed.h>
11 #include <math.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 /// @file
17 /// Implementation of CeedBasis interfaces
18 
19 /// @cond DOXYGEN_SKIP
20 static struct CeedBasis_private ceed_basis_collocated;
21 /// @endcond
22 
23 /// @addtogroup CeedBasisUser
24 /// @{
25 
26 /// Indicate that the quadrature points are collocated with the nodes
27 const CeedBasis CEED_BASIS_COLLOCATED = &ceed_basis_collocated;
28 
29 /// @}
30 
31 /// ----------------------------------------------------------------------------
32 /// CeedBasis Library Internal Functions
33 /// ----------------------------------------------------------------------------
34 /// @addtogroup CeedBasisDeveloper
35 /// @{
36 
37 /**
38   @brief Compute Householder reflection
39 
40     Computes A = (I - b v v^T) A, where A is an mxn matrix indexed as A[i*row + j*col]
41 
42   @param[in,out] A   Matrix to apply Householder reflection to, in place
43   @param[in]     v   Householder vector
44   @param[in]     b   Scaling factor
45   @param[in]     m   Number of rows in A
46   @param[in]     n   Number of columns in A
47   @param[in]     row Row stride
48   @param[in]     col Col stride
49 
50   @return An error code: 0 - success, otherwise - failure
51 
52   @ref Developer
53 **/
54 static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v, CeedScalar b, CeedInt m, CeedInt n, CeedInt row, CeedInt col) {
55   for (CeedInt j = 0; j < n; j++) {
56     CeedScalar w = A[0 * row + j * col];
57     for (CeedInt i = 1; i < m; i++) w += v[i] * A[i * row + j * col];
58     A[0 * row + j * col] -= b * w;
59     for (CeedInt i = 1; i < m; i++) A[i * row + j * col] -= b * w * v[i];
60   }
61   return CEED_ERROR_SUCCESS;
62 }
63 
64 /**
65   @brief Apply Householder Q matrix
66 
67     Compute A = Q A, where Q is mxm and A is mxn.
68 
69   @param[in,out] A      Matrix to apply Householder Q to, in place
70   @param[in]     Q      Householder Q matrix
71   @param[in]     tau    Householder scaling factors
72   @param[in]     t_mode Transpose mode for application
73   @param[in]     m      Number of rows in A
74   @param[in]     n      Number of columns in A
75   @param[in]     k      Number of elementary reflectors in Q, k<m
76   @param[in]     row    Row stride in A
77   @param[in]     col    Col stride in A
78 
79   @return An error code: 0 - success, otherwise - failure
80 
81   @ref Developer
82 **/
83 int CeedHouseholderApplyQ(CeedScalar *A, const CeedScalar *Q, const CeedScalar *tau, CeedTransposeMode t_mode, CeedInt m, CeedInt n, CeedInt k,
84                           CeedInt row, CeedInt col) {
85   CeedScalar *v;
86   CeedCall(CeedMalloc(m, &v));
87   for (CeedInt ii = 0; ii < k; ii++) {
88     CeedInt i = t_mode == CEED_TRANSPOSE ? ii : k - 1 - ii;
89     for (CeedInt j = i + 1; j < m; j++) v[j] = Q[j * k + i];
90     // Apply Householder reflector (I - tau v v^T) collo_grad_1d^T
91     CeedCall(CeedHouseholderReflect(&A[i * row], &v[i], tau[i], m - i, n, row, col));
92   }
93   CeedCall(CeedFree(&v));
94   return CEED_ERROR_SUCCESS;
95 }
96 
97 /**
98   @brief Compute Givens rotation
99 
100     Computes A = G A (or G^T A in transpose mode), where A is an mxn matrix indexed as A[i*n + j*m]
101 
102   @param[in,out] A      Row major matrix to apply Givens rotation to, in place
103   @param[in]     c      Cosine factor
104   @param[in]     s      Sine factor
105   @param[in]     t_mode @ref CEED_NOTRANSPOSE to rotate the basis counter-clockwise, which has the effect of rotating columns of A clockwise;
106                           @ref CEED_TRANSPOSE for the opposite rotation
107   @param[in]     i      First row/column to apply rotation
108   @param[in]     k      Second row/column to apply rotation
109   @param[in]     m      Number of rows in A
110   @param[in]     n      Number of columns in A
111 
112   @return An error code: 0 - success, otherwise - failure
113 
114   @ref Developer
115 **/
116 static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s, CeedTransposeMode t_mode, CeedInt i, CeedInt k, CeedInt m, CeedInt n) {
117   CeedInt stride_j = 1, stride_ik = m, num_its = n;
118   if (t_mode == CEED_NOTRANSPOSE) {
119     stride_j  = n;
120     stride_ik = 1;
121     num_its   = m;
122   }
123 
124   // Apply rotation
125   for (CeedInt j = 0; j < num_its; j++) {
126     CeedScalar tau1 = A[i * stride_ik + j * stride_j], tau2 = A[k * stride_ik + j * stride_j];
127     A[i * stride_ik + j * stride_j] = c * tau1 - s * tau2;
128     A[k * stride_ik + j * stride_j] = s * tau1 + c * tau2;
129   }
130   return CEED_ERROR_SUCCESS;
131 }
132 
133 /**
134   @brief View an array stored in a CeedBasis
135 
136   @param[in] name   Name of array
137   @param[in] fp_fmt Printing format
138   @param[in] m      Number of rows in array
139   @param[in] n      Number of columns in array
140   @param[in] a      Array to be viewed
141   @param[in] stream Stream to view to, e.g., stdout
142 
143   @return An error code: 0 - success, otherwise - failure
144 
145   @ref Developer
146 **/
147 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m, CeedInt n, const CeedScalar *a, FILE *stream) {
148   for (CeedInt i = 0; i < m; i++) {
149     if (m > 1) fprintf(stream, "%12s[%" CeedInt_FMT "]:", name, i);
150     else fprintf(stream, "%12s:", name);
151     for (CeedInt j = 0; j < n; j++) fprintf(stream, fp_fmt, fabs(a[i * n + j]) > 1E-14 ? a[i * n + j] : 0);
152     fputs("\n", stream);
153   }
154   return CEED_ERROR_SUCCESS;
155 }
156 
157 /**
158   @brief Create the interpolation and gradient matrices for projection from the nodes of `basis_from` to the nodes of `basis_to`.
159            The interpolation is given by `interp_project = interp_to^+ * interp_from`, where the pesudoinverse `interp_to^+` is given by QR
160 factorization.
161            The gradient is given by `grad_project = interp_to^+ * grad_from`.
162            Note: `basis_from` and `basis_to` must have compatible quadrature
163 spaces.
164 
165   @param[in]  basis_from     CeedBasis to project from
166   @param[in]  basis_to       CeedBasis to project to
167   @param[out] interp_project Address of the variable where the newly created interpolation matrix will be stored.
168   @param[out] grad_project   Address of the variable where the newly created gradient matrix will be stored.
169 
170   @return An error code: 0 - success, otherwise - failure
171 
172   @ref Developer
173 **/
174 static int CeedBasisCreateProjectionMatrices(CeedBasis basis_from, CeedBasis basis_to, CeedScalar **interp_project, CeedScalar **grad_project) {
175   Ceed ceed;
176   CeedCall(CeedBasisGetCeed(basis_to, &ceed));
177 
178   // Check for compatible quadrature spaces
179   CeedInt Q_to, Q_from;
180   CeedCall(CeedBasisGetNumQuadraturePoints(basis_to, &Q_to));
181   CeedCall(CeedBasisGetNumQuadraturePoints(basis_from, &Q_from));
182   if (Q_to != Q_from) {
183     // LCOV_EXCL_START
184     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
185     // LCOV_EXCL_STOP
186   }
187 
188   // Check for matching tensor or non-tensor
189   CeedInt P_to, P_from, Q = Q_to;
190   bool    is_tensor_to, is_tensor_from;
191   CeedCall(CeedBasisIsTensor(basis_to, &is_tensor_to));
192   CeedCall(CeedBasisIsTensor(basis_from, &is_tensor_from));
193   if (is_tensor_to && is_tensor_from) {
194     CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_to));
195     CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_from));
196     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis_from, &Q));
197   } else if (!is_tensor_to && !is_tensor_from) {
198     CeedCall(CeedBasisGetNumNodes(basis_to, &P_to));
199     CeedCall(CeedBasisGetNumNodes(basis_from, &P_from));
200   } else {
201     // LCOV_EXCL_START
202     return CeedError(ceed, CEED_ERROR_MINOR, "Bases must both be tensor or non-tensor");
203     // LCOV_EXCL_STOP
204   }
205 
206   // Get source matrices
207   CeedInt     dim;
208   CeedScalar *interp_to, *interp_from, *tau;
209   CeedCall(CeedBasisGetDimension(basis_to, &dim));
210   CeedCall(CeedMalloc(Q * P_from, &interp_from));
211   CeedCall(CeedMalloc(Q * P_to, &interp_to));
212   CeedCall(CeedCalloc(P_to * P_from, interp_project));
213   CeedCall(CeedCalloc(P_to * P_from * (is_tensor_to ? 1 : dim), grad_project));
214   CeedCall(CeedMalloc(Q, &tau));
215   const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL, *grad_from_source;
216   if (is_tensor_to) {
217     CeedCall(CeedBasisGetInterp1D(basis_to, &interp_to_source));
218     CeedCall(CeedBasisGetInterp1D(basis_from, &interp_from_source));
219     CeedCall(CeedBasisGetGrad1D(basis_from, &grad_from_source));
220   } else {
221     CeedCall(CeedBasisGetInterp(basis_to, &interp_to_source));
222     CeedCall(CeedBasisGetInterp(basis_from, &interp_from_source));
223     CeedCall(CeedBasisGetGrad(basis_from, &grad_from_source));
224   }
225 
226   // Build matrices
227   CeedInt     num_matrices = 1 + (is_tensor_to ? 1 : dim);
228   CeedScalar *input_from[num_matrices], *output_project[num_matrices];
229   input_from[0]     = (CeedScalar *)interp_from_source;
230   output_project[0] = *interp_project;
231   for (CeedInt m = 1; m < num_matrices; m++) {
232     input_from[m]     = (CeedScalar *)&grad_from_source[(m - 1) * Q * P_from];
233     output_project[m] = &((*grad_project)[(m - 1) * P_to * P_from]);
234   }
235   for (CeedInt m = 0; m < num_matrices; m++) {
236     // -- QR Factorization, interp_to = Q R
237     memcpy(interp_to, interp_to_source, Q * P_to * sizeof(interp_to_source[0]));
238     CeedCall(CeedQRFactorization(ceed, interp_to, tau, Q, P_to));
239 
240     // -- Apply Qtranspose, interp_to = Qtranspose interp_from
241     memcpy(interp_from, input_from[m], Q * P_from * sizeof(input_from[m][0]));
242     CeedCall(CeedHouseholderApplyQ(interp_from, interp_to, tau, CEED_TRANSPOSE, Q, P_from, P_to, P_from, 1));
243 
244     // -- Apply Rinv, interp_project = Rinv interp_c
245     for (CeedInt j = 0; j < P_from; j++) {  // Column j
246       output_project[m][j + P_from * (P_to - 1)] = interp_from[j + P_from * (P_to - 1)] / interp_to[P_to * P_to - 1];
247       for (CeedInt i = P_to - 2; i >= 0; i--) {  // Row i
248         output_project[m][j + P_from * i] = interp_from[j + P_from * i];
249         for (CeedInt k = i + 1; k < P_to; k++) {
250           output_project[m][j + P_from * i] -= interp_to[k + P_to * i] * output_project[m][j + P_from * k];
251         }
252         output_project[m][j + P_from * i] /= interp_to[i + P_to * i];
253       }
254     }
255   }
256 
257   // Cleanup
258   CeedCall(CeedFree(&tau));
259   CeedCall(CeedFree(&interp_to));
260   CeedCall(CeedFree(&interp_from));
261 
262   return CEED_ERROR_SUCCESS;
263 }
264 
265 /// @}
266 
267 /// ----------------------------------------------------------------------------
268 /// Ceed Backend API
269 /// ----------------------------------------------------------------------------
270 /// @addtogroup CeedBasisBackend
271 /// @{
272 
273 /**
274   @brief Return collocated grad matrix
275 
276   @param[in]  basis         CeedBasis
277   @param[out] collo_grad_1d Row-major (Q_1d * Q_1d) matrix expressing derivatives of basis functions at quadrature points
278 
279   @return An error code: 0 - success, otherwise - failure
280 
281   @ref Backend
282 **/
283 int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collo_grad_1d) {
284   int         i, j, k;
285   Ceed        ceed;
286   CeedInt     P_1d = (basis)->P_1d, Q_1d = (basis)->Q_1d;
287   CeedScalar *interp_1d, *grad_1d, *tau;
288 
289   CeedCall(CeedMalloc(Q_1d * P_1d, &interp_1d));
290   CeedCall(CeedMalloc(Q_1d * P_1d, &grad_1d));
291   CeedCall(CeedMalloc(Q_1d, &tau));
292   memcpy(interp_1d, (basis)->interp_1d, Q_1d * P_1d * sizeof(basis)->interp_1d[0]);
293   memcpy(grad_1d, (basis)->grad_1d, Q_1d * P_1d * sizeof(basis)->interp_1d[0]);
294 
295   // QR Factorization, interp_1d = Q R
296   CeedCall(CeedBasisGetCeed(basis, &ceed));
297   CeedCall(CeedQRFactorization(ceed, interp_1d, tau, Q_1d, P_1d));
298   // Note: This function is for backend use, so all errors are terminal and we do not need to clean up memory on failure.
299 
300   // Apply Rinv, collo_grad_1d = grad_1d Rinv
301   for (i = 0; i < Q_1d; i++) {  // Row i
302     collo_grad_1d[Q_1d * i] = grad_1d[P_1d * i] / interp_1d[0];
303     for (j = 1; j < P_1d; j++) {  // Column j
304       collo_grad_1d[j + Q_1d * i] = grad_1d[j + P_1d * i];
305       for (k = 0; k < j; k++) collo_grad_1d[j + Q_1d * i] -= interp_1d[j + P_1d * k] * collo_grad_1d[k + Q_1d * i];
306       collo_grad_1d[j + Q_1d * i] /= interp_1d[j + P_1d * j];
307     }
308     for (j = P_1d; j < Q_1d; j++) collo_grad_1d[j + Q_1d * i] = 0;
309   }
310 
311   // Apply Qtranspose, collo_grad = collo_grad Q_transpose
312   CeedCall(CeedHouseholderApplyQ(collo_grad_1d, interp_1d, tau, CEED_NOTRANSPOSE, Q_1d, Q_1d, P_1d, 1, Q_1d));
313 
314   CeedCall(CeedFree(&interp_1d));
315   CeedCall(CeedFree(&grad_1d));
316   CeedCall(CeedFree(&tau));
317   return CEED_ERROR_SUCCESS;
318 }
319 
320 /**
321   @brief Get tensor status for given CeedBasis
322 
323   @param[in]  basis     CeedBasis
324   @param[out] is_tensor Variable to store tensor status
325 
326   @return An error code: 0 - success, otherwise - failure
327 
328   @ref Backend
329 **/
330 int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor) {
331   *is_tensor = basis->tensor_basis;
332   return CEED_ERROR_SUCCESS;
333 }
334 
335 /**
336   @brief Get backend data of a CeedBasis
337 
338   @param[in]  basis CeedBasis
339   @param[out] data  Variable to store data
340 
341   @return An error code: 0 - success, otherwise - failure
342 
343   @ref Backend
344 **/
345 int CeedBasisGetData(CeedBasis basis, void *data) {
346   *(void **)data = basis->data;
347   return CEED_ERROR_SUCCESS;
348 }
349 
350 /**
351   @brief Set backend data of a CeedBasis
352 
353   @param[in,out] basis  CeedBasis
354   @param[in]     data   Data to set
355 
356   @return An error code: 0 - success, otherwise - failure
357 
358   @ref Backend
359 **/
360 int CeedBasisSetData(CeedBasis basis, void *data) {
361   basis->data = data;
362   return CEED_ERROR_SUCCESS;
363 }
364 
365 /**
366   @brief Increment the reference counter for a CeedBasis
367 
368   @param[in,out] basis Basis to increment the reference counter
369 
370   @return An error code: 0 - success, otherwise - failure
371 
372   @ref Backend
373 **/
374 int CeedBasisReference(CeedBasis basis) {
375   basis->ref_count++;
376   return CEED_ERROR_SUCCESS;
377 }
378 
379 /**
380   @brief Estimate number of FLOPs required to apply CeedBasis in t_mode and eval_mode
381 
382   @param[in]  basis     Basis to estimate FLOPs for
383   @param[in]  t_mode    Apply basis or transpose
384   @param[in]  eval_mode Basis evaluation mode
385   @param[out] flops     Address of variable to hold FLOPs estimate
386 
387   @ref Backend
388 **/
389 int CeedBasisGetFlopsEstimate(CeedBasis basis, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedSize *flops) {
390   bool is_tensor;
391 
392   CeedCall(CeedBasisIsTensor(basis, &is_tensor));
393   if (is_tensor) {
394     CeedInt dim, num_comp, P_1d, Q_1d;
395     CeedCall(CeedBasisGetDimension(basis, &dim));
396     CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
397     CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
398     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
399     if (t_mode == CEED_TRANSPOSE) {
400       P_1d = Q_1d;
401       Q_1d = P_1d;
402     }
403     CeedInt tensor_flops = 0, pre = num_comp * CeedIntPow(P_1d, dim - 1), post = 1;
404     for (CeedInt d = 0; d < dim; d++) {
405       tensor_flops += 2 * pre * P_1d * post * Q_1d;
406       pre /= P_1d;
407       post *= Q_1d;
408     }
409     switch (eval_mode) {
410       case CEED_EVAL_NONE:
411         *flops = 0;
412         break;
413       case CEED_EVAL_INTERP:
414         *flops = tensor_flops;
415         break;
416       case CEED_EVAL_GRAD:
417         *flops = tensor_flops * 2;
418         break;
419       case CEED_EVAL_DIV:
420         // LCOV_EXCL_START
421         return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, "Tensor CEED_EVAL_DIV not supported");
422         break;
423       case CEED_EVAL_CURL:
424         return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, "Tensor CEED_EVAL_CURL not supported");
425         break;
426       // LCOV_EXCL_STOP
427       case CEED_EVAL_WEIGHT:
428         *flops = dim * CeedIntPow(Q_1d, dim);
429         break;
430     }
431   } else {
432     CeedInt dim, num_comp, num_nodes, num_qpts, Q_comp;
433     CeedCall(CeedBasisGetDimension(basis, &dim));
434     CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
435     CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
436     CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
437     CeedCall(CeedBasisGetNumQuadratureComponents(basis, &Q_comp));
438     switch (eval_mode) {
439       case CEED_EVAL_NONE:
440         *flops = 0;
441         break;
442       case CEED_EVAL_INTERP:
443         *flops = num_nodes * num_qpts * num_comp;
444         break;
445       case CEED_EVAL_GRAD:
446         *flops = num_nodes * num_qpts * num_comp * dim;
447         break;
448       case CEED_EVAL_DIV:
449         *flops = num_nodes * num_qpts;
450         break;
451       case CEED_EVAL_CURL:
452         *flops = num_nodes * num_qpts * dim;
453         break;
454       case CEED_EVAL_WEIGHT:
455         *flops = 0;
456         break;
457     }
458   }
459 
460   return CEED_ERROR_SUCCESS;
461 }
462 
463 /**
464   @brief Get dimension for given CeedElemTopology
465 
466   @param[in]  topo CeedElemTopology
467   @param[out] dim  Variable to store dimension of topology
468 
469   @return An error code: 0 - success, otherwise - failure
470 
471   @ref Backend
472 **/
473 int CeedBasisGetTopologyDimension(CeedElemTopology topo, CeedInt *dim) {
474   *dim = (CeedInt)topo >> 16;
475   return CEED_ERROR_SUCCESS;
476 }
477 
478 /**
479   @brief Get CeedTensorContract of a CeedBasis
480 
481   @param[in]  basis     CeedBasis
482   @param[out] contract  Variable to store CeedTensorContract
483 
484   @return An error code: 0 - success, otherwise - failure
485 
486   @ref Backend
487 **/
488 int CeedBasisGetTensorContract(CeedBasis basis, CeedTensorContract *contract) {
489   *contract = basis->contract;
490   return CEED_ERROR_SUCCESS;
491 }
492 
493 /**
494   @brief Set CeedTensorContract of a CeedBasis
495 
496   @param[in,out] basis    CeedBasis
497   @param[in]     contract CeedTensorContract to set
498 
499   @return An error code: 0 - success, otherwise - failure
500 
501   @ref Backend
502 **/
503 int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract contract) {
504   basis->contract = contract;
505   CeedCall(CeedTensorContractReference(contract));
506   return CEED_ERROR_SUCCESS;
507 }
508 
509 /**
510   @brief Return a reference implementation of matrix multiplication C = A B.
511            Note, this is a reference implementation for CPU CeedScalar pointers that is not intended for high performance.
512 
513   @param[in]  ceed  Ceed context for error handling
514   @param[in]  mat_A Row-major matrix A
515   @param[in]  mat_B Row-major matrix B
516   @param[out] mat_C Row-major output matrix C
517   @param[in]  m     Number of rows of C
518   @param[in]  n     Number of columns of C
519   @param[in]  kk    Number of columns of A/rows of B
520 
521   @return An error code: 0 - success, otherwise - failure
522 
523   @ref Utility
524 **/
525 int CeedMatrixMatrixMultiply(Ceed ceed, const CeedScalar *mat_A, const CeedScalar *mat_B, CeedScalar *mat_C, CeedInt m, CeedInt n, CeedInt kk) {
526   for (CeedInt i = 0; i < m; i++) {
527     for (CeedInt j = 0; j < n; j++) {
528       CeedScalar sum = 0;
529       for (CeedInt k = 0; k < kk; k++) sum += mat_A[k + i * kk] * mat_B[j + k * n];
530       mat_C[j + i * n] = sum;
531     }
532   }
533   return CEED_ERROR_SUCCESS;
534 }
535 
536 /// @}
537 
538 /// ----------------------------------------------------------------------------
539 /// CeedBasis Public API
540 /// ----------------------------------------------------------------------------
541 /// @addtogroup CeedBasisUser
542 /// @{
543 
544 /**
545   @brief Create a tensor-product basis for H^1 discretizations
546 
547   @param[in]  ceed        Ceed object where the CeedBasis will be created
548   @param[in]  dim         Topological dimension
549   @param[in]  num_comp    Number of field components (1 for scalar fields)
550   @param[in]  P_1d        Number of nodes in one dimension
551   @param[in]  Q_1d        Number of quadrature points in one dimension
552   @param[in]  interp_1d   Row-major (Q_1d * P_1d) matrix expressing the values of nodal basis functions at quadrature points
553   @param[in]  grad_1d     Row-major (Q_1d * P_1d) matrix expressing derivatives of nodal basis functions at quadrature points
554   @param[in]  q_ref_1d    Array of length Q_1d holding the locations of quadrature points on the 1D reference element [-1, 1]
555   @param[in]  q_weight_1d Array of length Q_1d holding the quadrature weights on the reference element
556   @param[out] basis       Address of the variable where the newly created CeedBasis will be stored.
557 
558   @return An error code: 0 - success, otherwise - failure
559 
560   @ref User
561 **/
562 int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d,
563                             const CeedScalar *grad_1d, const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis *basis) {
564   if (!ceed->BasisCreateTensorH1) {
565     Ceed delegate;
566     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
567 
568     if (!delegate) {
569       // LCOV_EXCL_START
570       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateTensorH1");
571       // LCOV_EXCL_STOP
572     }
573 
574     CeedCall(CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis));
575     return CEED_ERROR_SUCCESS;
576   }
577 
578   if (dim < 1) {
579     // LCOV_EXCL_START
580     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value");
581     // LCOV_EXCL_STOP
582   }
583 
584   if (num_comp < 1) {
585     // LCOV_EXCL_START
586     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
587     // LCOV_EXCL_STOP
588   }
589 
590   if (P_1d < 1) {
591     // LCOV_EXCL_START
592     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
593     // LCOV_EXCL_STOP
594   }
595 
596   if (Q_1d < 1) {
597     // LCOV_EXCL_START
598     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
599     // LCOV_EXCL_STOP
600   }
601 
602   CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE : dim == 2 ? CEED_TOPOLOGY_QUAD : CEED_TOPOLOGY_HEX;
603 
604   CeedCall(CeedCalloc(1, basis));
605   (*basis)->ceed = ceed;
606   CeedCall(CeedReference(ceed));
607   (*basis)->ref_count    = 1;
608   (*basis)->tensor_basis = 1;
609   (*basis)->dim          = dim;
610   (*basis)->topo         = topo;
611   (*basis)->num_comp     = num_comp;
612   (*basis)->P_1d         = P_1d;
613   (*basis)->Q_1d         = Q_1d;
614   (*basis)->P            = CeedIntPow(P_1d, dim);
615   (*basis)->Q            = CeedIntPow(Q_1d, dim);
616   (*basis)->Q_comp       = 1;
617   (*basis)->basis_space  = 1;  // 1 for H^1 space
618   CeedCall(CeedCalloc(Q_1d, &(*basis)->q_ref_1d));
619   CeedCall(CeedCalloc(Q_1d, &(*basis)->q_weight_1d));
620   if (q_ref_1d) memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d * sizeof(q_ref_1d[0]));
621   if (q_weight_1d) memcpy((*basis)->q_weight_1d, q_weight_1d, Q_1d * sizeof(q_weight_1d[0]));
622   CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->interp_1d));
623   CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->grad_1d));
624   if (interp_1d) memcpy((*basis)->interp_1d, interp_1d, Q_1d * P_1d * sizeof(interp_1d[0]));
625   if (grad_1d) memcpy((*basis)->grad_1d, grad_1d, Q_1d * P_1d * sizeof(grad_1d[0]));
626   CeedCall(ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, *basis));
627   return CEED_ERROR_SUCCESS;
628 }
629 
630 /**
631   @brief Create a tensor-product Lagrange basis
632 
633   @param[in]  ceed      Ceed object where the CeedBasis will be created
634   @param[in]  dim       Topological dimension of element
635   @param[in]  num_comp  Number of field components (1 for scalar fields)
636   @param[in]  P         Number of Gauss-Lobatto nodes in one dimension.
637                           The polynomial degree of the resulting Q_k element is k=P-1.
638   @param[in]  Q         Number of quadrature points in one dimension.
639   @param[in]  quad_mode Distribution of the Q quadrature points (affects order of accuracy for the quadrature)
640   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
641 
642   @return An error code: 0 - success, otherwise - failure
643 
644   @ref User
645 **/
646 int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P, CeedInt Q, CeedQuadMode quad_mode, CeedBasis *basis) {
647   // Allocate
648   int        ierr = CEED_ERROR_SUCCESS, i, j, k;
649   CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d, *q_weight_1d;
650 
651   if (dim < 1) {
652     // LCOV_EXCL_START
653     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value");
654     // LCOV_EXCL_STOP
655   }
656 
657   if (num_comp < 1) {
658     // LCOV_EXCL_START
659     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
660     // LCOV_EXCL_STOP
661   }
662 
663   if (P < 1) {
664     // LCOV_EXCL_START
665     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
666     // LCOV_EXCL_STOP
667   }
668 
669   if (Q < 1) {
670     // LCOV_EXCL_START
671     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
672     // LCOV_EXCL_STOP
673   }
674 
675   // Get Nodes and Weights
676   CeedCall(CeedCalloc(P * Q, &interp_1d));
677   CeedCall(CeedCalloc(P * Q, &grad_1d));
678   CeedCall(CeedCalloc(P, &nodes));
679   CeedCall(CeedCalloc(Q, &q_ref_1d));
680   CeedCall(CeedCalloc(Q, &q_weight_1d));
681   if (CeedLobattoQuadrature(P, nodes, NULL) != CEED_ERROR_SUCCESS) goto cleanup;
682   switch (quad_mode) {
683     case CEED_GAUSS:
684       ierr = CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
685       break;
686     case CEED_GAUSS_LOBATTO:
687       ierr = CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
688       break;
689   }
690   if (ierr != CEED_ERROR_SUCCESS) goto cleanup;
691 
692   // Build B, D matrix
693   // Fornberg, 1998
694   for (i = 0; i < Q; i++) {
695     c1                   = 1.0;
696     c3                   = nodes[0] - q_ref_1d[i];
697     interp_1d[i * P + 0] = 1.0;
698     for (j = 1; j < P; j++) {
699       c2 = 1.0;
700       c4 = c3;
701       c3 = nodes[j] - q_ref_1d[i];
702       for (k = 0; k < j; k++) {
703         dx = nodes[j] - nodes[k];
704         c2 *= dx;
705         if (k == j - 1) {
706           grad_1d[i * P + j]   = c1 * (interp_1d[i * P + k] - c4 * grad_1d[i * P + k]) / c2;
707           interp_1d[i * P + j] = -c1 * c4 * interp_1d[i * P + k] / c2;
708         }
709         grad_1d[i * P + k]   = (c3 * grad_1d[i * P + k] - interp_1d[i * P + k]) / dx;
710         interp_1d[i * P + k] = c3 * interp_1d[i * P + k] / dx;
711       }
712       c1 = c2;
713     }
714   }
715   // Pass to CeedBasisCreateTensorH1
716   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis));
717 cleanup:
718   CeedCall(CeedFree(&interp_1d));
719   CeedCall(CeedFree(&grad_1d));
720   CeedCall(CeedFree(&nodes));
721   CeedCall(CeedFree(&q_ref_1d));
722   CeedCall(CeedFree(&q_weight_1d));
723   return CEED_ERROR_SUCCESS;
724 }
725 
726 /**
727   @brief Create a non tensor-product basis for H^1 discretizations
728 
729   @param[in]  ceed      Ceed object where the CeedBasis will be created
730   @param[in]  topo      Topology of element, e.g. hypercube, simplex, ect
731   @param[in]  num_comp  Number of field components (1 for scalar fields)
732   @param[in]  num_nodes Total number of nodes
733   @param[in]  num_qpts  Total number of quadrature points
734   @param[in]  interp    Row-major (num_qpts * num_nodes) matrix expressing the values of nodal basis functions at quadrature points
735   @param[in]  grad      Row-major (num_qpts * dim * num_nodes) matrix expressing derivatives of nodal basis functions at quadrature points
736   @param[in]  q_ref     Array of length num_qpts * dim holding the locations of quadrature points on the reference element
737   @param[in]  q_weight  Array of length num_qpts holding the quadrature weights on the reference element
738   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
739 
740   @return An error code: 0 - success, otherwise - failure
741 
742   @ref User
743 **/
744 int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
745                       const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
746   CeedInt P = num_nodes, Q = num_qpts, dim = 0;
747 
748   if (!ceed->BasisCreateH1) {
749     Ceed delegate;
750     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
751 
752     if (!delegate) {
753       // LCOV_EXCL_START
754       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateH1");
755       // LCOV_EXCL_STOP
756     }
757 
758     CeedCall(CeedBasisCreateH1(delegate, topo, num_comp, num_nodes, num_qpts, interp, grad, q_ref, q_weight, basis));
759     return CEED_ERROR_SUCCESS;
760   }
761 
762   if (num_comp < 1) {
763     // LCOV_EXCL_START
764     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
765     // LCOV_EXCL_STOP
766   }
767 
768   if (num_nodes < 1) {
769     // LCOV_EXCL_START
770     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
771     // LCOV_EXCL_STOP
772   }
773 
774   if (num_qpts < 1) {
775     // LCOV_EXCL_START
776     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
777     // LCOV_EXCL_STOP
778   }
779 
780   CeedCall(CeedCalloc(1, basis));
781 
782   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
783 
784   (*basis)->ceed = ceed;
785   CeedCall(CeedReference(ceed));
786   (*basis)->ref_count    = 1;
787   (*basis)->tensor_basis = 0;
788   (*basis)->dim          = dim;
789   (*basis)->topo         = topo;
790   (*basis)->num_comp     = num_comp;
791   (*basis)->P            = P;
792   (*basis)->Q            = Q;
793   (*basis)->Q_comp       = 1;
794   (*basis)->basis_space  = 1;  // 1 for H^1 space
795   CeedCall(CeedCalloc(Q * dim, &(*basis)->q_ref_1d));
796   CeedCall(CeedCalloc(Q, &(*basis)->q_weight_1d));
797   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
798   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
799   CeedCall(CeedCalloc(Q * P, &(*basis)->interp));
800   CeedCall(CeedCalloc(dim * Q * P, &(*basis)->grad));
801   if (interp) memcpy((*basis)->interp, interp, Q * P * sizeof(interp[0]));
802   if (grad) memcpy((*basis)->grad, grad, dim * Q * P * sizeof(grad[0]));
803   CeedCall(ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref, q_weight, *basis));
804   return CEED_ERROR_SUCCESS;
805 }
806 
807 /**
808   @brief Create a non tensor-product basis for H(div) discretizations
809 
810   @param[in]  ceed      Ceed object where the CeedBasis will be created
811   @param[in]  topo      Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.), dimension of which is used in some array sizes below
812   @param[in]  num_comp  Number of components (usually 1 for vectors in H(div) bases)
813   @param[in]  num_nodes Total number of nodes (dofs per element)
814   @param[in]  num_qpts  Total number of quadrature points
815   @param[in]  interp    Row-major (dim*num_qpts * num_nodes) matrix expressing the values of nodal basis functions at quadrature points
816   @param[in]  div       Row-major (num_qpts * num_nodes) matrix expressing divergence of nodal basis functions at quadrature points
817   @param[in]  q_ref     Array of length num_qpts * dim holding the locations of quadrature points on the reference element
818   @param[in]  q_weight  Array of length num_qpts holding the quadrature weights on the reference element
819   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
820 
821   @return An error code: 0 - success, otherwise - failure
822 
823   @ref User
824 **/
825 int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
826                         const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
827   CeedInt Q = num_qpts, P = num_nodes, dim = 0;
828   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
829   if (!ceed->BasisCreateHdiv) {
830     Ceed delegate;
831     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
832 
833     if (!delegate) {
834       // LCOV_EXCL_START
835       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateHdiv");
836       // LCOV_EXCL_STOP
837     }
838 
839     CeedCall(CeedBasisCreateHdiv(delegate, topo, num_comp, num_nodes, num_qpts, interp, div, q_ref, q_weight, basis));
840     return CEED_ERROR_SUCCESS;
841   }
842 
843   if (num_comp < 1) {
844     // LCOV_EXCL_START
845     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
846     // LCOV_EXCL_STOP
847   }
848 
849   if (num_nodes < 1) {
850     // LCOV_EXCL_START
851     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
852     // LCOV_EXCL_STOP
853   }
854 
855   if (num_qpts < 1) {
856     // LCOV_EXCL_START
857     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
858     // LCOV_EXCL_STOP
859   }
860 
861   CeedCall(CeedCalloc(1, basis));
862 
863   (*basis)->ceed = ceed;
864   CeedCall(CeedReference(ceed));
865   (*basis)->ref_count    = 1;
866   (*basis)->tensor_basis = 0;
867   (*basis)->dim          = dim;
868   (*basis)->topo         = topo;
869   (*basis)->num_comp     = num_comp;
870   (*basis)->P            = P;
871   (*basis)->Q            = Q;
872   (*basis)->Q_comp       = dim;
873   (*basis)->basis_space  = 2;  // 2 for H(div) space
874   CeedCall(CeedMalloc(Q * dim, &(*basis)->q_ref_1d));
875   CeedCall(CeedMalloc(Q, &(*basis)->q_weight_1d));
876   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
877   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
878   CeedCall(CeedMalloc(dim * Q * P, &(*basis)->interp));
879   CeedCall(CeedMalloc(Q * P, &(*basis)->div));
880   if (interp) memcpy((*basis)->interp, interp, dim * Q * P * sizeof(interp[0]));
881   if (div) memcpy((*basis)->div, div, Q * P * sizeof(div[0]));
882   CeedCall(ceed->BasisCreateHdiv(topo, dim, P, Q, interp, div, q_ref, q_weight, *basis));
883   return CEED_ERROR_SUCCESS;
884 }
885 
886 /**
887   @brief Create a CeedBasis for projection from the nodes of `basis_from` to the nodes of `basis_to`.
888            Only `CEED_EVAL_INTERP` and `CEED_EVAL_GRAD` will be valid for the new basis, `basis_project`.
889            The interpolation is given by `interp_project = interp_to^+ * interp_from`, where the pesudoinverse `interp_to^+` is given by QR
890 factorization. The gradient is given by `grad_project = interp_to^+ * grad_from`. Note: `basis_from` and `basis_to` must have compatible quadrature
891 spaces. Note: `basis_project` will have the same number of components as `basis_from`, regardless of the number of components that `basis_to` has. If
892 `basis_from` has 3 components and `basis_to` has 5 components, then `basis_project` will have 3 components.
893 
894   @param[in]  basis_from    CeedBasis to prolong from
895   @param[in]  basis_to      CeedBasis to prolong to
896   @param[out] basis_project Address of the variable where the newly created CeedBasis will be stored.
897 
898   @return An error code: 0 - success, otherwise - failure
899 
900   @ref User
901 **/
902 int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, CeedBasis *basis_project) {
903   Ceed ceed;
904   CeedCall(CeedBasisGetCeed(basis_to, &ceed));
905 
906   // Create projectior matrix
907   CeedScalar *interp_project, *grad_project;
908   CeedCall(CeedBasisCreateProjectionMatrices(basis_from, basis_to, &interp_project, &grad_project));
909 
910   // Build basis
911   bool        is_tensor;
912   CeedInt     dim, num_comp;
913   CeedScalar *q_ref, *q_weight;
914   CeedCall(CeedBasisIsTensor(basis_to, &is_tensor));
915   CeedCall(CeedBasisGetDimension(basis_to, &dim));
916   CeedCall(CeedBasisGetNumComponents(basis_from, &num_comp));
917   if (is_tensor) {
918     CeedInt P_1d_to, P_1d_from;
919     CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_1d_from));
920     CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_1d_to));
921     CeedCall(CeedCalloc(P_1d_to, &q_ref));
922     CeedCall(CeedCalloc(P_1d_to, &q_weight));
923     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_from, P_1d_to, interp_project, grad_project, q_ref, q_weight, basis_project));
924   } else {
925     CeedElemTopology topo;
926     CeedCall(CeedBasisGetTopology(basis_to, &topo));
927     CeedInt num_nodes_to, num_nodes_from;
928     CeedCall(CeedBasisGetNumNodes(basis_from, &num_nodes_from));
929     CeedCall(CeedBasisGetNumNodes(basis_to, &num_nodes_to));
930     CeedCall(CeedCalloc(num_nodes_to * dim, &q_ref));
931     CeedCall(CeedCalloc(num_nodes_to, &q_weight));
932     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_from, num_nodes_to, interp_project, grad_project, q_ref, q_weight, basis_project));
933   }
934 
935   // Cleanup
936   CeedCall(CeedFree(&interp_project));
937   CeedCall(CeedFree(&grad_project));
938   CeedCall(CeedFree(&q_ref));
939   CeedCall(CeedFree(&q_weight));
940 
941   return CEED_ERROR_SUCCESS;
942 }
943 
944 /**
945   @brief Copy the pointer to a CeedBasis.
946            Both pointers should be destroyed with `CeedBasisDestroy()`.
947 
948            Note: If `*basis_copy` is non-NULL, then it is assumed that `*basis_copy` is a pointer to a CeedBasis.
949              This CeedBasis will be destroyed if `*basis_copy` is the only reference to this CeedBasis.
950 
951   @param[in]     basis      CeedBasis to copy reference to
952   @param[in,out] basis_copy Variable to store copied reference
953 
954   @return An error code: 0 - success, otherwise - failure
955 
956   @ref User
957 **/
958 int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy) {
959   CeedCall(CeedBasisReference(basis));
960   CeedCall(CeedBasisDestroy(basis_copy));
961   *basis_copy = basis;
962   return CEED_ERROR_SUCCESS;
963 }
964 
965 /**
966   @brief View a CeedBasis
967 
968   @param[in] basis  CeedBasis to view
969   @param[in] stream Stream to view to, e.g., stdout
970 
971   @return An error code: 0 - success, otherwise - failure
972 
973   @ref User
974 **/
975 int CeedBasisView(CeedBasis basis, FILE *stream) {
976   CeedFESpace      FE_space = basis->basis_space;
977   CeedElemTopology topo     = basis->topo;
978 
979   // Print FE space and element topology of the basis
980   if (basis->tensor_basis) {
981     fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" CeedInt_FMT " Q=%" CeedInt_FMT "\n", CeedFESpaces[FE_space],
982             CeedElemTopologies[topo], basis->dim, basis->P_1d, basis->Q_1d);
983   } else {
984     fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" CeedInt_FMT " Q=%" CeedInt_FMT "\n", CeedFESpaces[FE_space],
985             CeedElemTopologies[topo], basis->dim, basis->P, basis->Q);
986   }
987   // Print quadrature data, interpolation/gradient/divergence/curl of the basis
988   if (basis->tensor_basis) {  // tensor basis
989     CeedCall(CeedScalarView("qref1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_ref_1d, stream));
990     CeedCall(CeedScalarView("qweight1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_weight_1d, stream));
991     CeedCall(CeedScalarView("interp1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, basis->interp_1d, stream));
992     CeedCall(CeedScalarView("grad1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, basis->grad_1d, stream));
993   } else {  // non-tensor basis
994     CeedCall(CeedScalarView("qref", "\t% 12.8f", 1, basis->Q * basis->dim, basis->q_ref_1d, stream));
995     CeedCall(CeedScalarView("qweight", "\t% 12.8f", 1, basis->Q, basis->q_weight_1d, stream));
996     CeedCall(CeedScalarView("interp", "\t% 12.8f", basis->Q_comp * basis->Q, basis->P, basis->interp, stream));
997     if (basis->grad) {
998       CeedCall(CeedScalarView("grad", "\t% 12.8f", basis->dim * basis->Q, basis->P, basis->grad, stream));
999     }
1000     if (basis->div) {
1001       CeedCall(CeedScalarView("div", "\t% 12.8f", basis->Q, basis->P, basis->div, stream));
1002     }
1003   }
1004   return CEED_ERROR_SUCCESS;
1005 }
1006 
1007 /**
1008   @brief Apply basis evaluation from nodes to quadrature points or vice versa
1009 
1010   @param[in]  basis      CeedBasis to evaluate
1011   @param[in]  num_elem   The number of elements to apply the basis evaluation to;
1012                            the backend will specify the ordering in CeedElemRestrictionCreateBlocked()
1013   @param[in]  t_mode    \ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature points;
1014                           \ref CEED_TRANSPOSE to apply the transpose, mapping from quadrature points to nodes
1015   @param[in]  eval_mode \ref CEED_EVAL_NONE to use values directly,
1016                           \ref CEED_EVAL_INTERP to use interpolated values,
1017                           \ref CEED_EVAL_GRAD to use gradients,
1018                           \ref CEED_EVAL_WEIGHT to use quadrature weights.
1019   @param[in]  u        Input CeedVector
1020   @param[out] v        Output CeedVector
1021 
1022   @return An error code: 0 - success, otherwise - failure
1023 
1024   @ref User
1025 **/
1026 int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
1027   CeedSize u_length = 0, v_length;
1028   CeedInt  dim, num_comp, num_nodes, num_qpts;
1029   CeedCall(CeedBasisGetDimension(basis, &dim));
1030   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
1031   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
1032   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
1033   CeedCall(CeedVectorGetLength(v, &v_length));
1034   if (u) {
1035     CeedCall(CeedVectorGetLength(u, &u_length));
1036   }
1037 
1038   if (!basis->Apply) {
1039     // LCOV_EXCL_START
1040     return CeedError(basis->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisApply");
1041     // LCOV_EXCL_STOP
1042   }
1043 
1044   // Check compatibility of topological and geometrical dimensions
1045   if ((t_mode == CEED_TRANSPOSE && (v_length % num_nodes != 0 || u_length % num_qpts != 0)) ||
1046       (t_mode == CEED_NOTRANSPOSE && (u_length % num_nodes != 0 || v_length % num_qpts != 0))) {
1047     // LCOV_EXCL_START
1048     return CeedError(basis->ceed, CEED_ERROR_DIMENSION, "Length of input/output vectors incompatible with basis dimensions");
1049     // LCOV_EXCL_STOP
1050   }
1051 
1052   // Check vector lengths to prevent out of bounds issues
1053   bool bad_dims = false;
1054   switch (eval_mode) {
1055     case CEED_EVAL_NONE:
1056     case CEED_EVAL_INTERP:
1057       bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts || v_length < num_elem * num_comp * num_nodes)) ||
1058                   (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp || u_length < num_elem * num_comp * num_nodes)));
1059       break;
1060     case CEED_EVAL_GRAD:
1061       bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts * dim || v_length < num_elem * num_comp * num_nodes)) ||
1062                   (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp * dim || u_length < num_elem * num_comp * num_nodes)));
1063       break;
1064     case CEED_EVAL_WEIGHT:
1065       bad_dims = v_length < num_elem * num_qpts;
1066       break;
1067     // LCOV_EXCL_START
1068     case CEED_EVAL_DIV:
1069       bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts || v_length < num_elem * num_comp * num_nodes)) ||
1070                   (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp || u_length < num_elem * num_comp * num_nodes)));
1071       break;
1072     case CEED_EVAL_CURL:
1073       bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts || v_length < num_elem * num_comp * num_nodes)) ||
1074                   (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp || u_length < num_elem * num_comp * num_nodes)));
1075       break;
1076       // LCOV_EXCL_STOP
1077   }
1078   if (bad_dims) {
1079     // LCOV_EXCL_START
1080     return CeedError(basis->ceed, CEED_ERROR_DIMENSION, "Input/output vectors too short for basis and evaluation mode");
1081     // LCOV_EXCL_STOP
1082   }
1083 
1084   CeedCall(basis->Apply(basis, num_elem, t_mode, eval_mode, u, v));
1085   return CEED_ERROR_SUCCESS;
1086 }
1087 
1088 /**
1089   @brief Get Ceed associated with a CeedBasis
1090 
1091   @param[in]  basis CeedBasis
1092   @param[out] ceed  Variable to store Ceed
1093 
1094   @return An error code: 0 - success, otherwise - failure
1095 
1096   @ref Advanced
1097 **/
1098 int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed) {
1099   *ceed = basis->ceed;
1100   return CEED_ERROR_SUCCESS;
1101 }
1102 
1103 /**
1104   @brief Get dimension for given CeedBasis
1105 
1106   @param[in]  basis CeedBasis
1107   @param[out] dim   Variable to store dimension of basis
1108 
1109   @return An error code: 0 - success, otherwise - failure
1110 
1111   @ref Advanced
1112 **/
1113 int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim) {
1114   *dim = basis->dim;
1115   return CEED_ERROR_SUCCESS;
1116 }
1117 
1118 /**
1119   @brief Get topology for given CeedBasis
1120 
1121   @param[in]  basis CeedBasis
1122   @param[out] topo  Variable to store topology of basis
1123 
1124   @return An error code: 0 - success, otherwise - failure
1125 
1126   @ref Advanced
1127 **/
1128 int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo) {
1129   *topo = basis->topo;
1130   return CEED_ERROR_SUCCESS;
1131 }
1132 
1133 /**
1134   @brief Get number of Q-vector components for given CeedBasis
1135 
1136   @param[in]  basis  CeedBasis
1137   @param[out] Q_comp Variable to store number of Q-vector components of basis
1138 
1139   @return An error code: 0 - success, otherwise - failure
1140 
1141   @ref Advanced
1142 **/
1143 int CeedBasisGetNumQuadratureComponents(CeedBasis basis, CeedInt *Q_comp) {
1144   *Q_comp = basis->Q_comp;
1145   return CEED_ERROR_SUCCESS;
1146 }
1147 
1148 /**
1149   @brief Get number of components for given CeedBasis
1150 
1151   @param[in]  basis    CeedBasis
1152   @param[out] num_comp Variable to store number of components of basis
1153 
1154   @return An error code: 0 - success, otherwise - failure
1155 
1156   @ref Advanced
1157 **/
1158 int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *num_comp) {
1159   *num_comp = basis->num_comp;
1160   return CEED_ERROR_SUCCESS;
1161 }
1162 
1163 /**
1164   @brief Get total number of nodes (in dim dimensions) of a CeedBasis
1165 
1166   @param[in]  basis CeedBasis
1167   @param[out] P     Variable to store number of nodes
1168 
1169   @return An error code: 0 - success, otherwise - failure
1170 
1171   @ref Utility
1172 **/
1173 int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P) {
1174   *P = basis->P;
1175   return CEED_ERROR_SUCCESS;
1176 }
1177 
1178 /**
1179   @brief Get total number of nodes (in 1 dimension) of a CeedBasis
1180 
1181   @param[in]  basis CeedBasis
1182   @param[out] P_1d  Variable to store number of nodes
1183 
1184   @return An error code: 0 - success, otherwise - failure
1185 
1186   @ref Advanced
1187 **/
1188 int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P_1d) {
1189   if (!basis->tensor_basis) {
1190     // LCOV_EXCL_START
1191     return CeedError(basis->ceed, CEED_ERROR_MINOR, "Cannot supply P_1d for non-tensor basis");
1192     // LCOV_EXCL_STOP
1193   }
1194 
1195   *P_1d = basis->P_1d;
1196   return CEED_ERROR_SUCCESS;
1197 }
1198 
1199 /**
1200   @brief Get total number of quadrature points (in dim dimensions) of a CeedBasis
1201 
1202   @param[in]  basis CeedBasis
1203   @param[out] Q     Variable to store number of quadrature points
1204 
1205   @return An error code: 0 - success, otherwise - failure
1206 
1207   @ref Utility
1208 **/
1209 int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q) {
1210   *Q = basis->Q;
1211   return CEED_ERROR_SUCCESS;
1212 }
1213 
1214 /**
1215   @brief Get total number of quadrature points (in 1 dimension) of a CeedBasis
1216 
1217   @param[in]  basis CeedBasis
1218   @param[out] Q_1d  Variable to store number of quadrature points
1219 
1220   @return An error code: 0 - success, otherwise - failure
1221 
1222   @ref Advanced
1223 **/
1224 int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, CeedInt *Q_1d) {
1225   if (!basis->tensor_basis) {
1226     // LCOV_EXCL_START
1227     return CeedError(basis->ceed, CEED_ERROR_MINOR, "Cannot supply Q_1d for non-tensor basis");
1228     // LCOV_EXCL_STOP
1229   }
1230 
1231   *Q_1d = basis->Q_1d;
1232   return CEED_ERROR_SUCCESS;
1233 }
1234 
1235 /**
1236   @brief Get reference coordinates of quadrature points (in dim dimensions) of a CeedBasis
1237 
1238   @param[in]  basis CeedBasis
1239   @param[out] q_ref Variable to store reference coordinates of quadrature points
1240 
1241   @return An error code: 0 - success, otherwise - failure
1242 
1243   @ref Advanced
1244 **/
1245 int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **q_ref) {
1246   *q_ref = basis->q_ref_1d;
1247   return CEED_ERROR_SUCCESS;
1248 }
1249 
1250 /**
1251   @brief Get quadrature weights of quadrature points (in dim dimensions) of a CeedBasis
1252 
1253   @param[in]  basis    CeedBasis
1254   @param[out] q_weight Variable to store quadrature weights
1255 
1256   @return An error code: 0 - success, otherwise - failure
1257 
1258   @ref Advanced
1259 **/
1260 int CeedBasisGetQWeights(CeedBasis basis, const CeedScalar **q_weight) {
1261   *q_weight = basis->q_weight_1d;
1262   return CEED_ERROR_SUCCESS;
1263 }
1264 
1265 /**
1266   @brief Get interpolation matrix of a CeedBasis
1267 
1268   @param[in]  basis  CeedBasis
1269   @param[out] interp Variable to store interpolation matrix
1270 
1271   @return An error code: 0 - success, otherwise - failure
1272 
1273   @ref Advanced
1274 **/
1275 int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp) {
1276   if (!basis->interp && basis->tensor_basis) {
1277     // Allocate
1278     CeedCall(CeedMalloc(basis->Q * basis->P, &basis->interp));
1279 
1280     // Initialize
1281     for (CeedInt i = 0; i < basis->Q * basis->P; i++) basis->interp[i] = 1.0;
1282 
1283     // Calculate
1284     for (CeedInt d = 0; d < basis->dim; d++) {
1285       for (CeedInt qpt = 0; qpt < basis->Q; qpt++) {
1286         for (CeedInt node = 0; node < basis->P; node++) {
1287           CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1288           CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1289           basis->interp[qpt * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p];
1290         }
1291       }
1292     }
1293   }
1294   *interp = basis->interp;
1295   return CEED_ERROR_SUCCESS;
1296 }
1297 
1298 /**
1299   @brief Get 1D interpolation matrix of a tensor product CeedBasis
1300 
1301   @param[in]  basis     CeedBasis
1302   @param[out] interp_1d Variable to store interpolation matrix
1303 
1304   @return An error code: 0 - success, otherwise - failure
1305 
1306   @ref Backend
1307 **/
1308 int CeedBasisGetInterp1D(CeedBasis basis, const CeedScalar **interp_1d) {
1309   if (!basis->tensor_basis) {
1310     // LCOV_EXCL_START
1311     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis is not a tensor product basis.");
1312     // LCOV_EXCL_STOP
1313   }
1314 
1315   *interp_1d = basis->interp_1d;
1316   return CEED_ERROR_SUCCESS;
1317 }
1318 
1319 /**
1320   @brief Get gradient matrix of a CeedBasis
1321 
1322   @param[in]  basis CeedBasis
1323   @param[out] grad  Variable to store gradient matrix
1324 
1325   @return An error code: 0 - success, otherwise - failure
1326 
1327   @ref Advanced
1328 **/
1329 int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad) {
1330   if (!basis->grad && basis->tensor_basis) {
1331     // Allocate
1332     CeedCall(CeedMalloc(basis->dim * basis->Q * basis->P, &basis->grad));
1333 
1334     // Initialize
1335     for (CeedInt i = 0; i < basis->dim * basis->Q * basis->P; i++) basis->grad[i] = 1.0;
1336 
1337     // Calculate
1338     for (CeedInt d = 0; d < basis->dim; d++) {
1339       for (CeedInt i = 0; i < basis->dim; i++) {
1340         for (CeedInt qpt = 0; qpt < basis->Q; qpt++) {
1341           for (CeedInt node = 0; node < basis->P; node++) {
1342             CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1343             CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1344             if (i == d) basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->grad_1d[q * basis->P_1d + p];
1345             else basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p];
1346           }
1347         }
1348       }
1349     }
1350   }
1351   *grad = basis->grad;
1352   return CEED_ERROR_SUCCESS;
1353 }
1354 
1355 /**
1356   @brief Get 1D gradient matrix of a tensor product CeedBasis
1357 
1358   @param[in]  basis   CeedBasis
1359   @param[out] grad_1d Variable to store gradient matrix
1360 
1361   @return An error code: 0 - success, otherwise - failure
1362 
1363   @ref Advanced
1364 **/
1365 int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad_1d) {
1366   if (!basis->tensor_basis) {
1367     // LCOV_EXCL_START
1368     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis is not a tensor product basis.");
1369     // LCOV_EXCL_STOP
1370   }
1371 
1372   *grad_1d = basis->grad_1d;
1373   return CEED_ERROR_SUCCESS;
1374 }
1375 
1376 /**
1377   @brief Get divergence matrix of a CeedBasis
1378 
1379   @param[in]  basis CeedBasis
1380   @param[out] div   Variable to store divergence matrix
1381 
1382   @return An error code: 0 - success, otherwise - failure
1383 
1384   @ref Advanced
1385 **/
1386 int CeedBasisGetDiv(CeedBasis basis, const CeedScalar **div) {
1387   if (!basis->div) {
1388     // LCOV_EXCL_START
1389     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis does not have divergence matrix.");
1390     // LCOV_EXCL_STOP
1391   }
1392 
1393   *div = basis->div;
1394   return CEED_ERROR_SUCCESS;
1395 }
1396 
1397 /**
1398   @brief Destroy a CeedBasis
1399 
1400   @param[in,out] basis CeedBasis to destroy
1401 
1402   @return An error code: 0 - success, otherwise - failure
1403 
1404   @ref User
1405 **/
1406 int CeedBasisDestroy(CeedBasis *basis) {
1407   if (!*basis || --(*basis)->ref_count > 0) {
1408     *basis = NULL;
1409     return CEED_ERROR_SUCCESS;
1410   }
1411   if ((*basis)->Destroy) CeedCall((*basis)->Destroy(*basis));
1412   if ((*basis)->contract) CeedCall(CeedTensorContractDestroy(&(*basis)->contract));
1413   CeedCall(CeedFree(&(*basis)->interp));
1414   CeedCall(CeedFree(&(*basis)->interp_1d));
1415   CeedCall(CeedFree(&(*basis)->grad));
1416   CeedCall(CeedFree(&(*basis)->div));
1417   CeedCall(CeedFree(&(*basis)->grad_1d));
1418   CeedCall(CeedFree(&(*basis)->q_ref_1d));
1419   CeedCall(CeedFree(&(*basis)->q_weight_1d));
1420   CeedCall(CeedDestroy(&(*basis)->ceed));
1421   CeedCall(CeedFree(basis));
1422   return CEED_ERROR_SUCCESS;
1423 }
1424 
1425 /**
1426   @brief Construct a Gauss-Legendre quadrature
1427 
1428   @param[in]  Q           Number of quadrature points (integrates polynomials of degree 2*Q-1 exactly)
1429   @param[out] q_ref_1d    Array of length Q to hold the abscissa on [-1, 1]
1430   @param[out] q_weight_1d Array of length Q to hold the weights
1431 
1432   @return An error code: 0 - success, otherwise - failure
1433 
1434   @ref Utility
1435 **/
1436 int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) {
1437   // Allocate
1438   CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0 * atan(1.0);
1439   // Build q_ref_1d, q_weight_1d
1440   for (CeedInt i = 0; i <= Q / 2; i++) {
1441     // Guess
1442     xi = cos(PI * (CeedScalar)(2 * i + 1) / ((CeedScalar)(2 * Q)));
1443     // Pn(xi)
1444     P0 = 1.0;
1445     P1 = xi;
1446     P2 = 0.0;
1447     for (CeedInt j = 2; j <= Q; j++) {
1448       P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1449       P0 = P1;
1450       P1 = P2;
1451     }
1452     // First Newton Step
1453     dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1454     xi  = xi - P2 / dP2;
1455     // Newton to convergence
1456     for (CeedInt k = 0; k < 100 && fabs(P2) > 10 * CEED_EPSILON; k++) {
1457       P0 = 1.0;
1458       P1 = xi;
1459       for (CeedInt j = 2; j <= Q; j++) {
1460         P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1461         P0 = P1;
1462         P1 = P2;
1463       }
1464       dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1465       xi  = xi - P2 / dP2;
1466     }
1467     // Save xi, wi
1468     wi                     = 2.0 / ((1.0 - xi * xi) * dP2 * dP2);
1469     q_weight_1d[i]         = wi;
1470     q_weight_1d[Q - 1 - i] = wi;
1471     q_ref_1d[i]            = -xi;
1472     q_ref_1d[Q - 1 - i]    = xi;
1473   }
1474   return CEED_ERROR_SUCCESS;
1475 }
1476 
1477 /**
1478   @brief Construct a Gauss-Legendre-Lobatto quadrature
1479 
1480   @param[in]  Q           Number of quadrature points (integrates polynomials of degree 2*Q-3 exactly)
1481   @param[out] q_ref_1d    Array of length Q to hold the abscissa on [-1, 1]
1482   @param[out] q_weight_1d Array of length Q to hold the weights
1483 
1484   @return An error code: 0 - success, otherwise - failure
1485 
1486   @ref Utility
1487 **/
1488 int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) {
1489   // Allocate
1490   CeedScalar P0, P1, P2, dP2, d2P2, xi, wi, PI = 4.0 * atan(1.0);
1491   // Build q_ref_1d, q_weight_1d
1492   // Set endpoints
1493   if (Q < 2) {
1494     // LCOV_EXCL_START
1495     return CeedError(NULL, CEED_ERROR_DIMENSION, "Cannot create Lobatto quadrature with Q=%" CeedInt_FMT " < 2 points", Q);
1496     // LCOV_EXCL_STOP
1497   }
1498   wi = 2.0 / ((CeedScalar)(Q * (Q - 1)));
1499   if (q_weight_1d) {
1500     q_weight_1d[0]     = wi;
1501     q_weight_1d[Q - 1] = wi;
1502   }
1503   q_ref_1d[0]     = -1.0;
1504   q_ref_1d[Q - 1] = 1.0;
1505   // Interior
1506   for (CeedInt i = 1; i <= (Q - 1) / 2; i++) {
1507     // Guess
1508     xi = cos(PI * (CeedScalar)(i) / (CeedScalar)(Q - 1));
1509     // Pn(xi)
1510     P0 = 1.0;
1511     P1 = xi;
1512     P2 = 0.0;
1513     for (CeedInt j = 2; j < Q; j++) {
1514       P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1515       P0 = P1;
1516       P1 = P2;
1517     }
1518     // First Newton step
1519     dP2  = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1520     d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi);
1521     xi   = xi - dP2 / d2P2;
1522     // Newton to convergence
1523     for (CeedInt k = 0; k < 100 && fabs(dP2) > 10 * CEED_EPSILON; k++) {
1524       P0 = 1.0;
1525       P1 = xi;
1526       for (CeedInt j = 2; j < Q; j++) {
1527         P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1528         P0 = P1;
1529         P1 = P2;
1530       }
1531       dP2  = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1532       d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi);
1533       xi   = xi - dP2 / d2P2;
1534     }
1535     // Save xi, wi
1536     wi = 2.0 / (((CeedScalar)(Q * (Q - 1))) * P2 * P2);
1537     if (q_weight_1d) {
1538       q_weight_1d[i]         = wi;
1539       q_weight_1d[Q - 1 - i] = wi;
1540     }
1541     q_ref_1d[i]         = -xi;
1542     q_ref_1d[Q - 1 - i] = xi;
1543   }
1544   return CEED_ERROR_SUCCESS;
1545 }
1546 
1547 /**
1548   @brief Return QR Factorization of a matrix
1549 
1550   @param[in]     ceed Ceed context for error handling
1551   @param[in,out] mat  Row-major matrix to be factorized in place
1552   @param[in,out] tau  Vector of length m of scaling factors
1553   @param[in]     m    Number of rows
1554   @param[in]     n    Number of columns
1555 
1556   @return An error code: 0 - success, otherwise - failure
1557 
1558   @ref Utility
1559 **/
1560 int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau, CeedInt m, CeedInt n) {
1561   CeedScalar v[m];
1562 
1563   // Check matrix shape
1564   if (n > m) {
1565     // LCOV_EXCL_START
1566     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute QR factorization with n > m");
1567     // LCOV_EXCL_STOP
1568   }
1569 
1570   for (CeedInt i = 0; i < n; i++) {
1571     if (i >= m - 1) {  // last row of matrix, no reflection needed
1572       tau[i] = 0.;
1573       break;
1574     }
1575     // Calculate Householder vector, magnitude
1576     CeedScalar sigma = 0.0;
1577     v[i]             = mat[i + n * i];
1578     for (CeedInt j = i + 1; j < m; j++) {
1579       v[j] = mat[i + n * j];
1580       sigma += v[j] * v[j];
1581     }
1582     CeedScalar norm = sqrt(v[i] * v[i] + sigma);  // norm of v[i:m]
1583     CeedScalar R_ii = -copysign(norm, v[i]);
1584     v[i] -= R_ii;
1585     // norm of v[i:m] after modification above and scaling below
1586     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
1587     //   tau = 2 / (norm*norm)
1588     tau[i] = 2 * v[i] * v[i] / (v[i] * v[i] + sigma);
1589     for (CeedInt j = i + 1; j < m; j++) v[j] /= v[i];
1590 
1591     // Apply Householder reflector to lower right panel
1592     CeedHouseholderReflect(&mat[i * n + i + 1], &v[i], tau[i], m - i, n - i - 1, n, 1);
1593     // Save v
1594     mat[i + n * i] = R_ii;
1595     for (CeedInt j = i + 1; j < m; j++) mat[i + n * j] = v[j];
1596   }
1597   return CEED_ERROR_SUCCESS;
1598 }
1599 
1600 /**
1601   @brief Return symmetric Schur decomposition of the symmetric matrix mat via symmetric QR factorization
1602 
1603   @param[in]     ceed   Ceed context for error handling
1604   @param[in,out] mat    Row-major matrix to be factorized in place
1605   @param[out]    lambda Vector of length n of eigenvalues
1606   @param[in]     n      Number of rows/columns
1607 
1608   @return An error code: 0 - success, otherwise - failure
1609 
1610   @ref Utility
1611 **/
1612 CeedPragmaOptimizeOff int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat, CeedScalar *lambda, CeedInt n) {
1613   // Check bounds for clang-tidy
1614   if (n < 2) {
1615     // LCOV_EXCL_START
1616     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute symmetric Schur decomposition of scalars");
1617     // LCOV_EXCL_STOP
1618   }
1619 
1620   CeedScalar v[n - 1], tau[n - 1], mat_T[n * n];
1621 
1622   // Copy mat to mat_T and set mat to I
1623   memcpy(mat_T, mat, n * n * sizeof(mat[0]));
1624   for (CeedInt i = 0; i < n; i++) {
1625     for (CeedInt j = 0; j < n; j++) mat[j + n * i] = (i == j) ? 1 : 0;
1626   }
1627 
1628   // Reduce to tridiagonal
1629   for (CeedInt i = 0; i < n - 1; i++) {
1630     // Calculate Householder vector, magnitude
1631     CeedScalar sigma = 0.0;
1632     v[i]             = mat_T[i + n * (i + 1)];
1633     for (CeedInt j = i + 1; j < n - 1; j++) {
1634       v[j] = mat_T[i + n * (j + 1)];
1635       sigma += v[j] * v[j];
1636     }
1637     CeedScalar norm = sqrt(v[i] * v[i] + sigma);  // norm of v[i:n-1]
1638     CeedScalar R_ii = -copysign(norm, v[i]);
1639     v[i] -= R_ii;
1640     // norm of v[i:m] after modification above and scaling below
1641     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
1642     //   tau = 2 / (norm*norm)
1643     tau[i] = i == n - 2 ? 2 : 2 * v[i] * v[i] / (v[i] * v[i] + sigma);
1644     for (CeedInt j = i + 1; j < n - 1; j++) v[j] /= v[i];
1645 
1646     // Update sub and super diagonal
1647     for (CeedInt j = i + 2; j < n; j++) {
1648       mat_T[i + n * j] = 0;
1649       mat_T[j + n * i] = 0;
1650     }
1651     // Apply symmetric Householder reflector to lower right panel
1652     CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1);
1653     CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), 1, n);
1654 
1655     // Save v
1656     mat_T[i + n * (i + 1)] = R_ii;
1657     mat_T[(i + 1) + n * i] = R_ii;
1658     for (CeedInt j = i + 1; j < n - 1; j++) {
1659       mat_T[i + n * (j + 1)] = v[j];
1660     }
1661   }
1662   // Backwards accumulation of Q
1663   for (CeedInt i = n - 2; i >= 0; i--) {
1664     if (tau[i] > 0.0) {
1665       v[i] = 1;
1666       for (CeedInt j = i + 1; j < n - 1; j++) {
1667         v[j]                   = mat_T[i + n * (j + 1)];
1668         mat_T[i + n * (j + 1)] = 0;
1669       }
1670       CeedHouseholderReflect(&mat[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1);
1671     }
1672   }
1673 
1674   // Reduce sub and super diagonal
1675   CeedInt    p = 0, q = 0, itr = 0, max_itr = n * n * n * n;
1676   CeedScalar tol = CEED_EPSILON;
1677 
1678   while (itr < max_itr) {
1679     // Update p, q, size of reduced portions of diagonal
1680     p = 0;
1681     q = 0;
1682     for (CeedInt i = n - 2; i >= 0; i--) {
1683       if (fabs(mat_T[i + n * (i + 1)]) < tol) q += 1;
1684       else break;
1685     }
1686     for (CeedInt i = 0; i < n - q - 1; i++) {
1687       if (fabs(mat_T[i + n * (i + 1)]) < tol) p += 1;
1688       else break;
1689     }
1690     if (q == n - 1) break;  // Finished reducing
1691 
1692     // Reduce tridiagonal portion
1693     CeedScalar t_nn = mat_T[(n - 1 - q) + n * (n - 1 - q)], t_nnm1 = mat_T[(n - 2 - q) + n * (n - 1 - q)];
1694     CeedScalar d  = (mat_T[(n - 2 - q) + n * (n - 2 - q)] - t_nn) / 2;
1695     CeedScalar mu = t_nn - t_nnm1 * t_nnm1 / (d + copysign(sqrt(d * d + t_nnm1 * t_nnm1), d));
1696     CeedScalar x  = mat_T[p + n * p] - mu;
1697     CeedScalar z  = mat_T[p + n * (p + 1)];
1698     for (CeedInt k = p; k < n - q - 1; k++) {
1699       // Compute Givens rotation
1700       CeedScalar c = 1, s = 0;
1701       if (fabs(z) > tol) {
1702         if (fabs(z) > fabs(x)) {
1703           CeedScalar tau = -x / z;
1704           s = 1 / sqrt(1 + tau * tau), c = s * tau;
1705         } else {
1706           CeedScalar tau = -z / x;
1707           c = 1 / sqrt(1 + tau * tau), s = c * tau;
1708         }
1709       }
1710 
1711       // Apply Givens rotation to T
1712       CeedGivensRotation(mat_T, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n);
1713       CeedGivensRotation(mat_T, c, s, CEED_TRANSPOSE, k, k + 1, n, n);
1714 
1715       // Apply Givens rotation to Q
1716       CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n);
1717 
1718       // Update x, z
1719       if (k < n - q - 2) {
1720         x = mat_T[k + n * (k + 1)];
1721         z = mat_T[k + n * (k + 2)];
1722       }
1723     }
1724     itr++;
1725   }
1726 
1727   // Save eigenvalues
1728   for (CeedInt i = 0; i < n; i++) lambda[i] = mat_T[i + n * i];
1729 
1730   // Check convergence
1731   if (itr == max_itr && q < n - 1) {
1732     // LCOV_EXCL_START
1733     return CeedError(ceed, CEED_ERROR_MINOR, "Symmetric QR failed to converge");
1734     // LCOV_EXCL_STOP
1735   }
1736   return CEED_ERROR_SUCCESS;
1737 }
1738 CeedPragmaOptimizeOn
1739 
1740     /**
1741       @brief Return Simultaneous Diagonalization of two matrices.
1742                This solves the generalized eigenvalue problem A x = lambda B x, where A and B are symmetric and B is positive definite.
1743                We generate the matrix X and vector Lambda such that X^T A X = Lambda and X^T B X = I.
1744                This is equivalent to the LAPACK routine 'sygv' with TYPE = 1.
1745 
1746       @param[in]  ceed   Ceed context for error handling
1747       @param[in]  mat_A  Row-major matrix to be factorized with eigenvalues
1748       @param[in]  mat_B  Row-major matrix to be factorized to identity
1749       @param[out] mat_X  Row-major orthogonal matrix
1750       @param[out] lambda Vector of length n of generalized eigenvalues
1751       @param[in]  n      Number of rows/columns
1752 
1753       @return An error code: 0 - success, otherwise - failure
1754 
1755       @ref Utility
1756     **/
1757     CeedPragmaOptimizeOff int
1758     CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A, CeedScalar *mat_B, CeedScalar *mat_X, CeedScalar *lambda, CeedInt n) {
1759   CeedScalar *mat_C, *mat_G, *vec_D;
1760   CeedCall(CeedCalloc(n * n, &mat_C));
1761   CeedCall(CeedCalloc(n * n, &mat_G));
1762   CeedCall(CeedCalloc(n, &vec_D));
1763 
1764   // Compute B = G D G^T
1765   memcpy(mat_G, mat_B, n * n * sizeof(mat_B[0]));
1766   CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_G, vec_D, n));
1767 
1768   // Sort eigenvalues
1769   for (CeedInt i = n - 1; i >= 0; i--) {
1770     for (CeedInt j = 0; j < i; j++) {
1771       if (fabs(vec_D[j]) > fabs(vec_D[j + 1])) {
1772         CeedScalar temp;
1773         temp         = vec_D[j];
1774         vec_D[j]     = vec_D[j + 1];
1775         vec_D[j + 1] = temp;
1776         for (CeedInt k = 0; k < n; k++) {
1777           temp                 = mat_G[k * n + j];
1778           mat_G[k * n + j]     = mat_G[k * n + j + 1];
1779           mat_G[k * n + j + 1] = temp;
1780         }
1781       }
1782     }
1783   }
1784 
1785   // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T
1786   //           = D^-1/2 G^T A G D^-1/2
1787   // -- D = D^-1/2
1788   for (CeedInt i = 0; i < n; i++) vec_D[i] = 1. / sqrt(vec_D[i]);
1789   // -- G = G D^-1/2
1790   // -- C = D^-1/2 G^T
1791   for (CeedInt i = 0; i < n; i++) {
1792     for (CeedInt j = 0; j < n; j++) {
1793       mat_G[i * n + j] *= vec_D[j];
1794       mat_C[j * n + i] = mat_G[i * n + j];
1795     }
1796   }
1797   // -- X = (D^-1/2 G^T) A
1798   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_C, (const CeedScalar *)mat_A, mat_X, n, n, n));
1799   // -- C = (D^-1/2 G^T A) (G D^-1/2)
1800   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_X, (const CeedScalar *)mat_G, mat_C, n, n, n));
1801 
1802   // Compute Q^T C Q = lambda
1803   CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n));
1804 
1805   // Sort eigenvalues
1806   for (CeedInt i = n - 1; i >= 0; i--) {
1807     for (CeedInt j = 0; j < i; j++) {
1808       if (fabs(lambda[j]) > fabs(lambda[j + 1])) {
1809         CeedScalar temp;
1810         temp          = lambda[j];
1811         lambda[j]     = lambda[j + 1];
1812         lambda[j + 1] = temp;
1813         for (CeedInt k = 0; k < n; k++) {
1814           temp                 = mat_C[k * n + j];
1815           mat_C[k * n + j]     = mat_C[k * n + j + 1];
1816           mat_C[k * n + j + 1] = temp;
1817         }
1818       }
1819     }
1820   }
1821 
1822   // Set X = (G D^1/2)^-T Q
1823   //       = G D^-1/2 Q
1824   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_G, (const CeedScalar *)mat_C, mat_X, n, n, n));
1825 
1826   // Cleanup
1827   CeedCall(CeedFree(&mat_C));
1828   CeedCall(CeedFree(&mat_G));
1829   CeedCall(CeedFree(&vec_D));
1830   return CEED_ERROR_SUCCESS;
1831 }
1832 CeedPragmaOptimizeOn
1833 
1834     /// @}
1835