xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-basis.c (revision ba7b2a00d73849c86e4657fa7cfd7a31da27058f)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <math.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 /// @file
17 /// Implementation of CeedBasis interfaces
18 
19 /// @cond DOXYGEN_SKIP
20 static struct CeedBasis_private ceed_basis_collocated;
21 /// @endcond
22 
23 /// @addtogroup CeedBasisUser
24 /// @{
25 
26 /// Indicate that the quadrature points are collocated with the nodes
27 const CeedBasis CEED_BASIS_COLLOCATED = &ceed_basis_collocated;
28 
29 /// @}
30 
31 /// ----------------------------------------------------------------------------
32 /// CeedBasis Library Internal Functions
33 /// ----------------------------------------------------------------------------
34 /// @addtogroup CeedBasisDeveloper
35 /// @{
36 
37 /**
38   @brief Compute Householder reflection
39 
40   Computes A = (I - b v v^T) A, where A is an mxn matrix indexed as A[i*row + j*col]
41 
42   @param[in,out] A   Matrix to apply Householder reflection to, in place
43   @param[in]     v   Householder vector
44   @param[in]     b   Scaling factor
45   @param[in]     m   Number of rows in A
46   @param[in]     n   Number of columns in A
47   @param[in]     row Row stride
48   @param[in]     col Col stride
49 
50   @return An error code: 0 - success, otherwise - failure
51 
52   @ref Developer
53 **/
54 static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v, CeedScalar b, CeedInt m, CeedInt n, CeedInt row, CeedInt col) {
55   for (CeedInt j = 0; j < n; j++) {
56     CeedScalar w = A[0 * row + j * col];
57     for (CeedInt i = 1; i < m; i++) w += v[i] * A[i * row + j * col];
58     A[0 * row + j * col] -= b * w;
59     for (CeedInt i = 1; i < m; i++) A[i * row + j * col] -= b * w * v[i];
60   }
61   return CEED_ERROR_SUCCESS;
62 }
63 
64 /**
65   @brief Compute Givens rotation
66 
67   Computes A = G A (or G^T A in transpose mode), where A is an mxn matrix indexed as A[i*n + j*m]
68 
69   @param[in,out] A      Row major matrix to apply Givens rotation to, in place
70   @param[in]     c      Cosine factor
71   @param[in]     s      Sine factor
72   @param[in]     t_mode @ref CEED_NOTRANSPOSE to rotate the basis counter-clockwise, which has the effect of rotating columns of A clockwise;
73                           @ref CEED_TRANSPOSE for the opposite rotation
74   @param[in]     i      First row/column to apply rotation
75   @param[in]     k      Second row/column to apply rotation
76   @param[in]     m      Number of rows in A
77   @param[in]     n      Number of columns in A
78 
79   @return An error code: 0 - success, otherwise - failure
80 
81   @ref Developer
82 **/
83 static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s, CeedTransposeMode t_mode, CeedInt i, CeedInt k, CeedInt m, CeedInt n) {
84   CeedInt stride_j = 1, stride_ik = m, num_its = n;
85   if (t_mode == CEED_NOTRANSPOSE) {
86     stride_j  = n;
87     stride_ik = 1;
88     num_its   = m;
89   }
90 
91   // Apply rotation
92   for (CeedInt j = 0; j < num_its; j++) {
93     CeedScalar tau1 = A[i * stride_ik + j * stride_j], tau2 = A[k * stride_ik + j * stride_j];
94     A[i * stride_ik + j * stride_j] = c * tau1 - s * tau2;
95     A[k * stride_ik + j * stride_j] = s * tau1 + c * tau2;
96   }
97   return CEED_ERROR_SUCCESS;
98 }
99 
100 /**
101   @brief View an array stored in a CeedBasis
102 
103   @param[in] name   Name of array
104   @param[in] fp_fmt Printing format
105   @param[in] m      Number of rows in array
106   @param[in] n      Number of columns in array
107   @param[in] a      Array to be viewed
108   @param[in] stream Stream to view to, e.g., stdout
109 
110   @return An error code: 0 - success, otherwise - failure
111 
112   @ref Developer
113 **/
114 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m, CeedInt n, const CeedScalar *a, FILE *stream) {
115   if (m > 1) {
116     fprintf(stream, "  %s:\n", name);
117   } else {
118     char padded_name[12];
119 
120     snprintf(padded_name, 11, "%s:", name);
121     fprintf(stream, "  %-10s", padded_name);
122   }
123   for (CeedInt i = 0; i < m; i++) {
124     if (m > 1) fprintf(stream, "    [%" CeedInt_FMT "]", i);
125     for (CeedInt j = 0; j < n; j++) fprintf(stream, fp_fmt, fabs(a[i * n + j]) > 1E-14 ? a[i * n + j] : 0);
126     fputs("\n", stream);
127   }
128   return CEED_ERROR_SUCCESS;
129 }
130 
131 /**
132   @brief Create the interpolation and gradient matrices for projection from the nodes of `basis_from` to the nodes of `basis_to`.
133 
134   The interpolation is given by `interp_project = interp_to^+ * interp_from`, where the pseudoinverse `interp_to^+` is given by QR factorization.
135   The gradient is given by `grad_project = interp_to^+ * grad_from`, and is only computed for H^1 spaces otherwise it should not be used.
136 
137   Note: `basis_from` and `basis_to` must have compatible quadrature spaces.
138 
139   @param[in]  basis_from     CeedBasis to project from
140   @param[in]  basis_to       CeedBasis to project to
141   @param[out] interp_project Address of the variable where the newly created interpolation matrix will be stored.
142   @param[out] grad_project   Address of the variable where the newly created gradient matrix will be stored.
143 
144   @return An error code: 0 - success, otherwise - failure
145 
146   @ref Developer
147 **/
148 static int CeedBasisCreateProjectionMatrices(CeedBasis basis_from, CeedBasis basis_to, CeedScalar **interp_project, CeedScalar **grad_project) {
149   Ceed ceed;
150   CeedCall(CeedBasisGetCeed(basis_to, &ceed));
151 
152   // Check for compatible quadrature spaces
153   CeedInt Q_to, Q_from;
154   CeedCall(CeedBasisGetNumQuadraturePoints(basis_to, &Q_to));
155   CeedCall(CeedBasisGetNumQuadraturePoints(basis_from, &Q_from));
156   CeedCheck(Q_to == Q_from, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
157 
158   // Check for matching tensor or non-tensor
159   CeedInt P_to, P_from, Q = Q_to;
160   bool    is_tensor_to, is_tensor_from;
161   CeedCall(CeedBasisIsTensor(basis_to, &is_tensor_to));
162   CeedCall(CeedBasisIsTensor(basis_from, &is_tensor_from));
163   CeedCheck(is_tensor_to == is_tensor_from, ceed, CEED_ERROR_MINOR, "Bases must both be tensor or non-tensor");
164   if (is_tensor_to) {
165     CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_to));
166     CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_from));
167     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis_from, &Q));
168   } else {
169     CeedCall(CeedBasisGetNumNodes(basis_to, &P_to));
170     CeedCall(CeedBasisGetNumNodes(basis_from, &P_from));
171   }
172 
173   // Check for matching FE space
174   CeedFESpace fe_space_to, fe_space_from;
175   CeedCall(CeedBasisGetFESpace(basis_to, &fe_space_to));
176   CeedCall(CeedBasisGetFESpace(basis_from, &fe_space_from));
177   CeedCheck(fe_space_to == fe_space_from, ceed, CEED_ERROR_MINOR, "Bases must both be the same FE space type");
178 
179   // Get source matrices
180   CeedInt           dim, q_comp = 1;
181   const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL;
182   CeedScalar       *interp_to, *interp_from, *tau;
183   CeedCall(CeedBasisGetDimension(basis_to, &dim));
184   if (is_tensor_to) {
185     CeedCall(CeedBasisGetInterp1D(basis_to, &interp_to_source));
186     CeedCall(CeedBasisGetInterp1D(basis_from, &interp_from_source));
187   } else {
188     CeedCall(CeedBasisGetNumQuadratureComponents(basis_from, CEED_EVAL_INTERP, &q_comp));
189     CeedCall(CeedBasisGetInterp(basis_to, &interp_to_source));
190     CeedCall(CeedBasisGetInterp(basis_from, &interp_from_source));
191   }
192   CeedCall(CeedMalloc(Q * P_from * q_comp, &interp_from));
193   CeedCall(CeedMalloc(Q * P_to * q_comp, &interp_to));
194   CeedCall(CeedCalloc(P_to * P_from, interp_project));
195   CeedCall(CeedMalloc(Q * q_comp, &tau));
196 
197   // `grad_project = interp_to^+ * grad_from` is computed for the H^1 space case so the
198   // projection basis will have a gradient operation (allocated even if not H^1 for the
199   // basis construction later on)
200   const CeedScalar *grad_from_source = NULL;
201   if (fe_space_to == CEED_FE_SPACE_H1) {
202     if (is_tensor_to) {
203       CeedCall(CeedBasisGetGrad1D(basis_from, &grad_from_source));
204     } else {
205       CeedCall(CeedBasisGetGrad(basis_from, &grad_from_source));
206     }
207   }
208   CeedCall(CeedCalloc(P_to * P_from * (is_tensor_to ? 1 : dim), grad_project));
209 
210   // QR Factorization, interp_to = Q R
211   memcpy(interp_to, interp_to_source, Q * P_to * q_comp * sizeof(interp_to_source[0]));
212   CeedCall(CeedQRFactorization(ceed, interp_to, tau, Q * q_comp, P_to));
213 
214   // Build matrices
215   CeedInt     num_matrices = 1 + (fe_space_to == CEED_FE_SPACE_H1) * (is_tensor_to ? 1 : dim);
216   CeedScalar *input_from[num_matrices], *output_project[num_matrices];
217   input_from[0]     = (CeedScalar *)interp_from_source;
218   output_project[0] = *interp_project;
219   for (CeedInt m = 1; m < num_matrices; m++) {
220     input_from[m]     = (CeedScalar *)&grad_from_source[(m - 1) * Q * P_from];
221     output_project[m] = &((*grad_project)[(m - 1) * P_to * P_from]);
222   }
223   for (CeedInt m = 0; m < num_matrices; m++) {
224     // Apply Q^T, interp_from = Q^T interp_from
225     memcpy(interp_from, input_from[m], Q * P_from * q_comp * sizeof(input_from[m][0]));
226     CeedCall(CeedHouseholderApplyQ(interp_from, interp_to, tau, CEED_TRANSPOSE, Q * q_comp, P_from, P_to, P_from, 1));
227 
228     // Apply Rinv, output_project = Rinv interp_from
229     for (CeedInt j = 0; j < P_from; j++) {  // Column j
230       output_project[m][j + P_from * (P_to - 1)] = interp_from[j + P_from * (P_to - 1)] / interp_to[P_to * P_to - 1];
231       for (CeedInt i = P_to - 2; i >= 0; i--) {  // Row i
232         output_project[m][j + P_from * i] = interp_from[j + P_from * i];
233         for (CeedInt k = i + 1; k < P_to; k++) {
234           output_project[m][j + P_from * i] -= interp_to[k + P_to * i] * output_project[m][j + P_from * k];
235         }
236         output_project[m][j + P_from * i] /= interp_to[i + P_to * i];
237       }
238     }
239   }
240 
241   // Cleanup
242   CeedCall(CeedFree(&tau));
243   CeedCall(CeedFree(&interp_to));
244   CeedCall(CeedFree(&interp_from));
245 
246   return CEED_ERROR_SUCCESS;
247 }
248 
249 /// @}
250 
251 /// ----------------------------------------------------------------------------
252 /// Ceed Backend API
253 /// ----------------------------------------------------------------------------
254 /// @addtogroup CeedBasisBackend
255 /// @{
256 
257 /**
258   @brief Return collocated grad matrix
259 
260   @param[in]  basis         CeedBasis
261   @param[out] collo_grad_1d Row-major (Q_1d * Q_1d) matrix expressing derivatives of basis functions at quadrature points
262 
263   @return An error code: 0 - success, otherwise - failure
264 
265   @ref Backend
266 **/
267 int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collo_grad_1d) {
268   Ceed        ceed;
269   CeedInt     P_1d = (basis)->P_1d, Q_1d = (basis)->Q_1d;
270   CeedScalar *interp_1d, *grad_1d, *tau;
271 
272   CeedCall(CeedMalloc(Q_1d * P_1d, &interp_1d));
273   CeedCall(CeedMalloc(Q_1d * P_1d, &grad_1d));
274   CeedCall(CeedMalloc(Q_1d, &tau));
275   memcpy(interp_1d, (basis)->interp_1d, Q_1d * P_1d * sizeof(basis)->interp_1d[0]);
276   memcpy(grad_1d, (basis)->grad_1d, Q_1d * P_1d * sizeof(basis)->interp_1d[0]);
277 
278   // QR Factorization, interp_1d = Q R
279   CeedCall(CeedBasisGetCeed(basis, &ceed));
280   CeedCall(CeedQRFactorization(ceed, interp_1d, tau, Q_1d, P_1d));
281   // Note: This function is for backend use, so all errors are terminal and we do not need to clean up memory on failure.
282 
283   // Apply R_inv, collo_grad_1d = grad_1d R_inv
284   for (CeedInt i = 0; i < Q_1d; i++) {  // Row i
285     collo_grad_1d[Q_1d * i] = grad_1d[P_1d * i] / interp_1d[0];
286     for (CeedInt j = 1; j < P_1d; j++) {  // Column j
287       collo_grad_1d[j + Q_1d * i] = grad_1d[j + P_1d * i];
288       for (CeedInt k = 0; k < j; k++) collo_grad_1d[j + Q_1d * i] -= interp_1d[j + P_1d * k] * collo_grad_1d[k + Q_1d * i];
289       collo_grad_1d[j + Q_1d * i] /= interp_1d[j + P_1d * j];
290     }
291     for (CeedInt j = P_1d; j < Q_1d; j++) collo_grad_1d[j + Q_1d * i] = 0;
292   }
293 
294   // Apply Q^T, collo_grad_1d = collo_grad_1d Q^T
295   CeedCall(CeedHouseholderApplyQ(collo_grad_1d, interp_1d, tau, CEED_NOTRANSPOSE, Q_1d, Q_1d, P_1d, 1, Q_1d));
296 
297   CeedCall(CeedFree(&interp_1d));
298   CeedCall(CeedFree(&grad_1d));
299   CeedCall(CeedFree(&tau));
300   return CEED_ERROR_SUCCESS;
301 }
302 
303 /**
304   @brief Get tensor status for given CeedBasis
305 
306   @param[in]  basis     CeedBasis
307   @param[out] is_tensor Variable to store tensor status
308 
309   @return An error code: 0 - success, otherwise - failure
310 
311   @ref Backend
312 **/
313 int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor) {
314   *is_tensor = basis->is_tensor_basis;
315   return CEED_ERROR_SUCCESS;
316 }
317 
318 /**
319   @brief Get backend data of a CeedBasis
320 
321   @param[in]  basis CeedBasis
322   @param[out] data  Variable to store data
323 
324   @return An error code: 0 - success, otherwise - failure
325 
326   @ref Backend
327 **/
328 int CeedBasisGetData(CeedBasis basis, void *data) {
329   *(void **)data = basis->data;
330   return CEED_ERROR_SUCCESS;
331 }
332 
333 /**
334   @brief Set backend data of a CeedBasis
335 
336   @param[in,out] basis  CeedBasis
337   @param[in]     data   Data to set
338 
339   @return An error code: 0 - success, otherwise - failure
340 
341   @ref Backend
342 **/
343 int CeedBasisSetData(CeedBasis basis, void *data) {
344   basis->data = data;
345   return CEED_ERROR_SUCCESS;
346 }
347 
348 /**
349   @brief Increment the reference counter for a CeedBasis
350 
351   @param[in,out] basis Basis to increment the reference counter
352 
353   @return An error code: 0 - success, otherwise - failure
354 
355   @ref Backend
356 **/
357 int CeedBasisReference(CeedBasis basis) {
358   basis->ref_count++;
359   return CEED_ERROR_SUCCESS;
360 }
361 
362 /**
363   @brief Get number of Q-vector components for given CeedBasis
364 
365   @param[in]  basis  CeedBasis
366   @param[in]  eval_mode \ref CEED_EVAL_INTERP to use interpolated values,
367                           \ref CEED_EVAL_GRAD to use gradients,
368                           \ref CEED_EVAL_DIV to use divergence,
369                           \ref CEED_EVAL_CURL to use curl.
370   @param[out] q_comp Variable to store number of Q-vector components of basis
371 
372   @return An error code: 0 - success, otherwise - failure
373 
374   @ref Backend
375 **/
376 int CeedBasisGetNumQuadratureComponents(CeedBasis basis, CeedEvalMode eval_mode, CeedInt *q_comp) {
377   switch (eval_mode) {
378     case CEED_EVAL_INTERP:
379       *q_comp = (basis->fe_space == CEED_FE_SPACE_H1) ? 1 : basis->dim;
380       break;
381     case CEED_EVAL_GRAD:
382       *q_comp = basis->dim;
383       break;
384     case CEED_EVAL_DIV:
385       *q_comp = 1;
386       break;
387     case CEED_EVAL_CURL:
388       *q_comp = (basis->dim < 3) ? 1 : basis->dim;
389       break;
390     case CEED_EVAL_NONE:
391     case CEED_EVAL_WEIGHT:
392       *q_comp = 1;
393       break;
394   }
395   return CEED_ERROR_SUCCESS;
396 }
397 
398 /**
399   @brief Estimate number of FLOPs required to apply CeedBasis in t_mode and eval_mode
400 
401   @param[in]  basis     Basis to estimate FLOPs for
402   @param[in]  t_mode    Apply basis or transpose
403   @param[in]  eval_mode Basis evaluation mode
404   @param[out] flops     Address of variable to hold FLOPs estimate
405 
406   @ref Backend
407 **/
408 int CeedBasisGetFlopsEstimate(CeedBasis basis, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedSize *flops) {
409   bool is_tensor;
410 
411   CeedCall(CeedBasisIsTensor(basis, &is_tensor));
412   if (is_tensor) {
413     CeedInt dim, num_comp, P_1d, Q_1d;
414     CeedCall(CeedBasisGetDimension(basis, &dim));
415     CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
416     CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
417     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
418     if (t_mode == CEED_TRANSPOSE) {
419       P_1d = Q_1d;
420       Q_1d = P_1d;
421     }
422     CeedInt tensor_flops = 0, pre = num_comp * CeedIntPow(P_1d, dim - 1), post = 1;
423     for (CeedInt d = 0; d < dim; d++) {
424       tensor_flops += 2 * pre * P_1d * post * Q_1d;
425       pre /= P_1d;
426       post *= Q_1d;
427     }
428     switch (eval_mode) {
429       case CEED_EVAL_NONE:
430         *flops = 0;
431         break;
432       case CEED_EVAL_INTERP:
433         *flops = tensor_flops;
434         break;
435       case CEED_EVAL_GRAD:
436         *flops = tensor_flops * 2;
437         break;
438       case CEED_EVAL_DIV:
439       case CEED_EVAL_CURL:
440         // LCOV_EXCL_START
441         return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, "Tensor basis evaluation for %s not supported", CeedEvalModes[eval_mode]);
442         break;
443       // LCOV_EXCL_STOP
444       case CEED_EVAL_WEIGHT:
445         *flops = dim * CeedIntPow(Q_1d, dim);
446         break;
447     }
448   } else {
449     CeedInt dim, num_comp, q_comp, num_nodes, num_qpts;
450     CeedCall(CeedBasisGetDimension(basis, &dim));
451     CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
452     CeedCall(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
453     CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
454     CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
455     switch (eval_mode) {
456       case CEED_EVAL_NONE:
457         *flops = 0;
458         break;
459       case CEED_EVAL_INTERP:
460       case CEED_EVAL_GRAD:
461       case CEED_EVAL_DIV:
462       case CEED_EVAL_CURL:
463         *flops = num_nodes * num_qpts * num_comp * q_comp;
464         break;
465       case CEED_EVAL_WEIGHT:
466         *flops = 0;
467         break;
468     }
469   }
470 
471   return CEED_ERROR_SUCCESS;
472 }
473 
474 /**
475   @brief Get CeedFESpace for a CeedBasis
476 
477   @param[in]  basis     CeedBasis
478   @param[out] fe_space  Variable to store CeedFESpace
479 
480   @return An error code: 0 - success, otherwise - failure
481 
482   @ref Backend
483 **/
484 int CeedBasisGetFESpace(CeedBasis basis, CeedFESpace *fe_space) {
485   *fe_space = basis->fe_space;
486   return CEED_ERROR_SUCCESS;
487 }
488 
489 /**
490   @brief Get dimension for given CeedElemTopology
491 
492   @param[in]  topo CeedElemTopology
493   @param[out] dim  Variable to store dimension of topology
494 
495   @return An error code: 0 - success, otherwise - failure
496 
497   @ref Backend
498 **/
499 int CeedBasisGetTopologyDimension(CeedElemTopology topo, CeedInt *dim) {
500   *dim = (CeedInt)topo >> 16;
501   return CEED_ERROR_SUCCESS;
502 }
503 
504 /**
505   @brief Get CeedTensorContract of a CeedBasis
506 
507   @param[in]  basis     CeedBasis
508   @param[out] contract  Variable to store CeedTensorContract
509 
510   @return An error code: 0 - success, otherwise - failure
511 
512   @ref Backend
513 **/
514 int CeedBasisGetTensorContract(CeedBasis basis, CeedTensorContract *contract) {
515   *contract = basis->contract;
516   return CEED_ERROR_SUCCESS;
517 }
518 
519 /**
520   @brief Set CeedTensorContract of a CeedBasis
521 
522   @param[in,out] basis    CeedBasis
523   @param[in]     contract CeedTensorContract to set
524 
525   @return An error code: 0 - success, otherwise - failure
526 
527   @ref Backend
528 **/
529 int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract contract) {
530   basis->contract = contract;
531   CeedCall(CeedTensorContractReference(contract));
532   return CEED_ERROR_SUCCESS;
533 }
534 
535 /**
536   @brief Return a reference implementation of matrix multiplication C = A B.
537 
538   Note: This is a reference implementation for CPU CeedScalar pointers that is not intended for high performance.
539 
540   @param[in]  ceed  Ceed context for error handling
541   @param[in]  mat_A Row-major matrix A
542   @param[in]  mat_B Row-major matrix B
543   @param[out] mat_C Row-major output matrix C
544   @param[in]  m     Number of rows of C
545   @param[in]  n     Number of columns of C
546   @param[in]  kk    Number of columns of A/rows of B
547 
548   @return An error code: 0 - success, otherwise - failure
549 
550   @ref Utility
551 **/
552 int CeedMatrixMatrixMultiply(Ceed ceed, const CeedScalar *mat_A, const CeedScalar *mat_B, CeedScalar *mat_C, CeedInt m, CeedInt n, CeedInt kk) {
553   for (CeedInt i = 0; i < m; i++) {
554     for (CeedInt j = 0; j < n; j++) {
555       CeedScalar sum = 0;
556       for (CeedInt k = 0; k < kk; k++) sum += mat_A[k + i * kk] * mat_B[j + k * n];
557       mat_C[j + i * n] = sum;
558     }
559   }
560   return CEED_ERROR_SUCCESS;
561 }
562 
563 /**
564   @brief Return QR Factorization of a matrix
565 
566   @param[in]     ceed Ceed context for error handling
567   @param[in,out] mat  Row-major matrix to be factorized in place
568   @param[in,out] tau  Vector of length m of scaling factors
569   @param[in]     m    Number of rows
570   @param[in]     n    Number of columns
571 
572   @return An error code: 0 - success, otherwise - failure
573 
574   @ref Utility
575 **/
576 int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau, CeedInt m, CeedInt n) {
577   CeedScalar v[m];
578 
579   // Check matrix shape
580   CeedCheck(n <= m, ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute QR factorization with n > m");
581 
582   for (CeedInt i = 0; i < n; i++) {
583     if (i >= m - 1) {  // last row of matrix, no reflection needed
584       tau[i] = 0.;
585       break;
586     }
587     // Calculate Householder vector, magnitude
588     CeedScalar sigma = 0.0;
589     v[i]             = mat[i + n * i];
590     for (CeedInt j = i + 1; j < m; j++) {
591       v[j] = mat[i + n * j];
592       sigma += v[j] * v[j];
593     }
594     CeedScalar norm = sqrt(v[i] * v[i] + sigma);  // norm of v[i:m]
595     CeedScalar R_ii = -copysign(norm, v[i]);
596     v[i] -= R_ii;
597     // norm of v[i:m] after modification above and scaling below
598     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
599     //   tau = 2 / (norm*norm)
600     tau[i] = 2 * v[i] * v[i] / (v[i] * v[i] + sigma);
601     for (CeedInt j = i + 1; j < m; j++) v[j] /= v[i];
602 
603     // Apply Householder reflector to lower right panel
604     CeedHouseholderReflect(&mat[i * n + i + 1], &v[i], tau[i], m - i, n - i - 1, n, 1);
605     // Save v
606     mat[i + n * i] = R_ii;
607     for (CeedInt j = i + 1; j < m; j++) mat[i + n * j] = v[j];
608   }
609   return CEED_ERROR_SUCCESS;
610 }
611 
612 /**
613   @brief Apply Householder Q matrix
614 
615   Compute mat_A = mat_Q mat_A, where mat_Q is mxm and mat_A is mxn.
616 
617   @param[in,out] mat_A  Matrix to apply Householder Q to, in place
618   @param[in]     mat_Q  Householder Q matrix
619   @param[in]     tau    Householder scaling factors
620   @param[in]     t_mode Transpose mode for application
621   @param[in]     m      Number of rows in A
622   @param[in]     n      Number of columns in A
623   @param[in]     k      Number of elementary reflectors in Q, k<m
624   @param[in]     row    Row stride in A
625   @param[in]     col    Col stride in A
626 
627   @return An error code: 0 - success, otherwise - failure
628 
629   @ref Utility
630 **/
631 int CeedHouseholderApplyQ(CeedScalar *mat_A, const CeedScalar *mat_Q, const CeedScalar *tau, CeedTransposeMode t_mode, CeedInt m, CeedInt n,
632                           CeedInt k, CeedInt row, CeedInt col) {
633   CeedScalar *v;
634   CeedCall(CeedMalloc(m, &v));
635   for (CeedInt ii = 0; ii < k; ii++) {
636     CeedInt i = t_mode == CEED_TRANSPOSE ? ii : k - 1 - ii;
637     for (CeedInt j = i + 1; j < m; j++) v[j] = mat_Q[j * k + i];
638     // Apply Householder reflector (I - tau v v^T) collo_grad_1d^T
639     CeedCall(CeedHouseholderReflect(&mat_A[i * row], &v[i], tau[i], m - i, n, row, col));
640   }
641   CeedCall(CeedFree(&v));
642   return CEED_ERROR_SUCCESS;
643 }
644 
645 /**
646   @brief Return symmetric Schur decomposition of the symmetric matrix mat via symmetric QR factorization
647 
648   @param[in]     ceed   Ceed context for error handling
649   @param[in,out] mat    Row-major matrix to be factorized in place
650   @param[out]    lambda Vector of length n of eigenvalues
651   @param[in]     n      Number of rows/columns
652 
653   @return An error code: 0 - success, otherwise - failure
654 
655   @ref Utility
656 **/
657 CeedPragmaOptimizeOff
658 int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat, CeedScalar *lambda, CeedInt n) {
659   // Check bounds for clang-tidy
660   CeedCheck(n > 1, ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute symmetric Schur decomposition of scalars");
661 
662   CeedScalar v[n - 1], tau[n - 1], mat_T[n * n];
663 
664   // Copy mat to mat_T and set mat to I
665   memcpy(mat_T, mat, n * n * sizeof(mat[0]));
666   for (CeedInt i = 0; i < n; i++) {
667     for (CeedInt j = 0; j < n; j++) mat[j + n * i] = (i == j) ? 1 : 0;
668   }
669 
670   // Reduce to tridiagonal
671   for (CeedInt i = 0; i < n - 1; i++) {
672     // Calculate Householder vector, magnitude
673     CeedScalar sigma = 0.0;
674     v[i]             = mat_T[i + n * (i + 1)];
675     for (CeedInt j = i + 1; j < n - 1; j++) {
676       v[j] = mat_T[i + n * (j + 1)];
677       sigma += v[j] * v[j];
678     }
679     CeedScalar norm = sqrt(v[i] * v[i] + sigma);  // norm of v[i:n-1]
680     CeedScalar R_ii = -copysign(norm, v[i]);
681     v[i] -= R_ii;
682     // norm of v[i:m] after modification above and scaling below
683     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
684     //   tau = 2 / (norm*norm)
685     tau[i] = i == n - 2 ? 2 : 2 * v[i] * v[i] / (v[i] * v[i] + sigma);
686     for (CeedInt j = i + 1; j < n - 1; j++) v[j] /= v[i];
687 
688     // Update sub and super diagonal
689     for (CeedInt j = i + 2; j < n; j++) {
690       mat_T[i + n * j] = 0;
691       mat_T[j + n * i] = 0;
692     }
693     // Apply symmetric Householder reflector to lower right panel
694     CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1);
695     CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), 1, n);
696 
697     // Save v
698     mat_T[i + n * (i + 1)] = R_ii;
699     mat_T[(i + 1) + n * i] = R_ii;
700     for (CeedInt j = i + 1; j < n - 1; j++) {
701       mat_T[i + n * (j + 1)] = v[j];
702     }
703   }
704   // Backwards accumulation of Q
705   for (CeedInt i = n - 2; i >= 0; i--) {
706     if (tau[i] > 0.0) {
707       v[i] = 1;
708       for (CeedInt j = i + 1; j < n - 1; j++) {
709         v[j]                   = mat_T[i + n * (j + 1)];
710         mat_T[i + n * (j + 1)] = 0;
711       }
712       CeedHouseholderReflect(&mat[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1);
713     }
714   }
715 
716   // Reduce sub and super diagonal
717   CeedInt    p = 0, q = 0, itr = 0, max_itr = n * n * n * n;
718   CeedScalar tol = CEED_EPSILON;
719 
720   while (itr < max_itr) {
721     // Update p, q, size of reduced portions of diagonal
722     p = 0;
723     q = 0;
724     for (CeedInt i = n - 2; i >= 0; i--) {
725       if (fabs(mat_T[i + n * (i + 1)]) < tol) q += 1;
726       else break;
727     }
728     for (CeedInt i = 0; i < n - q - 1; i++) {
729       if (fabs(mat_T[i + n * (i + 1)]) < tol) p += 1;
730       else break;
731     }
732     if (q == n - 1) break;  // Finished reducing
733 
734     // Reduce tridiagonal portion
735     CeedScalar t_nn = mat_T[(n - 1 - q) + n * (n - 1 - q)], t_nnm1 = mat_T[(n - 2 - q) + n * (n - 1 - q)];
736     CeedScalar d  = (mat_T[(n - 2 - q) + n * (n - 2 - q)] - t_nn) / 2;
737     CeedScalar mu = t_nn - t_nnm1 * t_nnm1 / (d + copysign(sqrt(d * d + t_nnm1 * t_nnm1), d));
738     CeedScalar x  = mat_T[p + n * p] - mu;
739     CeedScalar z  = mat_T[p + n * (p + 1)];
740     for (CeedInt k = p; k < n - q - 1; k++) {
741       // Compute Givens rotation
742       CeedScalar c = 1, s = 0;
743       if (fabs(z) > tol) {
744         if (fabs(z) > fabs(x)) {
745           CeedScalar tau = -x / z;
746           s = 1 / sqrt(1 + tau * tau), c = s * tau;
747         } else {
748           CeedScalar tau = -z / x;
749           c = 1 / sqrt(1 + tau * tau), s = c * tau;
750         }
751       }
752 
753       // Apply Givens rotation to T
754       CeedGivensRotation(mat_T, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n);
755       CeedGivensRotation(mat_T, c, s, CEED_TRANSPOSE, k, k + 1, n, n);
756 
757       // Apply Givens rotation to Q
758       CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n);
759 
760       // Update x, z
761       if (k < n - q - 2) {
762         x = mat_T[k + n * (k + 1)];
763         z = mat_T[k + n * (k + 2)];
764       }
765     }
766     itr++;
767   }
768 
769   // Save eigenvalues
770   for (CeedInt i = 0; i < n; i++) lambda[i] = mat_T[i + n * i];
771 
772   // Check convergence
773   CeedCheck(itr < max_itr || q > n, ceed, CEED_ERROR_MINOR, "Symmetric QR failed to converge");
774   return CEED_ERROR_SUCCESS;
775 }
776 CeedPragmaOptimizeOn
777 
778 /**
779   @brief Return Simultaneous Diagonalization of two matrices.
780 
781   This solves the generalized eigenvalue problem A x = lambda B x, where A and B are symmetric and B is positive definite.
782   We generate the matrix X and vector Lambda such that X^T A X = Lambda and X^T B X = I.
783   This is equivalent to the LAPACK routine 'sygv' with TYPE = 1.
784 
785   @param[in]  ceed   Ceed context for error handling
786   @param[in]  mat_A  Row-major matrix to be factorized with eigenvalues
787   @param[in]  mat_B  Row-major matrix to be factorized to identity
788   @param[out] mat_X  Row-major orthogonal matrix
789   @param[out] lambda Vector of length n of generalized eigenvalues
790   @param[in]  n      Number of rows/columns
791 
792   @return An error code: 0 - success, otherwise - failure
793 
794   @ref Utility
795 **/
796 CeedPragmaOptimizeOff
797 int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A, CeedScalar *mat_B, CeedScalar *mat_X, CeedScalar *lambda, CeedInt n) {
798   CeedScalar *mat_C, *mat_G, *vec_D;
799   CeedCall(CeedCalloc(n * n, &mat_C));
800   CeedCall(CeedCalloc(n * n, &mat_G));
801   CeedCall(CeedCalloc(n, &vec_D));
802 
803   // Compute B = G D G^T
804   memcpy(mat_G, mat_B, n * n * sizeof(mat_B[0]));
805   CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_G, vec_D, n));
806 
807   // Sort eigenvalues
808   for (CeedInt i = n - 1; i >= 0; i--) {
809     for (CeedInt j = 0; j < i; j++) {
810       if (fabs(vec_D[j]) > fabs(vec_D[j + 1])) {
811         CeedScalar temp;
812         temp         = vec_D[j];
813         vec_D[j]     = vec_D[j + 1];
814         vec_D[j + 1] = temp;
815         for (CeedInt k = 0; k < n; k++) {
816           temp                 = mat_G[k * n + j];
817           mat_G[k * n + j]     = mat_G[k * n + j + 1];
818           mat_G[k * n + j + 1] = temp;
819         }
820       }
821     }
822   }
823 
824   // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T
825   //           = D^-1/2 G^T A G D^-1/2
826   // -- D = D^-1/2
827   for (CeedInt i = 0; i < n; i++) vec_D[i] = 1. / sqrt(vec_D[i]);
828   // -- G = G D^-1/2
829   // -- C = D^-1/2 G^T
830   for (CeedInt i = 0; i < n; i++) {
831     for (CeedInt j = 0; j < n; j++) {
832       mat_G[i * n + j] *= vec_D[j];
833       mat_C[j * n + i] = mat_G[i * n + j];
834     }
835   }
836   // -- X = (D^-1/2 G^T) A
837   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_C, (const CeedScalar *)mat_A, mat_X, n, n, n));
838   // -- C = (D^-1/2 G^T A) (G D^-1/2)
839   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_X, (const CeedScalar *)mat_G, mat_C, n, n, n));
840 
841   // Compute Q^T C Q = lambda
842   CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n));
843 
844   // Sort eigenvalues
845   for (CeedInt i = n - 1; i >= 0; i--) {
846     for (CeedInt j = 0; j < i; j++) {
847       if (fabs(lambda[j]) > fabs(lambda[j + 1])) {
848         CeedScalar temp;
849         temp          = lambda[j];
850         lambda[j]     = lambda[j + 1];
851         lambda[j + 1] = temp;
852         for (CeedInt k = 0; k < n; k++) {
853           temp                 = mat_C[k * n + j];
854           mat_C[k * n + j]     = mat_C[k * n + j + 1];
855           mat_C[k * n + j + 1] = temp;
856         }
857       }
858     }
859   }
860 
861   // Set X = (G D^1/2)^-T Q
862   //       = G D^-1/2 Q
863   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_G, (const CeedScalar *)mat_C, mat_X, n, n, n));
864 
865   // Cleanup
866   CeedCall(CeedFree(&mat_C));
867   CeedCall(CeedFree(&mat_G));
868   CeedCall(CeedFree(&vec_D));
869   return CEED_ERROR_SUCCESS;
870 }
871 CeedPragmaOptimizeOn
872 
873 /// @}
874 
875 /// ----------------------------------------------------------------------------
876 /// CeedBasis Public API
877 /// ----------------------------------------------------------------------------
878 /// @addtogroup CeedBasisUser
879 /// @{
880 
881 /**
882   @brief Create a tensor-product basis for H^1 discretizations
883 
884   @param[in]  ceed        Ceed object where the CeedBasis will be created
885   @param[in]  dim         Topological dimension
886   @param[in]  num_comp    Number of field components (1 for scalar fields)
887   @param[in]  P_1d        Number of nodes in one dimension
888   @param[in]  Q_1d        Number of quadrature points in one dimension
889   @param[in]  interp_1d   Row-major (Q_1d * P_1d) matrix expressing the values of nodal basis functions at quadrature points
890   @param[in]  grad_1d     Row-major (Q_1d * P_1d) matrix expressing derivatives of nodal basis functions at quadrature points
891   @param[in]  q_ref_1d    Array of length Q_1d holding the locations of quadrature points on the 1D reference element [-1, 1]
892   @param[in]  q_weight_1d Array of length Q_1d holding the quadrature weights on the reference element
893   @param[out] basis       Address of the variable where the newly created CeedBasis will be stored.
894 
895   @return An error code: 0 - success, otherwise - failure
896 
897   @ref User
898 **/
899 int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d,
900                             const CeedScalar *grad_1d, const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis *basis) {
901   if (!ceed->BasisCreateTensorH1) {
902     Ceed delegate;
903 
904     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
905     CeedCheck(delegate, ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateTensorH1");
906     CeedCall(CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis));
907     return CEED_ERROR_SUCCESS;
908   }
909 
910   CeedCheck(dim > 0, ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value");
911   CeedCheck(num_comp > 0, ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
912   CeedCheck(P_1d > 0, ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
913   CeedCheck(Q_1d > 0, ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
914 
915   CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE : dim == 2 ? CEED_TOPOLOGY_QUAD : CEED_TOPOLOGY_HEX;
916 
917   CeedCall(CeedCalloc(1, basis));
918   CeedCall(CeedReferenceCopy(ceed, &(*basis)->ceed));
919   (*basis)->ref_count       = 1;
920   (*basis)->is_tensor_basis = true;
921   (*basis)->dim             = dim;
922   (*basis)->topo            = topo;
923   (*basis)->num_comp        = num_comp;
924   (*basis)->P_1d            = P_1d;
925   (*basis)->Q_1d            = Q_1d;
926   (*basis)->P               = CeedIntPow(P_1d, dim);
927   (*basis)->Q               = CeedIntPow(Q_1d, dim);
928   (*basis)->fe_space        = CEED_FE_SPACE_H1;
929   CeedCall(CeedCalloc(Q_1d, &(*basis)->q_ref_1d));
930   CeedCall(CeedCalloc(Q_1d, &(*basis)->q_weight_1d));
931   if (q_ref_1d) memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d * sizeof(q_ref_1d[0]));
932   if (q_weight_1d) memcpy((*basis)->q_weight_1d, q_weight_1d, Q_1d * sizeof(q_weight_1d[0]));
933   CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->interp_1d));
934   CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->grad_1d));
935   if (interp_1d) memcpy((*basis)->interp_1d, interp_1d, Q_1d * P_1d * sizeof(interp_1d[0]));
936   if (grad_1d) memcpy((*basis)->grad_1d, grad_1d, Q_1d * P_1d * sizeof(grad_1d[0]));
937   CeedCall(ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, *basis));
938   return CEED_ERROR_SUCCESS;
939 }
940 
941 /**
942   @brief Create a tensor-product Lagrange basis
943 
944   @param[in]  ceed      Ceed object where the CeedBasis will be created
945   @param[in]  dim       Topological dimension of element
946   @param[in]  num_comp  Number of field components (1 for scalar fields)
947   @param[in]  P         Number of Gauss-Lobatto nodes in one dimension.
948                           The polynomial degree of the resulting Q_k element is k=P-1.
949   @param[in]  Q         Number of quadrature points in one dimension.
950   @param[in]  quad_mode Distribution of the Q quadrature points (affects order of accuracy for the quadrature)
951   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
952 
953   @return An error code: 0 - success, otherwise - failure
954 
955   @ref User
956 **/
957 int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P, CeedInt Q, CeedQuadMode quad_mode, CeedBasis *basis) {
958   // Allocate
959   int        ierr = CEED_ERROR_SUCCESS;
960   CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d, *q_weight_1d;
961 
962   CeedCheck(dim > 0, ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value");
963   CeedCheck(num_comp > 0, ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
964   CeedCheck(P > 0, ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
965   CeedCheck(Q > 0, ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
966 
967   // Get Nodes and Weights
968   CeedCall(CeedCalloc(P * Q, &interp_1d));
969   CeedCall(CeedCalloc(P * Q, &grad_1d));
970   CeedCall(CeedCalloc(P, &nodes));
971   CeedCall(CeedCalloc(Q, &q_ref_1d));
972   CeedCall(CeedCalloc(Q, &q_weight_1d));
973   if (CeedLobattoQuadrature(P, nodes, NULL) != CEED_ERROR_SUCCESS) goto cleanup;
974   switch (quad_mode) {
975     case CEED_GAUSS:
976       ierr = CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
977       break;
978     case CEED_GAUSS_LOBATTO:
979       ierr = CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
980       break;
981   }
982   if (ierr != CEED_ERROR_SUCCESS) goto cleanup;
983 
984   // Build B, D matrix
985   // Fornberg, 1998
986   for (CeedInt i = 0; i < Q; i++) {
987     c1                   = 1.0;
988     c3                   = nodes[0] - q_ref_1d[i];
989     interp_1d[i * P + 0] = 1.0;
990     for (CeedInt j = 1; j < P; j++) {
991       c2 = 1.0;
992       c4 = c3;
993       c3 = nodes[j] - q_ref_1d[i];
994       for (CeedInt k = 0; k < j; k++) {
995         dx = nodes[j] - nodes[k];
996         c2 *= dx;
997         if (k == j - 1) {
998           grad_1d[i * P + j]   = c1 * (interp_1d[i * P + k] - c4 * grad_1d[i * P + k]) / c2;
999           interp_1d[i * P + j] = -c1 * c4 * interp_1d[i * P + k] / c2;
1000         }
1001         grad_1d[i * P + k]   = (c3 * grad_1d[i * P + k] - interp_1d[i * P + k]) / dx;
1002         interp_1d[i * P + k] = c3 * interp_1d[i * P + k] / dx;
1003       }
1004       c1 = c2;
1005     }
1006   }
1007   // Pass to CeedBasisCreateTensorH1
1008   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis));
1009 cleanup:
1010   CeedCall(CeedFree(&interp_1d));
1011   CeedCall(CeedFree(&grad_1d));
1012   CeedCall(CeedFree(&nodes));
1013   CeedCall(CeedFree(&q_ref_1d));
1014   CeedCall(CeedFree(&q_weight_1d));
1015   return CEED_ERROR_SUCCESS;
1016 }
1017 
1018 /**
1019   @brief Create a non tensor-product basis for H^1 discretizations
1020 
1021   @param[in]  ceed      Ceed object where the CeedBasis will be created
1022   @param[in]  topo      Topology of element, e.g. hypercube, simplex, ect
1023   @param[in]  num_comp  Number of field components (1 for scalar fields)
1024   @param[in]  num_nodes Total number of nodes
1025   @param[in]  num_qpts  Total number of quadrature points
1026   @param[in]  interp    Row-major (num_qpts * num_nodes) matrix expressing the values of nodal basis functions at quadrature points
1027   @param[in]  grad      Row-major (dim * num_qpts * num_nodes) matrix expressing derivatives of nodal basis functions at quadrature points
1028   @param[in]  q_ref     Array of length num_qpts * dim holding the locations of quadrature points on the reference element
1029   @param[in]  q_weight  Array of length num_qpts holding the quadrature weights on the reference element
1030   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1031 
1032   @return An error code: 0 - success, otherwise - failure
1033 
1034   @ref User
1035 **/
1036 int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1037                       const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1038   CeedInt P = num_nodes, Q = num_qpts, dim = 0;
1039 
1040   if (!ceed->BasisCreateH1) {
1041     Ceed delegate;
1042 
1043     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1044     CeedCheck(delegate, ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateH1");
1045     CeedCall(CeedBasisCreateH1(delegate, topo, num_comp, num_nodes, num_qpts, interp, grad, q_ref, q_weight, basis));
1046     return CEED_ERROR_SUCCESS;
1047   }
1048 
1049   CeedCheck(num_comp > 0, ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1050   CeedCheck(num_nodes > 0, ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1051   CeedCheck(num_qpts > 0, ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1052 
1053   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1054 
1055   CeedCall(CeedCalloc(1, basis));
1056   CeedCall(CeedReferenceCopy(ceed, &(*basis)->ceed));
1057   (*basis)->ref_count       = 1;
1058   (*basis)->is_tensor_basis = false;
1059   (*basis)->dim             = dim;
1060   (*basis)->topo            = topo;
1061   (*basis)->num_comp        = num_comp;
1062   (*basis)->P               = P;
1063   (*basis)->Q               = Q;
1064   (*basis)->fe_space        = CEED_FE_SPACE_H1;
1065   CeedCall(CeedCalloc(Q * dim, &(*basis)->q_ref_1d));
1066   CeedCall(CeedCalloc(Q, &(*basis)->q_weight_1d));
1067   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1068   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1069   CeedCall(CeedCalloc(Q * P, &(*basis)->interp));
1070   CeedCall(CeedCalloc(dim * Q * P, &(*basis)->grad));
1071   if (interp) memcpy((*basis)->interp, interp, Q * P * sizeof(interp[0]));
1072   if (grad) memcpy((*basis)->grad, grad, dim * Q * P * sizeof(grad[0]));
1073   CeedCall(ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref, q_weight, *basis));
1074   return CEED_ERROR_SUCCESS;
1075 }
1076 
1077 /**
1078   @brief Create a non tensor-product basis for \f$H(\mathrm{div})\f$ discretizations
1079 
1080   @param[in]  ceed      Ceed object where the CeedBasis will be created
1081   @param[in]  topo      Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.), dimension of which is used in some array sizes below
1082   @param[in]  num_comp  Number of components (usually 1 for vectors in H(div) bases)
1083   @param[in]  num_nodes Total number of nodes (dofs per element)
1084   @param[in]  num_qpts  Total number of quadrature points
1085   @param[in]  interp    Row-major (dim * num_qpts * num_nodes) matrix expressing the values of basis functions at quadrature points
1086   @param[in]  div       Row-major (num_qpts * num_nodes) matrix expressing divergence of basis functions at quadrature points
1087   @param[in]  q_ref     Array of length num_qpts * dim holding the locations of quadrature points on the reference element
1088   @param[in]  q_weight  Array of length num_qpts holding the quadrature weights on the reference element
1089   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1090 
1091   @return An error code: 0 - success, otherwise - failure
1092 
1093   @ref User
1094 **/
1095 int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1096                         const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1097   CeedInt Q = num_qpts, P = num_nodes, dim = 0;
1098 
1099   if (!ceed->BasisCreateHdiv) {
1100     Ceed delegate;
1101 
1102     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1103     CeedCheck(delegate, ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateHdiv");
1104     CeedCall(CeedBasisCreateHdiv(delegate, topo, num_comp, num_nodes, num_qpts, interp, div, q_ref, q_weight, basis));
1105     return CEED_ERROR_SUCCESS;
1106   }
1107 
1108   CeedCheck(num_comp > 0, ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1109   CeedCheck(num_nodes > 0, ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1110   CeedCheck(num_qpts > 0, ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1111 
1112   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1113 
1114   CeedCall(CeedCalloc(1, basis));
1115   CeedCall(CeedReferenceCopy(ceed, &(*basis)->ceed));
1116   (*basis)->ref_count       = 1;
1117   (*basis)->is_tensor_basis = false;
1118   (*basis)->dim             = dim;
1119   (*basis)->topo            = topo;
1120   (*basis)->num_comp        = num_comp;
1121   (*basis)->P               = P;
1122   (*basis)->Q               = Q;
1123   (*basis)->fe_space        = CEED_FE_SPACE_HDIV;
1124   CeedCall(CeedMalloc(Q * dim, &(*basis)->q_ref_1d));
1125   CeedCall(CeedMalloc(Q, &(*basis)->q_weight_1d));
1126   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1127   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1128   CeedCall(CeedMalloc(dim * Q * P, &(*basis)->interp));
1129   CeedCall(CeedMalloc(Q * P, &(*basis)->div));
1130   if (interp) memcpy((*basis)->interp, interp, dim * Q * P * sizeof(interp[0]));
1131   if (div) memcpy((*basis)->div, div, Q * P * sizeof(div[0]));
1132   CeedCall(ceed->BasisCreateHdiv(topo, dim, P, Q, interp, div, q_ref, q_weight, *basis));
1133   return CEED_ERROR_SUCCESS;
1134 }
1135 
1136 /**
1137   @brief Create a non tensor-product basis for \f$H(\mathrm{curl})\f$ discretizations
1138 
1139   @param[in]  ceed      Ceed object where the CeedBasis will be created
1140   @param[in]  topo      Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.), dimension of which is used in some array sizes below
1141   @param[in]  num_comp  Number of components (usually 1 for vectors in H(curl) bases)
1142   @param[in]  num_nodes Total number of nodes (dofs per element)
1143   @param[in]  num_qpts  Total number of quadrature points
1144   @param[in]  interp    Row-major (dim * num_qpts * num_nodes) matrix expressing the values of basis functions at quadrature points
1145   @param[in]  curl      Row-major (curl_comp * num_qpts * num_nodes, curl_comp = 1 if dim < 3 else dim) matrix expressing curl of basis functions at
1146 quadrature points
1147   @param[in]  q_ref     Array of length num_qpts * dim holding the locations of quadrature points on the reference element
1148   @param[in]  q_weight  Array of length num_qpts holding the quadrature weights on the reference element
1149   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1150 
1151   @return An error code: 0 - success, otherwise - failure
1152 
1153   @ref User
1154 **/
1155 int CeedBasisCreateHcurl(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1156                          const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1157   CeedInt Q = num_qpts, P = num_nodes, dim = 0, curl_comp = 0;
1158 
1159   if (!ceed->BasisCreateHdiv) {
1160     Ceed delegate;
1161 
1162     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1163     CeedCheck(delegate, ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateHcurl");
1164     CeedCall(CeedBasisCreateHcurl(delegate, topo, num_comp, num_nodes, num_qpts, interp, curl, q_ref, q_weight, basis));
1165     return CEED_ERROR_SUCCESS;
1166   }
1167 
1168   CeedCheck(num_comp > 0, ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1169   CeedCheck(num_nodes > 0, ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1170   CeedCheck(num_qpts > 0, ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1171 
1172   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1173   curl_comp = (dim < 3) ? 1 : dim;
1174 
1175   CeedCall(CeedCalloc(1, basis));
1176   CeedCall(CeedReferenceCopy(ceed, &(*basis)->ceed));
1177   (*basis)->ref_count       = 1;
1178   (*basis)->is_tensor_basis = false;
1179   (*basis)->dim             = dim;
1180   (*basis)->topo            = topo;
1181   (*basis)->num_comp        = num_comp;
1182   (*basis)->P               = P;
1183   (*basis)->Q               = Q;
1184   (*basis)->fe_space        = CEED_FE_SPACE_HCURL;
1185   CeedCall(CeedMalloc(Q * dim, &(*basis)->q_ref_1d));
1186   CeedCall(CeedMalloc(Q, &(*basis)->q_weight_1d));
1187   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1188   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1189   CeedCall(CeedMalloc(dim * Q * P, &(*basis)->interp));
1190   CeedCall(CeedMalloc(curl_comp * Q * P, &(*basis)->curl));
1191   if (interp) memcpy((*basis)->interp, interp, dim * Q * P * sizeof(interp[0]));
1192   if (curl) memcpy((*basis)->curl, curl, curl_comp * Q * P * sizeof(curl[0]));
1193   CeedCall(ceed->BasisCreateHcurl(topo, dim, P, Q, interp, curl, q_ref, q_weight, *basis));
1194   return CEED_ERROR_SUCCESS;
1195 }
1196 
1197 /**
1198   @brief Create a CeedBasis for projection from the nodes of `basis_from` to the nodes of `basis_to`.
1199 
1200   Only `CEED_EVAL_INTERP` will be valid for the new basis, `basis_project`.
1201   For H^1 spaces, `CEED_EVAL_GRAD` will also be valid.
1202   The interpolation is given by `interp_project = interp_to^+ * interp_from`, where the pseudoinverse `interp_to^+` is given by QR
1203 factorization.
1204   The gradient (for the H^1 case) is given by `grad_project = interp_to^+ * grad_from`.
1205 
1206   Note: `basis_from` and `basis_to` must have compatible quadrature spaces.
1207 
1208   Note: `basis_project` will have the same number of components as `basis_from`, regardless of the number of components that `basis_to` has.
1209         If `basis_from` has 3 components and `basis_to` has 5 components, then `basis_project` will have 3 components.
1210 
1211   @param[in]  basis_from    CeedBasis to prolong from
1212   @param[in]  basis_to      CeedBasis to prolong to
1213   @param[out] basis_project Address of the variable where the newly created CeedBasis will be stored.
1214 
1215   @return An error code: 0 - success, otherwise - failure
1216 
1217   @ref User
1218 **/
1219 int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, CeedBasis *basis_project) {
1220   Ceed ceed;
1221   CeedCall(CeedBasisGetCeed(basis_to, &ceed));
1222 
1223   // Create projection matrix
1224   CeedScalar *interp_project, *grad_project;
1225   CeedCall(CeedBasisCreateProjectionMatrices(basis_from, basis_to, &interp_project, &grad_project));
1226 
1227   // Build basis
1228   bool        is_tensor;
1229   CeedInt     dim, num_comp;
1230   CeedScalar *q_ref, *q_weight;
1231   CeedCall(CeedBasisIsTensor(basis_to, &is_tensor));
1232   CeedCall(CeedBasisGetDimension(basis_to, &dim));
1233   CeedCall(CeedBasisGetNumComponents(basis_from, &num_comp));
1234   if (is_tensor) {
1235     CeedInt P_1d_to, P_1d_from;
1236     CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_1d_from));
1237     CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_1d_to));
1238     CeedCall(CeedCalloc(P_1d_to, &q_ref));
1239     CeedCall(CeedCalloc(P_1d_to, &q_weight));
1240     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_from, P_1d_to, interp_project, grad_project, q_ref, q_weight, basis_project));
1241   } else {
1242     // Even if basis_to and basis_from are not H1, the resulting basis is H1 for interpolation to work
1243     CeedElemTopology topo;
1244     CeedCall(CeedBasisGetTopology(basis_to, &topo));
1245     CeedInt num_nodes_to, num_nodes_from;
1246     CeedCall(CeedBasisGetNumNodes(basis_from, &num_nodes_from));
1247     CeedCall(CeedBasisGetNumNodes(basis_to, &num_nodes_to));
1248     CeedCall(CeedCalloc(num_nodes_to * dim, &q_ref));
1249     CeedCall(CeedCalloc(num_nodes_to, &q_weight));
1250     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_from, num_nodes_to, interp_project, grad_project, q_ref, q_weight, basis_project));
1251   }
1252 
1253   // Cleanup
1254   CeedCall(CeedFree(&interp_project));
1255   CeedCall(CeedFree(&grad_project));
1256   CeedCall(CeedFree(&q_ref));
1257   CeedCall(CeedFree(&q_weight));
1258 
1259   return CEED_ERROR_SUCCESS;
1260 }
1261 
1262 /**
1263   @brief Copy the pointer to a CeedBasis.
1264 
1265   Note: If the value of `basis_copy` passed into this function is non-NULL, then it is assumed that `basis_copy` is a pointer to a CeedBasis.
1266         This CeedBasis will be destroyed if `basis_copy` is the only reference to this CeedBasis.
1267 
1268   @param[in]     basis      CeedBasis to copy reference to
1269   @param[in,out] basis_copy Variable to store copied reference
1270 
1271   @return An error code: 0 - success, otherwise - failure
1272 
1273   @ref User
1274 **/
1275 int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy) {
1276   if (basis != CEED_BASIS_COLLOCATED) CeedCall(CeedBasisReference(basis));
1277   CeedCall(CeedBasisDestroy(basis_copy));
1278   *basis_copy = basis;
1279   return CEED_ERROR_SUCCESS;
1280 }
1281 
1282 /**
1283   @brief View a CeedBasis
1284 
1285   @param[in] basis  CeedBasis to view
1286   @param[in] stream Stream to view to, e.g., stdout
1287 
1288   @return An error code: 0 - success, otherwise - failure
1289 
1290   @ref User
1291 **/
1292 int CeedBasisView(CeedBasis basis, FILE *stream) {
1293   CeedElemTopology topo     = basis->topo;
1294   CeedFESpace      fe_space = basis->fe_space;
1295   CeedInt          q_comp   = 0;
1296 
1297   // Print FE space and element topology of the basis
1298   fprintf(stream, "CeedBasis in a %s on a %s element\n", CeedFESpaces[fe_space], CeedElemTopologies[topo]);
1299   if (basis->is_tensor_basis) {
1300     fprintf(stream, "  P: %" CeedInt_FMT "\n  Q: %" CeedInt_FMT "\n", basis->P_1d, basis->Q_1d);
1301   } else {
1302     fprintf(stream, "  P: %" CeedInt_FMT "\n  Q: %" CeedInt_FMT "\n", basis->P, basis->Q);
1303   }
1304   fprintf(stream, "  dimension: %" CeedInt_FMT "\n  field components: %" CeedInt_FMT "\n", basis->dim, basis->num_comp);
1305   // Print quadrature data, interpolation/gradient/divergence/curl of the basis
1306   if (basis->is_tensor_basis) {  // tensor basis
1307     CeedCall(CeedScalarView("qref1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_ref_1d, stream));
1308     CeedCall(CeedScalarView("qweight1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_weight_1d, stream));
1309     CeedCall(CeedScalarView("interp1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, basis->interp_1d, stream));
1310     CeedCall(CeedScalarView("grad1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, basis->grad_1d, stream));
1311   } else {  // non-tensor basis
1312     CeedCall(CeedScalarView("qref", "\t% 12.8f", 1, basis->Q * basis->dim, basis->q_ref_1d, stream));
1313     CeedCall(CeedScalarView("qweight", "\t% 12.8f", 1, basis->Q, basis->q_weight_1d, stream));
1314     CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp));
1315     CeedCall(CeedScalarView("interp", "\t% 12.8f", q_comp * basis->Q, basis->P, basis->interp, stream));
1316     if (basis->grad) {
1317       CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp));
1318       CeedCall(CeedScalarView("grad", "\t% 12.8f", q_comp * basis->Q, basis->P, basis->grad, stream));
1319     }
1320     if (basis->div) {
1321       CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp));
1322       CeedCall(CeedScalarView("div", "\t% 12.8f", q_comp * basis->Q, basis->P, basis->div, stream));
1323     }
1324     if (basis->curl) {
1325       CeedCall(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp));
1326       CeedCall(CeedScalarView("curl", "\t% 12.8f", q_comp * basis->Q, basis->P, basis->curl, stream));
1327     }
1328   }
1329   return CEED_ERROR_SUCCESS;
1330 }
1331 
1332 /**
1333   @brief Apply basis evaluation from nodes to quadrature points or vice versa
1334 
1335   @param[in]  basis      CeedBasis to evaluate
1336   @param[in]  num_elem   The number of elements to apply the basis evaluation to;
1337                            the backend will specify the ordering in CeedElemRestrictionCreateBlocked()
1338   @param[in]  t_mode    \ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature points;
1339                           \ref CEED_TRANSPOSE to apply the transpose, mapping from quadrature points to nodes
1340   @param[in]  eval_mode \ref CEED_EVAL_NONE to use values directly,
1341                           \ref CEED_EVAL_INTERP to use interpolated values,
1342                           \ref CEED_EVAL_GRAD to use gradients,
1343                           \ref CEED_EVAL_DIV to use divergence,
1344                           \ref CEED_EVAL_CURL to use curl,
1345                           \ref CEED_EVAL_WEIGHT to use quadrature weights.
1346   @param[in]  u        Input CeedVector
1347   @param[out] v        Output CeedVector
1348 
1349   @return An error code: 0 - success, otherwise - failure
1350 
1351   @ref User
1352 **/
1353 int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
1354   CeedSize u_length = 0, v_length;
1355   CeedInt  dim, num_comp, q_comp, num_nodes, num_qpts;
1356   CeedCall(CeedBasisGetDimension(basis, &dim));
1357   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
1358   CeedCall(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
1359   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
1360   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
1361   CeedCall(CeedVectorGetLength(v, &v_length));
1362   if (u) CeedCall(CeedVectorGetLength(u, &u_length));
1363 
1364   CeedCheck(basis->Apply, basis->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisApply");
1365 
1366   // Check compatibility of topological and geometrical dimensions
1367   CeedCheck((t_mode == CEED_TRANSPOSE && v_length % num_nodes == 0 && u_length % num_qpts == 0) ||
1368                 (t_mode == CEED_NOTRANSPOSE && u_length % num_nodes == 0 && v_length % num_qpts == 0),
1369             basis->ceed, CEED_ERROR_DIMENSION, "Length of input/output vectors incompatible with basis dimensions");
1370 
1371   // Check vector lengths to prevent out of bounds issues
1372   bool good_dims = true;
1373   switch (eval_mode) {
1374     case CEED_EVAL_NONE:
1375     case CEED_EVAL_INTERP:
1376     case CEED_EVAL_GRAD:
1377     case CEED_EVAL_DIV:
1378     case CEED_EVAL_CURL:
1379       good_dims =
1380           ((t_mode == CEED_TRANSPOSE && u_length >= num_elem * num_comp * num_qpts * q_comp && v_length >= num_elem * num_comp * num_nodes) ||
1381            (t_mode == CEED_NOTRANSPOSE && v_length >= num_elem * num_qpts * num_comp * q_comp && u_length >= num_elem * num_comp * num_nodes));
1382       break;
1383     case CEED_EVAL_WEIGHT:
1384       good_dims = v_length >= num_elem * num_qpts;
1385       break;
1386   }
1387   CeedCheck(good_dims, basis->ceed, CEED_ERROR_DIMENSION, "Input/output vectors too short for basis and evaluation mode");
1388 
1389   CeedCall(basis->Apply(basis, num_elem, t_mode, eval_mode, u, v));
1390   return CEED_ERROR_SUCCESS;
1391 }
1392 
1393 /**
1394   @brief Apply basis evaluation from nodes to arbitrary points
1395 
1396   @param[in]  basis      CeedBasis to evaluate
1397   @param[in]  num_points The number of points to apply the basis evaluation to
1398   @param[in]  t_mode    \ref CEED_NOTRANSPOSE to evaluate from nodes to points;
1399                           \ref CEED_TRANSPOSE to apply the transpose, mapping from points to nodes
1400   @param[in]  eval_mode \ref CEED_EVAL_INTERP to use interpolated values,
1401                           \ref CEED_EVAL_GRAD to use gradients
1402   @param[in]  x_ref    CeedVector holding reference coordinates of each point
1403   @param[in]  u        Input CeedVector, of length `num_nodes * num_comp` for `CEED_NOTRANSPOSE`
1404   @param[out] v        Output CeedVector, of length `num_points * num_q_comp` for `CEED_NOTRANSPOSE` with `CEED_EVAL_INTERP`
1405 
1406   @return An error code: 0 - success, otherwise - failure
1407 
1408   @ref User
1409 **/
1410 int CeedBasisApplyAtPoints(CeedBasis basis, CeedInt num_points, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u,
1411                            CeedVector v) {
1412   CeedSize x_length = 0, u_length = 0, v_length;
1413   CeedInt  dim, num_comp, num_q_comp, num_nodes, P_1d = 1, Q_1d = 1;
1414 
1415   CeedCall(CeedBasisGetDimension(basis, &dim));
1416   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
1417   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
1418   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
1419   CeedCall(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &num_q_comp));
1420   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
1421   CeedCall(CeedVectorGetLength(x_ref, &x_length));
1422   CeedCall(CeedVectorGetLength(v, &v_length));
1423   CeedCall(CeedVectorGetLength(u, &u_length));
1424 
1425   // Check compatibility of topological and geometrical dimensions
1426   CeedCheck((t_mode == CEED_TRANSPOSE && v_length % num_nodes == 0) || (t_mode == CEED_NOTRANSPOSE && u_length % num_nodes == 0), basis->ceed,
1427             CEED_ERROR_DIMENSION, "Length of input/output vectors incompatible with basis dimensions and number of points");
1428 
1429   // Check compatibility coordinates vector
1430   CeedCheck(x_length >= num_points * dim, basis->ceed, CEED_ERROR_DIMENSION,
1431             "Length of reference coordinate vector incompatible with basis dimension and number of points");
1432 
1433   // Check vector lengths to prevent out of bounds issues
1434   bool good_dims = false;
1435   switch (eval_mode) {
1436     case CEED_EVAL_INTERP:
1437       good_dims = ((t_mode == CEED_TRANSPOSE && (u_length >= num_points * num_q_comp || v_length >= num_nodes * num_comp)) ||
1438                    (t_mode == CEED_NOTRANSPOSE && (v_length >= num_points * num_q_comp || u_length >= num_nodes * num_comp)));
1439       break;
1440     case CEED_EVAL_GRAD:
1441     case CEED_EVAL_NONE:
1442     case CEED_EVAL_WEIGHT:
1443     case CEED_EVAL_DIV:
1444     case CEED_EVAL_CURL:
1445       // LCOV_EXCL_START
1446       return CeedError(basis->ceed, CEED_ERROR_UNSUPPORTED, "Evaluation at arbitrary points not supported for %s", CeedEvalModes[eval_mode]);
1447       // LCOV_EXCL_STOP
1448   }
1449   CeedCheck(good_dims, basis->ceed, CEED_ERROR_DIMENSION, "Input/output vectors too short for basis and evaluation mode");
1450 
1451   // Backend method
1452   if (basis->ApplyAtPoints) {
1453     CeedCall(basis->ApplyAtPoints(basis, num_points, t_mode, eval_mode, x_ref, u, v));
1454     return CEED_ERROR_SUCCESS;
1455   }
1456 
1457   // Default implementation
1458   CeedCheck(basis->is_tensor_basis, basis->ceed, CEED_ERROR_UNSUPPORTED, "Evaluation at arbitrary points only supported for tensor product bases");
1459   if (!basis->basis_chebyshev) {
1460     // Build matrix mapping from quadrature point values to Chebyshev coefficients
1461     CeedScalar       *tau, *C, *I, *chebyshev_coeffs_1d;
1462     const CeedScalar *q_ref_1d;
1463 
1464     // Build coefficient matrix
1465     // -- Note: Clang-tidy needs this check because it does not understand the is_tensor_basis check above
1466     CeedCheck(P_1d > 0 && Q_1d > 0, basis->ceed, CEED_ERROR_INCOMPATIBLE, "Basis dimensions are malformed");
1467     CeedCall(CeedCalloc(Q_1d * Q_1d, &C));
1468     CeedCall(CeedBasisGetQRef(basis, &q_ref_1d));
1469     for (CeedInt i = 0; i < Q_1d; i++) {
1470       const CeedScalar x = q_ref_1d[i];
1471 
1472       C[i * Q_1d + 0] = 1.0;
1473       C[i * Q_1d + 1] = 2 * x;
1474       for (CeedInt j = 2; j < Q_1d; j++) C[i * Q_1d + j] = 2 * x * C[i * Q_1d + j - 1] - C[i * Q_1d + j - 2];
1475     }
1476 
1477     // Inverse of coefficient matrix
1478     CeedCall(CeedCalloc(Q_1d * Q_1d, &chebyshev_coeffs_1d));
1479     CeedCall(CeedCalloc(Q_1d * Q_1d, &I));
1480     CeedCall(CeedCalloc(Q_1d, &tau));
1481     // -- QR Factorization, C = Q R
1482     CeedCall(CeedQRFactorization(basis->ceed, C, tau, Q_1d, Q_1d));
1483     // -- chebyshev_coeffs_1d = R_inv Q^T
1484     for (CeedInt i = 0; i < Q_1d; i++) I[i * Q_1d + i] = 1.0;
1485     // ---- Apply R_inv, chebyshev_coeffs_1d = I R_inv
1486     for (CeedInt i = 0; i < Q_1d; i++) {  // Row i
1487       chebyshev_coeffs_1d[Q_1d * i] = I[Q_1d * i] / C[0];
1488       for (CeedInt j = 1; j < Q_1d; j++) {  // Column j
1489         chebyshev_coeffs_1d[j + Q_1d * i] = I[j + Q_1d * i];
1490         for (CeedInt k = 0; k < j; k++) chebyshev_coeffs_1d[j + Q_1d * i] -= C[j + Q_1d * k] * chebyshev_coeffs_1d[k + Q_1d * i];
1491         chebyshev_coeffs_1d[j + Q_1d * i] /= C[j + Q_1d * j];
1492       }
1493     }
1494     // ---- Apply Q^T, chebyshev_coeffs_1d = R_inv Q^T
1495     CeedCall(CeedHouseholderApplyQ(chebyshev_coeffs_1d, C, tau, CEED_NOTRANSPOSE, Q_1d, Q_1d, Q_1d, 1, Q_1d));
1496 
1497     // Build basis mapping from nodes to Chebyshev coefficients
1498     CeedScalar       *chebyshev_interp_1d, *chebyshev_grad_1d, *chebyshev_q_weight_1d;
1499     const CeedScalar *interp_1d;
1500 
1501     CeedCall(CeedCalloc(Q_1d * Q_1d, &chebyshev_interp_1d));
1502     CeedCall(CeedCalloc(Q_1d * Q_1d, &chebyshev_grad_1d));
1503     CeedCall(CeedCalloc(Q_1d, &chebyshev_q_weight_1d));
1504     CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
1505     CeedCall(CeedMatrixMatrixMultiply(basis->ceed, chebyshev_coeffs_1d, interp_1d, chebyshev_interp_1d, Q_1d, P_1d, Q_1d));
1506 
1507     CeedCall(CeedVectorCreate(basis->ceed, num_comp * CeedIntPow(Q_1d, dim), &basis->vec_chebyshev));
1508     CeedCall(CeedBasisCreateTensorH1(basis->ceed, dim, num_comp, Q_1d, Q_1d, chebyshev_interp_1d, chebyshev_grad_1d, q_ref_1d, chebyshev_q_weight_1d,
1509                                      &basis->basis_chebyshev));
1510 
1511     // Cleanup
1512     CeedCall(CeedFree(&C));
1513     CeedCall(CeedFree(&chebyshev_coeffs_1d));
1514     CeedCall(CeedFree(&I));
1515     CeedCall(CeedFree(&tau));
1516     CeedCall(CeedFree(&chebyshev_interp_1d));
1517     CeedCall(CeedFree(&chebyshev_grad_1d));
1518     CeedCall(CeedFree(&chebyshev_q_weight_1d));
1519   }
1520 
1521   // Create TensorContract object if needed, such as a basis from the GPU backends
1522   if (!basis->contract) {
1523     Ceed      ceed_ref;
1524     CeedBasis basis_ref;
1525 
1526     CeedCall(CeedInit("/cpu/self", &ceed_ref));
1527     // Only need matching tensor contraction dimensions, any type of basis will work
1528     CeedCall(CeedBasisCreateTensorH1Lagrange(ceed_ref, dim, num_comp, Q_1d, Q_1d, CEED_GAUSS, &basis_ref));
1529     CeedCall(CeedTensorContractReference(basis_ref->contract));
1530     basis->contract = basis_ref->contract;
1531     CeedCall(CeedBasisDestroy(&basis_ref));
1532     CeedCall(CeedDestroy(&ceed_ref));
1533   }
1534 
1535   // Basis evaluation
1536   switch (t_mode) {
1537     case CEED_NOTRANSPOSE: {
1538       // Nodes to arbitrary points
1539       CeedScalar       *v_array;
1540       const CeedScalar *chebyshev_coeffs, *x_array_read;
1541 
1542       // -- Interpolate to Chebyshev coefficients
1543       CeedCall(CeedBasisApply(basis->basis_chebyshev, 1, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, basis->vec_chebyshev));
1544 
1545       // -- Evaluate Chebyshev polynomials at arbitrary points
1546       CeedCall(CeedVectorGetArrayRead(basis->vec_chebyshev, CEED_MEM_HOST, &chebyshev_coeffs));
1547       CeedCall(CeedVectorGetArrayRead(x_ref, CEED_MEM_HOST, &x_array_read));
1548       CeedCall(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &v_array));
1549       {
1550         CeedScalar tmp[2][num_comp * CeedIntPow(Q_1d, dim)], chebyshev_x[Q_1d];
1551 
1552         // ---- Values at point
1553         for (CeedInt p = 0; p < num_points; p++) {
1554           CeedInt pre = num_comp * CeedIntPow(Q_1d, dim - 1), post = 1;
1555 
1556           for (CeedInt d = dim - 1; d >= 0; d--) {
1557             // ------ Compute Chebyshev polynomial values
1558             {
1559               const CeedScalar x = x_array_read[p * dim + d];
1560 
1561               chebyshev_x[0] = 1.0;
1562               chebyshev_x[1] = 2 * x;
1563               for (CeedInt j = 2; j < Q_1d; j++) chebyshev_x[j] = 2 * x * chebyshev_x[j - 1] - chebyshev_x[j - 2];
1564             }
1565             // ------ Tensor contract
1566             CeedCall(CeedTensorContractApply(basis->contract, pre, Q_1d, post, 1, chebyshev_x, t_mode, false,
1567                                              d == (dim - 1) ? chebyshev_coeffs : tmp[d % 2], d == 0 ? &v_array[p * num_comp] : tmp[(d + 1) % 2]));
1568             pre /= Q_1d;
1569             post *= 1;
1570           }
1571         }
1572       }
1573       CeedCall(CeedVectorRestoreArrayRead(basis->vec_chebyshev, &chebyshev_coeffs));
1574       CeedCall(CeedVectorRestoreArrayRead(x_ref, &x_array_read));
1575       CeedCall(CeedVectorRestoreArray(v, &v_array));
1576       break;
1577     }
1578     case CEED_TRANSPOSE: {
1579       // Arbitrary points to nodes
1580       CeedScalar       *chebyshev_coeffs;
1581       const CeedScalar *u_array, *x_array_read;
1582 
1583       // -- Transpose of evaluaton of Chebyshev polynomials at arbitrary points
1584       CeedCall(CeedVectorGetArrayWrite(basis->vec_chebyshev, CEED_MEM_HOST, &chebyshev_coeffs));
1585       CeedCall(CeedVectorGetArrayRead(x_ref, CEED_MEM_HOST, &x_array_read));
1586       CeedCall(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &u_array));
1587       {
1588         CeedScalar tmp[2][num_comp * CeedIntPow(Q_1d, dim)], chebyshev_x[Q_1d];
1589 
1590         // ---- Values at point
1591         for (CeedInt p = 0; p < num_points; p++) {
1592           CeedInt pre = num_comp * 1, post = 1;
1593 
1594           for (CeedInt d = dim - 1; d >= 0; d--) {
1595             // ------ Compute Chebyshev polynomial values
1596             {
1597               const CeedScalar x = x_array_read[p * dim + d];
1598 
1599               chebyshev_x[0] = 1.0;
1600               chebyshev_x[1] = 2 * x;
1601               for (CeedInt j = 2; j < Q_1d; j++) chebyshev_x[j] = 2 * x * chebyshev_x[j - 1] - chebyshev_x[j - 2];
1602             }
1603             // ------ Tensor contract
1604             CeedCall(CeedTensorContractApply(basis->contract, pre, 1, post, Q_1d, chebyshev_x, t_mode, p > 0 && d == 0,
1605                                              d == (dim - 1) ? &u_array[p * num_comp] : tmp[d % 2], d == 0 ? chebyshev_coeffs : tmp[(d + 1) % 2]));
1606             pre /= 1;
1607             post *= Q_1d;
1608           }
1609         }
1610       }
1611       CeedCall(CeedVectorRestoreArray(basis->vec_chebyshev, &chebyshev_coeffs));
1612       CeedCall(CeedVectorRestoreArrayRead(x_ref, &x_array_read));
1613       CeedCall(CeedVectorRestoreArrayRead(u, &u_array));
1614 
1615       // -- Interpolate transpose from Chebyshev coefficients
1616       CeedCall(CeedBasisApply(basis->basis_chebyshev, 1, CEED_TRANSPOSE, CEED_EVAL_INTERP, basis->vec_chebyshev, v));
1617       break;
1618     }
1619   }
1620 
1621   return CEED_ERROR_SUCCESS;
1622 }
1623 
1624 /**
1625   @brief Get Ceed associated with a CeedBasis
1626 
1627   @param[in]  basis CeedBasis
1628   @param[out] ceed  Variable to store Ceed
1629 
1630   @return An error code: 0 - success, otherwise - failure
1631 
1632   @ref Advanced
1633 **/
1634 int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed) {
1635   *ceed = basis->ceed;
1636   return CEED_ERROR_SUCCESS;
1637 }
1638 
1639 /**
1640   @brief Get dimension for given CeedBasis
1641 
1642   @param[in]  basis CeedBasis
1643   @param[out] dim   Variable to store dimension of basis
1644 
1645   @return An error code: 0 - success, otherwise - failure
1646 
1647   @ref Advanced
1648 **/
1649 int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim) {
1650   *dim = basis->dim;
1651   return CEED_ERROR_SUCCESS;
1652 }
1653 
1654 /**
1655   @brief Get topology for given CeedBasis
1656 
1657   @param[in]  basis CeedBasis
1658   @param[out] topo  Variable to store topology of basis
1659 
1660   @return An error code: 0 - success, otherwise - failure
1661 
1662   @ref Advanced
1663 **/
1664 int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo) {
1665   *topo = basis->topo;
1666   return CEED_ERROR_SUCCESS;
1667 }
1668 
1669 /**
1670   @brief Get number of components for given CeedBasis
1671 
1672   @param[in]  basis    CeedBasis
1673   @param[out] num_comp Variable to store number of components of basis
1674 
1675   @return An error code: 0 - success, otherwise - failure
1676 
1677   @ref Advanced
1678 **/
1679 int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *num_comp) {
1680   *num_comp = basis->num_comp;
1681   return CEED_ERROR_SUCCESS;
1682 }
1683 
1684 /**
1685   @brief Get total number of nodes (in dim dimensions) of a CeedBasis
1686 
1687   @param[in]  basis CeedBasis
1688   @param[out] P     Variable to store number of nodes
1689 
1690   @return An error code: 0 - success, otherwise - failure
1691 
1692   @ref Utility
1693 **/
1694 int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P) {
1695   *P = basis->P;
1696   return CEED_ERROR_SUCCESS;
1697 }
1698 
1699 /**
1700   @brief Get total number of nodes (in 1 dimension) of a CeedBasis
1701 
1702   @param[in]  basis CeedBasis
1703   @param[out] P_1d  Variable to store number of nodes
1704 
1705   @return An error code: 0 - success, otherwise - failure
1706 
1707   @ref Advanced
1708 **/
1709 int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P_1d) {
1710   CeedCheck(basis->is_tensor_basis, basis->ceed, CEED_ERROR_MINOR, "Cannot supply P_1d for non-tensor basis");
1711   *P_1d = basis->P_1d;
1712   return CEED_ERROR_SUCCESS;
1713 }
1714 
1715 /**
1716   @brief Get total number of quadrature points (in dim dimensions) of a CeedBasis
1717 
1718   @param[in]  basis CeedBasis
1719   @param[out] Q     Variable to store number of quadrature points
1720 
1721   @return An error code: 0 - success, otherwise - failure
1722 
1723   @ref Utility
1724 **/
1725 int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q) {
1726   *Q = basis->Q;
1727   return CEED_ERROR_SUCCESS;
1728 }
1729 
1730 /**
1731   @brief Get total number of quadrature points (in 1 dimension) of a CeedBasis
1732 
1733   @param[in]  basis CeedBasis
1734   @param[out] Q_1d  Variable to store number of quadrature points
1735 
1736   @return An error code: 0 - success, otherwise - failure
1737 
1738   @ref Advanced
1739 **/
1740 int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, CeedInt *Q_1d) {
1741   CeedCheck(basis->is_tensor_basis, basis->ceed, CEED_ERROR_MINOR, "Cannot supply Q_1d for non-tensor basis");
1742   *Q_1d = basis->Q_1d;
1743   return CEED_ERROR_SUCCESS;
1744 }
1745 
1746 /**
1747   @brief Get reference coordinates of quadrature points (in dim dimensions) of a CeedBasis
1748 
1749   @param[in]  basis CeedBasis
1750   @param[out] q_ref Variable to store reference coordinates of quadrature points
1751 
1752   @return An error code: 0 - success, otherwise - failure
1753 
1754   @ref Advanced
1755 **/
1756 int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **q_ref) {
1757   *q_ref = basis->q_ref_1d;
1758   return CEED_ERROR_SUCCESS;
1759 }
1760 
1761 /**
1762   @brief Get quadrature weights of quadrature points (in dim dimensions) of a CeedBasis
1763 
1764   @param[in]  basis    CeedBasis
1765   @param[out] q_weight Variable to store quadrature weights
1766 
1767   @return An error code: 0 - success, otherwise - failure
1768 
1769   @ref Advanced
1770 **/
1771 int CeedBasisGetQWeights(CeedBasis basis, const CeedScalar **q_weight) {
1772   *q_weight = basis->q_weight_1d;
1773   return CEED_ERROR_SUCCESS;
1774 }
1775 
1776 /**
1777   @brief Get interpolation matrix of a CeedBasis
1778 
1779   @param[in]  basis  CeedBasis
1780   @param[out] interp Variable to store interpolation matrix
1781 
1782   @return An error code: 0 - success, otherwise - failure
1783 
1784   @ref Advanced
1785 **/
1786 int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp) {
1787   if (!basis->interp && basis->is_tensor_basis) {
1788     // Allocate
1789     CeedCall(CeedMalloc(basis->Q * basis->P, &basis->interp));
1790 
1791     // Initialize
1792     for (CeedInt i = 0; i < basis->Q * basis->P; i++) basis->interp[i] = 1.0;
1793 
1794     // Calculate
1795     for (CeedInt d = 0; d < basis->dim; d++) {
1796       for (CeedInt qpt = 0; qpt < basis->Q; qpt++) {
1797         for (CeedInt node = 0; node < basis->P; node++) {
1798           CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1799           CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1800           basis->interp[qpt * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p];
1801         }
1802       }
1803     }
1804   }
1805   *interp = basis->interp;
1806   return CEED_ERROR_SUCCESS;
1807 }
1808 
1809 /**
1810   @brief Get 1D interpolation matrix of a tensor product CeedBasis
1811 
1812   @param[in]  basis     CeedBasis
1813   @param[out] interp_1d Variable to store interpolation matrix
1814 
1815   @return An error code: 0 - success, otherwise - failure
1816 
1817   @ref Backend
1818 **/
1819 int CeedBasisGetInterp1D(CeedBasis basis, const CeedScalar **interp_1d) {
1820   CeedCheck(basis->is_tensor_basis, basis->ceed, CEED_ERROR_MINOR, "CeedBasis is not a tensor product basis.");
1821   *interp_1d = basis->interp_1d;
1822   return CEED_ERROR_SUCCESS;
1823 }
1824 
1825 /**
1826   @brief Get gradient matrix of a CeedBasis
1827 
1828   @param[in]  basis CeedBasis
1829   @param[out] grad  Variable to store gradient matrix
1830 
1831   @return An error code: 0 - success, otherwise - failure
1832 
1833   @ref Advanced
1834 **/
1835 int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad) {
1836   if (!basis->grad && basis->is_tensor_basis) {
1837     // Allocate
1838     CeedCall(CeedMalloc(basis->dim * basis->Q * basis->P, &basis->grad));
1839 
1840     // Initialize
1841     for (CeedInt i = 0; i < basis->dim * basis->Q * basis->P; i++) basis->grad[i] = 1.0;
1842 
1843     // Calculate
1844     for (CeedInt d = 0; d < basis->dim; d++) {
1845       for (CeedInt i = 0; i < basis->dim; i++) {
1846         for (CeedInt qpt = 0; qpt < basis->Q; qpt++) {
1847           for (CeedInt node = 0; node < basis->P; node++) {
1848             CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1849             CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1850             if (i == d) basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->grad_1d[q * basis->P_1d + p];
1851             else basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p];
1852           }
1853         }
1854       }
1855     }
1856   }
1857   *grad = basis->grad;
1858   return CEED_ERROR_SUCCESS;
1859 }
1860 
1861 /**
1862   @brief Get 1D gradient matrix of a tensor product CeedBasis
1863 
1864   @param[in]  basis   CeedBasis
1865   @param[out] grad_1d Variable to store gradient matrix
1866 
1867   @return An error code: 0 - success, otherwise - failure
1868 
1869   @ref Advanced
1870 **/
1871 int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad_1d) {
1872   CeedCheck(basis->is_tensor_basis, basis->ceed, CEED_ERROR_MINOR, "CeedBasis is not a tensor product basis.");
1873   *grad_1d = basis->grad_1d;
1874   return CEED_ERROR_SUCCESS;
1875 }
1876 
1877 /**
1878   @brief Get divergence matrix of a CeedBasis
1879 
1880   @param[in]  basis CeedBasis
1881   @param[out] div   Variable to store divergence matrix
1882 
1883   @return An error code: 0 - success, otherwise - failure
1884 
1885   @ref Advanced
1886 **/
1887 int CeedBasisGetDiv(CeedBasis basis, const CeedScalar **div) {
1888   CeedCheck(basis->div, basis->ceed, CEED_ERROR_MINOR, "CeedBasis does not have divergence matrix.");
1889   *div = basis->div;
1890   return CEED_ERROR_SUCCESS;
1891 }
1892 
1893 /**
1894   @brief Get curl matrix of a CeedBasis
1895 
1896   @param[in]  basis CeedBasis
1897   @param[out] curl  Variable to store curl matrix
1898 
1899   @return An error code: 0 - success, otherwise - failure
1900 
1901   @ref Advanced
1902 **/
1903 int CeedBasisGetCurl(CeedBasis basis, const CeedScalar **curl) {
1904   CeedCheck(basis->curl, basis->ceed, CEED_ERROR_MINOR, "CeedBasis does not have curl matrix.");
1905   *curl = basis->curl;
1906   return CEED_ERROR_SUCCESS;
1907 }
1908 
1909 /**
1910   @brief Destroy a CeedBasis
1911 
1912   @param[in,out] basis CeedBasis to destroy
1913 
1914   @return An error code: 0 - success, otherwise - failure
1915 
1916   @ref User
1917 **/
1918 int CeedBasisDestroy(CeedBasis *basis) {
1919   if (!*basis || *basis == CEED_BASIS_COLLOCATED || --(*basis)->ref_count > 0) {
1920     *basis = NULL;
1921     return CEED_ERROR_SUCCESS;
1922   }
1923   if ((*basis)->Destroy) CeedCall((*basis)->Destroy(*basis));
1924   CeedCall(CeedTensorContractDestroy(&(*basis)->contract));
1925   CeedCall(CeedFree(&(*basis)->q_ref_1d));
1926   CeedCall(CeedFree(&(*basis)->q_weight_1d));
1927   CeedCall(CeedFree(&(*basis)->interp));
1928   CeedCall(CeedFree(&(*basis)->interp_1d));
1929   CeedCall(CeedFree(&(*basis)->grad));
1930   CeedCall(CeedFree(&(*basis)->grad_1d));
1931   CeedCall(CeedFree(&(*basis)->div));
1932   CeedCall(CeedFree(&(*basis)->curl));
1933   CeedCall(CeedVectorDestroy(&(*basis)->vec_chebyshev));
1934   CeedCall(CeedBasisDestroy(&(*basis)->basis_chebyshev));
1935   CeedCall(CeedDestroy(&(*basis)->ceed));
1936   CeedCall(CeedFree(basis));
1937   return CEED_ERROR_SUCCESS;
1938 }
1939 
1940 /**
1941   @brief Construct a Gauss-Legendre quadrature
1942 
1943   @param[in]  Q           Number of quadrature points (integrates polynomials of degree 2*Q-1 exactly)
1944   @param[out] q_ref_1d    Array of length Q to hold the abscissa on [-1, 1]
1945   @param[out] q_weight_1d Array of length Q to hold the weights
1946 
1947   @return An error code: 0 - success, otherwise - failure
1948 
1949   @ref Utility
1950 **/
1951 int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) {
1952   // Allocate
1953   CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0 * atan(1.0);
1954   // Build q_ref_1d, q_weight_1d
1955   for (CeedInt i = 0; i <= Q / 2; i++) {
1956     // Guess
1957     xi = cos(PI * (CeedScalar)(2 * i + 1) / ((CeedScalar)(2 * Q)));
1958     // Pn(xi)
1959     P0 = 1.0;
1960     P1 = xi;
1961     P2 = 0.0;
1962     for (CeedInt j = 2; j <= Q; j++) {
1963       P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1964       P0 = P1;
1965       P1 = P2;
1966     }
1967     // First Newton Step
1968     dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1969     xi  = xi - P2 / dP2;
1970     // Newton to convergence
1971     for (CeedInt k = 0; k < 100 && fabs(P2) > 10 * CEED_EPSILON; k++) {
1972       P0 = 1.0;
1973       P1 = xi;
1974       for (CeedInt j = 2; j <= Q; j++) {
1975         P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1976         P0 = P1;
1977         P1 = P2;
1978       }
1979       dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1980       xi  = xi - P2 / dP2;
1981     }
1982     // Save xi, wi
1983     wi                     = 2.0 / ((1.0 - xi * xi) * dP2 * dP2);
1984     q_weight_1d[i]         = wi;
1985     q_weight_1d[Q - 1 - i] = wi;
1986     q_ref_1d[i]            = -xi;
1987     q_ref_1d[Q - 1 - i]    = xi;
1988   }
1989   return CEED_ERROR_SUCCESS;
1990 }
1991 
1992 /**
1993   @brief Construct a Gauss-Legendre-Lobatto quadrature
1994 
1995   @param[in]  Q           Number of quadrature points (integrates polynomials of degree 2*Q-3 exactly)
1996   @param[out] q_ref_1d    Array of length Q to hold the abscissa on [-1, 1]
1997   @param[out] q_weight_1d Array of length Q to hold the weights
1998 
1999   @return An error code: 0 - success, otherwise - failure
2000 
2001   @ref Utility
2002 **/
2003 int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) {
2004   // Allocate
2005   CeedScalar P0, P1, P2, dP2, d2P2, xi, wi, PI = 4.0 * atan(1.0);
2006   // Build q_ref_1d, q_weight_1d
2007   // Set endpoints
2008   CeedCheck(Q > 1, NULL, CEED_ERROR_DIMENSION, "Cannot create Lobatto quadrature with Q=%" CeedInt_FMT " < 2 points", Q);
2009   wi = 2.0 / ((CeedScalar)(Q * (Q - 1)));
2010   if (q_weight_1d) {
2011     q_weight_1d[0]     = wi;
2012     q_weight_1d[Q - 1] = wi;
2013   }
2014   q_ref_1d[0]     = -1.0;
2015   q_ref_1d[Q - 1] = 1.0;
2016   // Interior
2017   for (CeedInt i = 1; i <= (Q - 1) / 2; i++) {
2018     // Guess
2019     xi = cos(PI * (CeedScalar)(i) / (CeedScalar)(Q - 1));
2020     // Pn(xi)
2021     P0 = 1.0;
2022     P1 = xi;
2023     P2 = 0.0;
2024     for (CeedInt j = 2; j < Q; j++) {
2025       P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
2026       P0 = P1;
2027       P1 = P2;
2028     }
2029     // First Newton step
2030     dP2  = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
2031     d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi);
2032     xi   = xi - dP2 / d2P2;
2033     // Newton to convergence
2034     for (CeedInt k = 0; k < 100 && fabs(dP2) > 10 * CEED_EPSILON; k++) {
2035       P0 = 1.0;
2036       P1 = xi;
2037       for (CeedInt j = 2; j < Q; j++) {
2038         P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
2039         P0 = P1;
2040         P1 = P2;
2041       }
2042       dP2  = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
2043       d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi);
2044       xi   = xi - dP2 / d2P2;
2045     }
2046     // Save xi, wi
2047     wi = 2.0 / (((CeedScalar)(Q * (Q - 1))) * P2 * P2);
2048     if (q_weight_1d) {
2049       q_weight_1d[i]         = wi;
2050       q_weight_1d[Q - 1 - i] = wi;
2051     }
2052     q_ref_1d[i]         = -xi;
2053     q_ref_1d[Q - 1 - i] = xi;
2054   }
2055   return CEED_ERROR_SUCCESS;
2056 }
2057 
2058 /// @}
2059