xref: /libCEED/interface/ceed-basis.c (revision 7b87cde0397b48cc601e25bb3ecac5ddabea754c)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <math.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 /// @file
17 /// Implementation of CeedBasis interfaces
18 
19 /// @cond DOXYGEN_SKIP
20 static struct CeedBasis_private ceed_basis_collocated;
21 /// @endcond
22 
23 /// @addtogroup CeedBasisUser
24 /// @{
25 
26 /// Indicate that the quadrature points are collocated with the nodes
27 const CeedBasis CEED_BASIS_COLLOCATED = &ceed_basis_collocated;
28 
29 /// @}
30 
31 /// ----------------------------------------------------------------------------
32 /// CeedBasis Library Internal Functions
33 /// ----------------------------------------------------------------------------
34 /// @addtogroup CeedBasisDeveloper
35 /// @{
36 
37 /**
38   @brief Compute Householder reflection
39 
40   Computes A = (I - b v v^T) A, where A is an mxn matrix indexed as A[i*row + j*col]
41 
42   @param[in,out] A   Matrix to apply Householder reflection to, in place
43   @param[in]     v   Householder vector
44   @param[in]     b   Scaling factor
45   @param[in]     m   Number of rows in A
46   @param[in]     n   Number of columns in A
47   @param[in]     row Row stride
48   @param[in]     col Col stride
49 
50   @return An error code: 0 - success, otherwise - failure
51 
52   @ref Developer
53 **/
54 static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v, CeedScalar b, CeedInt m, CeedInt n, CeedInt row, CeedInt col) {
55   for (CeedInt j = 0; j < n; j++) {
56     CeedScalar w = A[0 * row + j * col];
57     for (CeedInt i = 1; i < m; i++) w += v[i] * A[i * row + j * col];
58     A[0 * row + j * col] -= b * w;
59     for (CeedInt i = 1; i < m; i++) A[i * row + j * col] -= b * w * v[i];
60   }
61   return CEED_ERROR_SUCCESS;
62 }
63 
64 /**
65   @brief Compute Givens rotation
66 
67   Computes A = G A (or G^T A in transpose mode), where A is an mxn matrix indexed as A[i*n + j*m]
68 
69   @param[in,out] A      Row major matrix to apply Givens rotation to, in place
70   @param[in]     c      Cosine factor
71   @param[in]     s      Sine factor
72   @param[in]     t_mode @ref CEED_NOTRANSPOSE to rotate the basis counter-clockwise, which has the effect of rotating columns of A clockwise;
73                           @ref CEED_TRANSPOSE for the opposite rotation
74   @param[in]     i      First row/column to apply rotation
75   @param[in]     k      Second row/column to apply rotation
76   @param[in]     m      Number of rows in A
77   @param[in]     n      Number of columns in A
78 
79   @return An error code: 0 - success, otherwise - failure
80 
81   @ref Developer
82 **/
83 static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s, CeedTransposeMode t_mode, CeedInt i, CeedInt k, CeedInt m, CeedInt n) {
84   CeedInt stride_j = 1, stride_ik = m, num_its = n;
85   if (t_mode == CEED_NOTRANSPOSE) {
86     stride_j  = n;
87     stride_ik = 1;
88     num_its   = m;
89   }
90 
91   // Apply rotation
92   for (CeedInt j = 0; j < num_its; j++) {
93     CeedScalar tau1 = A[i * stride_ik + j * stride_j], tau2 = A[k * stride_ik + j * stride_j];
94     A[i * stride_ik + j * stride_j] = c * tau1 - s * tau2;
95     A[k * stride_ik + j * stride_j] = s * tau1 + c * tau2;
96   }
97   return CEED_ERROR_SUCCESS;
98 }
99 
100 /**
101   @brief View an array stored in a CeedBasis
102 
103   @param[in] name   Name of array
104   @param[in] fp_fmt Printing format
105   @param[in] m      Number of rows in array
106   @param[in] n      Number of columns in array
107   @param[in] a      Array to be viewed
108   @param[in] stream Stream to view to, e.g., stdout
109 
110   @return An error code: 0 - success, otherwise - failure
111 
112   @ref Developer
113 **/
114 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m, CeedInt n, const CeedScalar *a, FILE *stream) {
115   for (CeedInt i = 0; i < m; i++) {
116     if (m > 1) fprintf(stream, "%12s[%" CeedInt_FMT "]:", name, i);
117     else fprintf(stream, "%12s:", name);
118     for (CeedInt j = 0; j < n; j++) fprintf(stream, fp_fmt, fabs(a[i * n + j]) > 1E-14 ? a[i * n + j] : 0);
119     fputs("\n", stream);
120   }
121   return CEED_ERROR_SUCCESS;
122 }
123 
124 /**
125   @brief Create the interpolation and gradient matrices for projection from the nodes of `basis_from` to the nodes of `basis_to`.
126 
127   The interpolation is given by `interp_project = interp_to^+ * interp_from`, where the pesudoinverse `interp_to^+` is given by QR factorization.
128   The gradient is given by `grad_project = interp_to^+ * grad_from`.
129   Note: `basis_from` and `basis_to` must have compatible quadrature spaces.
130 
131   @param[in]  basis_from     CeedBasis to project from
132   @param[in]  basis_to       CeedBasis to project to
133   @param[out] interp_project Address of the variable where the newly created interpolation matrix will be stored.
134   @param[out] grad_project   Address of the variable where the newly created gradient matrix will be stored.
135 
136   @return An error code: 0 - success, otherwise - failure
137 
138   @ref Developer
139 **/
140 static int CeedBasisCreateProjectionMatrices(CeedBasis basis_from, CeedBasis basis_to, CeedScalar **interp_project, CeedScalar **grad_project) {
141   Ceed ceed;
142   CeedCall(CeedBasisGetCeed(basis_to, &ceed));
143 
144   // Check for compatible quadrature spaces
145   CeedInt Q_to, Q_from;
146   CeedCall(CeedBasisGetNumQuadraturePoints(basis_to, &Q_to));
147   CeedCall(CeedBasisGetNumQuadraturePoints(basis_from, &Q_from));
148   if (Q_to != Q_from) {
149     // LCOV_EXCL_START
150     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
151     // LCOV_EXCL_STOP
152   }
153 
154   // Check for matching tensor or non-tensor
155   CeedInt P_to, P_from, Q = Q_to;
156   bool    is_tensor_to, is_tensor_from;
157   CeedCall(CeedBasisIsTensor(basis_to, &is_tensor_to));
158   CeedCall(CeedBasisIsTensor(basis_from, &is_tensor_from));
159   if (is_tensor_to && is_tensor_from) {
160     CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_to));
161     CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_from));
162     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis_from, &Q));
163   } else if (!is_tensor_to && !is_tensor_from) {
164     CeedCall(CeedBasisGetNumNodes(basis_to, &P_to));
165     CeedCall(CeedBasisGetNumNodes(basis_from, &P_from));
166   } else {
167     // LCOV_EXCL_START
168     return CeedError(ceed, CEED_ERROR_MINOR, "Bases must both be tensor or non-tensor");
169     // LCOV_EXCL_STOP
170   }
171 
172   // Get source matrices
173   CeedInt     dim;
174   CeedScalar *interp_to, *interp_from, *tau;
175   CeedCall(CeedBasisGetDimension(basis_to, &dim));
176   CeedCall(CeedMalloc(Q * P_from, &interp_from));
177   CeedCall(CeedMalloc(Q * P_to, &interp_to));
178   CeedCall(CeedCalloc(P_to * P_from, interp_project));
179   CeedCall(CeedCalloc(P_to * P_from * (is_tensor_to ? 1 : dim), grad_project));
180   CeedCall(CeedMalloc(Q, &tau));
181   const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL, *grad_from_source;
182   if (is_tensor_to) {
183     CeedCall(CeedBasisGetInterp1D(basis_to, &interp_to_source));
184     CeedCall(CeedBasisGetInterp1D(basis_from, &interp_from_source));
185     CeedCall(CeedBasisGetGrad1D(basis_from, &grad_from_source));
186   } else {
187     CeedCall(CeedBasisGetInterp(basis_to, &interp_to_source));
188     CeedCall(CeedBasisGetInterp(basis_from, &interp_from_source));
189     CeedCall(CeedBasisGetGrad(basis_from, &grad_from_source));
190   }
191 
192   // Build matrices
193   CeedInt     num_matrices = 1 + (is_tensor_to ? 1 : dim);
194   CeedScalar *input_from[num_matrices], *output_project[num_matrices];
195   input_from[0]     = (CeedScalar *)interp_from_source;
196   output_project[0] = *interp_project;
197   for (CeedInt m = 1; m < num_matrices; m++) {
198     input_from[m]     = (CeedScalar *)&grad_from_source[(m - 1) * Q * P_from];
199     output_project[m] = &((*grad_project)[(m - 1) * P_to * P_from]);
200   }
201   for (CeedInt m = 0; m < num_matrices; m++) {
202     // -- QR Factorization, interp_to = Q R
203     memcpy(interp_to, interp_to_source, Q * P_to * sizeof(interp_to_source[0]));
204     CeedCall(CeedQRFactorization(ceed, interp_to, tau, Q, P_to));
205 
206     // -- Apply Qtranspose, interp_to = Qtranspose interp_from
207     memcpy(interp_from, input_from[m], Q * P_from * sizeof(input_from[m][0]));
208     CeedCall(CeedHouseholderApplyQ(interp_from, interp_to, tau, CEED_TRANSPOSE, Q, P_from, P_to, P_from, 1));
209 
210     // -- Apply Rinv, interp_project = Rinv interp_c
211     for (CeedInt j = 0; j < P_from; j++) {  // Column j
212       output_project[m][j + P_from * (P_to - 1)] = interp_from[j + P_from * (P_to - 1)] / interp_to[P_to * P_to - 1];
213       for (CeedInt i = P_to - 2; i >= 0; i--) {  // Row i
214         output_project[m][j + P_from * i] = interp_from[j + P_from * i];
215         for (CeedInt k = i + 1; k < P_to; k++) {
216           output_project[m][j + P_from * i] -= interp_to[k + P_to * i] * output_project[m][j + P_from * k];
217         }
218         output_project[m][j + P_from * i] /= interp_to[i + P_to * i];
219       }
220     }
221   }
222 
223   // Cleanup
224   CeedCall(CeedFree(&tau));
225   CeedCall(CeedFree(&interp_to));
226   CeedCall(CeedFree(&interp_from));
227 
228   return CEED_ERROR_SUCCESS;
229 }
230 
231 /// @}
232 
233 /// ----------------------------------------------------------------------------
234 /// Ceed Backend API
235 /// ----------------------------------------------------------------------------
236 /// @addtogroup CeedBasisBackend
237 /// @{
238 
239 /**
240   @brief Return collocated grad matrix
241 
242   @param[in]  basis         CeedBasis
243   @param[out] collo_grad_1d Row-major (Q_1d * Q_1d) matrix expressing derivatives of basis functions at quadrature points
244 
245   @return An error code: 0 - success, otherwise - failure
246 
247   @ref Backend
248 **/
249 int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collo_grad_1d) {
250   int         i, j, k;
251   Ceed        ceed;
252   CeedInt     P_1d = (basis)->P_1d, Q_1d = (basis)->Q_1d;
253   CeedScalar *interp_1d, *grad_1d, *tau;
254 
255   CeedCall(CeedMalloc(Q_1d * P_1d, &interp_1d));
256   CeedCall(CeedMalloc(Q_1d * P_1d, &grad_1d));
257   CeedCall(CeedMalloc(Q_1d, &tau));
258   memcpy(interp_1d, (basis)->interp_1d, Q_1d * P_1d * sizeof(basis)->interp_1d[0]);
259   memcpy(grad_1d, (basis)->grad_1d, Q_1d * P_1d * sizeof(basis)->interp_1d[0]);
260 
261   // QR Factorization, interp_1d = Q R
262   CeedCall(CeedBasisGetCeed(basis, &ceed));
263   CeedCall(CeedQRFactorization(ceed, interp_1d, tau, Q_1d, P_1d));
264   // Note: This function is for backend use, so all errors are terminal and we do not need to clean up memory on failure.
265 
266   // Apply Rinv, collo_grad_1d = grad_1d Rinv
267   for (i = 0; i < Q_1d; i++) {  // Row i
268     collo_grad_1d[Q_1d * i] = grad_1d[P_1d * i] / interp_1d[0];
269     for (j = 1; j < P_1d; j++) {  // Column j
270       collo_grad_1d[j + Q_1d * i] = grad_1d[j + P_1d * i];
271       for (k = 0; k < j; k++) collo_grad_1d[j + Q_1d * i] -= interp_1d[j + P_1d * k] * collo_grad_1d[k + Q_1d * i];
272       collo_grad_1d[j + Q_1d * i] /= interp_1d[j + P_1d * j];
273     }
274     for (j = P_1d; j < Q_1d; j++) collo_grad_1d[j + Q_1d * i] = 0;
275   }
276 
277   // Apply Qtranspose, collo_grad = collo_grad Q_transpose
278   CeedCall(CeedHouseholderApplyQ(collo_grad_1d, interp_1d, tau, CEED_NOTRANSPOSE, Q_1d, Q_1d, P_1d, 1, Q_1d));
279 
280   CeedCall(CeedFree(&interp_1d));
281   CeedCall(CeedFree(&grad_1d));
282   CeedCall(CeedFree(&tau));
283   return CEED_ERROR_SUCCESS;
284 }
285 
286 /**
287   @brief Get tensor status for given CeedBasis
288 
289   @param[in]  basis     CeedBasis
290   @param[out] is_tensor Variable to store tensor status
291 
292   @return An error code: 0 - success, otherwise - failure
293 
294   @ref Backend
295 **/
296 int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor) {
297   *is_tensor = basis->tensor_basis;
298   return CEED_ERROR_SUCCESS;
299 }
300 
301 /**
302   @brief Get backend data of a CeedBasis
303 
304   @param[in]  basis CeedBasis
305   @param[out] data  Variable to store data
306 
307   @return An error code: 0 - success, otherwise - failure
308 
309   @ref Backend
310 **/
311 int CeedBasisGetData(CeedBasis basis, void *data) {
312   *(void **)data = basis->data;
313   return CEED_ERROR_SUCCESS;
314 }
315 
316 /**
317   @brief Set backend data of a CeedBasis
318 
319   @param[in,out] basis  CeedBasis
320   @param[in]     data   Data to set
321 
322   @return An error code: 0 - success, otherwise - failure
323 
324   @ref Backend
325 **/
326 int CeedBasisSetData(CeedBasis basis, void *data) {
327   basis->data = data;
328   return CEED_ERROR_SUCCESS;
329 }
330 
331 /**
332   @brief Increment the reference counter for a CeedBasis
333 
334   @param[in,out] basis Basis to increment the reference counter
335 
336   @return An error code: 0 - success, otherwise - failure
337 
338   @ref Backend
339 **/
340 int CeedBasisReference(CeedBasis basis) {
341   basis->ref_count++;
342   return CEED_ERROR_SUCCESS;
343 }
344 
345 /**
346   @brief Estimate number of FLOPs required to apply CeedBasis in t_mode and eval_mode
347 
348   @param[in]  basis     Basis to estimate FLOPs for
349   @param[in]  t_mode    Apply basis or transpose
350   @param[in]  eval_mode Basis evaluation mode
351   @param[out] flops     Address of variable to hold FLOPs estimate
352 
353   @ref Backend
354 **/
355 int CeedBasisGetFlopsEstimate(CeedBasis basis, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedSize *flops) {
356   bool is_tensor;
357 
358   CeedCall(CeedBasisIsTensor(basis, &is_tensor));
359   if (is_tensor) {
360     CeedInt dim, num_comp, P_1d, Q_1d;
361     CeedCall(CeedBasisGetDimension(basis, &dim));
362     CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
363     CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
364     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
365     if (t_mode == CEED_TRANSPOSE) {
366       P_1d = Q_1d;
367       Q_1d = P_1d;
368     }
369     CeedInt tensor_flops = 0, pre = num_comp * CeedIntPow(P_1d, dim - 1), post = 1;
370     for (CeedInt d = 0; d < dim; d++) {
371       tensor_flops += 2 * pre * P_1d * post * Q_1d;
372       pre /= P_1d;
373       post *= Q_1d;
374     }
375     switch (eval_mode) {
376       case CEED_EVAL_NONE:
377         *flops = 0;
378         break;
379       case CEED_EVAL_INTERP:
380         *flops = tensor_flops;
381         break;
382       case CEED_EVAL_GRAD:
383         *flops = tensor_flops * 2;
384         break;
385       case CEED_EVAL_DIV:
386         // LCOV_EXCL_START
387         return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, "Tensor CEED_EVAL_DIV not supported");
388         break;
389       case CEED_EVAL_CURL:
390         return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, "Tensor CEED_EVAL_CURL not supported");
391         break;
392       // LCOV_EXCL_STOP
393       case CEED_EVAL_WEIGHT:
394         *flops = dim * CeedIntPow(Q_1d, dim);
395         break;
396     }
397   } else {
398     CeedInt dim, num_comp, num_nodes, num_qpts, Q_comp;
399     CeedCall(CeedBasisGetDimension(basis, &dim));
400     CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
401     CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
402     CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
403     CeedCall(CeedBasisGetNumQuadratureComponents(basis, &Q_comp));
404     switch (eval_mode) {
405       case CEED_EVAL_NONE:
406         *flops = 0;
407         break;
408       case CEED_EVAL_INTERP:
409         *flops = num_nodes * num_qpts * num_comp;
410         break;
411       case CEED_EVAL_GRAD:
412         *flops = num_nodes * num_qpts * num_comp * dim;
413         break;
414       case CEED_EVAL_DIV:
415         *flops = num_nodes * num_qpts;
416         break;
417       case CEED_EVAL_CURL:
418         *flops = num_nodes * num_qpts * dim;
419         break;
420       case CEED_EVAL_WEIGHT:
421         *flops = 0;
422         break;
423     }
424   }
425 
426   return CEED_ERROR_SUCCESS;
427 }
428 
429 /**
430   @brief Get dimension for given CeedElemTopology
431 
432   @param[in]  topo CeedElemTopology
433   @param[out] dim  Variable to store dimension of topology
434 
435   @return An error code: 0 - success, otherwise - failure
436 
437   @ref Backend
438 **/
439 int CeedBasisGetTopologyDimension(CeedElemTopology topo, CeedInt *dim) {
440   *dim = (CeedInt)topo >> 16;
441   return CEED_ERROR_SUCCESS;
442 }
443 
444 /**
445   @brief Get CeedTensorContract of a CeedBasis
446 
447   @param[in]  basis     CeedBasis
448   @param[out] contract  Variable to store CeedTensorContract
449 
450   @return An error code: 0 - success, otherwise - failure
451 
452   @ref Backend
453 **/
454 int CeedBasisGetTensorContract(CeedBasis basis, CeedTensorContract *contract) {
455   *contract = basis->contract;
456   return CEED_ERROR_SUCCESS;
457 }
458 
459 /**
460   @brief Set CeedTensorContract of a CeedBasis
461 
462   @param[in,out] basis    CeedBasis
463   @param[in]     contract CeedTensorContract to set
464 
465   @return An error code: 0 - success, otherwise - failure
466 
467   @ref Backend
468 **/
469 int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract contract) {
470   basis->contract = contract;
471   CeedCall(CeedTensorContractReference(contract));
472   return CEED_ERROR_SUCCESS;
473 }
474 
475 /**
476   @brief Return a reference implementation of matrix multiplication C = A B.
477 
478   Note: This is a reference implementation for CPU CeedScalar pointers that is not intended for high performance.
479 
480   @param[in]  ceed  Ceed context for error handling
481   @param[in]  mat_A Row-major matrix A
482   @param[in]  mat_B Row-major matrix B
483   @param[out] mat_C Row-major output matrix C
484   @param[in]  m     Number of rows of C
485   @param[in]  n     Number of columns of C
486   @param[in]  kk    Number of columns of A/rows of B
487 
488   @return An error code: 0 - success, otherwise - failure
489 
490   @ref Utility
491 **/
492 int CeedMatrixMatrixMultiply(Ceed ceed, const CeedScalar *mat_A, const CeedScalar *mat_B, CeedScalar *mat_C, CeedInt m, CeedInt n, CeedInt kk) {
493   for (CeedInt i = 0; i < m; i++) {
494     for (CeedInt j = 0; j < n; j++) {
495       CeedScalar sum = 0;
496       for (CeedInt k = 0; k < kk; k++) sum += mat_A[k + i * kk] * mat_B[j + k * n];
497       mat_C[j + i * n] = sum;
498     }
499   }
500   return CEED_ERROR_SUCCESS;
501 }
502 
503 /**
504   @brief Return QR Factorization of a matrix
505 
506   @param[in]     ceed Ceed context for error handling
507   @param[in,out] mat  Row-major matrix to be factorized in place
508   @param[in,out] tau  Vector of length m of scaling factors
509   @param[in]     m    Number of rows
510   @param[in]     n    Number of columns
511 
512   @return An error code: 0 - success, otherwise - failure
513 
514   @ref Utility
515 **/
516 int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau, CeedInt m, CeedInt n) {
517   CeedScalar v[m];
518 
519   // Check matrix shape
520   if (n > m) {
521     // LCOV_EXCL_START
522     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute QR factorization with n > m");
523     // LCOV_EXCL_STOP
524   }
525 
526   for (CeedInt i = 0; i < n; i++) {
527     if (i >= m - 1) {  // last row of matrix, no reflection needed
528       tau[i] = 0.;
529       break;
530     }
531     // Calculate Householder vector, magnitude
532     CeedScalar sigma = 0.0;
533     v[i]             = mat[i + n * i];
534     for (CeedInt j = i + 1; j < m; j++) {
535       v[j] = mat[i + n * j];
536       sigma += v[j] * v[j];
537     }
538     CeedScalar norm = sqrt(v[i] * v[i] + sigma);  // norm of v[i:m]
539     CeedScalar R_ii = -copysign(norm, v[i]);
540     v[i] -= R_ii;
541     // norm of v[i:m] after modification above and scaling below
542     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
543     //   tau = 2 / (norm*norm)
544     tau[i] = 2 * v[i] * v[i] / (v[i] * v[i] + sigma);
545     for (CeedInt j = i + 1; j < m; j++) v[j] /= v[i];
546 
547     // Apply Householder reflector to lower right panel
548     CeedHouseholderReflect(&mat[i * n + i + 1], &v[i], tau[i], m - i, n - i - 1, n, 1);
549     // Save v
550     mat[i + n * i] = R_ii;
551     for (CeedInt j = i + 1; j < m; j++) mat[i + n * j] = v[j];
552   }
553   return CEED_ERROR_SUCCESS;
554 }
555 
556 /**
557   @brief Apply Householder Q matrix
558 
559   Compute mat_A = mat_Q mat_A, where mat_Q is mxm and mat_A is mxn.
560 
561   @param[in,out] mat_A  Matrix to apply Householder Q to, in place
562   @param[in]     mat_Q  Householder Q matrix
563   @param[in]     tau    Householder scaling factors
564   @param[in]     t_mode Transpose mode for application
565   @param[in]     m      Number of rows in A
566   @param[in]     n      Number of columns in A
567   @param[in]     k      Number of elementary reflectors in Q, k<m
568   @param[in]     row    Row stride in A
569   @param[in]     col    Col stride in A
570 
571   @return An error code: 0 - success, otherwise - failure
572 
573   @ref Developer
574 **/
575 int CeedHouseholderApplyQ(CeedScalar *mat_A, const CeedScalar *mat_Q, const CeedScalar *tau, CeedTransposeMode t_mode, CeedInt m, CeedInt n,
576                           CeedInt k, CeedInt row, CeedInt col) {
577   CeedScalar *v;
578   CeedCall(CeedMalloc(m, &v));
579   for (CeedInt ii = 0; ii < k; ii++) {
580     CeedInt i = t_mode == CEED_TRANSPOSE ? ii : k - 1 - ii;
581     for (CeedInt j = i + 1; j < m; j++) v[j] = mat_Q[j * k + i];
582     // Apply Householder reflector (I - tau v v^T) collo_grad_1d^T
583     CeedCall(CeedHouseholderReflect(&mat_A[i * row], &v[i], tau[i], m - i, n, row, col));
584   }
585   CeedCall(CeedFree(&v));
586   return CEED_ERROR_SUCCESS;
587 }
588 
589 /**
590   @brief Return symmetric Schur decomposition of the symmetric matrix mat via symmetric QR factorization
591 
592   @param[in]     ceed   Ceed context for error handling
593   @param[in,out] mat    Row-major matrix to be factorized in place
594   @param[out]    lambda Vector of length n of eigenvalues
595   @param[in]     n      Number of rows/columns
596 
597   @return An error code: 0 - success, otherwise - failure
598 
599   @ref Utility
600 **/
601 CeedPragmaOptimizeOff int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat, CeedScalar *lambda, CeedInt n) {
602   // Check bounds for clang-tidy
603   if (n < 2) {
604     // LCOV_EXCL_START
605     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute symmetric Schur decomposition of scalars");
606     // LCOV_EXCL_STOP
607   }
608 
609   CeedScalar v[n - 1], tau[n - 1], mat_T[n * n];
610 
611   // Copy mat to mat_T and set mat to I
612   memcpy(mat_T, mat, n * n * sizeof(mat[0]));
613   for (CeedInt i = 0; i < n; i++) {
614     for (CeedInt j = 0; j < n; j++) mat[j + n * i] = (i == j) ? 1 : 0;
615   }
616 
617   // Reduce to tridiagonal
618   for (CeedInt i = 0; i < n - 1; i++) {
619     // Calculate Householder vector, magnitude
620     CeedScalar sigma = 0.0;
621     v[i]             = mat_T[i + n * (i + 1)];
622     for (CeedInt j = i + 1; j < n - 1; j++) {
623       v[j] = mat_T[i + n * (j + 1)];
624       sigma += v[j] * v[j];
625     }
626     CeedScalar norm = sqrt(v[i] * v[i] + sigma);  // norm of v[i:n-1]
627     CeedScalar R_ii = -copysign(norm, v[i]);
628     v[i] -= R_ii;
629     // norm of v[i:m] after modification above and scaling below
630     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
631     //   tau = 2 / (norm*norm)
632     tau[i] = i == n - 2 ? 2 : 2 * v[i] * v[i] / (v[i] * v[i] + sigma);
633     for (CeedInt j = i + 1; j < n - 1; j++) v[j] /= v[i];
634 
635     // Update sub and super diagonal
636     for (CeedInt j = i + 2; j < n; j++) {
637       mat_T[i + n * j] = 0;
638       mat_T[j + n * i] = 0;
639     }
640     // Apply symmetric Householder reflector to lower right panel
641     CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1);
642     CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), 1, n);
643 
644     // Save v
645     mat_T[i + n * (i + 1)] = R_ii;
646     mat_T[(i + 1) + n * i] = R_ii;
647     for (CeedInt j = i + 1; j < n - 1; j++) {
648       mat_T[i + n * (j + 1)] = v[j];
649     }
650   }
651   // Backwards accumulation of Q
652   for (CeedInt i = n - 2; i >= 0; i--) {
653     if (tau[i] > 0.0) {
654       v[i] = 1;
655       for (CeedInt j = i + 1; j < n - 1; j++) {
656         v[j]                   = mat_T[i + n * (j + 1)];
657         mat_T[i + n * (j + 1)] = 0;
658       }
659       CeedHouseholderReflect(&mat[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1);
660     }
661   }
662 
663   // Reduce sub and super diagonal
664   CeedInt    p = 0, q = 0, itr = 0, max_itr = n * n * n * n;
665   CeedScalar tol = CEED_EPSILON;
666 
667   while (itr < max_itr) {
668     // Update p, q, size of reduced portions of diagonal
669     p = 0;
670     q = 0;
671     for (CeedInt i = n - 2; i >= 0; i--) {
672       if (fabs(mat_T[i + n * (i + 1)]) < tol) q += 1;
673       else break;
674     }
675     for (CeedInt i = 0; i < n - q - 1; i++) {
676       if (fabs(mat_T[i + n * (i + 1)]) < tol) p += 1;
677       else break;
678     }
679     if (q == n - 1) break;  // Finished reducing
680 
681     // Reduce tridiagonal portion
682     CeedScalar t_nn = mat_T[(n - 1 - q) + n * (n - 1 - q)], t_nnm1 = mat_T[(n - 2 - q) + n * (n - 1 - q)];
683     CeedScalar d  = (mat_T[(n - 2 - q) + n * (n - 2 - q)] - t_nn) / 2;
684     CeedScalar mu = t_nn - t_nnm1 * t_nnm1 / (d + copysign(sqrt(d * d + t_nnm1 * t_nnm1), d));
685     CeedScalar x  = mat_T[p + n * p] - mu;
686     CeedScalar z  = mat_T[p + n * (p + 1)];
687     for (CeedInt k = p; k < n - q - 1; k++) {
688       // Compute Givens rotation
689       CeedScalar c = 1, s = 0;
690       if (fabs(z) > tol) {
691         if (fabs(z) > fabs(x)) {
692           CeedScalar tau = -x / z;
693           s = 1 / sqrt(1 + tau * tau), c = s * tau;
694         } else {
695           CeedScalar tau = -z / x;
696           c = 1 / sqrt(1 + tau * tau), s = c * tau;
697         }
698       }
699 
700       // Apply Givens rotation to T
701       CeedGivensRotation(mat_T, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n);
702       CeedGivensRotation(mat_T, c, s, CEED_TRANSPOSE, k, k + 1, n, n);
703 
704       // Apply Givens rotation to Q
705       CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n);
706 
707       // Update x, z
708       if (k < n - q - 2) {
709         x = mat_T[k + n * (k + 1)];
710         z = mat_T[k + n * (k + 2)];
711       }
712     }
713     itr++;
714   }
715 
716   // Save eigenvalues
717   for (CeedInt i = 0; i < n; i++) lambda[i] = mat_T[i + n * i];
718 
719   // Check convergence
720   if (itr == max_itr && q < n - 1) {
721     // LCOV_EXCL_START
722     return CeedError(ceed, CEED_ERROR_MINOR, "Symmetric QR failed to converge");
723     // LCOV_EXCL_STOP
724   }
725   return CEED_ERROR_SUCCESS;
726 }
727 CeedPragmaOptimizeOn;
728 
729 /**
730   @brief Return Simultaneous Diagonalization of two matrices.
731 
732   This solves the generalized eigenvalue problem A x = lambda B x, where A and B are symmetric and B is positive definite.
733   We generate the matrix X and vector Lambda such that X^T A X = Lambda and X^T B X = I.
734   This is equivalent to the LAPACK routine 'sygv' with TYPE = 1.
735 
736   @param[in]  ceed   Ceed context for error handling
737   @param[in]  mat_A  Row-major matrix to be factorized with eigenvalues
738   @param[in]  mat_B  Row-major matrix to be factorized to identity
739   @param[out] mat_X  Row-major orthogonal matrix
740   @param[out] lambda Vector of length n of generalized eigenvalues
741   @param[in]  n      Number of rows/columns
742 
743   @return An error code: 0 - success, otherwise - failure
744 
745   @ref Utility
746 **/
747 CeedPragmaOptimizeOff int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A, CeedScalar *mat_B, CeedScalar *mat_X, CeedScalar *lambda,
748                                                           CeedInt n) {
749   CeedScalar *mat_C, *mat_G, *vec_D;
750   CeedCall(CeedCalloc(n * n, &mat_C));
751   CeedCall(CeedCalloc(n * n, &mat_G));
752   CeedCall(CeedCalloc(n, &vec_D));
753 
754   // Compute B = G D G^T
755   memcpy(mat_G, mat_B, n * n * sizeof(mat_B[0]));
756   CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_G, vec_D, n));
757 
758   // Sort eigenvalues
759   for (CeedInt i = n - 1; i >= 0; i--) {
760     for (CeedInt j = 0; j < i; j++) {
761       if (fabs(vec_D[j]) > fabs(vec_D[j + 1])) {
762         CeedScalar temp;
763         temp         = vec_D[j];
764         vec_D[j]     = vec_D[j + 1];
765         vec_D[j + 1] = temp;
766         for (CeedInt k = 0; k < n; k++) {
767           temp                 = mat_G[k * n + j];
768           mat_G[k * n + j]     = mat_G[k * n + j + 1];
769           mat_G[k * n + j + 1] = temp;
770         }
771       }
772     }
773   }
774 
775   // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T
776   //           = D^-1/2 G^T A G D^-1/2
777   // -- D = D^-1/2
778   for (CeedInt i = 0; i < n; i++) vec_D[i] = 1. / sqrt(vec_D[i]);
779   // -- G = G D^-1/2
780   // -- C = D^-1/2 G^T
781   for (CeedInt i = 0; i < n; i++) {
782     for (CeedInt j = 0; j < n; j++) {
783       mat_G[i * n + j] *= vec_D[j];
784       mat_C[j * n + i] = mat_G[i * n + j];
785     }
786   }
787   // -- X = (D^-1/2 G^T) A
788   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_C, (const CeedScalar *)mat_A, mat_X, n, n, n));
789   // -- C = (D^-1/2 G^T A) (G D^-1/2)
790   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_X, (const CeedScalar *)mat_G, mat_C, n, n, n));
791 
792   // Compute Q^T C Q = lambda
793   CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n));
794 
795   // Sort eigenvalues
796   for (CeedInt i = n - 1; i >= 0; i--) {
797     for (CeedInt j = 0; j < i; j++) {
798       if (fabs(lambda[j]) > fabs(lambda[j + 1])) {
799         CeedScalar temp;
800         temp          = lambda[j];
801         lambda[j]     = lambda[j + 1];
802         lambda[j + 1] = temp;
803         for (CeedInt k = 0; k < n; k++) {
804           temp                 = mat_C[k * n + j];
805           mat_C[k * n + j]     = mat_C[k * n + j + 1];
806           mat_C[k * n + j + 1] = temp;
807         }
808       }
809     }
810   }
811 
812   // Set X = (G D^1/2)^-T Q
813   //       = G D^-1/2 Q
814   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_G, (const CeedScalar *)mat_C, mat_X, n, n, n));
815 
816   // Cleanup
817   CeedCall(CeedFree(&mat_C));
818   CeedCall(CeedFree(&mat_G));
819   CeedCall(CeedFree(&vec_D));
820   return CEED_ERROR_SUCCESS;
821 }
822 CeedPragmaOptimizeOn;
823 
824 /// @}
825 
826 /// ----------------------------------------------------------------------------
827 /// CeedBasis Public API
828 /// ----------------------------------------------------------------------------
829 /// @addtogroup CeedBasisUser
830 /// @{
831 
832 /**
833   @brief Create a tensor-product basis for H^1 discretizations
834 
835   @param[in]  ceed        Ceed object where the CeedBasis will be created
836   @param[in]  dim         Topological dimension
837   @param[in]  num_comp    Number of field components (1 for scalar fields)
838   @param[in]  P_1d        Number of nodes in one dimension
839   @param[in]  Q_1d        Number of quadrature points in one dimension
840   @param[in]  interp_1d   Row-major (Q_1d * P_1d) matrix expressing the values of nodal basis functions at quadrature points
841   @param[in]  grad_1d     Row-major (Q_1d * P_1d) matrix expressing derivatives of nodal basis functions at quadrature points
842   @param[in]  q_ref_1d    Array of length Q_1d holding the locations of quadrature points on the 1D reference element [-1, 1]
843   @param[in]  q_weight_1d Array of length Q_1d holding the quadrature weights on the reference element
844   @param[out] basis       Address of the variable where the newly created CeedBasis will be stored.
845 
846   @return An error code: 0 - success, otherwise - failure
847 
848   @ref User
849 **/
850 int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d,
851                             const CeedScalar *grad_1d, const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis *basis) {
852   if (!ceed->BasisCreateTensorH1) {
853     Ceed delegate;
854     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
855 
856     if (!delegate) {
857       // LCOV_EXCL_START
858       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateTensorH1");
859       // LCOV_EXCL_STOP
860     }
861 
862     CeedCall(CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis));
863     return CEED_ERROR_SUCCESS;
864   }
865 
866   if (dim < 1) {
867     // LCOV_EXCL_START
868     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value");
869     // LCOV_EXCL_STOP
870   }
871 
872   if (num_comp < 1) {
873     // LCOV_EXCL_START
874     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
875     // LCOV_EXCL_STOP
876   }
877 
878   if (P_1d < 1) {
879     // LCOV_EXCL_START
880     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
881     // LCOV_EXCL_STOP
882   }
883 
884   if (Q_1d < 1) {
885     // LCOV_EXCL_START
886     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
887     // LCOV_EXCL_STOP
888   }
889 
890   CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE : dim == 2 ? CEED_TOPOLOGY_QUAD : CEED_TOPOLOGY_HEX;
891 
892   CeedCall(CeedCalloc(1, basis));
893   (*basis)->ceed = ceed;
894   CeedCall(CeedReference(ceed));
895   (*basis)->ref_count    = 1;
896   (*basis)->tensor_basis = 1;
897   (*basis)->dim          = dim;
898   (*basis)->topo         = topo;
899   (*basis)->num_comp     = num_comp;
900   (*basis)->P_1d         = P_1d;
901   (*basis)->Q_1d         = Q_1d;
902   (*basis)->P            = CeedIntPow(P_1d, dim);
903   (*basis)->Q            = CeedIntPow(Q_1d, dim);
904   (*basis)->Q_comp       = 1;
905   (*basis)->basis_space  = 1;  // 1 for H^1 space
906   CeedCall(CeedCalloc(Q_1d, &(*basis)->q_ref_1d));
907   CeedCall(CeedCalloc(Q_1d, &(*basis)->q_weight_1d));
908   if (q_ref_1d) memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d * sizeof(q_ref_1d[0]));
909   if (q_weight_1d) memcpy((*basis)->q_weight_1d, q_weight_1d, Q_1d * sizeof(q_weight_1d[0]));
910   CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->interp_1d));
911   CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->grad_1d));
912   if (interp_1d) memcpy((*basis)->interp_1d, interp_1d, Q_1d * P_1d * sizeof(interp_1d[0]));
913   if (grad_1d) memcpy((*basis)->grad_1d, grad_1d, Q_1d * P_1d * sizeof(grad_1d[0]));
914   CeedCall(ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, *basis));
915   return CEED_ERROR_SUCCESS;
916 }
917 
918 /**
919   @brief Create a tensor-product Lagrange basis
920 
921   @param[in]  ceed      Ceed object where the CeedBasis will be created
922   @param[in]  dim       Topological dimension of element
923   @param[in]  num_comp  Number of field components (1 for scalar fields)
924   @param[in]  P         Number of Gauss-Lobatto nodes in one dimension.
925                           The polynomial degree of the resulting Q_k element is k=P-1.
926   @param[in]  Q         Number of quadrature points in one dimension.
927   @param[in]  quad_mode Distribution of the Q quadrature points (affects order of accuracy for the quadrature)
928   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
929 
930   @return An error code: 0 - success, otherwise - failure
931 
932   @ref User
933 **/
934 int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P, CeedInt Q, CeedQuadMode quad_mode, CeedBasis *basis) {
935   // Allocate
936   int        ierr = CEED_ERROR_SUCCESS, i, j, k;
937   CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d, *q_weight_1d;
938 
939   if (dim < 1) {
940     // LCOV_EXCL_START
941     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value");
942     // LCOV_EXCL_STOP
943   }
944 
945   if (num_comp < 1) {
946     // LCOV_EXCL_START
947     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
948     // LCOV_EXCL_STOP
949   }
950 
951   if (P < 1) {
952     // LCOV_EXCL_START
953     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
954     // LCOV_EXCL_STOP
955   }
956 
957   if (Q < 1) {
958     // LCOV_EXCL_START
959     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
960     // LCOV_EXCL_STOP
961   }
962 
963   // Get Nodes and Weights
964   CeedCall(CeedCalloc(P * Q, &interp_1d));
965   CeedCall(CeedCalloc(P * Q, &grad_1d));
966   CeedCall(CeedCalloc(P, &nodes));
967   CeedCall(CeedCalloc(Q, &q_ref_1d));
968   CeedCall(CeedCalloc(Q, &q_weight_1d));
969   if (CeedLobattoQuadrature(P, nodes, NULL) != CEED_ERROR_SUCCESS) goto cleanup;
970   switch (quad_mode) {
971     case CEED_GAUSS:
972       ierr = CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
973       break;
974     case CEED_GAUSS_LOBATTO:
975       ierr = CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
976       break;
977   }
978   if (ierr != CEED_ERROR_SUCCESS) goto cleanup;
979 
980   // Build B, D matrix
981   // Fornberg, 1998
982   for (i = 0; i < Q; i++) {
983     c1                   = 1.0;
984     c3                   = nodes[0] - q_ref_1d[i];
985     interp_1d[i * P + 0] = 1.0;
986     for (j = 1; j < P; j++) {
987       c2 = 1.0;
988       c4 = c3;
989       c3 = nodes[j] - q_ref_1d[i];
990       for (k = 0; k < j; k++) {
991         dx = nodes[j] - nodes[k];
992         c2 *= dx;
993         if (k == j - 1) {
994           grad_1d[i * P + j]   = c1 * (interp_1d[i * P + k] - c4 * grad_1d[i * P + k]) / c2;
995           interp_1d[i * P + j] = -c1 * c4 * interp_1d[i * P + k] / c2;
996         }
997         grad_1d[i * P + k]   = (c3 * grad_1d[i * P + k] - interp_1d[i * P + k]) / dx;
998         interp_1d[i * P + k] = c3 * interp_1d[i * P + k] / dx;
999       }
1000       c1 = c2;
1001     }
1002   }
1003   // Pass to CeedBasisCreateTensorH1
1004   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis));
1005 cleanup:
1006   CeedCall(CeedFree(&interp_1d));
1007   CeedCall(CeedFree(&grad_1d));
1008   CeedCall(CeedFree(&nodes));
1009   CeedCall(CeedFree(&q_ref_1d));
1010   CeedCall(CeedFree(&q_weight_1d));
1011   return CEED_ERROR_SUCCESS;
1012 }
1013 
1014 /**
1015   @brief Create a non tensor-product basis for H^1 discretizations
1016 
1017   @param[in]  ceed      Ceed object where the CeedBasis will be created
1018   @param[in]  topo      Topology of element, e.g. hypercube, simplex, ect
1019   @param[in]  num_comp  Number of field components (1 for scalar fields)
1020   @param[in]  num_nodes Total number of nodes
1021   @param[in]  num_qpts  Total number of quadrature points
1022   @param[in]  interp    Row-major (num_qpts * num_nodes) matrix expressing the values of nodal basis functions at quadrature points
1023   @param[in]  grad      Row-major (num_qpts * dim * num_nodes) matrix expressing derivatives of nodal basis functions at quadrature points
1024   @param[in]  q_ref     Array of length num_qpts * dim holding the locations of quadrature points on the reference element
1025   @param[in]  q_weight  Array of length num_qpts holding the quadrature weights on the reference element
1026   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1027 
1028   @return An error code: 0 - success, otherwise - failure
1029 
1030   @ref User
1031 **/
1032 int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1033                       const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1034   CeedInt P = num_nodes, Q = num_qpts, dim = 0;
1035 
1036   if (!ceed->BasisCreateH1) {
1037     Ceed delegate;
1038     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1039 
1040     if (!delegate) {
1041       // LCOV_EXCL_START
1042       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateH1");
1043       // LCOV_EXCL_STOP
1044     }
1045 
1046     CeedCall(CeedBasisCreateH1(delegate, topo, num_comp, num_nodes, num_qpts, interp, grad, q_ref, q_weight, basis));
1047     return CEED_ERROR_SUCCESS;
1048   }
1049 
1050   if (num_comp < 1) {
1051     // LCOV_EXCL_START
1052     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1053     // LCOV_EXCL_STOP
1054   }
1055 
1056   if (num_nodes < 1) {
1057     // LCOV_EXCL_START
1058     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1059     // LCOV_EXCL_STOP
1060   }
1061 
1062   if (num_qpts < 1) {
1063     // LCOV_EXCL_START
1064     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1065     // LCOV_EXCL_STOP
1066   }
1067 
1068   CeedCall(CeedCalloc(1, basis));
1069 
1070   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1071 
1072   (*basis)->ceed = ceed;
1073   CeedCall(CeedReference(ceed));
1074   (*basis)->ref_count    = 1;
1075   (*basis)->tensor_basis = 0;
1076   (*basis)->dim          = dim;
1077   (*basis)->topo         = topo;
1078   (*basis)->num_comp     = num_comp;
1079   (*basis)->P            = P;
1080   (*basis)->Q            = Q;
1081   (*basis)->Q_comp       = 1;
1082   (*basis)->basis_space  = 1;  // 1 for H^1 space
1083   CeedCall(CeedCalloc(Q * dim, &(*basis)->q_ref_1d));
1084   CeedCall(CeedCalloc(Q, &(*basis)->q_weight_1d));
1085   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1086   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1087   CeedCall(CeedCalloc(Q * P, &(*basis)->interp));
1088   CeedCall(CeedCalloc(dim * Q * P, &(*basis)->grad));
1089   if (interp) memcpy((*basis)->interp, interp, Q * P * sizeof(interp[0]));
1090   if (grad) memcpy((*basis)->grad, grad, dim * Q * P * sizeof(grad[0]));
1091   CeedCall(ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref, q_weight, *basis));
1092   return CEED_ERROR_SUCCESS;
1093 }
1094 
1095 /**
1096   @brief Create a non tensor-product basis for \f$H(\mathrm{div})\f$ discretizations
1097 
1098   @param[in]  ceed      Ceed object where the CeedBasis will be created
1099   @param[in]  topo      Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.), dimension of which is used in some array sizes below
1100   @param[in]  num_comp  Number of components (usually 1 for vectors in H(div) bases)
1101   @param[in]  num_nodes Total number of nodes (dofs per element)
1102   @param[in]  num_qpts  Total number of quadrature points
1103   @param[in]  interp    Row-major (dim*num_qpts * num_nodes) matrix expressing the values of nodal basis functions at quadrature points
1104   @param[in]  div       Row-major (num_qpts * num_nodes) matrix expressing divergence of nodal basis functions at quadrature points
1105   @param[in]  q_ref     Array of length num_qpts * dim holding the locations of quadrature points on the reference element
1106   @param[in]  q_weight  Array of length num_qpts holding the quadrature weights on the reference element
1107   @param[out] basis     Address of the variable where the newly created CeedBasis will be stored.
1108 
1109   @return An error code: 0 - success, otherwise - failure
1110 
1111   @ref User
1112 **/
1113 int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
1114                         const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
1115   CeedInt Q = num_qpts, P = num_nodes, dim = 0;
1116   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
1117   if (!ceed->BasisCreateHdiv) {
1118     Ceed delegate;
1119     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
1120 
1121     if (!delegate) {
1122       // LCOV_EXCL_START
1123       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateHdiv");
1124       // LCOV_EXCL_STOP
1125     }
1126 
1127     CeedCall(CeedBasisCreateHdiv(delegate, topo, num_comp, num_nodes, num_qpts, interp, div, q_ref, q_weight, basis));
1128     return CEED_ERROR_SUCCESS;
1129   }
1130 
1131   if (num_comp < 1) {
1132     // LCOV_EXCL_START
1133     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
1134     // LCOV_EXCL_STOP
1135   }
1136 
1137   if (num_nodes < 1) {
1138     // LCOV_EXCL_START
1139     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
1140     // LCOV_EXCL_STOP
1141   }
1142 
1143   if (num_qpts < 1) {
1144     // LCOV_EXCL_START
1145     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
1146     // LCOV_EXCL_STOP
1147   }
1148 
1149   CeedCall(CeedCalloc(1, basis));
1150 
1151   (*basis)->ceed = ceed;
1152   CeedCall(CeedReference(ceed));
1153   (*basis)->ref_count    = 1;
1154   (*basis)->tensor_basis = 0;
1155   (*basis)->dim          = dim;
1156   (*basis)->topo         = topo;
1157   (*basis)->num_comp     = num_comp;
1158   (*basis)->P            = P;
1159   (*basis)->Q            = Q;
1160   (*basis)->Q_comp       = dim;
1161   (*basis)->basis_space  = 2;  // 2 for H(div) space
1162   CeedCall(CeedMalloc(Q * dim, &(*basis)->q_ref_1d));
1163   CeedCall(CeedMalloc(Q, &(*basis)->q_weight_1d));
1164   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
1165   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
1166   CeedCall(CeedMalloc(dim * Q * P, &(*basis)->interp));
1167   CeedCall(CeedMalloc(Q * P, &(*basis)->div));
1168   if (interp) memcpy((*basis)->interp, interp, dim * Q * P * sizeof(interp[0]));
1169   if (div) memcpy((*basis)->div, div, Q * P * sizeof(div[0]));
1170   CeedCall(ceed->BasisCreateHdiv(topo, dim, P, Q, interp, div, q_ref, q_weight, *basis));
1171   return CEED_ERROR_SUCCESS;
1172 }
1173 
1174 /**
1175   @brief Create a CeedBasis for projection from the nodes of `basis_from` to the nodes of `basis_to`.
1176 
1177   Only `CEED_EVAL_INTERP` and `CEED_EVAL_GRAD` will be valid for the new basis, `basis_project`.
1178   The interpolation is given by `interp_project = interp_to^+ * interp_from`, where the pesudoinverse `interp_to^+` is given by QR
1179 factorization. The gradient is given by `grad_project = interp_to^+ * grad_from`. Note: `basis_from` and `basis_to` must have compatible quadrature
1180 spaces.
1181   Note: `basis_project` will have the same number of components as `basis_from`, regardless of the number of components that `basis_to` has. If
1182 `basis_from` has 3 components and `basis_to` has 5 components, then `basis_project` will have 3 components.
1183 
1184   @param[in]  basis_from    CeedBasis to prolong from
1185   @param[in]  basis_to      CeedBasis to prolong to
1186   @param[out] basis_project Address of the variable where the newly created CeedBasis will be stored.
1187 
1188   @return An error code: 0 - success, otherwise - failure
1189 
1190   @ref User
1191 **/
1192 int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, CeedBasis *basis_project) {
1193   Ceed ceed;
1194   CeedCall(CeedBasisGetCeed(basis_to, &ceed));
1195 
1196   // Create projection matrix
1197   CeedScalar *interp_project, *grad_project;
1198   CeedCall(CeedBasisCreateProjectionMatrices(basis_from, basis_to, &interp_project, &grad_project));
1199 
1200   // Build basis
1201   bool        is_tensor;
1202   CeedInt     dim, num_comp;
1203   CeedScalar *q_ref, *q_weight;
1204   CeedCall(CeedBasisIsTensor(basis_to, &is_tensor));
1205   CeedCall(CeedBasisGetDimension(basis_to, &dim));
1206   CeedCall(CeedBasisGetNumComponents(basis_from, &num_comp));
1207   if (is_tensor) {
1208     CeedInt P_1d_to, P_1d_from;
1209     CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_1d_from));
1210     CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_1d_to));
1211     CeedCall(CeedCalloc(P_1d_to, &q_ref));
1212     CeedCall(CeedCalloc(P_1d_to, &q_weight));
1213     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_from, P_1d_to, interp_project, grad_project, q_ref, q_weight, basis_project));
1214   } else {
1215     CeedElemTopology topo;
1216     CeedCall(CeedBasisGetTopology(basis_to, &topo));
1217     CeedInt num_nodes_to, num_nodes_from;
1218     CeedCall(CeedBasisGetNumNodes(basis_from, &num_nodes_from));
1219     CeedCall(CeedBasisGetNumNodes(basis_to, &num_nodes_to));
1220     CeedCall(CeedCalloc(num_nodes_to * dim, &q_ref));
1221     CeedCall(CeedCalloc(num_nodes_to, &q_weight));
1222     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_from, num_nodes_to, interp_project, grad_project, q_ref, q_weight, basis_project));
1223   }
1224 
1225   // Cleanup
1226   CeedCall(CeedFree(&interp_project));
1227   CeedCall(CeedFree(&grad_project));
1228   CeedCall(CeedFree(&q_ref));
1229   CeedCall(CeedFree(&q_weight));
1230 
1231   return CEED_ERROR_SUCCESS;
1232 }
1233 
1234 /**
1235   @brief Copy the pointer to a CeedBasis.
1236 
1237   Note: If the value of `basis_copy` passed into this function is non-NULL, then it is assumed that `basis_copy` is a pointer to a CeedBasis.
1238   This CeedBasis will be destroyed if `basis_copy` is the only reference to this CeedBasis.
1239 
1240   @param[in]     basis      CeedBasis to copy reference to
1241   @param[in,out] basis_copy Variable to store copied reference
1242 
1243   @return An error code: 0 - success, otherwise - failure
1244 
1245   @ref User
1246 **/
1247 int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy) {
1248   if (basis != CEED_BASIS_COLLOCATED) CeedCall(CeedBasisReference(basis));
1249   CeedCall(CeedBasisDestroy(basis_copy));
1250   *basis_copy = basis;
1251   return CEED_ERROR_SUCCESS;
1252 }
1253 
1254 /**
1255   @brief View a CeedBasis
1256 
1257   @param[in] basis  CeedBasis to view
1258   @param[in] stream Stream to view to, e.g., stdout
1259 
1260   @return An error code: 0 - success, otherwise - failure
1261 
1262   @ref User
1263 **/
1264 int CeedBasisView(CeedBasis basis, FILE *stream) {
1265   CeedFESpace      FE_space = basis->basis_space;
1266   CeedElemTopology topo     = basis->topo;
1267 
1268   // Print FE space and element topology of the basis
1269   if (basis->tensor_basis) {
1270     fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" CeedInt_FMT " Q=%" CeedInt_FMT "\n", CeedFESpaces[FE_space],
1271             CeedElemTopologies[topo], basis->dim, basis->P_1d, basis->Q_1d);
1272   } else {
1273     fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" CeedInt_FMT " Q=%" CeedInt_FMT "\n", CeedFESpaces[FE_space],
1274             CeedElemTopologies[topo], basis->dim, basis->P, basis->Q);
1275   }
1276   // Print quadrature data, interpolation/gradient/divergence/curl of the basis
1277   if (basis->tensor_basis) {  // tensor basis
1278     CeedCall(CeedScalarView("qref1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_ref_1d, stream));
1279     CeedCall(CeedScalarView("qweight1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_weight_1d, stream));
1280     CeedCall(CeedScalarView("interp1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, basis->interp_1d, stream));
1281     CeedCall(CeedScalarView("grad1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, basis->grad_1d, stream));
1282   } else {  // non-tensor basis
1283     CeedCall(CeedScalarView("qref", "\t% 12.8f", 1, basis->Q * basis->dim, basis->q_ref_1d, stream));
1284     CeedCall(CeedScalarView("qweight", "\t% 12.8f", 1, basis->Q, basis->q_weight_1d, stream));
1285     CeedCall(CeedScalarView("interp", "\t% 12.8f", basis->Q_comp * basis->Q, basis->P, basis->interp, stream));
1286     if (basis->grad) {
1287       CeedCall(CeedScalarView("grad", "\t% 12.8f", basis->dim * basis->Q, basis->P, basis->grad, stream));
1288     }
1289     if (basis->div) {
1290       CeedCall(CeedScalarView("div", "\t% 12.8f", basis->Q, basis->P, basis->div, stream));
1291     }
1292   }
1293   return CEED_ERROR_SUCCESS;
1294 }
1295 
1296 /**
1297   @brief Apply basis evaluation from nodes to quadrature points or vice versa
1298 
1299   @param[in]  basis      CeedBasis to evaluate
1300   @param[in]  num_elem   The number of elements to apply the basis evaluation to;
1301                            the backend will specify the ordering in CeedElemRestrictionCreateBlocked()
1302   @param[in]  t_mode    \ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature points;
1303                           \ref CEED_TRANSPOSE to apply the transpose, mapping from quadrature points to nodes
1304   @param[in]  eval_mode \ref CEED_EVAL_NONE to use values directly,
1305                           \ref CEED_EVAL_INTERP to use interpolated values,
1306                           \ref CEED_EVAL_GRAD to use gradients,
1307                           \ref CEED_EVAL_WEIGHT to use quadrature weights.
1308   @param[in]  u        Input CeedVector
1309   @param[out] v        Output CeedVector
1310 
1311   @return An error code: 0 - success, otherwise - failure
1312 
1313   @ref User
1314 **/
1315 int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
1316   CeedSize u_length = 0, v_length;
1317   CeedInt  dim, num_comp, num_nodes, num_qpts;
1318   CeedCall(CeedBasisGetDimension(basis, &dim));
1319   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
1320   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
1321   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
1322   CeedCall(CeedVectorGetLength(v, &v_length));
1323   if (u) {
1324     CeedCall(CeedVectorGetLength(u, &u_length));
1325   }
1326 
1327   if (!basis->Apply) {
1328     // LCOV_EXCL_START
1329     return CeedError(basis->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisApply");
1330     // LCOV_EXCL_STOP
1331   }
1332 
1333   // Check compatibility of topological and geometrical dimensions
1334   if ((t_mode == CEED_TRANSPOSE && (v_length % num_nodes != 0 || u_length % num_qpts != 0)) ||
1335       (t_mode == CEED_NOTRANSPOSE && (u_length % num_nodes != 0 || v_length % num_qpts != 0))) {
1336     // LCOV_EXCL_START
1337     return CeedError(basis->ceed, CEED_ERROR_DIMENSION, "Length of input/output vectors incompatible with basis dimensions");
1338     // LCOV_EXCL_STOP
1339   }
1340 
1341   // Check vector lengths to prevent out of bounds issues
1342   bool bad_dims = false;
1343   switch (eval_mode) {
1344     case CEED_EVAL_NONE:
1345     case CEED_EVAL_INTERP:
1346       bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts || v_length < num_elem * num_comp * num_nodes)) ||
1347                   (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp || u_length < num_elem * num_comp * num_nodes)));
1348       break;
1349     case CEED_EVAL_GRAD:
1350       bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts * dim || v_length < num_elem * num_comp * num_nodes)) ||
1351                   (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp * dim || u_length < num_elem * num_comp * num_nodes)));
1352       break;
1353     case CEED_EVAL_WEIGHT:
1354       bad_dims = v_length < num_elem * num_qpts;
1355       break;
1356     // LCOV_EXCL_START
1357     case CEED_EVAL_DIV:
1358       bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts || v_length < num_elem * num_comp * num_nodes)) ||
1359                   (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp || u_length < num_elem * num_comp * num_nodes)));
1360       break;
1361     case CEED_EVAL_CURL:
1362       bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts || v_length < num_elem * num_comp * num_nodes)) ||
1363                   (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp || u_length < num_elem * num_comp * num_nodes)));
1364       break;
1365       // LCOV_EXCL_STOP
1366   }
1367   if (bad_dims) {
1368     // LCOV_EXCL_START
1369     return CeedError(basis->ceed, CEED_ERROR_DIMENSION, "Input/output vectors too short for basis and evaluation mode");
1370     // LCOV_EXCL_STOP
1371   }
1372 
1373   CeedCall(basis->Apply(basis, num_elem, t_mode, eval_mode, u, v));
1374   return CEED_ERROR_SUCCESS;
1375 }
1376 
1377 /**
1378   @brief Get Ceed associated with a CeedBasis
1379 
1380   @param[in]  basis CeedBasis
1381   @param[out] ceed  Variable to store Ceed
1382 
1383   @return An error code: 0 - success, otherwise - failure
1384 
1385   @ref Advanced
1386 **/
1387 int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed) {
1388   *ceed = basis->ceed;
1389   return CEED_ERROR_SUCCESS;
1390 }
1391 
1392 /**
1393   @brief Get dimension for given CeedBasis
1394 
1395   @param[in]  basis CeedBasis
1396   @param[out] dim   Variable to store dimension of basis
1397 
1398   @return An error code: 0 - success, otherwise - failure
1399 
1400   @ref Advanced
1401 **/
1402 int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim) {
1403   *dim = basis->dim;
1404   return CEED_ERROR_SUCCESS;
1405 }
1406 
1407 /**
1408   @brief Get topology for given CeedBasis
1409 
1410   @param[in]  basis CeedBasis
1411   @param[out] topo  Variable to store topology of basis
1412 
1413   @return An error code: 0 - success, otherwise - failure
1414 
1415   @ref Advanced
1416 **/
1417 int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo) {
1418   *topo = basis->topo;
1419   return CEED_ERROR_SUCCESS;
1420 }
1421 
1422 /**
1423   @brief Get number of Q-vector components for given CeedBasis
1424 
1425   @param[in]  basis  CeedBasis
1426   @param[out] Q_comp Variable to store number of Q-vector components of basis
1427 
1428   @return An error code: 0 - success, otherwise - failure
1429 
1430   @ref Advanced
1431 **/
1432 int CeedBasisGetNumQuadratureComponents(CeedBasis basis, CeedInt *Q_comp) {
1433   *Q_comp = basis->Q_comp;
1434   return CEED_ERROR_SUCCESS;
1435 }
1436 
1437 /**
1438   @brief Get number of components for given CeedBasis
1439 
1440   @param[in]  basis    CeedBasis
1441   @param[out] num_comp Variable to store number of components of basis
1442 
1443   @return An error code: 0 - success, otherwise - failure
1444 
1445   @ref Advanced
1446 **/
1447 int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *num_comp) {
1448   *num_comp = basis->num_comp;
1449   return CEED_ERROR_SUCCESS;
1450 }
1451 
1452 /**
1453   @brief Get total number of nodes (in dim dimensions) of a CeedBasis
1454 
1455   @param[in]  basis CeedBasis
1456   @param[out] P     Variable to store number of nodes
1457 
1458   @return An error code: 0 - success, otherwise - failure
1459 
1460   @ref Utility
1461 **/
1462 int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P) {
1463   *P = basis->P;
1464   return CEED_ERROR_SUCCESS;
1465 }
1466 
1467 /**
1468   @brief Get total number of nodes (in 1 dimension) of a CeedBasis
1469 
1470   @param[in]  basis CeedBasis
1471   @param[out] P_1d  Variable to store number of nodes
1472 
1473   @return An error code: 0 - success, otherwise - failure
1474 
1475   @ref Advanced
1476 **/
1477 int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P_1d) {
1478   if (!basis->tensor_basis) {
1479     // LCOV_EXCL_START
1480     return CeedError(basis->ceed, CEED_ERROR_MINOR, "Cannot supply P_1d for non-tensor basis");
1481     // LCOV_EXCL_STOP
1482   }
1483 
1484   *P_1d = basis->P_1d;
1485   return CEED_ERROR_SUCCESS;
1486 }
1487 
1488 /**
1489   @brief Get total number of quadrature points (in dim dimensions) of a CeedBasis
1490 
1491   @param[in]  basis CeedBasis
1492   @param[out] Q     Variable to store number of quadrature points
1493 
1494   @return An error code: 0 - success, otherwise - failure
1495 
1496   @ref Utility
1497 **/
1498 int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q) {
1499   *Q = basis->Q;
1500   return CEED_ERROR_SUCCESS;
1501 }
1502 
1503 /**
1504   @brief Get total number of quadrature points (in 1 dimension) of a CeedBasis
1505 
1506   @param[in]  basis CeedBasis
1507   @param[out] Q_1d  Variable to store number of quadrature points
1508 
1509   @return An error code: 0 - success, otherwise - failure
1510 
1511   @ref Advanced
1512 **/
1513 int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, CeedInt *Q_1d) {
1514   if (!basis->tensor_basis) {
1515     // LCOV_EXCL_START
1516     return CeedError(basis->ceed, CEED_ERROR_MINOR, "Cannot supply Q_1d for non-tensor basis");
1517     // LCOV_EXCL_STOP
1518   }
1519 
1520   *Q_1d = basis->Q_1d;
1521   return CEED_ERROR_SUCCESS;
1522 }
1523 
1524 /**
1525   @brief Get reference coordinates of quadrature points (in dim dimensions) of a CeedBasis
1526 
1527   @param[in]  basis CeedBasis
1528   @param[out] q_ref Variable to store reference coordinates of quadrature points
1529 
1530   @return An error code: 0 - success, otherwise - failure
1531 
1532   @ref Advanced
1533 **/
1534 int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **q_ref) {
1535   *q_ref = basis->q_ref_1d;
1536   return CEED_ERROR_SUCCESS;
1537 }
1538 
1539 /**
1540   @brief Get quadrature weights of quadrature points (in dim dimensions) of a CeedBasis
1541 
1542   @param[in]  basis    CeedBasis
1543   @param[out] q_weight Variable to store quadrature weights
1544 
1545   @return An error code: 0 - success, otherwise - failure
1546 
1547   @ref Advanced
1548 **/
1549 int CeedBasisGetQWeights(CeedBasis basis, const CeedScalar **q_weight) {
1550   *q_weight = basis->q_weight_1d;
1551   return CEED_ERROR_SUCCESS;
1552 }
1553 
1554 /**
1555   @brief Get interpolation matrix of a CeedBasis
1556 
1557   @param[in]  basis  CeedBasis
1558   @param[out] interp Variable to store interpolation matrix
1559 
1560   @return An error code: 0 - success, otherwise - failure
1561 
1562   @ref Advanced
1563 **/
1564 int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp) {
1565   if (!basis->interp && basis->tensor_basis) {
1566     // Allocate
1567     CeedCall(CeedMalloc(basis->Q * basis->P, &basis->interp));
1568 
1569     // Initialize
1570     for (CeedInt i = 0; i < basis->Q * basis->P; i++) basis->interp[i] = 1.0;
1571 
1572     // Calculate
1573     for (CeedInt d = 0; d < basis->dim; d++) {
1574       for (CeedInt qpt = 0; qpt < basis->Q; qpt++) {
1575         for (CeedInt node = 0; node < basis->P; node++) {
1576           CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1577           CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1578           basis->interp[qpt * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p];
1579         }
1580       }
1581     }
1582   }
1583   *interp = basis->interp;
1584   return CEED_ERROR_SUCCESS;
1585 }
1586 
1587 /**
1588   @brief Get 1D interpolation matrix of a tensor product CeedBasis
1589 
1590   @param[in]  basis     CeedBasis
1591   @param[out] interp_1d Variable to store interpolation matrix
1592 
1593   @return An error code: 0 - success, otherwise - failure
1594 
1595   @ref Backend
1596 **/
1597 int CeedBasisGetInterp1D(CeedBasis basis, const CeedScalar **interp_1d) {
1598   if (!basis->tensor_basis) {
1599     // LCOV_EXCL_START
1600     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis is not a tensor product basis.");
1601     // LCOV_EXCL_STOP
1602   }
1603 
1604   *interp_1d = basis->interp_1d;
1605   return CEED_ERROR_SUCCESS;
1606 }
1607 
1608 /**
1609   @brief Get gradient matrix of a CeedBasis
1610 
1611   @param[in]  basis CeedBasis
1612   @param[out] grad  Variable to store gradient matrix
1613 
1614   @return An error code: 0 - success, otherwise - failure
1615 
1616   @ref Advanced
1617 **/
1618 int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad) {
1619   if (!basis->grad && basis->tensor_basis) {
1620     // Allocate
1621     CeedCall(CeedMalloc(basis->dim * basis->Q * basis->P, &basis->grad));
1622 
1623     // Initialize
1624     for (CeedInt i = 0; i < basis->dim * basis->Q * basis->P; i++) basis->grad[i] = 1.0;
1625 
1626     // Calculate
1627     for (CeedInt d = 0; d < basis->dim; d++) {
1628       for (CeedInt i = 0; i < basis->dim; i++) {
1629         for (CeedInt qpt = 0; qpt < basis->Q; qpt++) {
1630           for (CeedInt node = 0; node < basis->P; node++) {
1631             CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1632             CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1633             if (i == d) basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->grad_1d[q * basis->P_1d + p];
1634             else basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p];
1635           }
1636         }
1637       }
1638     }
1639   }
1640   *grad = basis->grad;
1641   return CEED_ERROR_SUCCESS;
1642 }
1643 
1644 /**
1645   @brief Get 1D gradient matrix of a tensor product CeedBasis
1646 
1647   @param[in]  basis   CeedBasis
1648   @param[out] grad_1d Variable to store gradient matrix
1649 
1650   @return An error code: 0 - success, otherwise - failure
1651 
1652   @ref Advanced
1653 **/
1654 int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad_1d) {
1655   if (!basis->tensor_basis) {
1656     // LCOV_EXCL_START
1657     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis is not a tensor product basis.");
1658     // LCOV_EXCL_STOP
1659   }
1660 
1661   *grad_1d = basis->grad_1d;
1662   return CEED_ERROR_SUCCESS;
1663 }
1664 
1665 /**
1666   @brief Get divergence matrix of a CeedBasis
1667 
1668   @param[in]  basis CeedBasis
1669   @param[out] div   Variable to store divergence matrix
1670 
1671   @return An error code: 0 - success, otherwise - failure
1672 
1673   @ref Advanced
1674 **/
1675 int CeedBasisGetDiv(CeedBasis basis, const CeedScalar **div) {
1676   if (!basis->div) {
1677     // LCOV_EXCL_START
1678     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis does not have divergence matrix.");
1679     // LCOV_EXCL_STOP
1680   }
1681 
1682   *div = basis->div;
1683   return CEED_ERROR_SUCCESS;
1684 }
1685 
1686 /**
1687   @brief Destroy a CeedBasis
1688 
1689   @param[in,out] basis CeedBasis to destroy
1690 
1691   @return An error code: 0 - success, otherwise - failure
1692 
1693   @ref User
1694 **/
1695 int CeedBasisDestroy(CeedBasis *basis) {
1696   if (!*basis || *basis == CEED_BASIS_COLLOCATED || --(*basis)->ref_count > 0) {
1697     *basis = NULL;
1698     return CEED_ERROR_SUCCESS;
1699   }
1700   if ((*basis)->Destroy) CeedCall((*basis)->Destroy(*basis));
1701   if ((*basis)->contract) CeedCall(CeedTensorContractDestroy(&(*basis)->contract));
1702   CeedCall(CeedFree(&(*basis)->interp));
1703   CeedCall(CeedFree(&(*basis)->interp_1d));
1704   CeedCall(CeedFree(&(*basis)->grad));
1705   CeedCall(CeedFree(&(*basis)->div));
1706   CeedCall(CeedFree(&(*basis)->grad_1d));
1707   CeedCall(CeedFree(&(*basis)->q_ref_1d));
1708   CeedCall(CeedFree(&(*basis)->q_weight_1d));
1709   CeedCall(CeedDestroy(&(*basis)->ceed));
1710   CeedCall(CeedFree(basis));
1711   return CEED_ERROR_SUCCESS;
1712 }
1713 
1714 /**
1715   @brief Construct a Gauss-Legendre quadrature
1716 
1717   @param[in]  Q           Number of quadrature points (integrates polynomials of degree 2*Q-1 exactly)
1718   @param[out] q_ref_1d    Array of length Q to hold the abscissa on [-1, 1]
1719   @param[out] q_weight_1d Array of length Q to hold the weights
1720 
1721   @return An error code: 0 - success, otherwise - failure
1722 
1723   @ref Utility
1724 **/
1725 int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) {
1726   // Allocate
1727   CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0 * atan(1.0);
1728   // Build q_ref_1d, q_weight_1d
1729   for (CeedInt i = 0; i <= Q / 2; i++) {
1730     // Guess
1731     xi = cos(PI * (CeedScalar)(2 * i + 1) / ((CeedScalar)(2 * Q)));
1732     // Pn(xi)
1733     P0 = 1.0;
1734     P1 = xi;
1735     P2 = 0.0;
1736     for (CeedInt j = 2; j <= Q; j++) {
1737       P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1738       P0 = P1;
1739       P1 = P2;
1740     }
1741     // First Newton Step
1742     dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1743     xi  = xi - P2 / dP2;
1744     // Newton to convergence
1745     for (CeedInt k = 0; k < 100 && fabs(P2) > 10 * CEED_EPSILON; k++) {
1746       P0 = 1.0;
1747       P1 = xi;
1748       for (CeedInt j = 2; j <= Q; j++) {
1749         P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1750         P0 = P1;
1751         P1 = P2;
1752       }
1753       dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1754       xi  = xi - P2 / dP2;
1755     }
1756     // Save xi, wi
1757     wi                     = 2.0 / ((1.0 - xi * xi) * dP2 * dP2);
1758     q_weight_1d[i]         = wi;
1759     q_weight_1d[Q - 1 - i] = wi;
1760     q_ref_1d[i]            = -xi;
1761     q_ref_1d[Q - 1 - i]    = xi;
1762   }
1763   return CEED_ERROR_SUCCESS;
1764 }
1765 
1766 /**
1767   @brief Construct a Gauss-Legendre-Lobatto quadrature
1768 
1769   @param[in]  Q           Number of quadrature points (integrates polynomials of degree 2*Q-3 exactly)
1770   @param[out] q_ref_1d    Array of length Q to hold the abscissa on [-1, 1]
1771   @param[out] q_weight_1d Array of length Q to hold the weights
1772 
1773   @return An error code: 0 - success, otherwise - failure
1774 
1775   @ref Utility
1776 **/
1777 int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) {
1778   // Allocate
1779   CeedScalar P0, P1, P2, dP2, d2P2, xi, wi, PI = 4.0 * atan(1.0);
1780   // Build q_ref_1d, q_weight_1d
1781   // Set endpoints
1782   if (Q < 2) {
1783     // LCOV_EXCL_START
1784     return CeedError(NULL, CEED_ERROR_DIMENSION, "Cannot create Lobatto quadrature with Q=%" CeedInt_FMT " < 2 points", Q);
1785     // LCOV_EXCL_STOP
1786   }
1787   wi = 2.0 / ((CeedScalar)(Q * (Q - 1)));
1788   if (q_weight_1d) {
1789     q_weight_1d[0]     = wi;
1790     q_weight_1d[Q - 1] = wi;
1791   }
1792   q_ref_1d[0]     = -1.0;
1793   q_ref_1d[Q - 1] = 1.0;
1794   // Interior
1795   for (CeedInt i = 1; i <= (Q - 1) / 2; i++) {
1796     // Guess
1797     xi = cos(PI * (CeedScalar)(i) / (CeedScalar)(Q - 1));
1798     // Pn(xi)
1799     P0 = 1.0;
1800     P1 = xi;
1801     P2 = 0.0;
1802     for (CeedInt j = 2; j < Q; j++) {
1803       P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1804       P0 = P1;
1805       P1 = P2;
1806     }
1807     // First Newton step
1808     dP2  = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1809     d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi);
1810     xi   = xi - dP2 / d2P2;
1811     // Newton to convergence
1812     for (CeedInt k = 0; k < 100 && fabs(dP2) > 10 * CEED_EPSILON; k++) {
1813       P0 = 1.0;
1814       P1 = xi;
1815       for (CeedInt j = 2; j < Q; j++) {
1816         P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1817         P0 = P1;
1818         P1 = P2;
1819       }
1820       dP2  = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1821       d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi);
1822       xi   = xi - dP2 / d2P2;
1823     }
1824     // Save xi, wi
1825     wi = 2.0 / (((CeedScalar)(Q * (Q - 1))) * P2 * P2);
1826     if (q_weight_1d) {
1827       q_weight_1d[i]         = wi;
1828       q_weight_1d[Q - 1 - i] = wi;
1829     }
1830     q_ref_1d[i]         = -xi;
1831     q_ref_1d[Q - 1 - i] = xi;
1832   }
1833   return CEED_ERROR_SUCCESS;
1834 }
1835 
1836 /// @}
1837