xref: /libCEED/interface/ceed-basis.c (revision 2b730f8b5a9c809740a0b3b302db43a719c636b1)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed/backend.h>
10 #include <ceed/ceed.h>
11 #include <math.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 /// @file
17 /// Implementation of CeedBasis interfaces
18 
19 /// @cond DOXYGEN_SKIP
20 static struct CeedBasis_private ceed_basis_collocated;
21 /// @endcond
22 
23 /// @addtogroup CeedBasisUser
24 /// @{
25 
26 /// Indicate that the quadrature points are collocated with the nodes
27 const CeedBasis CEED_BASIS_COLLOCATED = &ceed_basis_collocated;
28 
29 /// @}
30 
31 /// ----------------------------------------------------------------------------
32 /// CeedBasis Library Internal Functions
33 /// ----------------------------------------------------------------------------
34 /// @addtogroup CeedBasisDeveloper
35 /// @{
36 
37 /**
38   @brief Compute Householder reflection
39 
40     Computes A = (I - b v v^T) A
41     where A is an mxn matrix indexed as A[i*row + j*col]
42 
43   @param[in,out] A  Matrix to apply Householder reflection to, in place
44   @param v          Householder vector
45   @param b          Scaling factor
46   @param m          Number of rows in A
47   @param n          Number of columns in A
48   @param row        Row stride
49   @param col        Col stride
50 
51   @return An error code: 0 - success, otherwise - failure
52 
53   @ref Developer
54 **/
55 static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v, CeedScalar b, CeedInt m, CeedInt n, CeedInt row, CeedInt col) {
56   for (CeedInt j = 0; j < n; j++) {
57     CeedScalar w = A[0 * row + j * col];
58     for (CeedInt i = 1; i < m; i++) w += v[i] * A[i * row + j * col];
59     A[0 * row + j * col] -= b * w;
60     for (CeedInt i = 1; i < m; i++) A[i * row + j * col] -= b * w * v[i];
61   }
62   return CEED_ERROR_SUCCESS;
63 }
64 
65 /**
66   @brief Apply Householder Q matrix
67 
68     Compute A = Q A where Q is mxm and A is mxn.
69 
70   @param[in,out] A  Matrix to apply Householder Q to, in place
71   @param Q          Householder Q matrix
72   @param tau        Householder scaling factors
73   @param t_mode     Transpose mode for application
74   @param m          Number of rows in A
75   @param n          Number of columns in A
76   @param k          Number of elementary reflectors in Q, k<m
77   @param row        Row stride in A
78   @param col        Col stride in A
79 
80   @return An error code: 0 - success, otherwise - failure
81 
82   @ref Developer
83 **/
84 int CeedHouseholderApplyQ(CeedScalar *A, const CeedScalar *Q, const CeedScalar *tau, CeedTransposeMode t_mode, CeedInt m, CeedInt n, CeedInt k,
85                           CeedInt row, CeedInt col) {
86   CeedScalar *v;
87   CeedCall(CeedMalloc(m, &v));
88   for (CeedInt ii = 0; ii < k; ii++) {
89     CeedInt i = t_mode == CEED_TRANSPOSE ? ii : k - 1 - ii;
90     for (CeedInt j = i + 1; j < m; j++) v[j] = Q[j * k + i];
91     // Apply Householder reflector (I - tau v v^T) collo_grad_1d^T
92     CeedCall(CeedHouseholderReflect(&A[i * row], &v[i], tau[i], m - i, n, row, col));
93   }
94   CeedCall(CeedFree(&v));
95   return CEED_ERROR_SUCCESS;
96 }
97 
98 /**
99   @brief Compute Givens rotation
100 
101     Computes A = G A (or G^T A in transpose mode)
102     where A is an mxn matrix indexed as A[i*n + j*m]
103 
104   @param[in,out] A  Row major matrix to apply Givens rotation to, in place
105   @param c          Cosine factor
106   @param s          Sine factor
107   @param t_mode     @ref CEED_NOTRANSPOSE to rotate the basis counter-clockwise,
108                     which has the effect of rotating columns of A clockwise;
109                     @ref CEED_TRANSPOSE for the opposite rotation
110   @param i          First row/column to apply rotation
111   @param k          Second row/column to apply rotation
112   @param m          Number of rows in A
113   @param n          Number of columns in A
114 
115   @return An error code: 0 - success, otherwise - failure
116 
117   @ref Developer
118 **/
119 static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s, CeedTransposeMode t_mode, CeedInt i, CeedInt k, CeedInt m, CeedInt n) {
120   CeedInt stride_j = 1, stride_ik = m, num_its = n;
121   if (t_mode == CEED_NOTRANSPOSE) {
122     stride_j  = n;
123     stride_ik = 1;
124     num_its   = m;
125   }
126 
127   // Apply rotation
128   for (CeedInt j = 0; j < num_its; j++) {
129     CeedScalar tau1 = A[i * stride_ik + j * stride_j], tau2 = A[k * stride_ik + j * stride_j];
130     A[i * stride_ik + j * stride_j] = c * tau1 - s * tau2;
131     A[k * stride_ik + j * stride_j] = s * tau1 + c * tau2;
132   }
133   return CEED_ERROR_SUCCESS;
134 }
135 
136 /**
137   @brief View an array stored in a CeedBasis
138 
139   @param[in] name      Name of array
140   @param[in] fp_fmt    Printing format
141   @param[in] m         Number of rows in array
142   @param[in] n         Number of columns in array
143   @param[in] a         Array to be viewed
144   @param[in] stream    Stream to view to, e.g., stdout
145 
146   @return An error code: 0 - success, otherwise - failure
147 
148   @ref Developer
149 **/
150 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m, CeedInt n, const CeedScalar *a, FILE *stream) {
151   for (CeedInt i = 0; i < m; i++) {
152     if (m > 1) fprintf(stream, "%12s[%" CeedInt_FMT "]:", name, i);
153     else fprintf(stream, "%12s:", name);
154     for (CeedInt j = 0; j < n; j++) fprintf(stream, fp_fmt, fabs(a[i * n + j]) > 1E-14 ? a[i * n + j] : 0);
155     fputs("\n", stream);
156   }
157   return CEED_ERROR_SUCCESS;
158 }
159 
160 /**
161   @brief Create the interpolation and gradient matrices for projection from
162            the nodes of `basis_from` to the nodes of `basis_to`.
163            The interpolation is given by `interp_project = interp_to^+ * interp_from`,
164            where the pesudoinverse `interp_to^+` is given by QR factorization.
165            The gradient is given by `grad_project = interp_to^+ * grad_from`.
166          Note: `basis_from` and `basis_to` must have compatible quadrature
167            spaces.
168 
169   @param[in] basis_from       CeedBasis to project from
170   @param[in] basis_to         CeedBasis to project to
171   @param[out] interp_project  Address of the variable where the newly created
172                                 interpolation matrix will be stored.
173   @param[out] grad_project    Address of the variable where the newly created
174                                 gradient matrix will be stored.
175 
176   @return An error code: 0 - success, otherwise - failure
177 
178   @ref Developer
179 **/
180 static int CeedBasisCreateProjectionMatrices(CeedBasis basis_from, CeedBasis basis_to, CeedScalar **interp_project, CeedScalar **grad_project) {
181   Ceed ceed;
182   CeedCall(CeedBasisGetCeed(basis_to, &ceed));
183 
184   // Check for compatible quadrature spaces
185   CeedInt Q_to, Q_from;
186   CeedCall(CeedBasisGetNumQuadraturePoints(basis_to, &Q_to));
187   CeedCall(CeedBasisGetNumQuadraturePoints(basis_from, &Q_from));
188   if (Q_to != Q_from) {
189     // LCOV_EXCL_START
190     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
191     // LCOV_EXCL_STOP
192   }
193 
194   // Check for matching tensor or non-tensor
195   CeedInt P_to, P_from, Q = Q_to;
196   bool    is_tensor_to, is_tensor_from;
197   CeedCall(CeedBasisIsTensor(basis_to, &is_tensor_to));
198   CeedCall(CeedBasisIsTensor(basis_from, &is_tensor_from));
199   if (is_tensor_to && is_tensor_from) {
200     CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_to));
201     CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_from));
202     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis_from, &Q));
203   } else if (!is_tensor_to && !is_tensor_from) {
204     CeedCall(CeedBasisGetNumNodes(basis_to, &P_to));
205     CeedCall(CeedBasisGetNumNodes(basis_from, &P_from));
206   } else {
207     // LCOV_EXCL_START
208     return CeedError(ceed, CEED_ERROR_MINOR, "Bases must both be tensor or non-tensor");
209     // LCOV_EXCL_STOP
210   }
211 
212   // Get source matrices
213   CeedInt     dim;
214   CeedScalar *interp_to, *interp_from, *tau;
215   CeedCall(CeedBasisGetDimension(basis_to, &dim));
216   CeedCall(CeedMalloc(Q * P_from, &interp_from));
217   CeedCall(CeedMalloc(Q * P_to, &interp_to));
218   CeedCall(CeedCalloc(P_to * P_from, interp_project));
219   CeedCall(CeedCalloc(P_to * P_from * (is_tensor_to ? 1 : dim), grad_project));
220   CeedCall(CeedMalloc(Q, &tau));
221   const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL, *grad_from_source;
222   if (is_tensor_to) {
223     CeedCall(CeedBasisGetInterp1D(basis_to, &interp_to_source));
224     CeedCall(CeedBasisGetInterp1D(basis_from, &interp_from_source));
225     CeedCall(CeedBasisGetGrad1D(basis_from, &grad_from_source));
226   } else {
227     CeedCall(CeedBasisGetInterp(basis_to, &interp_to_source));
228     CeedCall(CeedBasisGetInterp(basis_from, &interp_from_source));
229     CeedCall(CeedBasisGetGrad(basis_from, &grad_from_source));
230   }
231 
232   // Build matrices
233   CeedInt     num_matrices = 1 + (is_tensor_to ? 1 : dim);
234   CeedScalar *input_from[num_matrices], *output_project[num_matrices];
235   input_from[0]     = (CeedScalar *)interp_from_source;
236   output_project[0] = *interp_project;
237   for (CeedInt m = 1; m < num_matrices; m++) {
238     input_from[m]     = (CeedScalar *)&grad_from_source[(m - 1) * Q * P_from];
239     output_project[m] = &((*grad_project)[(m - 1) * P_to * P_from]);
240   }
241   for (CeedInt m = 0; m < num_matrices; m++) {
242     // -- QR Factorization, interp_to = Q R
243     memcpy(interp_to, interp_to_source, Q * P_to * sizeof(interp_to_source[0]));
244     CeedCall(CeedQRFactorization(ceed, interp_to, tau, Q, P_to));
245 
246     // -- Apply Qtranspose, interp_to = Qtranspose interp_from
247     memcpy(interp_from, input_from[m], Q * P_from * sizeof(input_from[m][0]));
248     CeedCall(CeedHouseholderApplyQ(interp_from, interp_to, tau, CEED_TRANSPOSE, Q, P_from, P_to, P_from, 1));
249 
250     // -- Apply Rinv, interp_project = Rinv interp_c
251     for (CeedInt j = 0; j < P_from; j++) {  // Column j
252       output_project[m][j + P_from * (P_to - 1)] = interp_from[j + P_from * (P_to - 1)] / interp_to[P_to * P_to - 1];
253       for (CeedInt i = P_to - 2; i >= 0; i--) {  // Row i
254         output_project[m][j + P_from * i] = interp_from[j + P_from * i];
255         for (CeedInt k = i + 1; k < P_to; k++) {
256           output_project[m][j + P_from * i] -= interp_to[k + P_to * i] * output_project[m][j + P_from * k];
257         }
258         output_project[m][j + P_from * i] /= interp_to[i + P_to * i];
259       }
260     }
261   }
262 
263   // Cleanup
264   CeedCall(CeedFree(&tau));
265   CeedCall(CeedFree(&interp_to));
266   CeedCall(CeedFree(&interp_from));
267 
268   return CEED_ERROR_SUCCESS;
269 }
270 
271 /// @}
272 
273 /// ----------------------------------------------------------------------------
274 /// Ceed Backend API
275 /// ----------------------------------------------------------------------------
276 /// @addtogroup CeedBasisBackend
277 /// @{
278 
279 /**
280   @brief Return collocated grad matrix
281 
282   @param basis               CeedBasis
283   @param[out] collo_grad_1d  Row-major (Q_1d * Q_1d) matrix expressing derivatives of
284                                basis functions at quadrature points
285 
286   @return An error code: 0 - success, otherwise - failure
287 
288   @ref Backend
289 **/
290 int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collo_grad_1d) {
291   int         i, j, k;
292   Ceed        ceed;
293   CeedInt     P_1d = (basis)->P_1d, Q_1d = (basis)->Q_1d;
294   CeedScalar *interp_1d, *grad_1d, *tau;
295 
296   CeedCall(CeedMalloc(Q_1d * P_1d, &interp_1d));
297   CeedCall(CeedMalloc(Q_1d * P_1d, &grad_1d));
298   CeedCall(CeedMalloc(Q_1d, &tau));
299   memcpy(interp_1d, (basis)->interp_1d, Q_1d * P_1d * sizeof(basis)->interp_1d[0]);
300   memcpy(grad_1d, (basis)->grad_1d, Q_1d * P_1d * sizeof(basis)->interp_1d[0]);
301 
302   // QR Factorization, interp_1d = Q R
303   CeedCall(CeedBasisGetCeed(basis, &ceed));
304   CeedCall(CeedQRFactorization(ceed, interp_1d, tau, Q_1d, P_1d));
305   // Note: This function is for backend use, so all errors are terminal
306   //   and we do not need to clean up memory on failure.
307 
308   // Apply Rinv, collo_grad_1d = grad_1d Rinv
309   for (i = 0; i < Q_1d; i++) {  // Row i
310     collo_grad_1d[Q_1d * i] = grad_1d[P_1d * i] / interp_1d[0];
311     for (j = 1; j < P_1d; j++) {  // Column j
312       collo_grad_1d[j + Q_1d * i] = grad_1d[j + P_1d * i];
313       for (k = 0; k < j; k++) collo_grad_1d[j + Q_1d * i] -= interp_1d[j + P_1d * k] * collo_grad_1d[k + Q_1d * i];
314       collo_grad_1d[j + Q_1d * i] /= interp_1d[j + P_1d * j];
315     }
316     for (j = P_1d; j < Q_1d; j++) collo_grad_1d[j + Q_1d * i] = 0;
317   }
318 
319   // Apply Qtranspose, collo_grad = collo_grad Q_transpose
320   CeedCall(CeedHouseholderApplyQ(collo_grad_1d, interp_1d, tau, CEED_NOTRANSPOSE, Q_1d, Q_1d, P_1d, 1, Q_1d));
321 
322   CeedCall(CeedFree(&interp_1d));
323   CeedCall(CeedFree(&grad_1d));
324   CeedCall(CeedFree(&tau));
325   return CEED_ERROR_SUCCESS;
326 }
327 
328 /**
329   @brief Get tensor status for given CeedBasis
330 
331   @param basis           CeedBasis
332   @param[out] is_tensor  Variable to store tensor status
333 
334   @return An error code: 0 - success, otherwise - failure
335 
336   @ref Backend
337 **/
338 int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor) {
339   *is_tensor = basis->tensor_basis;
340   return CEED_ERROR_SUCCESS;
341 }
342 
343 /**
344   @brief Get backend data of a CeedBasis
345 
346   @param basis      CeedBasis
347   @param[out] data  Variable to store data
348 
349   @return An error code: 0 - success, otherwise - failure
350 
351   @ref Backend
352 **/
353 int CeedBasisGetData(CeedBasis basis, void *data) {
354   *(void **)data = basis->data;
355   return CEED_ERROR_SUCCESS;
356 }
357 
358 /**
359   @brief Set backend data of a CeedBasis
360 
361   @param[out] basis  CeedBasis
362   @param data        Data to set
363 
364   @return An error code: 0 - success, otherwise - failure
365 
366   @ref Backend
367 **/
368 int CeedBasisSetData(CeedBasis basis, void *data) {
369   basis->data = data;
370   return CEED_ERROR_SUCCESS;
371 }
372 
373 /**
374   @brief Increment the reference counter for a CeedBasis
375 
376   @param basis  Basis to increment the reference counter
377 
378   @return An error code: 0 - success, otherwise - failure
379 
380   @ref Backend
381 **/
382 int CeedBasisReference(CeedBasis basis) {
383   basis->ref_count++;
384   return CEED_ERROR_SUCCESS;
385 }
386 
387 /**
388   @brief Estimate number of FLOPs required to apply CeedBasis in t_mode and eval_mode
389 
390   @param basis     Basis to estimate FLOPs for
391   @param t_mode    Apply basis or transpose
392   @param eval_mode Basis evaluation mode
393   @param flops     Address of variable to hold FLOPs estimate
394 
395   @ref Backend
396 **/
397 int CeedBasisGetFlopsEstimate(CeedBasis basis, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedSize *flops) {
398   bool is_tensor;
399 
400   CeedCall(CeedBasisIsTensor(basis, &is_tensor));
401   if (is_tensor) {
402     CeedInt dim, num_comp, P_1d, Q_1d;
403     CeedCall(CeedBasisGetDimension(basis, &dim));
404     CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
405     CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
406     CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
407     if (t_mode == CEED_TRANSPOSE) {
408       P_1d = Q_1d;
409       Q_1d = P_1d;
410     }
411     CeedInt tensor_flops = 0, pre = num_comp * CeedIntPow(P_1d, dim - 1), post = 1;
412     for (CeedInt d = 0; d < dim; d++) {
413       tensor_flops += 2 * pre * P_1d * post * Q_1d;
414       pre /= P_1d;
415       post *= Q_1d;
416     }
417     switch (eval_mode) {
418       case CEED_EVAL_NONE:
419         *flops = 0;
420         break;
421       case CEED_EVAL_INTERP:
422         *flops = tensor_flops;
423         break;
424       case CEED_EVAL_GRAD:
425         *flops = tensor_flops * 2;
426         break;
427       case CEED_EVAL_DIV:
428         // LCOV_EXCL_START
429         return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, "Tensor CEED_EVAL_DIV not supported");
430         break;
431       case CEED_EVAL_CURL:
432         return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, "Tensor CEED_EVAL_CURL not supported");
433         break;
434       // LCOV_EXCL_STOP
435       case CEED_EVAL_WEIGHT:
436         *flops = dim * CeedIntPow(Q_1d, dim);
437         break;
438     }
439   } else {
440     CeedInt dim, num_comp, num_nodes, num_qpts, Q_comp;
441     CeedCall(CeedBasisGetDimension(basis, &dim));
442     CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
443     CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
444     CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
445     CeedCall(CeedBasisGetNumQuadratureComponents(basis, &Q_comp));
446     switch (eval_mode) {
447       case CEED_EVAL_NONE:
448         *flops = 0;
449         break;
450       case CEED_EVAL_INTERP:
451         *flops = num_nodes * num_qpts * num_comp;
452         break;
453       case CEED_EVAL_GRAD:
454         *flops = num_nodes * num_qpts * num_comp * dim;
455         break;
456       case CEED_EVAL_DIV:
457         *flops = num_nodes * num_qpts;
458         break;
459       case CEED_EVAL_CURL:
460         *flops = num_nodes * num_qpts * dim;
461         break;
462       case CEED_EVAL_WEIGHT:
463         *flops = 0;
464         break;
465     }
466   }
467 
468   return CEED_ERROR_SUCCESS;
469 }
470 
471 /**
472   @brief Get dimension for given CeedElemTopology
473 
474   @param topo      CeedElemTopology
475   @param[out] dim  Variable to store dimension of topology
476 
477   @return An error code: 0 - success, otherwise - failure
478 
479   @ref Backend
480 **/
481 int CeedBasisGetTopologyDimension(CeedElemTopology topo, CeedInt *dim) {
482   *dim = (CeedInt)topo >> 16;
483   return CEED_ERROR_SUCCESS;
484 }
485 
486 /**
487   @brief Get CeedTensorContract of a CeedBasis
488 
489   @param basis          CeedBasis
490   @param[out] contract  Variable to store CeedTensorContract
491 
492   @return An error code: 0 - success, otherwise - failure
493 
494   @ref Backend
495 **/
496 int CeedBasisGetTensorContract(CeedBasis basis, CeedTensorContract *contract) {
497   *contract = basis->contract;
498   return CEED_ERROR_SUCCESS;
499 }
500 
501 /**
502   @brief Set CeedTensorContract of a CeedBasis
503 
504   @param[out] basis  CeedBasis
505   @param contract    CeedTensorContract to set
506 
507   @return An error code: 0 - success, otherwise - failure
508 
509   @ref Backend
510 **/
511 int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract contract) {
512   basis->contract = contract;
513   CeedCall(CeedTensorContractReference(contract));
514   return CEED_ERROR_SUCCESS;
515 }
516 
517 /**
518   @brief Return a reference implementation of matrix multiplication C = A B.
519            Note, this is a reference implementation for CPU CeedScalar pointers
520            that is not intended for high performance.
521 
522   @param ceed        A Ceed context for error handling
523   @param[in] mat_A   Row-major matrix A
524   @param[in] mat_B   Row-major matrix B
525   @param[out] mat_C  Row-major output matrix C
526   @param m           Number of rows of C
527   @param n           Number of columns of C
528   @param kk          Number of columns of A/rows of B
529 
530   @return An error code: 0 - success, otherwise - failure
531 
532   @ref Utility
533 **/
534 int CeedMatrixMatrixMultiply(Ceed ceed, const CeedScalar *mat_A, const CeedScalar *mat_B, CeedScalar *mat_C, CeedInt m, CeedInt n, CeedInt kk) {
535   for (CeedInt i = 0; i < m; i++) {
536     for (CeedInt j = 0; j < n; j++) {
537       CeedScalar sum = 0;
538       for (CeedInt k = 0; k < kk; k++) sum += mat_A[k + i * kk] * mat_B[j + k * n];
539       mat_C[j + i * n] = sum;
540     }
541   }
542   return CEED_ERROR_SUCCESS;
543 }
544 
545 /// @}
546 
547 /// ----------------------------------------------------------------------------
548 /// CeedBasis Public API
549 /// ----------------------------------------------------------------------------
550 /// @addtogroup CeedBasisUser
551 /// @{
552 
553 /**
554   @brief Create a tensor-product basis for H^1 discretizations
555 
556   @param ceed        A Ceed object where the CeedBasis will be created
557   @param dim         Topological dimension
558   @param num_comp    Number of field components (1 for scalar fields)
559   @param P_1d        Number of nodes in one dimension
560   @param Q_1d        Number of quadrature points in one dimension
561   @param interp_1d   Row-major (Q_1d * P_1d) matrix expressing the values of nodal
562                        basis functions at quadrature points
563   @param grad_1d     Row-major (Q_1d * P_1d) matrix expressing derivatives of nodal
564                        basis functions at quadrature points
565   @param q_ref_1d    Array of length Q_1d holding the locations of quadrature points
566                        on the 1D reference element [-1, 1]
567   @param q_weight_1d Array of length Q_1d holding the quadrature weights on the
568                        reference element
569   @param[out] basis  Address of the variable where the newly created
570                        CeedBasis will be stored.
571 
572   @return An error code: 0 - success, otherwise - failure
573 
574   @ref User
575 **/
576 int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d,
577                             const CeedScalar *grad_1d, const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis *basis) {
578   if (!ceed->BasisCreateTensorH1) {
579     Ceed delegate;
580     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
581 
582     if (!delegate) {
583       // LCOV_EXCL_START
584       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateTensorH1");
585       // LCOV_EXCL_STOP
586     }
587 
588     CeedCall(CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis));
589     return CEED_ERROR_SUCCESS;
590   }
591 
592   if (dim < 1) {
593     // LCOV_EXCL_START
594     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value");
595     // LCOV_EXCL_STOP
596   }
597 
598   if (num_comp < 1) {
599     // LCOV_EXCL_START
600     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
601     // LCOV_EXCL_STOP
602   }
603 
604   if (P_1d < 1) {
605     // LCOV_EXCL_START
606     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
607     // LCOV_EXCL_STOP
608   }
609 
610   if (Q_1d < 1) {
611     // LCOV_EXCL_START
612     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
613     // LCOV_EXCL_STOP
614   }
615 
616   CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE : dim == 2 ? CEED_TOPOLOGY_QUAD : CEED_TOPOLOGY_HEX;
617 
618   CeedCall(CeedCalloc(1, basis));
619   (*basis)->ceed = ceed;
620   CeedCall(CeedReference(ceed));
621   (*basis)->ref_count    = 1;
622   (*basis)->tensor_basis = 1;
623   (*basis)->dim          = dim;
624   (*basis)->topo         = topo;
625   (*basis)->num_comp     = num_comp;
626   (*basis)->P_1d         = P_1d;
627   (*basis)->Q_1d         = Q_1d;
628   (*basis)->P            = CeedIntPow(P_1d, dim);
629   (*basis)->Q            = CeedIntPow(Q_1d, dim);
630   (*basis)->Q_comp       = 1;
631   (*basis)->basis_space  = 1;  // 1 for H^1 space
632   CeedCall(CeedCalloc(Q_1d, &(*basis)->q_ref_1d));
633   CeedCall(CeedCalloc(Q_1d, &(*basis)->q_weight_1d));
634   if (q_ref_1d) memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d * sizeof(q_ref_1d[0]));
635   if (q_weight_1d) memcpy((*basis)->q_weight_1d, q_weight_1d, Q_1d * sizeof(q_weight_1d[0]));
636   CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->interp_1d));
637   CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->grad_1d));
638   if (interp_1d) memcpy((*basis)->interp_1d, interp_1d, Q_1d * P_1d * sizeof(interp_1d[0]));
639   if (grad_1d) memcpy((*basis)->grad_1d, grad_1d, Q_1d * P_1d * sizeof(grad_1d[0]));
640   CeedCall(ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, *basis));
641   return CEED_ERROR_SUCCESS;
642 }
643 
644 /**
645   @brief Create a tensor-product Lagrange basis
646 
647   @param ceed        A Ceed object where the CeedBasis will be created
648   @param dim         Topological dimension of element
649   @param num_comp      Number of field components (1 for scalar fields)
650   @param P           Number of Gauss-Lobatto nodes in one dimension.  The
651                        polynomial degree of the resulting Q_k element is k=P-1.
652   @param Q           Number of quadrature points in one dimension.
653   @param quad_mode   Distribution of the Q quadrature points (affects order of
654                        accuracy for the quadrature)
655   @param[out] basis  Address of the variable where the newly created
656                        CeedBasis will be stored.
657 
658   @return An error code: 0 - success, otherwise - failure
659 
660   @ref User
661 **/
662 int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P, CeedInt Q, CeedQuadMode quad_mode, CeedBasis *basis) {
663   // Allocate
664   int        ierr = CEED_ERROR_SUCCESS, i, j, k;
665   CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d, *q_weight_1d;
666 
667   if (dim < 1) {
668     // LCOV_EXCL_START
669     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value");
670     // LCOV_EXCL_STOP
671   }
672 
673   if (num_comp < 1) {
674     // LCOV_EXCL_START
675     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
676     // LCOV_EXCL_STOP
677   }
678 
679   if (P < 1) {
680     // LCOV_EXCL_START
681     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
682     // LCOV_EXCL_STOP
683   }
684 
685   if (Q < 1) {
686     // LCOV_EXCL_START
687     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
688     // LCOV_EXCL_STOP
689   }
690 
691   // Get Nodes and Weights
692   CeedCall(CeedCalloc(P * Q, &interp_1d));
693   CeedCall(CeedCalloc(P * Q, &grad_1d));
694   CeedCall(CeedCalloc(P, &nodes));
695   CeedCall(CeedCalloc(Q, &q_ref_1d));
696   CeedCall(CeedCalloc(Q, &q_weight_1d));
697   if (CeedLobattoQuadrature(P, nodes, NULL) != CEED_ERROR_SUCCESS) goto cleanup;
698   switch (quad_mode) {
699     case CEED_GAUSS:
700       ierr = CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
701       break;
702     case CEED_GAUSS_LOBATTO:
703       ierr = CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
704       break;
705   }
706   if (ierr != CEED_ERROR_SUCCESS) goto cleanup;
707 
708   // Build B, D matrix
709   // Fornberg, 1998
710   for (i = 0; i < Q; i++) {
711     c1                   = 1.0;
712     c3                   = nodes[0] - q_ref_1d[i];
713     interp_1d[i * P + 0] = 1.0;
714     for (j = 1; j < P; j++) {
715       c2 = 1.0;
716       c4 = c3;
717       c3 = nodes[j] - q_ref_1d[i];
718       for (k = 0; k < j; k++) {
719         dx = nodes[j] - nodes[k];
720         c2 *= dx;
721         if (k == j - 1) {
722           grad_1d[i * P + j]   = c1 * (interp_1d[i * P + k] - c4 * grad_1d[i * P + k]) / c2;
723           interp_1d[i * P + j] = -c1 * c4 * interp_1d[i * P + k] / c2;
724         }
725         grad_1d[i * P + k]   = (c3 * grad_1d[i * P + k] - interp_1d[i * P + k]) / dx;
726         interp_1d[i * P + k] = c3 * interp_1d[i * P + k] / dx;
727       }
728       c1 = c2;
729     }
730   }
731   // Pass to CeedBasisCreateTensorH1
732   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis));
733 cleanup:
734   CeedCall(CeedFree(&interp_1d));
735   CeedCall(CeedFree(&grad_1d));
736   CeedCall(CeedFree(&nodes));
737   CeedCall(CeedFree(&q_ref_1d));
738   CeedCall(CeedFree(&q_weight_1d));
739   return CEED_ERROR_SUCCESS;
740 }
741 
742 /**
743   @brief Create a non tensor-product basis for H^1 discretizations
744 
745   @param ceed        A Ceed object where the CeedBasis will be created
746   @param topo        Topology of element, e.g. hypercube, simplex, ect
747   @param num_comp    Number of field components (1 for scalar fields)
748   @param num_nodes   Total number of nodes
749   @param num_qpts    Total number of quadrature points
750   @param interp      Row-major (num_qpts * num_nodes) matrix expressing the values of
751                        nodal basis functions at quadrature points
752   @param grad        Row-major (num_qpts * dim * num_nodes) matrix expressing
753                        derivatives of nodal basis functions at quadrature points
754   @param q_ref       Array of length num_qpts holding the locations of quadrature
755                        points on the reference element
756   @param q_weight    Array of length num_qpts holding the quadrature weights on the
757                        reference element
758   @param[out] basis  Address of the variable where the newly created
759                        CeedBasis will be stored.
760 
761   @return An error code: 0 - success, otherwise - failure
762 
763   @ref User
764 **/
765 int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
766                       const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
767   CeedInt P = num_nodes, Q = num_qpts, dim = 0;
768 
769   if (!ceed->BasisCreateH1) {
770     Ceed delegate;
771     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
772 
773     if (!delegate) {
774       // LCOV_EXCL_START
775       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateH1");
776       // LCOV_EXCL_STOP
777     }
778 
779     CeedCall(CeedBasisCreateH1(delegate, topo, num_comp, num_nodes, num_qpts, interp, grad, q_ref, q_weight, basis));
780     return CEED_ERROR_SUCCESS;
781   }
782 
783   if (num_comp < 1) {
784     // LCOV_EXCL_START
785     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
786     // LCOV_EXCL_STOP
787   }
788 
789   if (num_nodes < 1) {
790     // LCOV_EXCL_START
791     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
792     // LCOV_EXCL_STOP
793   }
794 
795   if (num_qpts < 1) {
796     // LCOV_EXCL_START
797     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
798     // LCOV_EXCL_STOP
799   }
800 
801   CeedCall(CeedCalloc(1, basis));
802 
803   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
804 
805   (*basis)->ceed = ceed;
806   CeedCall(CeedReference(ceed));
807   (*basis)->ref_count    = 1;
808   (*basis)->tensor_basis = 0;
809   (*basis)->dim          = dim;
810   (*basis)->topo         = topo;
811   (*basis)->num_comp     = num_comp;
812   (*basis)->P            = P;
813   (*basis)->Q            = Q;
814   (*basis)->Q_comp       = 1;
815   (*basis)->basis_space  = 1;  // 1 for H^1 space
816   CeedCall(CeedCalloc(Q * dim, &(*basis)->q_ref_1d));
817   CeedCall(CeedCalloc(Q, &(*basis)->q_weight_1d));
818   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
819   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
820   CeedCall(CeedCalloc(Q * P, &(*basis)->interp));
821   CeedCall(CeedCalloc(dim * Q * P, &(*basis)->grad));
822   if (interp) memcpy((*basis)->interp, interp, Q * P * sizeof(interp[0]));
823   if (grad) memcpy((*basis)->grad, grad, dim * Q * P * sizeof(grad[0]));
824   CeedCall(ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref, q_weight, *basis));
825   return CEED_ERROR_SUCCESS;
826 }
827 
828 /**
829   @brief Create a non tensor-product basis for H(div) discretizations
830 
831   @param ceed        A Ceed object where the CeedBasis will be created
832   @param topo        Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.),
833                      dimension of which is used in some array sizes below
834   @param num_comp    Number of components (usually 1 for vectors in H(div) bases)
835   @param num_nodes   Total number of nodes (dofs per element)
836   @param num_qpts    Total number of quadrature points
837   @param interp      Row-major (dim*num_qpts * num_nodes) matrix expressing the values of
838                        nodal basis functions at quadrature points
839   @param div        Row-major (num_qpts * num_nodes) matrix expressing
840                        divergence of nodal basis functions at quadrature points
841   @param q_ref       Array of length num_qpts holding the locations of quadrature
842                        points on the reference element
843   @param q_weight    Array of length num_qpts holding the quadrature weights on the
844                        reference element
845   @param[out] basis  Address of the variable where the newly created
846                        CeedBasis will be stored.
847 
848   @return An error code: 0 - success, otherwise - failure
849 
850   @ref User
851 **/
852 int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
853                         const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) {
854   CeedInt Q = num_qpts, P = num_nodes, dim = 0;
855   CeedCall(CeedBasisGetTopologyDimension(topo, &dim));
856   if (!ceed->BasisCreateHdiv) {
857     Ceed delegate;
858     CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis"));
859 
860     if (!delegate) {
861       // LCOV_EXCL_START
862       return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateHdiv");
863       // LCOV_EXCL_STOP
864     }
865 
866     CeedCall(CeedBasisCreateHdiv(delegate, topo, num_comp, num_nodes, num_qpts, interp, div, q_ref, q_weight, basis));
867     return CEED_ERROR_SUCCESS;
868   }
869 
870   if (num_comp < 1) {
871     // LCOV_EXCL_START
872     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component");
873     // LCOV_EXCL_STOP
874   }
875 
876   if (num_nodes < 1) {
877     // LCOV_EXCL_START
878     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node");
879     // LCOV_EXCL_STOP
880   }
881 
882   if (num_qpts < 1) {
883     // LCOV_EXCL_START
884     return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point");
885     // LCOV_EXCL_STOP
886   }
887 
888   CeedCall(CeedCalloc(1, basis));
889 
890   (*basis)->ceed = ceed;
891   CeedCall(CeedReference(ceed));
892   (*basis)->ref_count    = 1;
893   (*basis)->tensor_basis = 0;
894   (*basis)->dim          = dim;
895   (*basis)->topo         = topo;
896   (*basis)->num_comp     = num_comp;
897   (*basis)->P            = P;
898   (*basis)->Q            = Q;
899   (*basis)->Q_comp       = dim;
900   (*basis)->basis_space  = 2;  // 2 for H(div) space
901   CeedCall(CeedMalloc(Q * dim, &(*basis)->q_ref_1d));
902   CeedCall(CeedMalloc(Q, &(*basis)->q_weight_1d));
903   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0]));
904   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0]));
905   CeedCall(CeedMalloc(dim * Q * P, &(*basis)->interp));
906   CeedCall(CeedMalloc(Q * P, &(*basis)->div));
907   if (interp) memcpy((*basis)->interp, interp, dim * Q * P * sizeof(interp[0]));
908   if (div) memcpy((*basis)->div, div, Q * P * sizeof(div[0]));
909   CeedCall(ceed->BasisCreateHdiv(topo, dim, P, Q, interp, div, q_ref, q_weight, *basis));
910   return CEED_ERROR_SUCCESS;
911 }
912 
913 /**
914   @brief Create a CeedBasis for projection from the nodes of `basis_from`
915            to the nodes of `basis_to`. Only `CEED_EVAL_INTERP` and
916            `CEED_EVAL_GRAD` will be valid for the new basis, `basis_project`.
917            The interpolation is given by `interp_project = interp_to^+ * interp_from`,
918            where the pesudoinverse `interp_to^+` is given by QR factorization.
919            The gradient is given by `grad_project = interp_to^+ * grad_from`.
920          Note: `basis_from` and `basis_to` must have compatible quadrature
921            spaces.
922          Note: `basis_project` will have the same number of components as
923            `basis_from`, regardless of the number of components that
924            `basis_to` has. If `basis_from` has 3 components and `basis_to`
925            has 5 components, then `basis_project` will have 3 components.
926 
927   @param[in] basis_from      CeedBasis to prolong from
928   @param[in] basis_to        CeedBasis to prolong to
929   @param[out] basis_project  Address of the variable where the newly created
930                                CeedBasis will be stored.
931 
932   @return An error code: 0 - success, otherwise - failure
933 
934   @ref User
935 **/
936 int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, CeedBasis *basis_project) {
937   Ceed ceed;
938   CeedCall(CeedBasisGetCeed(basis_to, &ceed));
939 
940   // Create projectior matrix
941   CeedScalar *interp_project, *grad_project;
942   CeedCall(CeedBasisCreateProjectionMatrices(basis_from, basis_to, &interp_project, &grad_project));
943 
944   // Build basis
945   bool        is_tensor;
946   CeedInt     dim, num_comp;
947   CeedScalar *q_ref, *q_weight;
948   CeedCall(CeedBasisIsTensor(basis_to, &is_tensor));
949   CeedCall(CeedBasisGetDimension(basis_to, &dim));
950   CeedCall(CeedBasisGetNumComponents(basis_from, &num_comp));
951   if (is_tensor) {
952     CeedInt P_1d_to, P_1d_from;
953     CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_1d_from));
954     CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_1d_to));
955     CeedCall(CeedCalloc(P_1d_to, &q_ref));
956     CeedCall(CeedCalloc(P_1d_to, &q_weight));
957     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_from, P_1d_to, interp_project, grad_project, q_ref, q_weight, basis_project));
958   } else {
959     CeedElemTopology topo;
960     CeedCall(CeedBasisGetTopology(basis_to, &topo));
961     CeedInt num_nodes_to, num_nodes_from;
962     CeedCall(CeedBasisGetNumNodes(basis_from, &num_nodes_from));
963     CeedCall(CeedBasisGetNumNodes(basis_to, &num_nodes_to));
964     CeedCall(CeedCalloc(num_nodes_to * dim, &q_ref));
965     CeedCall(CeedCalloc(num_nodes_to, &q_weight));
966     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_from, num_nodes_to, interp_project, grad_project, q_ref, q_weight, basis_project));
967   }
968 
969   // Cleanup
970   CeedCall(CeedFree(&interp_project));
971   CeedCall(CeedFree(&grad_project));
972   CeedCall(CeedFree(&q_ref));
973   CeedCall(CeedFree(&q_weight));
974 
975   return CEED_ERROR_SUCCESS;
976 }
977 
978 /**
979   @brief Copy the pointer to a CeedBasis. Both pointers should
980            be destroyed with `CeedBasisDestroy()`;
981            Note: If `*basis_copy` is non-NULL, then it is assumed that
982            `*basis_copy` is a pointer to a CeedBasis. This CeedBasis
983            will be destroyed if `*basis_copy` is the only
984            reference to this CeedBasis.
985 
986   @param basis            CeedBasis to copy reference to
987   @param[out] basis_copy  Variable to store copied reference
988 
989   @return An error code: 0 - success, otherwise - failure
990 
991   @ref User
992 **/
993 int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy) {
994   CeedCall(CeedBasisReference(basis));
995   CeedCall(CeedBasisDestroy(basis_copy));
996   *basis_copy = basis;
997   return CEED_ERROR_SUCCESS;
998 }
999 
1000 /**
1001   @brief View a CeedBasis
1002 
1003   @param basis   CeedBasis to view
1004   @param stream  Stream to view to, e.g., stdout
1005 
1006   @return An error code: 0 - success, otherwise - failure
1007 
1008   @ref User
1009 **/
1010 int CeedBasisView(CeedBasis basis, FILE *stream) {
1011   CeedFESpace      FE_space = basis->basis_space;
1012   CeedElemTopology topo     = basis->topo;
1013 
1014   // Print FE space and element topology of the basis
1015   if (basis->tensor_basis) {
1016     fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" CeedInt_FMT " Q=%" CeedInt_FMT "\n", CeedFESpaces[FE_space],
1017             CeedElemTopologies[topo], basis->dim, basis->P_1d, basis->Q_1d);
1018   } else {
1019     fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" CeedInt_FMT " Q=%" CeedInt_FMT "\n", CeedFESpaces[FE_space],
1020             CeedElemTopologies[topo], basis->dim, basis->P, basis->Q);
1021   }
1022   // Print quadrature data, interpolation/gradient/divergene/curl of the basis
1023   if (basis->tensor_basis) {  // tensor basis
1024     CeedCall(CeedScalarView("qref1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_ref_1d, stream));
1025     CeedCall(CeedScalarView("qweight1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_weight_1d, stream));
1026     CeedCall(CeedScalarView("interp1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, basis->interp_1d, stream));
1027     CeedCall(CeedScalarView("grad1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, basis->grad_1d, stream));
1028   } else {  // non-tensor basis
1029     CeedCall(CeedScalarView("qref", "\t% 12.8f", 1, basis->Q * basis->dim, basis->q_ref_1d, stream));
1030     CeedCall(CeedScalarView("qweight", "\t% 12.8f", 1, basis->Q, basis->q_weight_1d, stream));
1031     CeedCall(CeedScalarView("interp", "\t% 12.8f", basis->Q_comp * basis->Q, basis->P, basis->interp, stream));
1032     if (basis->grad) {
1033       CeedCall(CeedScalarView("grad", "\t% 12.8f", basis->dim * basis->Q, basis->P, basis->grad, stream));
1034     }
1035     if (basis->div) {
1036       CeedCall(CeedScalarView("div", "\t% 12.8f", basis->Q, basis->P, basis->div, stream));
1037     }
1038   }
1039   return CEED_ERROR_SUCCESS;
1040 }
1041 
1042 /**
1043   @brief Apply basis evaluation from nodes to quadrature points or vice versa
1044 
1045   @param basis     CeedBasis to evaluate
1046   @param num_elem  The number of elements to apply the basis evaluation to;
1047                      the backend will specify the ordering in
1048                      CeedElemRestrictionCreateBlocked()
1049   @param t_mode    \ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature
1050                      points, \ref CEED_TRANSPOSE to apply the transpose, mapping
1051                      from quadrature points to nodes
1052   @param eval_mode \ref CEED_EVAL_NONE to use values directly,
1053                      \ref CEED_EVAL_INTERP to use interpolated values,
1054                      \ref CEED_EVAL_GRAD to use gradients,
1055                      \ref CEED_EVAL_WEIGHT to use quadrature weights.
1056   @param[in] u     Input CeedVector
1057   @param[out] v    Output CeedVector
1058 
1059   @return An error code: 0 - success, otherwise - failure
1060 
1061   @ref User
1062 **/
1063 int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
1064   CeedSize u_length = 0, v_length;
1065   CeedInt  dim, num_comp, num_nodes, num_qpts;
1066   CeedCall(CeedBasisGetDimension(basis, &dim));
1067   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
1068   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
1069   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
1070   CeedCall(CeedVectorGetLength(v, &v_length));
1071   if (u) {
1072     CeedCall(CeedVectorGetLength(u, &u_length));
1073   }
1074 
1075   if (!basis->Apply) {
1076     // LCOV_EXCL_START
1077     return CeedError(basis->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisApply");
1078     // LCOV_EXCL_STOP
1079   }
1080 
1081   // Check compatibility of topological and geometrical dimensions
1082   if ((t_mode == CEED_TRANSPOSE && (v_length % num_nodes != 0 || u_length % num_qpts != 0)) ||
1083       (t_mode == CEED_NOTRANSPOSE && (u_length % num_nodes != 0 || v_length % num_qpts != 0))) {
1084     // LCOV_EXCL_START
1085     return CeedError(basis->ceed, CEED_ERROR_DIMENSION, "Length of input/output vectors incompatible with basis dimensions");
1086     // LCOV_EXCL_STOP
1087   }
1088 
1089   // Check vector lengths to prevent out of bounds issues
1090   bool bad_dims = false;
1091   switch (eval_mode) {
1092     case CEED_EVAL_NONE:
1093     case CEED_EVAL_INTERP:
1094       bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts || v_length < num_elem * num_comp * num_nodes)) ||
1095                   (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp || u_length < num_elem * num_comp * num_nodes)));
1096       break;
1097     case CEED_EVAL_GRAD:
1098       bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts * dim || v_length < num_elem * num_comp * num_nodes)) ||
1099                   (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp * dim || u_length < num_elem * num_comp * num_nodes)));
1100       break;
1101     case CEED_EVAL_WEIGHT:
1102       bad_dims = v_length < num_elem * num_qpts;
1103       break;
1104     // LCOV_EXCL_START
1105     case CEED_EVAL_DIV:
1106       bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts || v_length < num_elem * num_comp * num_nodes)) ||
1107                   (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp || u_length < num_elem * num_comp * num_nodes)));
1108       break;
1109     case CEED_EVAL_CURL:
1110       bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts || v_length < num_elem * num_comp * num_nodes)) ||
1111                   (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp || u_length < num_elem * num_comp * num_nodes)));
1112       break;
1113       // LCOV_EXCL_STOP
1114   }
1115   if (bad_dims) {
1116     // LCOV_EXCL_START
1117     return CeedError(basis->ceed, CEED_ERROR_DIMENSION, "Input/output vectors too short for basis and evaluation mode");
1118     // LCOV_EXCL_STOP
1119   }
1120 
1121   CeedCall(basis->Apply(basis, num_elem, t_mode, eval_mode, u, v));
1122   return CEED_ERROR_SUCCESS;
1123 }
1124 
1125 /**
1126   @brief Get Ceed associated with a CeedBasis
1127 
1128   @param basis      CeedBasis
1129   @param[out] ceed  Variable to store Ceed
1130 
1131   @return An error code: 0 - success, otherwise - failure
1132 
1133   @ref Advanced
1134 **/
1135 int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed) {
1136   *ceed = basis->ceed;
1137   return CEED_ERROR_SUCCESS;
1138 }
1139 
1140 /**
1141   @brief Get dimension for given CeedBasis
1142 
1143   @param basis     CeedBasis
1144   @param[out] dim  Variable to store dimension of basis
1145 
1146   @return An error code: 0 - success, otherwise - failure
1147 
1148   @ref Advanced
1149 **/
1150 int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim) {
1151   *dim = basis->dim;
1152   return CEED_ERROR_SUCCESS;
1153 }
1154 
1155 /**
1156   @brief Get topology for given CeedBasis
1157 
1158   @param basis      CeedBasis
1159   @param[out] topo  Variable to store topology of basis
1160 
1161   @return An error code: 0 - success, otherwise - failure
1162 
1163   @ref Advanced
1164 **/
1165 int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo) {
1166   *topo = basis->topo;
1167   return CEED_ERROR_SUCCESS;
1168 }
1169 
1170 /**
1171   @brief Get number of Q-vector components for given CeedBasis
1172 
1173   @param basis          CeedBasis
1174   @param[out] Q_comp  Variable to store number of Q-vector components of basis
1175 
1176   @return An error code: 0 - success, otherwise - failure
1177 
1178   @ref Advanced
1179 **/
1180 int CeedBasisGetNumQuadratureComponents(CeedBasis basis, CeedInt *Q_comp) {
1181   *Q_comp = basis->Q_comp;
1182   return CEED_ERROR_SUCCESS;
1183 }
1184 
1185 /**
1186   @brief Get number of components for given CeedBasis
1187 
1188   @param basis          CeedBasis
1189   @param[out] num_comp  Variable to store number of components of basis
1190 
1191   @return An error code: 0 - success, otherwise - failure
1192 
1193   @ref Advanced
1194 **/
1195 int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *num_comp) {
1196   *num_comp = basis->num_comp;
1197   return CEED_ERROR_SUCCESS;
1198 }
1199 
1200 /**
1201   @brief Get total number of nodes (in dim dimensions) of a CeedBasis
1202 
1203   @param basis   CeedBasis
1204   @param[out] P  Variable to store number of nodes
1205 
1206   @return An error code: 0 - success, otherwise - failure
1207 
1208   @ref Utility
1209 **/
1210 int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P) {
1211   *P = basis->P;
1212   return CEED_ERROR_SUCCESS;
1213 }
1214 
1215 /**
1216   @brief Get total number of nodes (in 1 dimension) of a CeedBasis
1217 
1218   @param basis     CeedBasis
1219   @param[out] P_1d  Variable to store number of nodes
1220 
1221   @return An error code: 0 - success, otherwise - failure
1222 
1223   @ref Advanced
1224 **/
1225 int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P_1d) {
1226   if (!basis->tensor_basis) {
1227     // LCOV_EXCL_START
1228     return CeedError(basis->ceed, CEED_ERROR_MINOR, "Cannot supply P_1d for non-tensor basis");
1229     // LCOV_EXCL_STOP
1230   }
1231 
1232   *P_1d = basis->P_1d;
1233   return CEED_ERROR_SUCCESS;
1234 }
1235 
1236 /**
1237   @brief Get total number of quadrature points (in dim dimensions) of a CeedBasis
1238 
1239   @param basis   CeedBasis
1240   @param[out] Q  Variable to store number of quadrature points
1241 
1242   @return An error code: 0 - success, otherwise - failure
1243 
1244   @ref Utility
1245 **/
1246 int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q) {
1247   *Q = basis->Q;
1248   return CEED_ERROR_SUCCESS;
1249 }
1250 
1251 /**
1252   @brief Get total number of quadrature points (in 1 dimension) of a CeedBasis
1253 
1254   @param basis      CeedBasis
1255   @param[out] Q_1d  Variable to store number of quadrature points
1256 
1257   @return An error code: 0 - success, otherwise - failure
1258 
1259   @ref Advanced
1260 **/
1261 int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, CeedInt *Q_1d) {
1262   if (!basis->tensor_basis) {
1263     // LCOV_EXCL_START
1264     return CeedError(basis->ceed, CEED_ERROR_MINOR, "Cannot supply Q_1d for non-tensor basis");
1265     // LCOV_EXCL_STOP
1266   }
1267 
1268   *Q_1d = basis->Q_1d;
1269   return CEED_ERROR_SUCCESS;
1270 }
1271 
1272 /**
1273   @brief Get reference coordinates of quadrature points (in dim dimensions)
1274          of a CeedBasis
1275 
1276   @param basis       CeedBasis
1277   @param[out] q_ref  Variable to store reference coordinates of quadrature points
1278 
1279   @return An error code: 0 - success, otherwise - failure
1280 
1281   @ref Advanced
1282 **/
1283 int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **q_ref) {
1284   *q_ref = basis->q_ref_1d;
1285   return CEED_ERROR_SUCCESS;
1286 }
1287 
1288 /**
1289   @brief Get quadrature weights of quadrature points (in dim dimensions)
1290          of a CeedBasis
1291 
1292   @param basis          CeedBasis
1293   @param[out] q_weight  Variable to store quadrature weights
1294 
1295   @return An error code: 0 - success, otherwise - failure
1296 
1297   @ref Advanced
1298 **/
1299 int CeedBasisGetQWeights(CeedBasis basis, const CeedScalar **q_weight) {
1300   *q_weight = basis->q_weight_1d;
1301   return CEED_ERROR_SUCCESS;
1302 }
1303 
1304 /**
1305   @brief Get interpolation matrix of a CeedBasis
1306 
1307   @param basis        CeedBasis
1308   @param[out] interp  Variable to store interpolation matrix
1309 
1310   @return An error code: 0 - success, otherwise - failure
1311 
1312   @ref Advanced
1313 **/
1314 int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp) {
1315   if (!basis->interp && basis->tensor_basis) {
1316     // Allocate
1317     CeedCall(CeedMalloc(basis->Q * basis->P, &basis->interp));
1318 
1319     // Initialize
1320     for (CeedInt i = 0; i < basis->Q * basis->P; i++) basis->interp[i] = 1.0;
1321 
1322     // Calculate
1323     for (CeedInt d = 0; d < basis->dim; d++) {
1324       for (CeedInt qpt = 0; qpt < basis->Q; qpt++) {
1325         for (CeedInt node = 0; node < basis->P; node++) {
1326           CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1327           CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1328           basis->interp[qpt * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p];
1329         }
1330       }
1331     }
1332   }
1333   *interp = basis->interp;
1334   return CEED_ERROR_SUCCESS;
1335 }
1336 
1337 /**
1338   @brief Get 1D interpolation matrix of a tensor product CeedBasis
1339 
1340   @param basis           CeedBasis
1341   @param[out] interp_1d  Variable to store interpolation matrix
1342 
1343   @return An error code: 0 - success, otherwise - failure
1344 
1345   @ref Backend
1346 **/
1347 int CeedBasisGetInterp1D(CeedBasis basis, const CeedScalar **interp_1d) {
1348   if (!basis->tensor_basis) {
1349     // LCOV_EXCL_START
1350     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis is not a tensor product basis.");
1351     // LCOV_EXCL_STOP
1352   }
1353 
1354   *interp_1d = basis->interp_1d;
1355   return CEED_ERROR_SUCCESS;
1356 }
1357 
1358 /**
1359   @brief Get gradient matrix of a CeedBasis
1360 
1361   @param basis      CeedBasis
1362   @param[out] grad  Variable to store gradient matrix
1363 
1364   @return An error code: 0 - success, otherwise - failure
1365 
1366   @ref Advanced
1367 **/
1368 int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad) {
1369   if (!basis->grad && basis->tensor_basis) {
1370     // Allocate
1371     CeedCall(CeedMalloc(basis->dim * basis->Q * basis->P, &basis->grad));
1372 
1373     // Initialize
1374     for (CeedInt i = 0; i < basis->dim * basis->Q * basis->P; i++) basis->grad[i] = 1.0;
1375 
1376     // Calculate
1377     for (CeedInt d = 0; d < basis->dim; d++) {
1378       for (CeedInt i = 0; i < basis->dim; i++) {
1379         for (CeedInt qpt = 0; qpt < basis->Q; qpt++) {
1380           for (CeedInt node = 0; node < basis->P; node++) {
1381             CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1382             CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1383             if (i == d) basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->grad_1d[q * basis->P_1d + p];
1384             else basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p];
1385           }
1386         }
1387       }
1388     }
1389   }
1390   *grad = basis->grad;
1391   return CEED_ERROR_SUCCESS;
1392 }
1393 
1394 /**
1395   @brief Get 1D gradient matrix of a tensor product CeedBasis
1396 
1397   @param basis         CeedBasis
1398   @param[out] grad_1d  Variable to store gradient matrix
1399 
1400   @return An error code: 0 - success, otherwise - failure
1401 
1402   @ref Advanced
1403 **/
1404 int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad_1d) {
1405   if (!basis->tensor_basis) {
1406     // LCOV_EXCL_START
1407     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis is not a tensor product basis.");
1408     // LCOV_EXCL_STOP
1409   }
1410 
1411   *grad_1d = basis->grad_1d;
1412   return CEED_ERROR_SUCCESS;
1413 }
1414 
1415 /**
1416   @brief Get divergence matrix of a CeedBasis
1417 
1418   @param basis     CeedBasis
1419   @param[out] div  Variable to store divergence matrix
1420 
1421   @return An error code: 0 - success, otherwise - failure
1422 
1423   @ref Advanced
1424 **/
1425 int CeedBasisGetDiv(CeedBasis basis, const CeedScalar **div) {
1426   if (!basis->div) {
1427     // LCOV_EXCL_START
1428     return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis does not have divergence matrix.");
1429     // LCOV_EXCL_STOP
1430   }
1431 
1432   *div = basis->div;
1433   return CEED_ERROR_SUCCESS;
1434 }
1435 
1436 /**
1437   @brief Destroy a CeedBasis
1438 
1439   @param basis CeedBasis to destroy
1440 
1441   @return An error code: 0 - success, otherwise - failure
1442 
1443   @ref User
1444 **/
1445 int CeedBasisDestroy(CeedBasis *basis) {
1446   if (!*basis || --(*basis)->ref_count > 0) return CEED_ERROR_SUCCESS;
1447   if ((*basis)->Destroy) CeedCall((*basis)->Destroy(*basis));
1448   if ((*basis)->contract) CeedCall(CeedTensorContractDestroy(&(*basis)->contract));
1449   CeedCall(CeedFree(&(*basis)->interp));
1450   CeedCall(CeedFree(&(*basis)->interp_1d));
1451   CeedCall(CeedFree(&(*basis)->grad));
1452   CeedCall(CeedFree(&(*basis)->div));
1453   CeedCall(CeedFree(&(*basis)->grad_1d));
1454   CeedCall(CeedFree(&(*basis)->q_ref_1d));
1455   CeedCall(CeedFree(&(*basis)->q_weight_1d));
1456   CeedCall(CeedDestroy(&(*basis)->ceed));
1457   CeedCall(CeedFree(basis));
1458   return CEED_ERROR_SUCCESS;
1459 }
1460 
1461 /**
1462   @brief Construct a Gauss-Legendre quadrature
1463 
1464   @param Q                 Number of quadrature points (integrates polynomials of
1465                              degree 2*Q-1 exactly)
1466   @param[out] q_ref_1d     Array of length Q to hold the abscissa on [-1, 1]
1467   @param[out] q_weight_1d  Array of length Q to hold the weights
1468 
1469   @return An error code: 0 - success, otherwise - failure
1470 
1471   @ref Utility
1472 **/
1473 int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) {
1474   // Allocate
1475   CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0 * atan(1.0);
1476   // Build q_ref_1d, q_weight_1d
1477   for (CeedInt i = 0; i <= Q / 2; i++) {
1478     // Guess
1479     xi = cos(PI * (CeedScalar)(2 * i + 1) / ((CeedScalar)(2 * Q)));
1480     // Pn(xi)
1481     P0 = 1.0;
1482     P1 = xi;
1483     P2 = 0.0;
1484     for (CeedInt j = 2; j <= Q; j++) {
1485       P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1486       P0 = P1;
1487       P1 = P2;
1488     }
1489     // First Newton Step
1490     dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1491     xi  = xi - P2 / dP2;
1492     // Newton to convergence
1493     for (CeedInt k = 0; k < 100 && fabs(P2) > 10 * CEED_EPSILON; k++) {
1494       P0 = 1.0;
1495       P1 = xi;
1496       for (CeedInt j = 2; j <= Q; j++) {
1497         P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1498         P0 = P1;
1499         P1 = P2;
1500       }
1501       dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1502       xi  = xi - P2 / dP2;
1503     }
1504     // Save xi, wi
1505     wi                     = 2.0 / ((1.0 - xi * xi) * dP2 * dP2);
1506     q_weight_1d[i]         = wi;
1507     q_weight_1d[Q - 1 - i] = wi;
1508     q_ref_1d[i]            = -xi;
1509     q_ref_1d[Q - 1 - i]    = xi;
1510   }
1511   return CEED_ERROR_SUCCESS;
1512 }
1513 
1514 /**
1515   @brief Construct a Gauss-Legendre-Lobatto quadrature
1516 
1517   @param Q                 Number of quadrature points (integrates polynomials of
1518                              degree 2*Q-3 exactly)
1519   @param[out] q_ref_1d     Array of length Q to hold the abscissa on [-1, 1]
1520   @param[out] q_weight_1d  Array of length Q to hold the weights
1521 
1522   @return An error code: 0 - success, otherwise - failure
1523 
1524   @ref Utility
1525 **/
1526 int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) {
1527   // Allocate
1528   CeedScalar P0, P1, P2, dP2, d2P2, xi, wi, PI = 4.0 * atan(1.0);
1529   // Build q_ref_1d, q_weight_1d
1530   // Set endpoints
1531   if (Q < 2) {
1532     // LCOV_EXCL_START
1533     return CeedError(NULL, CEED_ERROR_DIMENSION, "Cannot create Lobatto quadrature with Q=%" CeedInt_FMT " < 2 points", Q);
1534     // LCOV_EXCL_STOP
1535   }
1536   wi = 2.0 / ((CeedScalar)(Q * (Q - 1)));
1537   if (q_weight_1d) {
1538     q_weight_1d[0]     = wi;
1539     q_weight_1d[Q - 1] = wi;
1540   }
1541   q_ref_1d[0]     = -1.0;
1542   q_ref_1d[Q - 1] = 1.0;
1543   // Interior
1544   for (CeedInt i = 1; i <= (Q - 1) / 2; i++) {
1545     // Guess
1546     xi = cos(PI * (CeedScalar)(i) / (CeedScalar)(Q - 1));
1547     // Pn(xi)
1548     P0 = 1.0;
1549     P1 = xi;
1550     P2 = 0.0;
1551     for (CeedInt j = 2; j < Q; j++) {
1552       P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1553       P0 = P1;
1554       P1 = P2;
1555     }
1556     // First Newton step
1557     dP2  = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1558     d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi);
1559     xi   = xi - dP2 / d2P2;
1560     // Newton to convergence
1561     for (CeedInt k = 0; k < 100 && fabs(dP2) > 10 * CEED_EPSILON; k++) {
1562       P0 = 1.0;
1563       P1 = xi;
1564       for (CeedInt j = 2; j < Q; j++) {
1565         P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j));
1566         P0 = P1;
1567         P1 = P2;
1568       }
1569       dP2  = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0);
1570       d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi);
1571       xi   = xi - dP2 / d2P2;
1572     }
1573     // Save xi, wi
1574     wi = 2.0 / (((CeedScalar)(Q * (Q - 1))) * P2 * P2);
1575     if (q_weight_1d) {
1576       q_weight_1d[i]         = wi;
1577       q_weight_1d[Q - 1 - i] = wi;
1578     }
1579     q_ref_1d[i]         = -xi;
1580     q_ref_1d[Q - 1 - i] = xi;
1581   }
1582   return CEED_ERROR_SUCCESS;
1583 }
1584 
1585 /**
1586   @brief Return QR Factorization of a matrix
1587 
1588   @param ceed         A Ceed context for error handling
1589   @param[in,out] mat  Row-major matrix to be factorized in place
1590   @param[in,out] tau  Vector of length m of scaling factors
1591   @param m            Number of rows
1592   @param n            Number of columns
1593 
1594   @return An error code: 0 - success, otherwise - failure
1595 
1596   @ref Utility
1597 **/
1598 int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau, CeedInt m, CeedInt n) {
1599   CeedScalar v[m];
1600 
1601   // Check matrix shape
1602   if (n > m) {
1603     // LCOV_EXCL_START
1604     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute QR factorization with n > m");
1605     // LCOV_EXCL_STOP
1606   }
1607 
1608   for (CeedInt i = 0; i < n; i++) {
1609     if (i >= m - 1) {  // last row of matrix, no reflection needed
1610       tau[i] = 0.;
1611       break;
1612     }
1613     // Calculate Householder vector, magnitude
1614     CeedScalar sigma = 0.0;
1615     v[i]             = mat[i + n * i];
1616     for (CeedInt j = i + 1; j < m; j++) {
1617       v[j] = mat[i + n * j];
1618       sigma += v[j] * v[j];
1619     }
1620     CeedScalar norm = sqrt(v[i] * v[i] + sigma);  // norm of v[i:m]
1621     CeedScalar R_ii = -copysign(norm, v[i]);
1622     v[i] -= R_ii;
1623     // norm of v[i:m] after modification above and scaling below
1624     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
1625     //   tau = 2 / (norm*norm)
1626     tau[i] = 2 * v[i] * v[i] / (v[i] * v[i] + sigma);
1627     for (CeedInt j = i + 1; j < m; j++) v[j] /= v[i];
1628 
1629     // Apply Householder reflector to lower right panel
1630     CeedHouseholderReflect(&mat[i * n + i + 1], &v[i], tau[i], m - i, n - i - 1, n, 1);
1631     // Save v
1632     mat[i + n * i] = R_ii;
1633     for (CeedInt j = i + 1; j < m; j++) mat[i + n * j] = v[j];
1634   }
1635   return CEED_ERROR_SUCCESS;
1636 }
1637 
1638 /**
1639   @brief Return symmetric Schur decomposition of the symmetric matrix mat via
1640            symmetric QR factorization
1641 
1642   @param ceed         A Ceed context for error handling
1643   @param[in,out] mat  Row-major matrix to be factorized in place
1644   @param[out] lambda  Vector of length n of eigenvalues
1645   @param n            Number of rows/columns
1646 
1647   @return An error code: 0 - success, otherwise - failure
1648 
1649   @ref Utility
1650 **/
1651 CeedPragmaOptimizeOff int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat, CeedScalar *lambda, CeedInt n) {
1652   // Check bounds for clang-tidy
1653   if (n < 2) {
1654     // LCOV_EXCL_START
1655     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute symmetric Schur decomposition of scalars");
1656     // LCOV_EXCL_STOP
1657   }
1658 
1659   CeedScalar v[n - 1], tau[n - 1], mat_T[n * n];
1660 
1661   // Copy mat to mat_T and set mat to I
1662   memcpy(mat_T, mat, n * n * sizeof(mat[0]));
1663   for (CeedInt i = 0; i < n; i++) {
1664     for (CeedInt j = 0; j < n; j++) mat[j + n * i] = (i == j) ? 1 : 0;
1665   }
1666 
1667   // Reduce to tridiagonal
1668   for (CeedInt i = 0; i < n - 1; i++) {
1669     // Calculate Householder vector, magnitude
1670     CeedScalar sigma = 0.0;
1671     v[i]             = mat_T[i + n * (i + 1)];
1672     for (CeedInt j = i + 1; j < n - 1; j++) {
1673       v[j] = mat_T[i + n * (j + 1)];
1674       sigma += v[j] * v[j];
1675     }
1676     CeedScalar norm = sqrt(v[i] * v[i] + sigma);  // norm of v[i:n-1]
1677     CeedScalar R_ii = -copysign(norm, v[i]);
1678     v[i] -= R_ii;
1679     // norm of v[i:m] after modification above and scaling below
1680     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
1681     //   tau = 2 / (norm*norm)
1682     tau[i] = i == n - 2 ? 2 : 2 * v[i] * v[i] / (v[i] * v[i] + sigma);
1683     for (CeedInt j = i + 1; j < n - 1; j++) v[j] /= v[i];
1684 
1685     // Update sub and super diagonal
1686     for (CeedInt j = i + 2; j < n; j++) {
1687       mat_T[i + n * j] = 0;
1688       mat_T[j + n * i] = 0;
1689     }
1690     // Apply symmetric Householder reflector to lower right panel
1691     CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1);
1692     CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), 1, n);
1693 
1694     // Save v
1695     mat_T[i + n * (i + 1)] = R_ii;
1696     mat_T[(i + 1) + n * i] = R_ii;
1697     for (CeedInt j = i + 1; j < n - 1; j++) {
1698       mat_T[i + n * (j + 1)] = v[j];
1699     }
1700   }
1701   // Backwards accumulation of Q
1702   for (CeedInt i = n - 2; i >= 0; i--) {
1703     if (tau[i] > 0.0) {
1704       v[i] = 1;
1705       for (CeedInt j = i + 1; j < n - 1; j++) {
1706         v[j]                   = mat_T[i + n * (j + 1)];
1707         mat_T[i + n * (j + 1)] = 0;
1708       }
1709       CeedHouseholderReflect(&mat[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1);
1710     }
1711   }
1712 
1713   // Reduce sub and super diagonal
1714   CeedInt    p = 0, q = 0, itr = 0, max_itr = n * n * n * n;
1715   CeedScalar tol = CEED_EPSILON;
1716 
1717   while (itr < max_itr) {
1718     // Update p, q, size of reduced portions of diagonal
1719     p = 0;
1720     q = 0;
1721     for (CeedInt i = n - 2; i >= 0; i--) {
1722       if (fabs(mat_T[i + n * (i + 1)]) < tol) q += 1;
1723       else break;
1724     }
1725     for (CeedInt i = 0; i < n - q - 1; i++) {
1726       if (fabs(mat_T[i + n * (i + 1)]) < tol) p += 1;
1727       else break;
1728     }
1729     if (q == n - 1) break;  // Finished reducing
1730 
1731     // Reduce tridiagonal portion
1732     CeedScalar t_nn = mat_T[(n - 1 - q) + n * (n - 1 - q)], t_nnm1 = mat_T[(n - 2 - q) + n * (n - 1 - q)];
1733     CeedScalar d  = (mat_T[(n - 2 - q) + n * (n - 2 - q)] - t_nn) / 2;
1734     CeedScalar mu = t_nn - t_nnm1 * t_nnm1 / (d + copysign(sqrt(d * d + t_nnm1 * t_nnm1), d));
1735     CeedScalar x  = mat_T[p + n * p] - mu;
1736     CeedScalar z  = mat_T[p + n * (p + 1)];
1737     for (CeedInt k = p; k < n - q - 1; k++) {
1738       // Compute Givens rotation
1739       CeedScalar c = 1, s = 0;
1740       if (fabs(z) > tol) {
1741         if (fabs(z) > fabs(x)) {
1742           CeedScalar tau = -x / z;
1743           s = 1 / sqrt(1 + tau * tau), c = s * tau;
1744         } else {
1745           CeedScalar tau = -z / x;
1746           c = 1 / sqrt(1 + tau * tau), s = c * tau;
1747         }
1748       }
1749 
1750       // Apply Givens rotation to T
1751       CeedGivensRotation(mat_T, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n);
1752       CeedGivensRotation(mat_T, c, s, CEED_TRANSPOSE, k, k + 1, n, n);
1753 
1754       // Apply Givens rotation to Q
1755       CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n);
1756 
1757       // Update x, z
1758       if (k < n - q - 2) {
1759         x = mat_T[k + n * (k + 1)];
1760         z = mat_T[k + n * (k + 2)];
1761       }
1762     }
1763     itr++;
1764   }
1765 
1766   // Save eigenvalues
1767   for (CeedInt i = 0; i < n; i++) lambda[i] = mat_T[i + n * i];
1768 
1769   // Check convergence
1770   if (itr == max_itr && q < n - 1) {
1771     // LCOV_EXCL_START
1772     return CeedError(ceed, CEED_ERROR_MINOR, "Symmetric QR failed to converge");
1773     // LCOV_EXCL_STOP
1774   }
1775   return CEED_ERROR_SUCCESS;
1776 }
1777 CeedPragmaOptimizeOn
1778 
1779     /**
1780       @brief Return Simultaneous Diagonalization of two matrices. This solves the
1781                generalized eigenvalue problem A x = lambda B x, where A and B
1782                are symmetric and B is positive definite. We generate the matrix X
1783                and vector Lambda such that X^T A X = Lambda and X^T B X = I. This
1784                is equivalent to the LAPACK routine 'sygv' with TYPE = 1.
1785 
1786       @param ceed         A Ceed context for error handling
1787       @param[in] mat_A    Row-major matrix to be factorized with eigenvalues
1788       @param[in] mat_B    Row-major matrix to be factorized to identity
1789       @param[out] mat_X   Row-major orthogonal matrix
1790       @param[out] lambda  Vector of length n of generalized eigenvalues
1791       @param n            Number of rows/columns
1792 
1793       @return An error code: 0 - success, otherwise - failure
1794 
1795       @ref Utility
1796     **/
1797     CeedPragmaOptimizeOff int
1798     CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A, CeedScalar *mat_B, CeedScalar *mat_X, CeedScalar *lambda, CeedInt n) {
1799   CeedScalar *mat_C, *mat_G, *vec_D;
1800   CeedCall(CeedCalloc(n * n, &mat_C));
1801   CeedCall(CeedCalloc(n * n, &mat_G));
1802   CeedCall(CeedCalloc(n, &vec_D));
1803 
1804   // Compute B = G D G^T
1805   memcpy(mat_G, mat_B, n * n * sizeof(mat_B[0]));
1806   CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_G, vec_D, n));
1807 
1808   // Sort eigenvalues
1809   for (CeedInt i = n - 1; i >= 0; i--) {
1810     for (CeedInt j = 0; j < i; j++) {
1811       if (fabs(vec_D[j]) > fabs(vec_D[j + 1])) {
1812         CeedScalar temp;
1813         temp         = vec_D[j];
1814         vec_D[j]     = vec_D[j + 1];
1815         vec_D[j + 1] = temp;
1816         for (CeedInt k = 0; k < n; k++) {
1817           temp                 = mat_G[k * n + j];
1818           mat_G[k * n + j]     = mat_G[k * n + j + 1];
1819           mat_G[k * n + j + 1] = temp;
1820         }
1821       }
1822     }
1823   }
1824 
1825   // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T
1826   //           = D^-1/2 G^T A G D^-1/2
1827   // -- D = D^-1/2
1828   for (CeedInt i = 0; i < n; i++) vec_D[i] = 1. / sqrt(vec_D[i]);
1829   // -- G = G D^-1/2
1830   // -- C = D^-1/2 G^T
1831   for (CeedInt i = 0; i < n; i++) {
1832     for (CeedInt j = 0; j < n; j++) {
1833       mat_G[i * n + j] *= vec_D[j];
1834       mat_C[j * n + i] = mat_G[i * n + j];
1835     }
1836   }
1837   // -- X = (D^-1/2 G^T) A
1838   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_C, (const CeedScalar *)mat_A, mat_X, n, n, n));
1839   // -- C = (D^-1/2 G^T A) (G D^-1/2)
1840   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_X, (const CeedScalar *)mat_G, mat_C, n, n, n));
1841 
1842   // Compute Q^T C Q = lambda
1843   CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n));
1844 
1845   // Sort eigenvalues
1846   for (CeedInt i = n - 1; i >= 0; i--) {
1847     for (CeedInt j = 0; j < i; j++) {
1848       if (fabs(lambda[j]) > fabs(lambda[j + 1])) {
1849         CeedScalar temp;
1850         temp          = lambda[j];
1851         lambda[j]     = lambda[j + 1];
1852         lambda[j + 1] = temp;
1853         for (CeedInt k = 0; k < n; k++) {
1854           temp                 = mat_C[k * n + j];
1855           mat_C[k * n + j]     = mat_C[k * n + j + 1];
1856           mat_C[k * n + j + 1] = temp;
1857         }
1858       }
1859     }
1860   }
1861 
1862   // Set X = (G D^1/2)^-T Q
1863   //       = G D^-1/2 Q
1864   CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_G, (const CeedScalar *)mat_C, mat_X, n, n, n));
1865 
1866   // Cleanup
1867   CeedCall(CeedFree(&mat_C));
1868   CeedCall(CeedFree(&mat_G));
1869   CeedCall(CeedFree(&vec_D));
1870   return CEED_ERROR_SUCCESS;
1871 }
1872 CeedPragmaOptimizeOn
1873 
1874     /// @}
1875