xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-basis.c (revision a49029fe2b683c1941ca0e371db6091ee3c727b1)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed/ceed.h>
9 #include <ceed/backend.h>
10 #include <ceed-impl.h>
11 #include <math.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 /// @file
17 /// Implementation of CeedBasis interfaces
18 
19 /// @cond DOXYGEN_SKIP
20 static struct CeedBasis_private ceed_basis_collocated;
21 /// @endcond
22 
23 /// @addtogroup CeedBasisUser
24 /// @{
25 
26 /// Indicate that the quadrature points are collocated with the nodes
27 const CeedBasis CEED_BASIS_COLLOCATED = &ceed_basis_collocated;
28 
29 /// @}
30 
31 /// ----------------------------------------------------------------------------
32 /// CeedBasis Library Internal Functions
33 /// ----------------------------------------------------------------------------
34 /// @addtogroup CeedBasisDeveloper
35 /// @{
36 
37 /**
38   @brief Compute Householder reflection
39 
40     Computes A = (I - b v v^T) A
41     where A is an mxn matrix indexed as A[i*row + j*col]
42 
43   @param[in,out] A  Matrix to apply Householder reflection to, in place
44   @param v          Householder vector
45   @param b          Scaling factor
46   @param m          Number of rows in A
47   @param n          Number of columns in A
48   @param row        Row stride
49   @param col        Col stride
50 
51   @return An error code: 0 - success, otherwise - failure
52 
53   @ref Developer
54 **/
55 static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v,
56                                   CeedScalar b, CeedInt m, CeedInt n,
57                                   CeedInt row, CeedInt col) {
58   for (CeedInt j=0; j<n; j++) {
59     CeedScalar w = A[0*row + j*col];
60     for (CeedInt i=1; i<m; i++)
61       w += v[i] * A[i*row + j*col];
62     A[0*row + j*col] -= b * w;
63     for (CeedInt i=1; i<m; i++)
64       A[i*row + j*col] -= b * w * v[i];
65   }
66   return CEED_ERROR_SUCCESS;
67 }
68 
69 /**
70   @brief Apply Householder Q matrix
71 
72     Compute A = Q A where Q is mxm and A is mxn.
73 
74   @param[in,out] A  Matrix to apply Householder Q to, in place
75   @param Q          Householder Q matrix
76   @param tau        Householder scaling factors
77   @param t_mode     Transpose mode for application
78   @param m          Number of rows in A
79   @param n          Number of columns in A
80   @param k          Number of elementary reflectors in Q, k<m
81   @param row        Row stride in A
82   @param col        Col stride in A
83 
84   @return An error code: 0 - success, otherwise - failure
85 
86   @ref Developer
87 **/
88 int CeedHouseholderApplyQ(CeedScalar *A, const CeedScalar *Q,
89                           const CeedScalar *tau, CeedTransposeMode t_mode,
90                           CeedInt m, CeedInt n, CeedInt k,
91                           CeedInt row, CeedInt col) {
92   int ierr;
93   CeedScalar *v;
94   ierr = CeedMalloc(m, &v); CeedChk(ierr);
95   for (CeedInt ii=0; ii<k; ii++) {
96     CeedInt i = t_mode == CEED_TRANSPOSE ? ii : k-1-ii;
97     for (CeedInt j=i+1; j<m; j++)
98       v[j] = Q[j*k+i];
99     // Apply Householder reflector (I - tau v v^T) collo_grad_1d^T
100     ierr = CeedHouseholderReflect(&A[i*row], &v[i], tau[i], m-i, n, row, col);
101     CeedChk(ierr);
102   }
103   ierr = CeedFree(&v); CeedChk(ierr);
104   return CEED_ERROR_SUCCESS;
105 }
106 
107 /**
108   @brief Compute Givens rotation
109 
110     Computes A = G A (or G^T A in transpose mode)
111     where A is an mxn matrix indexed as A[i*n + j*m]
112 
113   @param[in,out] A  Row major matrix to apply Givens rotation to, in place
114   @param c          Cosine factor
115   @param s          Sine factor
116   @param t_mode     @ref CEED_NOTRANSPOSE to rotate the basis counter-clockwise,
117                     which has the effect of rotating columns of A clockwise;
118                     @ref CEED_TRANSPOSE for the opposite rotation
119   @param i          First row/column to apply rotation
120   @param k          Second row/column to apply rotation
121   @param m          Number of rows in A
122   @param n          Number of columns in A
123 
124   @return An error code: 0 - success, otherwise - failure
125 
126   @ref Developer
127 **/
128 static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s,
129                               CeedTransposeMode t_mode, CeedInt i, CeedInt k,
130                               CeedInt m, CeedInt n) {
131   CeedInt stride_j = 1, stride_ik = m, num_its = n;
132   if (t_mode == CEED_NOTRANSPOSE) {
133     stride_j = n; stride_ik = 1; num_its = m;
134   }
135 
136   // Apply rotation
137   for (CeedInt j=0; j<num_its; j++) {
138     CeedScalar tau1 = A[i*stride_ik+j*stride_j], tau2 = A[k*stride_ik+j*stride_j];
139     A[i*stride_ik+j*stride_j] = c*tau1 - s*tau2;
140     A[k*stride_ik+j*stride_j] = s*tau1 + c*tau2;
141   }
142   return CEED_ERROR_SUCCESS;
143 }
144 
145 /**
146   @brief View an array stored in a CeedBasis
147 
148   @param[in] name      Name of array
149   @param[in] fp_fmt    Printing format
150   @param[in] m         Number of rows in array
151   @param[in] n         Number of columns in array
152   @param[in] a         Array to be viewed
153   @param[in] stream    Stream to view to, e.g., stdout
154 
155   @return An error code: 0 - success, otherwise - failure
156 
157   @ref Developer
158 **/
159 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m,
160                           CeedInt n, const CeedScalar *a, FILE *stream) {
161   for (int i=0; i<m; i++) {
162     if (m > 1)
163       fprintf(stream, "%12s[%d]:", name, i);
164     else
165       fprintf(stream, "%12s:", name);
166     for (int j=0; j<n; j++)
167       fprintf(stream, fp_fmt, fabs(a[i*n+j]) > 1E-14 ? a[i*n+j] : 0);
168     fputs("\n", stream);
169   }
170   return CEED_ERROR_SUCCESS;
171 }
172 
173 /// @}
174 
175 /// ----------------------------------------------------------------------------
176 /// Ceed Backend API
177 /// ----------------------------------------------------------------------------
178 /// @addtogroup CeedBasisBackend
179 /// @{
180 
181 /**
182   @brief Return collocated grad matrix
183 
184   @param basis               CeedBasis
185   @param[out] collo_grad_1d  Row-major (Q_1d * Q_1d) matrix expressing derivatives of
186                                basis functions at quadrature points
187 
188   @return An error code: 0 - success, otherwise - failure
189 
190   @ref Backend
191 **/
192 int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collo_grad_1d) {
193   int i, j, k;
194   Ceed ceed;
195   CeedInt ierr, P_1d=(basis)->P_1d, Q_1d=(basis)->Q_1d;
196   CeedScalar *interp_1d, *grad_1d, *tau;
197 
198   ierr = CeedMalloc(Q_1d*P_1d, &interp_1d); CeedChk(ierr);
199   ierr = CeedMalloc(Q_1d*P_1d, &grad_1d); CeedChk(ierr);
200   ierr = CeedMalloc(Q_1d, &tau); CeedChk(ierr);
201   memcpy(interp_1d, (basis)->interp_1d, Q_1d*P_1d*sizeof(basis)->interp_1d[0]);
202   memcpy(grad_1d, (basis)->grad_1d, Q_1d*P_1d*sizeof(basis)->interp_1d[0]);
203 
204   // QR Factorization, interp_1d = Q R
205   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
206   ierr = CeedQRFactorization(ceed, interp_1d, tau, Q_1d, P_1d); CeedChk(ierr);
207   // Note: This function is for backend use, so all errors are terminal
208   //   and we do not need to clean up memory on failure.
209 
210   // Apply Rinv, collo_grad_1d = grad_1d Rinv
211   for (i=0; i<Q_1d; i++) { // Row i
212     collo_grad_1d[Q_1d*i] = grad_1d[P_1d*i]/interp_1d[0];
213     for (j=1; j<P_1d; j++) { // Column j
214       collo_grad_1d[j+Q_1d*i] = grad_1d[j+P_1d*i];
215       for (k=0; k<j; k++)
216         collo_grad_1d[j+Q_1d*i] -= interp_1d[j+P_1d*k]*collo_grad_1d[k+Q_1d*i];
217       collo_grad_1d[j+Q_1d*i] /= interp_1d[j+P_1d*j];
218     }
219     for (j=P_1d; j<Q_1d; j++)
220       collo_grad_1d[j+Q_1d*i] = 0;
221   }
222 
223   // Apply Qtranspose, collo_grad = collo_grad Q_transpose
224   ierr = CeedHouseholderApplyQ(collo_grad_1d, interp_1d, tau, CEED_NOTRANSPOSE,
225                                Q_1d, Q_1d, P_1d, 1, Q_1d); CeedChk(ierr);
226 
227   ierr = CeedFree(&interp_1d); CeedChk(ierr);
228   ierr = CeedFree(&grad_1d); CeedChk(ierr);
229   ierr = CeedFree(&tau); CeedChk(ierr);
230   return CEED_ERROR_SUCCESS;
231 }
232 
233 /**
234   @brief Get tensor status for given CeedBasis
235 
236   @param basis           CeedBasis
237   @param[out] is_tensor  Variable to store tensor status
238 
239   @return An error code: 0 - success, otherwise - failure
240 
241   @ref Backend
242 **/
243 int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor) {
244   *is_tensor = basis->tensor_basis;
245   return CEED_ERROR_SUCCESS;
246 }
247 
248 /**
249   @brief Get backend data of a CeedBasis
250 
251   @param basis      CeedBasis
252   @param[out] data  Variable to store data
253 
254   @return An error code: 0 - success, otherwise - failure
255 
256   @ref Backend
257 **/
258 int CeedBasisGetData(CeedBasis basis, void *data) {
259   *(void **)data = basis->data;
260   return CEED_ERROR_SUCCESS;
261 }
262 
263 /**
264   @brief Set backend data of a CeedBasis
265 
266   @param[out] basis  CeedBasis
267   @param data        Data to set
268 
269   @return An error code: 0 - success, otherwise - failure
270 
271   @ref Backend
272 **/
273 int CeedBasisSetData(CeedBasis basis, void *data) {
274   basis->data = data;
275   return CEED_ERROR_SUCCESS;
276 }
277 
278 /**
279   @brief Increment the reference counter for a CeedBasis
280 
281   @param basis  Basis to increment the reference counter
282 
283   @return An error code: 0 - success, otherwise - failure
284 
285   @ref Backend
286 **/
287 int CeedBasisReference(CeedBasis basis) {
288   basis->ref_count++;
289   return CEED_ERROR_SUCCESS;
290 }
291 
292 /**
293   @brief Estimate number of FLOPs required to apply CeedBasis in t_mode and eval_mode
294 
295   @param basis     Basis to estimate FLOPs for
296   @param t_mode    Apply basis or transpose
297   @param eval_mode Basis evaluation mode
298   @param flops     Address of variable to hold FLOPs estimate
299 
300   @ref Backend
301 **/
302 int CeedBasisGetFlopsEstimate(CeedBasis basis, CeedTransposeMode t_mode,
303                               CeedEvalMode eval_mode, CeedSize *flops) {
304   int ierr;
305   bool is_tensor;
306 
307   ierr = CeedBasisIsTensor(basis, &is_tensor); CeedChk(ierr);
308   if (is_tensor) {
309     CeedInt dim, num_comp, P_1d, Q_1d;
310     ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
311     ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
312     ierr = CeedBasisGetNumNodes1D(basis, &P_1d);  CeedChk(ierr);
313     ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d);  CeedChk(ierr);
314     if (t_mode == CEED_TRANSPOSE) {
315       P_1d = Q_1d; Q_1d = P_1d;
316     }
317     CeedInt tensor_flops = 0, pre = num_comp * CeedIntPow(P_1d, dim-1), post = 1;
318     for (CeedInt d = 0; d < dim; d++) {
319       tensor_flops += 2 * pre * P_1d * post * Q_1d;
320       pre /= P_1d;
321       post *= Q_1d;
322     }
323     switch (eval_mode) {
324     case CEED_EVAL_NONE:   *flops = 0; break;
325     case CEED_EVAL_INTERP: *flops = tensor_flops; break;
326     case CEED_EVAL_GRAD:   *flops = tensor_flops * 2; break;
327     case CEED_EVAL_DIV:
328       // LCOV_EXCL_START
329       return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE,
330                        "Tensor CEED_EVAL_DIV not supported"); break;
331     case CEED_EVAL_CURL:
332       return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE,
333                        "Tensor CEED_EVAL_CURL not supported"); break;
334     // LCOV_EXCL_STOP
335     case CEED_EVAL_WEIGHT: *flops = dim * CeedIntPow(Q_1d, dim); break;
336     }
337   } else {
338     CeedInt dim, num_comp, num_nodes, num_qpts, Q_comp;
339     ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
340     ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
341     ierr = CeedBasisGetNumNodes(basis, &num_nodes); CeedChk(ierr);
342     ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
343     ierr = CeedBasisGetNumQuadratureComponents(basis, &Q_comp); CeedChk(ierr);
344     switch (eval_mode) {
345     case CEED_EVAL_NONE:   *flops = 0; break;
346     case CEED_EVAL_INTERP: *flops = num_nodes * num_qpts * num_comp; break;
347     case CEED_EVAL_GRAD:   *flops = num_nodes * num_qpts * num_comp * dim; break;
348     case CEED_EVAL_DIV:    *flops = num_nodes * num_qpts; break;
349     case CEED_EVAL_CURL:   *flops = num_nodes * num_qpts * dim; break;
350     case CEED_EVAL_WEIGHT: *flops = 0; break;
351     }
352   }
353 
354   return CEED_ERROR_SUCCESS;
355 }
356 
357 /**
358   @brief Get dimension for given CeedElemTopology
359 
360   @param topo      CeedElemTopology
361   @param[out] dim  Variable to store dimension of topology
362 
363   @return An error code: 0 - success, otherwise - failure
364 
365   @ref Backend
366 **/
367 int CeedBasisGetTopologyDimension(CeedElemTopology topo, CeedInt *dim) {
368   *dim = (CeedInt) topo >> 16;
369   return CEED_ERROR_SUCCESS;
370 }
371 
372 /**
373   @brief Get CeedTensorContract of a CeedBasis
374 
375   @param basis          CeedBasis
376   @param[out] contract  Variable to store CeedTensorContract
377 
378   @return An error code: 0 - success, otherwise - failure
379 
380   @ref Backend
381 **/
382 int CeedBasisGetTensorContract(CeedBasis basis, CeedTensorContract *contract) {
383   *contract = basis->contract;
384   return CEED_ERROR_SUCCESS;
385 }
386 
387 /**
388   @brief Set CeedTensorContract of a CeedBasis
389 
390   @param[out] basis  CeedBasis
391   @param contract    CeedTensorContract to set
392 
393   @return An error code: 0 - success, otherwise - failure
394 
395   @ref Backend
396 **/
397 int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract contract) {
398   int ierr;
399   basis->contract = contract;
400   ierr = CeedTensorContractReference(contract); CeedChk(ierr);
401   return CEED_ERROR_SUCCESS;
402 }
403 
404 /**
405   @brief Return a reference implementation of matrix multiplication C = A B.
406            Note, this is a reference implementation for CPU CeedScalar pointers
407            that is not intended for high performance.
408 
409   @param ceed        A Ceed context for error handling
410   @param[in] mat_A   Row-major matrix A
411   @param[in] mat_B   Row-major matrix B
412   @param[out] mat_C  Row-major output matrix C
413   @param m           Number of rows of C
414   @param n           Number of columns of C
415   @param kk          Number of columns of A/rows of B
416 
417   @return An error code: 0 - success, otherwise - failure
418 
419   @ref Utility
420 **/
421 int CeedMatrixMultiply(Ceed ceed, const CeedScalar *mat_A,
422                        const CeedScalar *mat_B, CeedScalar *mat_C, CeedInt m,
423                        CeedInt n, CeedInt kk) {
424   for (CeedInt i=0; i<m; i++)
425     for (CeedInt j=0; j<n; j++) {
426       CeedScalar sum = 0;
427       for (CeedInt k=0; k<kk; k++)
428         sum += mat_A[k+i*kk]*mat_B[j+k*n];
429       mat_C[j+i*n] = sum;
430     }
431   return CEED_ERROR_SUCCESS;
432 }
433 
434 /// @}
435 
436 /// ----------------------------------------------------------------------------
437 /// CeedBasis Public API
438 /// ----------------------------------------------------------------------------
439 /// @addtogroup CeedBasisUser
440 /// @{
441 
442 /**
443   @brief Create a tensor-product basis for H^1 discretizations
444 
445   @param ceed        A Ceed object where the CeedBasis will be created
446   @param dim         Topological dimension
447   @param num_comp    Number of field components (1 for scalar fields)
448   @param P_1d        Number of nodes in one dimension
449   @param Q_1d        Number of quadrature points in one dimension
450   @param interp_1d   Row-major (Q_1d * P_1d) matrix expressing the values of nodal
451                        basis functions at quadrature points
452   @param grad_1d     Row-major (Q_1d * P_1d) matrix expressing derivatives of nodal
453                        basis functions at quadrature points
454   @param q_ref_1d    Array of length Q_1d holding the locations of quadrature points
455                        on the 1D reference element [-1, 1]
456   @param q_weight_1d Array of length Q_1d holding the quadrature weights on the
457                        reference element
458   @param[out] basis  Address of the variable where the newly created
459                        CeedBasis will be stored.
460 
461   @return An error code: 0 - success, otherwise - failure
462 
463   @ref User
464 **/
465 int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp,
466                             CeedInt P_1d, CeedInt Q_1d,
467                             const CeedScalar *interp_1d,
468                             const CeedScalar *grad_1d, const CeedScalar *q_ref_1d,
469                             const CeedScalar *q_weight_1d, CeedBasis *basis) {
470   int ierr;
471 
472   if (!ceed->BasisCreateTensorH1) {
473     Ceed delegate;
474     ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr);
475 
476     if (!delegate)
477       // LCOV_EXCL_START
478       return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
479                        "Backend does not support BasisCreateTensorH1");
480     // LCOV_EXCL_STOP
481 
482     ierr = CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d,
483                                    Q_1d, interp_1d, grad_1d, q_ref_1d,
484                                    q_weight_1d, basis); CeedChk(ierr);
485     return CEED_ERROR_SUCCESS;
486   }
487 
488   if (dim<1)
489     // LCOV_EXCL_START
490     return CeedError(ceed, CEED_ERROR_DIMENSION,
491                      "Basis dimension must be a positive value");
492   // LCOV_EXCL_STOP
493   CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE
494                           : dim == 2 ? CEED_TOPOLOGY_QUAD
495                           : CEED_TOPOLOGY_HEX;
496 
497   ierr = CeedCalloc(1, basis); CeedChk(ierr);
498   (*basis)->ceed = ceed;
499   ierr = CeedReference(ceed); CeedChk(ierr);
500   (*basis)->ref_count = 1;
501   (*basis)->tensor_basis = 1;
502   (*basis)->dim = dim;
503   (*basis)->topo = topo;
504   (*basis)->num_comp = num_comp;
505   (*basis)->P_1d = P_1d;
506   (*basis)->Q_1d = Q_1d;
507   (*basis)->P = CeedIntPow(P_1d, dim);
508   (*basis)->Q = CeedIntPow(Q_1d, dim);
509   (*basis)->Q_comp = 1;
510   (*basis)->basis_space = 1; // 1 for H^1 space
511   ierr = CeedCalloc(Q_1d, &(*basis)->q_ref_1d); CeedChk(ierr);
512   ierr = CeedCalloc(Q_1d, &(*basis)->q_weight_1d); CeedChk(ierr);
513   if (q_ref_1d) memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d*sizeof(q_ref_1d[0]));
514   if (q_weight_1d) memcpy((*basis)->q_weight_1d, q_weight_1d,
515                             Q_1d*sizeof(q_weight_1d[0]));
516   ierr = CeedCalloc(Q_1d*P_1d, &(*basis)->interp_1d); CeedChk(ierr);
517   ierr = CeedCalloc(Q_1d*P_1d, &(*basis)->grad_1d); CeedChk(ierr);
518   if (interp_1d) memcpy((*basis)->interp_1d, interp_1d,
519                           Q_1d*P_1d*sizeof(interp_1d[0]));
520   if (grad_1d) memcpy((*basis)->grad_1d, grad_1d, Q_1d*P_1d*sizeof(grad_1d[0]));
521   ierr = ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d,
522                                    q_weight_1d, *basis); CeedChk(ierr);
523   return CEED_ERROR_SUCCESS;
524 }
525 
526 /**
527   @brief Create a tensor-product Lagrange basis
528 
529   @param ceed        A Ceed object where the CeedBasis will be created
530   @param dim         Topological dimension of element
531   @param num_comp      Number of field components (1 for scalar fields)
532   @param P           Number of Gauss-Lobatto nodes in one dimension.  The
533                        polynomial degree of the resulting Q_k element is k=P-1.
534   @param Q           Number of quadrature points in one dimension.
535   @param quad_mode   Distribution of the Q quadrature points (affects order of
536                        accuracy for the quadrature)
537   @param[out] basis  Address of the variable where the newly created
538                        CeedBasis will be stored.
539 
540   @return An error code: 0 - success, otherwise - failure
541 
542   @ref User
543 **/
544 int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp,
545                                     CeedInt P, CeedInt Q, CeedQuadMode quad_mode,
546                                     CeedBasis *basis) {
547   // Allocate
548   int ierr, ierr2, i, j, k;
549   CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d,
550              *q_weight_1d;
551 
552   if (dim<1)
553     // LCOV_EXCL_START
554     return CeedError(ceed, CEED_ERROR_DIMENSION,
555                      "Basis dimension must be a positive value");
556   // LCOV_EXCL_STOP
557 
558   // Get Nodes and Weights
559   ierr = CeedCalloc(P*Q, &interp_1d); CeedChk(ierr);
560   ierr = CeedCalloc(P*Q, &grad_1d); CeedChk(ierr);
561   ierr = CeedCalloc(P, &nodes); CeedChk(ierr);
562   ierr = CeedCalloc(Q, &q_ref_1d); CeedChk(ierr);
563   ierr = CeedCalloc(Q, &q_weight_1d); CeedChk(ierr);
564   ierr = CeedLobattoQuadrature(P, nodes, NULL);
565   if (ierr) { goto cleanup; } CeedChk(ierr);
566   switch (quad_mode) {
567   case CEED_GAUSS:
568     ierr = CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
569     break;
570   case CEED_GAUSS_LOBATTO:
571     ierr = CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
572     break;
573   }
574   if (ierr) { goto cleanup; } CeedChk(ierr);
575 
576   // Build B, D matrix
577   // Fornberg, 1998
578   for (i = 0; i  < Q; i++) {
579     c1 = 1.0;
580     c3 = nodes[0] - q_ref_1d[i];
581     interp_1d[i*P+0] = 1.0;
582     for (j = 1; j < P; j++) {
583       c2 = 1.0;
584       c4 = c3;
585       c3 = nodes[j] - q_ref_1d[i];
586       for (k = 0; k < j; k++) {
587         dx = nodes[j] - nodes[k];
588         c2 *= dx;
589         if (k == j - 1) {
590           grad_1d[i*P + j] = c1*(interp_1d[i*P + k] - c4*grad_1d[i*P + k]) / c2;
591           interp_1d[i*P + j] = - c1*c4*interp_1d[i*P + k] / c2;
592         }
593         grad_1d[i*P + k] = (c3*grad_1d[i*P + k] - interp_1d[i*P + k]) / dx;
594         interp_1d[i*P + k] = c3*interp_1d[i*P + k] / dx;
595       }
596       c1 = c2;
597     }
598   }
599   // Pass to CeedBasisCreateTensorH1
600   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d,
601                                  q_ref_1d, q_weight_1d, basis); CeedChk(ierr);
602 cleanup:
603   ierr2 = CeedFree(&interp_1d); CeedChk(ierr2);
604   ierr2 = CeedFree(&grad_1d); CeedChk(ierr2);
605   ierr2 = CeedFree(&nodes); CeedChk(ierr2);
606   ierr2 = CeedFree(&q_ref_1d); CeedChk(ierr2);
607   ierr2 = CeedFree(&q_weight_1d); CeedChk(ierr2);
608   CeedChk(ierr);
609   return CEED_ERROR_SUCCESS;
610 }
611 
612 /**
613   @brief Create a non tensor-product basis for H^1 discretizations
614 
615   @param ceed        A Ceed object where the CeedBasis will be created
616   @param topo        Topology of element, e.g. hypercube, simplex, ect
617   @param num_comp    Number of field components (1 for scalar fields)
618   @param num_nodes   Total number of nodes
619   @param num_qpts    Total number of quadrature points
620   @param interp      Row-major (num_qpts * num_nodes) matrix expressing the values of
621                        nodal basis functions at quadrature points
622   @param grad        Row-major (num_qpts * dim * num_nodes) matrix expressing
623                        derivatives of nodal basis functions at quadrature points
624   @param q_ref       Array of length num_qpts holding the locations of quadrature
625                        points on the reference element
626   @param q_weight    Array of length num_qpts holding the quadrature weights on the
627                        reference element
628   @param[out] basis  Address of the variable where the newly created
629                        CeedBasis will be stored.
630 
631   @return An error code: 0 - success, otherwise - failure
632 
633   @ref User
634 **/
635 int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp,
636                       CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
637                       const CeedScalar *grad, const CeedScalar *q_ref,
638                       const CeedScalar *q_weight, CeedBasis *basis) {
639   int ierr;
640   CeedInt P = num_nodes, Q = num_qpts, dim = 0;
641 
642   if (!ceed->BasisCreateH1) {
643     Ceed delegate;
644     ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr);
645 
646     if (!delegate)
647       // LCOV_EXCL_START
648       return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
649                        "Backend does not support BasisCreateH1");
650     // LCOV_EXCL_STOP
651 
652     ierr = CeedBasisCreateH1(delegate, topo, num_comp, num_nodes,
653                              num_qpts, interp, grad, q_ref,
654                              q_weight, basis); CeedChk(ierr);
655     return CEED_ERROR_SUCCESS;
656   }
657 
658   ierr = CeedCalloc(1, basis); CeedChk(ierr);
659 
660   ierr = CeedBasisGetTopologyDimension(topo, &dim); CeedChk(ierr);
661 
662   (*basis)->ceed = ceed;
663   ierr = CeedReference(ceed); CeedChk(ierr);
664   (*basis)->ref_count = 1;
665   (*basis)->tensor_basis = 0;
666   (*basis)->dim = dim;
667   (*basis)->topo = topo;
668   (*basis)->num_comp = num_comp;
669   (*basis)->P = P;
670   (*basis)->Q = Q;
671   (*basis)->Q_comp = 1;
672   (*basis)->basis_space = 1; // 1 for H^1 space
673   ierr = CeedCalloc(Q*dim, &(*basis)->q_ref_1d); CeedChk(ierr);
674   ierr = CeedCalloc(Q, &(*basis)->q_weight_1d); CeedChk(ierr);
675   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q*dim*sizeof(q_ref[0]));
676   if(q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q*sizeof(q_weight[0]));
677   ierr = CeedCalloc(Q*P, &(*basis)->interp); CeedChk(ierr);
678   ierr = CeedCalloc(dim*Q*P, &(*basis)->grad); CeedChk(ierr);
679   if(interp) memcpy((*basis)->interp, interp, Q*P*sizeof(interp[0]));
680   if(grad) memcpy((*basis)->grad, grad, dim*Q*P*sizeof(grad[0]));
681   ierr = ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref,
682                              q_weight, *basis); CeedChk(ierr);
683   return CEED_ERROR_SUCCESS;
684 }
685 
686 /**
687   @brief Create a non tensor-product basis for H(div) discretizations
688 
689   @param ceed        A Ceed object where the CeedBasis will be created
690   @param topo        Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.),
691                      dimension of which is used in some array sizes below
692   @param num_comp    Number of components (usually 1 for vectors in H(div) bases)
693   @param num_nodes   Total number of nodes (dofs per element)
694   @param num_qpts    Total number of quadrature points
695   @param interp      Row-major (dim*num_qpts * num_nodes) matrix expressing the values of
696                        nodal basis functions at quadrature points
697   @param div        Row-major (num_qpts * num_nodes) matrix expressing
698                        divergence of nodal basis functions at quadrature points
699   @param q_ref       Array of length num_qpts holding the locations of quadrature
700                        points on the reference element
701   @param q_weight    Array of length num_qpts holding the quadrature weights on the
702                        reference element
703   @param[out] basis  Address of the variable where the newly created
704                        CeedBasis will be stored.
705 
706   @return An error code: 0 - success, otherwise - failure
707 
708   @ref User
709 **/
710 int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp,
711                         CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
712                         const CeedScalar *div, const CeedScalar *q_ref,
713                         const CeedScalar *q_weight, CeedBasis *basis) {
714   int ierr;
715   CeedInt Q = num_qpts, P = num_nodes, dim = 0;
716   ierr = CeedBasisGetTopologyDimension(topo, &dim); CeedChk(ierr);
717   if (!ceed->BasisCreateHdiv) {
718     Ceed delegate;
719     ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr);
720 
721     if (!delegate)
722       // LCOV_EXCL_START
723       return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
724                        "Backend does not implement BasisCreateHdiv");
725     // LCOV_EXCL_STOP
726 
727     ierr = CeedBasisCreateHdiv(delegate, topo, num_comp, num_nodes,
728                                num_qpts, interp, div, q_ref,
729                                q_weight, basis); CeedChk(ierr);
730     return CEED_ERROR_SUCCESS;
731   }
732 
733   ierr = CeedCalloc(1, basis); CeedChk(ierr);
734 
735   (*basis)->ceed = ceed;
736   ierr = CeedReference(ceed); CeedChk(ierr);
737   (*basis)->ref_count = 1;
738   (*basis)->tensor_basis = 0;
739   (*basis)->dim = dim;
740   (*basis)->topo = topo;
741   (*basis)->num_comp = num_comp;
742   (*basis)->P = P;
743   (*basis)->Q = Q;
744   (*basis)->Q_comp = dim;
745   (*basis)->basis_space = 2; // 2 for H(div) space
746   ierr = CeedMalloc(Q*dim, &(*basis)->q_ref_1d); CeedChk(ierr);
747   ierr = CeedMalloc(Q, &(*basis)->q_weight_1d); CeedChk(ierr);
748   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q*dim*sizeof(q_ref[0]));
749   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q*sizeof(q_weight[0]));
750   ierr = CeedMalloc(dim*Q*P, &(*basis)->interp); CeedChk(ierr);
751   ierr = CeedMalloc(Q*P, &(*basis)->div); CeedChk(ierr);
752   if (interp) memcpy((*basis)->interp, interp, dim*Q*P*sizeof(interp[0]));
753   if (div) memcpy((*basis)->div, div, Q*P*sizeof(div[0]));
754   ierr = ceed->BasisCreateHdiv(topo, dim, P, Q, interp, div, q_ref,
755                                q_weight, *basis); CeedChk(ierr);
756   return CEED_ERROR_SUCCESS;
757 }
758 
759 /**
760   @brief Copy the pointer to a CeedBasis. Both pointers should
761            be destroyed with `CeedBasisDestroy()`;
762            Note: If `*basis_copy` is non-NULL, then it is assumed that
763            `*basis_copy` is a pointer to a CeedBasis. This CeedBasis
764            will be destroyed if `*basis_copy` is the only
765            reference to this CeedBasis.
766 
767   @param basis            CeedBasis to copy reference to
768   @param[out] basis_copy  Variable to store copied reference
769 
770   @return An error code: 0 - success, otherwise - failure
771 
772   @ref User
773 **/
774 int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy) {
775   int ierr;
776 
777   ierr = CeedBasisReference(basis); CeedChk(ierr);
778   ierr = CeedBasisDestroy(basis_copy); CeedChk(ierr);
779   *basis_copy = basis;
780   return CEED_ERROR_SUCCESS;
781 }
782 
783 /**
784   @brief View a CeedBasis
785 
786   @param basis   CeedBasis to view
787   @param stream  Stream to view to, e.g., stdout
788 
789   @return An error code: 0 - success, otherwise - failure
790 
791   @ref User
792 **/
793 int CeedBasisView(CeedBasis basis, FILE *stream) {
794   int ierr;
795   CeedFESpace FE_space = basis->basis_space;
796   CeedElemTopology topo = basis->topo;
797   // Print FE space and element topology of the basis
798   if (basis->tensor_basis) {
799     fprintf(stream, "CeedBasis (%s on a %s element): dim=%d P=%d Q=%d\n",
800             CeedFESpaces[FE_space], CeedElemTopologies[topo],
801             basis->dim, basis->P_1d, basis->Q_1d);
802   } else {
803     fprintf(stream, "CeedBasis (%s on a %s element): dim=%d P=%d Q=%d\n",
804             CeedFESpaces[FE_space], CeedElemTopologies[topo],
805             basis->dim, basis->P, basis->Q);
806   }
807   // Print quadrature data, interpolation/gradient/divergene/curl of the basis
808   if (basis->tensor_basis) { // tensor basis
809     ierr = CeedScalarView("qref1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_ref_1d,
810                           stream); CeedChk(ierr);
811     ierr = CeedScalarView("qweight1d", "\t% 12.8f", 1, basis->Q_1d,
812                           basis->q_weight_1d, stream); CeedChk(ierr);
813     ierr = CeedScalarView("interp1d", "\t% 12.8f", basis->Q_1d, basis->P_1d,
814                           basis->interp_1d, stream); CeedChk(ierr);
815     ierr = CeedScalarView("grad1d", "\t% 12.8f", basis->Q_1d, basis->P_1d,
816                           basis->grad_1d, stream); CeedChk(ierr);
817   } else { // non-tensor basis
818     ierr = CeedScalarView("qref", "\t% 12.8f", 1, basis->Q*basis->dim,
819                           basis->q_ref_1d,
820                           stream); CeedChk(ierr);
821     ierr = CeedScalarView("qweight", "\t% 12.8f", 1, basis->Q, basis->q_weight_1d,
822                           stream); CeedChk(ierr);
823     ierr = CeedScalarView("interp", "\t% 12.8f", basis->Q_comp*basis->Q, basis->P,
824                           basis->interp, stream); CeedChk(ierr);
825     if (basis->grad) {
826       ierr = CeedScalarView("grad", "\t% 12.8f", basis->dim*basis->Q, basis->P,
827                             basis->grad, stream); CeedChk(ierr);
828     }
829     if (basis->div) {
830       ierr = CeedScalarView("div", "\t% 12.8f", basis->Q, basis->P,
831                             basis->div, stream); CeedChk(ierr);
832     }
833   }
834   return CEED_ERROR_SUCCESS;
835 }
836 
837 /**
838   @brief Apply basis evaluation from nodes to quadrature points or vice versa
839 
840   @param basis     CeedBasis to evaluate
841   @param num_elem  The number of elements to apply the basis evaluation to;
842                      the backend will specify the ordering in
843                      CeedElemRestrictionCreateBlocked()
844   @param t_mode    \ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature
845                      points, \ref CEED_TRANSPOSE to apply the transpose, mapping
846                      from quadrature points to nodes
847   @param eval_mode \ref CEED_EVAL_NONE to use values directly,
848                      \ref CEED_EVAL_INTERP to use interpolated values,
849                      \ref CEED_EVAL_GRAD to use gradients,
850                      \ref CEED_EVAL_WEIGHT to use quadrature weights.
851   @param[in] u     Input CeedVector
852   @param[out] v    Output CeedVector
853 
854   @return An error code: 0 - success, otherwise - failure
855 
856   @ref User
857 **/
858 int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode,
859                    CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
860   int ierr;
861   CeedSize u_length = 0, v_length;
862   CeedInt dim, num_comp, num_nodes, num_qpts;
863   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
864   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
865   ierr = CeedBasisGetNumNodes(basis, &num_nodes); CeedChk(ierr);
866   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
867   ierr = CeedVectorGetLength(v, &v_length); CeedChk(ierr);
868   if (u) {
869     ierr = CeedVectorGetLength(u, &u_length); CeedChk(ierr);
870   }
871 
872   if (!basis->Apply)
873     // LCOV_EXCL_START
874     return CeedError(basis->ceed, CEED_ERROR_UNSUPPORTED,
875                      "Backend does not support BasisApply");
876   // LCOV_EXCL_STOP
877 
878   // Check compatibility of topological and geometrical dimensions
879   if ((t_mode == CEED_TRANSPOSE && (v_length%num_nodes != 0 ||
880                                     u_length%num_qpts != 0)) ||
881       (t_mode == CEED_NOTRANSPOSE && (u_length%num_nodes != 0 ||
882                                       v_length%num_qpts != 0)))
883     // LCOV_EXCL_START
884     return CeedError(basis->ceed, CEED_ERROR_DIMENSION,
885                      "Length of input/output vectors "
886                      "incompatible with basis dimensions");
887   // LCOV_EXCL_STOP
888 
889   // Check vector lengths to prevent out of bounds issues
890   bool bad_dims = false;
891   switch (eval_mode) {
892   case CEED_EVAL_NONE:
893   case CEED_EVAL_INTERP: bad_dims =
894       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts ||
895                                      v_length < num_elem*num_comp*num_nodes)) ||
896        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp ||
897                                        u_length < num_elem*num_comp*num_nodes)));
898     break;
899   case CEED_EVAL_GRAD: bad_dims =
900       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts*dim ||
901                                      v_length < num_elem*num_comp*num_nodes)) ||
902        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp*dim ||
903                                        u_length < num_elem*num_comp*num_nodes)));
904     break;
905   case CEED_EVAL_WEIGHT:
906     bad_dims = v_length < num_elem*num_qpts;
907     break;
908   // LCOV_EXCL_START
909   case CEED_EVAL_DIV: bad_dims =
910       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts ||
911                                      v_length < num_elem*num_comp*num_nodes)) ||
912        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp ||
913                                        u_length < num_elem*num_comp*num_nodes)));
914     break;
915   case CEED_EVAL_CURL: bad_dims =
916       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts ||
917                                      v_length < num_elem*num_comp*num_nodes)) ||
918        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp ||
919                                        u_length < num_elem*num_comp*num_nodes)));
920     break;
921     // LCOV_EXCL_STOP
922   }
923   if (bad_dims)
924     // LCOV_EXCL_START
925     return CeedError(basis->ceed, CEED_ERROR_DIMENSION,
926                      "Input/output vectors too short for basis and evaluation mode");
927   // LCOV_EXCL_STOP
928 
929   ierr = basis->Apply(basis, num_elem, t_mode, eval_mode, u, v); CeedChk(ierr);
930   return CEED_ERROR_SUCCESS;
931 }
932 
933 /**
934   @brief Get Ceed associated with a CeedBasis
935 
936   @param basis      CeedBasis
937   @param[out] ceed  Variable to store Ceed
938 
939   @return An error code: 0 - success, otherwise - failure
940 
941   @ref Advanced
942 **/
943 int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed) {
944   *ceed = basis->ceed;
945   return CEED_ERROR_SUCCESS;
946 }
947 
948 /**
949   @brief Get dimension for given CeedBasis
950 
951   @param basis     CeedBasis
952   @param[out] dim  Variable to store dimension of basis
953 
954   @return An error code: 0 - success, otherwise - failure
955 
956   @ref Advanced
957 **/
958 int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim) {
959   *dim = basis->dim;
960   return CEED_ERROR_SUCCESS;
961 }
962 
963 /**
964   @brief Get topology for given CeedBasis
965 
966   @param basis      CeedBasis
967   @param[out] topo  Variable to store topology of basis
968 
969   @return An error code: 0 - success, otherwise - failure
970 
971   @ref Advanced
972 **/
973 int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo) {
974   *topo = basis->topo;
975   return CEED_ERROR_SUCCESS;
976 }
977 
978 /**
979   @brief Get number of Q-vector components for given CeedBasis
980 
981   @param basis          CeedBasis
982   @param[out] Q_comp  Variable to store number of Q-vector components of basis
983 
984   @return An error code: 0 - success, otherwise - failure
985 
986   @ref Advanced
987 **/
988 int CeedBasisGetNumQuadratureComponents(CeedBasis basis, CeedInt *Q_comp) {
989   *Q_comp = basis->Q_comp;
990   return CEED_ERROR_SUCCESS;
991 }
992 
993 /**
994   @brief Get number of components for given CeedBasis
995 
996   @param basis          CeedBasis
997   @param[out] num_comp  Variable to store number of components of basis
998 
999   @return An error code: 0 - success, otherwise - failure
1000 
1001   @ref Advanced
1002 **/
1003 int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *num_comp) {
1004   *num_comp = basis->num_comp;
1005   return CEED_ERROR_SUCCESS;
1006 }
1007 
1008 /**
1009   @brief Get total number of nodes (in dim dimensions) of a CeedBasis
1010 
1011   @param basis   CeedBasis
1012   @param[out] P  Variable to store number of nodes
1013 
1014   @return An error code: 0 - success, otherwise - failure
1015 
1016   @ref Utility
1017 **/
1018 int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P) {
1019   *P = basis->P;
1020   return CEED_ERROR_SUCCESS;
1021 }
1022 
1023 /**
1024   @brief Get total number of nodes (in 1 dimension) of a CeedBasis
1025 
1026   @param basis     CeedBasis
1027   @param[out] P_1d  Variable to store number of nodes
1028 
1029   @return An error code: 0 - success, otherwise - failure
1030 
1031   @ref Advanced
1032 **/
1033 int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P_1d) {
1034   if (!basis->tensor_basis)
1035     // LCOV_EXCL_START
1036     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1037                      "Cannot supply P_1d for non-tensor basis");
1038   // LCOV_EXCL_STOP
1039 
1040   *P_1d = basis->P_1d;
1041   return CEED_ERROR_SUCCESS;
1042 }
1043 
1044 /**
1045   @brief Get total number of quadrature points (in dim dimensions) of a CeedBasis
1046 
1047   @param basis   CeedBasis
1048   @param[out] Q  Variable to store number of quadrature points
1049 
1050   @return An error code: 0 - success, otherwise - failure
1051 
1052   @ref Utility
1053 **/
1054 int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q) {
1055   *Q = basis->Q;
1056   return CEED_ERROR_SUCCESS;
1057 }
1058 
1059 /**
1060   @brief Get total number of quadrature points (in 1 dimension) of a CeedBasis
1061 
1062   @param basis      CeedBasis
1063   @param[out] Q_1d  Variable to store number of quadrature points
1064 
1065   @return An error code: 0 - success, otherwise - failure
1066 
1067   @ref Advanced
1068 **/
1069 int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, CeedInt *Q_1d) {
1070   if (!basis->tensor_basis)
1071     // LCOV_EXCL_START
1072     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1073                      "Cannot supply Q_1d for non-tensor basis");
1074   // LCOV_EXCL_STOP
1075 
1076   *Q_1d = basis->Q_1d;
1077   return CEED_ERROR_SUCCESS;
1078 }
1079 
1080 /**
1081   @brief Get reference coordinates of quadrature points (in dim dimensions)
1082          of a CeedBasis
1083 
1084   @param basis       CeedBasis
1085   @param[out] q_ref  Variable to store reference coordinates of quadrature points
1086 
1087   @return An error code: 0 - success, otherwise - failure
1088 
1089   @ref Advanced
1090 **/
1091 int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **q_ref) {
1092   *q_ref = basis->q_ref_1d;
1093   return CEED_ERROR_SUCCESS;
1094 }
1095 
1096 /**
1097   @brief Get quadrature weights of quadrature points (in dim dimensions)
1098          of a CeedBasis
1099 
1100   @param basis          CeedBasis
1101   @param[out] q_weight  Variable to store quadrature weights
1102 
1103   @return An error code: 0 - success, otherwise - failure
1104 
1105   @ref Advanced
1106 **/
1107 int CeedBasisGetQWeights(CeedBasis basis, const CeedScalar **q_weight) {
1108   *q_weight = basis->q_weight_1d;
1109   return CEED_ERROR_SUCCESS;
1110 }
1111 
1112 /**
1113   @brief Get interpolation matrix of a CeedBasis
1114 
1115   @param basis        CeedBasis
1116   @param[out] interp  Variable to store interpolation matrix
1117 
1118   @return An error code: 0 - success, otherwise - failure
1119 
1120   @ref Advanced
1121 **/
1122 int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp) {
1123   if (!basis->interp && basis->tensor_basis) {
1124     // Allocate
1125     int ierr;
1126     ierr = CeedMalloc(basis->Q*basis->P, &basis->interp); CeedChk(ierr);
1127 
1128     // Initialize
1129     for (CeedInt i=0; i<basis->Q*basis->P; i++)
1130       basis->interp[i] = 1.0;
1131 
1132     // Calculate
1133     for (CeedInt d=0; d<basis->dim; d++)
1134       for (CeedInt qpt=0; qpt<basis->Q; qpt++)
1135         for (CeedInt node=0; node<basis->P; node++) {
1136           CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1137           CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1138           basis->interp[qpt*(basis->P)+node] *= basis->interp_1d[q*basis->P_1d+p];
1139         }
1140   }
1141   *interp = basis->interp;
1142   return CEED_ERROR_SUCCESS;
1143 }
1144 
1145 /**
1146   @brief Get 1D interpolation matrix of a tensor product CeedBasis
1147 
1148   @param basis           CeedBasis
1149   @param[out] interp_1d  Variable to store interpolation matrix
1150 
1151   @return An error code: 0 - success, otherwise - failure
1152 
1153   @ref Backend
1154 **/
1155 int CeedBasisGetInterp1D(CeedBasis basis, const CeedScalar **interp_1d) {
1156   if (!basis->tensor_basis)
1157     // LCOV_EXCL_START
1158     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1159                      "CeedBasis is not a tensor product basis.");
1160   // LCOV_EXCL_STOP
1161 
1162   *interp_1d = basis->interp_1d;
1163   return CEED_ERROR_SUCCESS;
1164 }
1165 
1166 /**
1167   @brief Get gradient matrix of a CeedBasis
1168 
1169   @param basis      CeedBasis
1170   @param[out] grad  Variable to store gradient matrix
1171 
1172   @return An error code: 0 - success, otherwise - failure
1173 
1174   @ref Advanced
1175 **/
1176 int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad) {
1177   if (!basis->grad && basis->tensor_basis) {
1178     // Allocate
1179     int ierr;
1180     ierr = CeedMalloc(basis->dim*basis->Q*basis->P, &basis->grad);
1181     CeedChk(ierr);
1182 
1183     // Initialize
1184     for (CeedInt i=0; i<basis->dim*basis->Q*basis->P; i++)
1185       basis->grad[i] = 1.0;
1186 
1187     // Calculate
1188     for (CeedInt d=0; d<basis->dim; d++)
1189       for (CeedInt i=0; i<basis->dim; i++)
1190         for (CeedInt qpt=0; qpt<basis->Q; qpt++)
1191           for (CeedInt node=0; node<basis->P; node++) {
1192             CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1193             CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1194             if (i == d)
1195               basis->grad[(i*basis->Q+qpt)*(basis->P)+node] *=
1196                 basis->grad_1d[q*basis->P_1d+p];
1197             else
1198               basis->grad[(i*basis->Q+qpt)*(basis->P)+node] *=
1199                 basis->interp_1d[q*basis->P_1d+p];
1200           }
1201   }
1202   *grad = basis->grad;
1203   return CEED_ERROR_SUCCESS;
1204 }
1205 
1206 /**
1207   @brief Get 1D gradient matrix of a tensor product CeedBasis
1208 
1209   @param basis         CeedBasis
1210   @param[out] grad_1d  Variable to store gradient matrix
1211 
1212   @return An error code: 0 - success, otherwise - failure
1213 
1214   @ref Advanced
1215 **/
1216 int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad_1d) {
1217   if (!basis->tensor_basis)
1218     // LCOV_EXCL_START
1219     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1220                      "CeedBasis is not a tensor product basis.");
1221   // LCOV_EXCL_STOP
1222 
1223   *grad_1d = basis->grad_1d;
1224   return CEED_ERROR_SUCCESS;
1225 }
1226 
1227 /**
1228   @brief Get divergence matrix of a CeedBasis
1229 
1230   @param basis     CeedBasis
1231   @param[out] div  Variable to store divergence matrix
1232 
1233   @return An error code: 0 - success, otherwise - failure
1234 
1235   @ref Advanced
1236 **/
1237 int CeedBasisGetDiv(CeedBasis basis, const CeedScalar **div) {
1238   if (!basis->div)
1239     // LCOV_EXCL_START
1240     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1241                      "CeedBasis does not have divergence matrix.");
1242   // LCOV_EXCL_STOP
1243 
1244   *div = basis->div;
1245   return CEED_ERROR_SUCCESS;
1246 }
1247 
1248 /**
1249   @brief Destroy a CeedBasis
1250 
1251   @param basis CeedBasis to destroy
1252 
1253   @return An error code: 0 - success, otherwise - failure
1254 
1255   @ref User
1256 **/
1257 int CeedBasisDestroy(CeedBasis *basis) {
1258   int ierr;
1259 
1260   if (!*basis || --(*basis)->ref_count > 0) return CEED_ERROR_SUCCESS;
1261   if ((*basis)->Destroy) {
1262     ierr = (*basis)->Destroy(*basis); CeedChk(ierr);
1263   }
1264   if ((*basis)->contract) {
1265     ierr = CeedTensorContractDestroy(&(*basis)->contract); CeedChk(ierr);
1266   }
1267   ierr = CeedFree(&(*basis)->interp); CeedChk(ierr);
1268   ierr = CeedFree(&(*basis)->interp_1d); CeedChk(ierr);
1269   ierr = CeedFree(&(*basis)->grad); CeedChk(ierr);
1270   ierr = CeedFree(&(*basis)->div); CeedChk(ierr);
1271   ierr = CeedFree(&(*basis)->grad_1d); CeedChk(ierr);
1272   ierr = CeedFree(&(*basis)->q_ref_1d); CeedChk(ierr);
1273   ierr = CeedFree(&(*basis)->q_weight_1d); CeedChk(ierr);
1274   ierr = CeedDestroy(&(*basis)->ceed); CeedChk(ierr);
1275   ierr = CeedFree(basis); CeedChk(ierr);
1276   return CEED_ERROR_SUCCESS;
1277 }
1278 
1279 /**
1280   @brief Construct a Gauss-Legendre quadrature
1281 
1282   @param Q                 Number of quadrature points (integrates polynomials of
1283                              degree 2*Q-1 exactly)
1284   @param[out] q_ref_1d     Array of length Q to hold the abscissa on [-1, 1]
1285   @param[out] q_weight_1d  Array of length Q to hold the weights
1286 
1287   @return An error code: 0 - success, otherwise - failure
1288 
1289   @ref Utility
1290 **/
1291 int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
1292                         CeedScalar *q_weight_1d) {
1293   // Allocate
1294   CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0*atan(1.0);
1295   // Build q_ref_1d, q_weight_1d
1296   for (int i = 0; i <= Q/2; i++) {
1297     // Guess
1298     xi = cos(PI*(CeedScalar)(2*i+1)/((CeedScalar)(2*Q)));
1299     // Pn(xi)
1300     P0 = 1.0;
1301     P1 = xi;
1302     P2 = 0.0;
1303     for (int j = 2; j <= Q; j++) {
1304       P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1305       P0 = P1;
1306       P1 = P2;
1307     }
1308     // First Newton Step
1309     dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1310     xi = xi-P2/dP2;
1311     // Newton to convergence
1312     for (int k=0; k<100 && fabs(P2)>10*CEED_EPSILON; k++) {
1313       P0 = 1.0;
1314       P1 = xi;
1315       for (int j = 2; j <= Q; j++) {
1316         P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1317         P0 = P1;
1318         P1 = P2;
1319       }
1320       dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1321       xi = xi-P2/dP2;
1322     }
1323     // Save xi, wi
1324     wi = 2.0/((1.0-xi*xi)*dP2*dP2);
1325     q_weight_1d[i] = wi;
1326     q_weight_1d[Q-1-i] = wi;
1327     q_ref_1d[i] = -xi;
1328     q_ref_1d[Q-1-i]= xi;
1329   }
1330   return CEED_ERROR_SUCCESS;
1331 }
1332 
1333 /**
1334   @brief Construct a Gauss-Legendre-Lobatto quadrature
1335 
1336   @param Q                 Number of quadrature points (integrates polynomials of
1337                              degree 2*Q-3 exactly)
1338   @param[out] q_ref_1d     Array of length Q to hold the abscissa on [-1, 1]
1339   @param[out] q_weight_1d  Array of length Q to hold the weights
1340 
1341   @return An error code: 0 - success, otherwise - failure
1342 
1343   @ref Utility
1344 **/
1345 int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
1346                           CeedScalar *q_weight_1d) {
1347   // Allocate
1348   CeedScalar P0, P1, P2, dP2, d2P2, xi, wi, PI = 4.0*atan(1.0);
1349   // Build q_ref_1d, q_weight_1d
1350   // Set endpoints
1351   if (Q < 2)
1352     // LCOV_EXCL_START
1353     return CeedError(NULL, CEED_ERROR_DIMENSION,
1354                      "Cannot create Lobatto quadrature with Q=%d < 2 points", Q);
1355   // LCOV_EXCL_STOP
1356   wi = 2.0/((CeedScalar)(Q*(Q-1)));
1357   if (q_weight_1d) {
1358     q_weight_1d[0] = wi;
1359     q_weight_1d[Q-1] = wi;
1360   }
1361   q_ref_1d[0] = -1.0;
1362   q_ref_1d[Q-1] = 1.0;
1363   // Interior
1364   for (int i = 1; i <= (Q-1)/2; i++) {
1365     // Guess
1366     xi = cos(PI*(CeedScalar)(i)/(CeedScalar)(Q-1));
1367     // Pn(xi)
1368     P0 = 1.0;
1369     P1 = xi;
1370     P2 = 0.0;
1371     for (int j = 2; j < Q; j++) {
1372       P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1373       P0 = P1;
1374       P1 = P2;
1375     }
1376     // First Newton step
1377     dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1378     d2P2 = (2*xi*dP2 - (CeedScalar)(Q*(Q-1))*P2)/(1.0-xi*xi);
1379     xi = xi-dP2/d2P2;
1380     // Newton to convergence
1381     for (int k=0; k<100 && fabs(dP2)>10*CEED_EPSILON; k++) {
1382       P0 = 1.0;
1383       P1 = xi;
1384       for (int j = 2; j < Q; j++) {
1385         P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1386         P0 = P1;
1387         P1 = P2;
1388       }
1389       dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1390       d2P2 = (2*xi*dP2 - (CeedScalar)(Q*(Q-1))*P2)/(1.0-xi*xi);
1391       xi = xi-dP2/d2P2;
1392     }
1393     // Save xi, wi
1394     wi = 2.0/(((CeedScalar)(Q*(Q-1)))*P2*P2);
1395     if (q_weight_1d) {
1396       q_weight_1d[i] = wi;
1397       q_weight_1d[Q-1-i] = wi;
1398     }
1399     q_ref_1d[i] = -xi;
1400     q_ref_1d[Q-1-i]= xi;
1401   }
1402   return CEED_ERROR_SUCCESS;
1403 }
1404 
1405 /**
1406   @brief Return QR Factorization of a matrix
1407 
1408   @param ceed         A Ceed context for error handling
1409   @param[in,out] mat  Row-major matrix to be factorized in place
1410   @param[in,out] tau  Vector of length m of scaling factors
1411   @param m            Number of rows
1412   @param n            Number of columns
1413 
1414   @return An error code: 0 - success, otherwise - failure
1415 
1416   @ref Utility
1417 **/
1418 int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau,
1419                         CeedInt m, CeedInt n) {
1420   CeedScalar v[m];
1421 
1422   // Check m >= n
1423   if (n > m)
1424     // LCOV_EXCL_START
1425     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
1426                      "Cannot compute QR factorization with n > m");
1427   // LCOV_EXCL_STOP
1428 
1429   for (CeedInt i=0; i<n; i++) {
1430     if (i >= m-1) { // last row of matrix, no reflection needed
1431       tau[i] = 0.;
1432       break;
1433     }
1434     // Calculate Householder vector, magnitude
1435     CeedScalar sigma = 0.0;
1436     v[i] = mat[i+n*i];
1437     for (CeedInt j=i+1; j<m; j++) {
1438       v[j] = mat[i+n*j];
1439       sigma += v[j] * v[j];
1440     }
1441     CeedScalar norm = sqrt(v[i]*v[i] + sigma); // norm of v[i:m]
1442     CeedScalar R_ii = -copysign(norm, v[i]);
1443     v[i] -= R_ii;
1444     // norm of v[i:m] after modification above and scaling below
1445     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
1446     //   tau = 2 / (norm*norm)
1447     tau[i] = 2 * v[i]*v[i] / (v[i]*v[i] + sigma);
1448     for (CeedInt j=i+1; j<m; j++)
1449       v[j] /= v[i];
1450 
1451     // Apply Householder reflector to lower right panel
1452     CeedHouseholderReflect(&mat[i*n+i+1], &v[i], tau[i], m-i, n-i-1, n, 1);
1453     // Save v
1454     mat[i+n*i] = R_ii;
1455     for (CeedInt j=i+1; j<m; j++)
1456       mat[i+n*j] = v[j];
1457   }
1458   return CEED_ERROR_SUCCESS;
1459 }
1460 
1461 /**
1462   @brief Return symmetric Schur decomposition of the symmetric matrix mat via
1463            symmetric QR factorization
1464 
1465   @param ceed         A Ceed context for error handling
1466   @param[in,out] mat  Row-major matrix to be factorized in place
1467   @param[out] lambda  Vector of length n of eigenvalues
1468   @param n            Number of rows/columns
1469 
1470   @return An error code: 0 - success, otherwise - failure
1471 
1472   @ref Utility
1473 **/
1474 CeedPragmaOptimizeOff
1475 int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat,
1476                                     CeedScalar *lambda, CeedInt n) {
1477   // Check bounds for clang-tidy
1478   if (n<2)
1479     // LCOV_EXCL_START
1480     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
1481                      "Cannot compute symmetric Schur decomposition of scalars");
1482   // LCOV_EXCL_STOP
1483 
1484   CeedScalar v[n-1], tau[n-1], mat_T[n*n];
1485 
1486   // Copy mat to mat_T and set mat to I
1487   memcpy(mat_T, mat, n*n*sizeof(mat[0]));
1488   for (CeedInt i=0; i<n; i++)
1489     for (CeedInt j=0; j<n; j++)
1490       mat[j+n*i] = (i==j) ? 1 : 0;
1491 
1492   // Reduce to tridiagonal
1493   for (CeedInt i=0; i<n-1; i++) {
1494     // Calculate Householder vector, magnitude
1495     CeedScalar sigma = 0.0;
1496     v[i] = mat_T[i+n*(i+1)];
1497     for (CeedInt j=i+1; j<n-1; j++) {
1498       v[j] = mat_T[i+n*(j+1)];
1499       sigma += v[j] * v[j];
1500     }
1501     CeedScalar norm = sqrt(v[i]*v[i] + sigma); // norm of v[i:n-1]
1502     CeedScalar R_ii = -copysign(norm, v[i]);
1503     v[i] -= R_ii;
1504     // norm of v[i:m] after modification above and scaling below
1505     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
1506     //   tau = 2 / (norm*norm)
1507     tau[i] = i == n - 2 ? 2 : 2 * v[i]*v[i] / (v[i]*v[i] + sigma);
1508     for (CeedInt j=i+1; j<n-1; j++)
1509       v[j] /= v[i];
1510 
1511     // Update sub and super diagonal
1512     for (CeedInt j=i+2; j<n; j++) {
1513       mat_T[i+n*j] = 0; mat_T[j+n*i] = 0;
1514     }
1515     // Apply symmetric Householder reflector to lower right panel
1516     CeedHouseholderReflect(&mat_T[(i+1)+n*(i+1)], &v[i], tau[i],
1517                            n-(i+1), n-(i+1), n, 1);
1518     CeedHouseholderReflect(&mat_T[(i+1)+n*(i+1)], &v[i], tau[i],
1519                            n-(i+1), n-(i+1), 1, n);
1520 
1521     // Save v
1522     mat_T[i+n*(i+1)] = R_ii;
1523     mat_T[(i+1)+n*i] = R_ii;
1524     for (CeedInt j=i+1; j<n-1; j++) {
1525       mat_T[i+n*(j+1)] = v[j];
1526     }
1527   }
1528   // Backwards accumulation of Q
1529   for (CeedInt i=n-2; i>=0; i--) {
1530     if (tau[i] > 0.0) {
1531       v[i] = 1;
1532       for (CeedInt j=i+1; j<n-1; j++) {
1533         v[j] = mat_T[i+n*(j+1)];
1534         mat_T[i+n*(j+1)] = 0;
1535       }
1536       CeedHouseholderReflect(&mat[(i+1)+n*(i+1)], &v[i], tau[i],
1537                              n-(i+1), n-(i+1), n, 1);
1538     }
1539   }
1540 
1541   // Reduce sub and super diagonal
1542   CeedInt p = 0, q = 0, itr = 0, max_itr = n*n*n*n;
1543   CeedScalar tol = CEED_EPSILON;
1544 
1545   while (itr < max_itr) {
1546     // Update p, q, size of reduced portions of diagonal
1547     p = 0; q = 0;
1548     for (CeedInt i=n-2; i>=0; i--) {
1549       if (fabs(mat_T[i+n*(i+1)]) < tol)
1550         q += 1;
1551       else
1552         break;
1553     }
1554     for (CeedInt i=0; i<n-q-1; i++) {
1555       if (fabs(mat_T[i+n*(i+1)]) < tol)
1556         p += 1;
1557       else
1558         break;
1559     }
1560     if (q == n-1) break; // Finished reducing
1561 
1562     // Reduce tridiagonal portion
1563     CeedScalar t_nn = mat_T[(n-1-q)+n*(n-1-q)],
1564                t_nnm1 = mat_T[(n-2-q)+n*(n-1-q)];
1565     CeedScalar d = (mat_T[(n-2-q)+n*(n-2-q)] - t_nn)/2;
1566     CeedScalar mu = t_nn - t_nnm1*t_nnm1 /
1567                     (d + copysign(sqrt(d*d + t_nnm1*t_nnm1), d));
1568     CeedScalar x = mat_T[p+n*p] - mu;
1569     CeedScalar z = mat_T[p+n*(p+1)];
1570     for (CeedInt k=p; k<n-q-1; k++) {
1571       // Compute Givens rotation
1572       CeedScalar c = 1, s = 0;
1573       if (fabs(z) > tol) {
1574         if (fabs(z) > fabs(x)) {
1575           CeedScalar tau = -x/z;
1576           s = 1/sqrt(1+tau*tau), c = s*tau;
1577         } else {
1578           CeedScalar tau = -z/x;
1579           c = 1/sqrt(1+tau*tau), s = c*tau;
1580         }
1581       }
1582 
1583       // Apply Givens rotation to T
1584       CeedGivensRotation(mat_T, c, s, CEED_NOTRANSPOSE, k, k+1, n, n);
1585       CeedGivensRotation(mat_T, c, s, CEED_TRANSPOSE, k, k+1, n, n);
1586 
1587       // Apply Givens rotation to Q
1588       CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k+1, n, n);
1589 
1590       // Update x, z
1591       if (k < n-q-2) {
1592         x = mat_T[k+n*(k+1)];
1593         z = mat_T[k+n*(k+2)];
1594       }
1595     }
1596     itr++;
1597   }
1598 
1599   // Save eigenvalues
1600   for (CeedInt i=0; i<n; i++)
1601     lambda[i] = mat_T[i+n*i];
1602 
1603   // Check convergence
1604   if (itr == max_itr && q < n-1)
1605     // LCOV_EXCL_START
1606     return CeedError(ceed, CEED_ERROR_MINOR,
1607                      "Symmetric QR failed to converge");
1608   // LCOV_EXCL_STOP
1609   return CEED_ERROR_SUCCESS;
1610 }
1611 CeedPragmaOptimizeOn
1612 
1613 /**
1614   @brief Return Simultaneous Diagonalization of two matrices. This solves the
1615            generalized eigenvalue problem A x = lambda B x, where A and B
1616            are symmetric and B is positive definite. We generate the matrix X
1617            and vector Lambda such that X^T A X = Lambda and X^T B X = I. This
1618            is equivalent to the LAPACK routine 'sygv' with TYPE = 1.
1619 
1620   @param ceed         A Ceed context for error handling
1621   @param[in] mat_A    Row-major matrix to be factorized with eigenvalues
1622   @param[in] mat_B    Row-major matrix to be factorized to identity
1623   @param[out] mat_X   Row-major orthogonal matrix
1624   @param[out] lambda  Vector of length n of generalized eigenvalues
1625   @param n            Number of rows/columns
1626 
1627   @return An error code: 0 - success, otherwise - failure
1628 
1629   @ref Utility
1630 **/
1631 CeedPragmaOptimizeOff
1632 int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A,
1633                                     CeedScalar *mat_B, CeedScalar *mat_X,
1634                                     CeedScalar *lambda, CeedInt n) {
1635   int ierr;
1636   CeedScalar *mat_C, *mat_G, *vec_D;
1637   ierr = CeedCalloc(n*n, &mat_C); CeedChk(ierr);
1638   ierr = CeedCalloc(n*n, &mat_G); CeedChk(ierr);
1639   ierr = CeedCalloc(n, &vec_D); CeedChk(ierr);
1640 
1641   // Compute B = G D G^T
1642   memcpy(mat_G, mat_B, n*n*sizeof(mat_B[0]));
1643   ierr = CeedSymmetricSchurDecomposition(ceed, mat_G, vec_D, n); CeedChk(ierr);
1644 
1645   // Sort eigenvalues
1646   for (CeedInt i=n-1; i>=0; i--)
1647     for (CeedInt j=0; j<i; j++) {
1648       if (fabs(vec_D[j]) > fabs(vec_D[j+1])) {
1649         CeedScalar temp;
1650         temp = vec_D[j]; vec_D[j] = vec_D[j+1]; vec_D[j+1] = temp;
1651         for (CeedInt k=0; k<n; k++) {
1652           temp = mat_G[k*n+j]; mat_G[k*n+j] = mat_G[k*n+j+1]; mat_G[k*n+j+1] = temp;
1653         }
1654       }
1655     }
1656 
1657   // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T
1658   //           = D^-1/2 G^T A G D^-1/2
1659   // -- D = D^-1/2
1660   for (CeedInt i=0; i<n; i++)
1661     vec_D[i] = 1./sqrt(vec_D[i]);
1662   // -- G = G D^-1/2
1663   // -- C = D^-1/2 G^T
1664   for (CeedInt i=0; i<n; i++)
1665     for (CeedInt j=0; j<n; j++) {
1666       mat_G[i*n+j] *= vec_D[j];
1667       mat_C[j*n+i]  = mat_G[i*n+j];
1668     }
1669   // -- X = (D^-1/2 G^T) A
1670   ierr = CeedMatrixMultiply(ceed, (const CeedScalar *)mat_C,
1671                             (const CeedScalar *)mat_A, mat_X, n, n, n);
1672   CeedChk(ierr);
1673   // -- C = (D^-1/2 G^T A) (G D^-1/2)
1674   ierr = CeedMatrixMultiply(ceed, (const CeedScalar *)mat_X,
1675                             (const CeedScalar *)mat_G, mat_C, n, n, n);
1676   CeedChk(ierr);
1677 
1678   // Compute Q^T C Q = lambda
1679   ierr = CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n); CeedChk(ierr);
1680 
1681   // Sort eigenvalues
1682   for (CeedInt i=n-1; i>=0; i--)
1683     for (CeedInt j=0; j<i; j++) {
1684       if (fabs(lambda[j]) > fabs(lambda[j+1])) {
1685         CeedScalar temp;
1686         temp = lambda[j]; lambda[j] = lambda[j+1]; lambda[j+1] = temp;
1687         for (CeedInt k=0; k<n; k++) {
1688           temp = mat_C[k*n+j]; mat_C[k*n+j] = mat_C[k*n+j+1]; mat_C[k*n+j+1] = temp;
1689         }
1690       }
1691     }
1692 
1693   // Set X = (G D^1/2)^-T Q
1694   //       = G D^-1/2 Q
1695   ierr = CeedMatrixMultiply(ceed, (const CeedScalar *)mat_G,
1696                             (const CeedScalar *)mat_C, mat_X, n, n, n);
1697   CeedChk(ierr);
1698 
1699   // Cleanup
1700   ierr = CeedFree(&mat_C); CeedChk(ierr);
1701   ierr = CeedFree(&mat_G); CeedChk(ierr);
1702   ierr = CeedFree(&vec_D); CeedChk(ierr);
1703   return CEED_ERROR_SUCCESS;
1704 }
1705 CeedPragmaOptimizeOn
1706 
1707 /// @}
1708