xref: /libCEED/interface/ceed-basis.c (revision 784646086dc2cd22411d95107c6b787565214f18)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <ceed-impl.h>
20 #include <math.h>
21 #include <stdbool.h>
22 #include <stdio.h>
23 #include <string.h>
24 
25 /// @file
26 /// Implementation of CeedBasis interfaces
27 
28 /// @cond DOXYGEN_SKIP
29 static struct CeedBasis_private ceed_basis_collocated;
30 /// @endcond
31 
32 /// @addtogroup CeedBasisUser
33 /// @{
34 
35 /// Indicate that the quadrature points are collocated with the nodes
36 const CeedBasis CEED_BASIS_COLLOCATED = &ceed_basis_collocated;
37 
38 /// @}
39 
40 /// ----------------------------------------------------------------------------
41 /// CeedBasis Library Internal Functions
42 /// ----------------------------------------------------------------------------
43 /// @addtogroup CeedBasisDeveloper
44 /// @{
45 
46 /**
47   @brief Compute Householder reflection
48 
49     Computes A = (I - b v v^T) A
50     where A is an mxn matrix indexed as A[i*row + j*col]
51 
52   @param[in,out] A  Matrix to apply Householder reflection to, in place
53   @param v          Householder vector
54   @param b          Scaling factor
55   @param m          Number of rows in A
56   @param n          Number of columns in A
57   @param row        Row stride
58   @param col        Col stride
59 
60   @return An error code: 0 - success, otherwise - failure
61 
62   @ref Developer
63 **/
64 static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v,
65                                   CeedScalar b, CeedInt m, CeedInt n,
66                                   CeedInt row, CeedInt col) {
67   for (CeedInt j=0; j<n; j++) {
68     CeedScalar w = A[0*row + j*col];
69     for (CeedInt i=1; i<m; i++)
70       w += v[i] * A[i*row + j*col];
71     A[0*row + j*col] -= b * w;
72     for (CeedInt i=1; i<m; i++)
73       A[i*row + j*col] -= b * w * v[i];
74   }
75   return CEED_ERROR_SUCCESS;
76 }
77 
78 /**
79   @brief Apply Householder Q matrix
80 
81     Compute A = Q A where Q is mxm and A is mxn.
82 
83   @param[in,out] A  Matrix to apply Householder Q to, in place
84   @param Q          Householder Q matrix
85   @param tau        Householder scaling factors
86   @param t_mode    Transpose mode for application
87   @param m          Number of rows in A
88   @param n          Number of columns in A
89   @param k          Number of elementary reflectors in Q, k<m
90   @param row        Row stride in A
91   @param col        Col stride in A
92 
93   @return An error code: 0 - success, otherwise - failure
94 
95   @ref Developer
96 **/
97 int CeedHouseholderApplyQ(CeedScalar *A, const CeedScalar *Q,
98                           const CeedScalar *tau, CeedTransposeMode t_mode,
99                           CeedInt m, CeedInt n, CeedInt k,
100                           CeedInt row, CeedInt col) {
101   int ierr;
102   CeedScalar *v;
103   ierr = CeedMalloc(m, &v); CeedChk(ierr);
104   for (CeedInt ii=0; ii<k; ii++) {
105     CeedInt i = t_mode == CEED_TRANSPOSE ? ii : k-1-ii;
106     for (CeedInt j=i+1; j<m; j++)
107       v[j] = Q[j*k+i];
108     // Apply Householder reflector (I - tau v v^T) collo_grad_1d^T
109     ierr = CeedHouseholderReflect(&A[i*row], &v[i], tau[i], m-i, n, row, col);
110     CeedChk(ierr);
111   }
112   ierr = CeedFree(&v); CeedChk(ierr);
113   return CEED_ERROR_SUCCESS;
114 }
115 
116 /**
117   @brief Compute Givens rotation
118 
119     Computes A = G A (or G^T A in transpose mode)
120     where A is an mxn matrix indexed as A[i*n + j*m]
121 
122   @param[in,out] A  Row major matrix to apply Givens rotation to, in place
123   @param c          Cosine factor
124   @param s          Sine factor
125   @param t_mode     @ref CEED_NOTRANSPOSE to rotate the basis counter-clockwise,
126                     which has the effect of rotating columns of A clockwise;
127                     @ref CEED_TRANSPOSE for the opposite rotation
128   @param i          First row/column to apply rotation
129   @param k          Second row/column to apply rotation
130   @param m          Number of rows in A
131   @param n          Number of columns in A
132 
133   @return An error code: 0 - success, otherwise - failure
134 
135   @ref Developer
136 **/
137 static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s,
138                               CeedTransposeMode t_mode, CeedInt i, CeedInt k,
139                               CeedInt m, CeedInt n) {
140   CeedInt stride_j = 1, stride_ik = m, num_its = n;
141   if (t_mode == CEED_NOTRANSPOSE) {
142     stride_j = n; stride_ik = 1; num_its = m;
143   }
144 
145   // Apply rotation
146   for (CeedInt j=0; j<num_its; j++) {
147     CeedScalar tau1 = A[i*stride_ik+j*stride_j], tau2 = A[k*stride_ik+j*stride_j];
148     A[i*stride_ik+j*stride_j] = c*tau1 - s*tau2;
149     A[k*stride_ik+j*stride_j] = s*tau1 + c*tau2;
150   }
151   return CEED_ERROR_SUCCESS;
152 }
153 
154 /**
155   @brief View an array stored in a CeedBasis
156 
157   @param[in] name      Name of array
158   @param[in] fp_fmt    Printing format
159   @param[in] m         Number of rows in array
160   @param[in] n         Number of columns in array
161   @param[in] a         Array to be viewed
162   @param[in] stream    Stream to view to, e.g., stdout
163 
164   @return An error code: 0 - success, otherwise - failure
165 
166   @ref Developer
167 **/
168 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m,
169                           CeedInt n, const CeedScalar *a, FILE *stream) {
170   for (int i=0; i<m; i++) {
171     if (m > 1)
172       fprintf(stream, "%12s[%d]:", name, i);
173     else
174       fprintf(stream, "%12s:", name);
175     for (int j=0; j<n; j++)
176       fprintf(stream, fp_fmt, fabs(a[i*n+j]) > 1E-14 ? a[i*n+j] : 0);
177     fputs("\n", stream);
178   }
179   return CEED_ERROR_SUCCESS;
180 }
181 
182 /// @}
183 
184 /// ----------------------------------------------------------------------------
185 /// Ceed Backend API
186 /// ----------------------------------------------------------------------------
187 /// @addtogroup CeedBasisBackend
188 /// @{
189 
190 /**
191   @brief Return collocated grad matrix
192 
193   @param basis               CeedBasis
194   @param[out] collo_grad_1d  Row-major (Q_1d * Q_1d) matrix expressing derivatives of
195                                basis functions at quadrature points
196 
197   @return An error code: 0 - success, otherwise - failure
198 
199   @ref Backend
200 **/
201 int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collo_grad_1d) {
202   int i, j, k;
203   Ceed ceed;
204   CeedInt ierr, P_1d=(basis)->P_1d, Q_1d=(basis)->Q_1d;
205   CeedScalar *interp_1d, *grad_1d, *tau;
206 
207   ierr = CeedMalloc(Q_1d*P_1d, &interp_1d); CeedChk(ierr);
208   ierr = CeedMalloc(Q_1d*P_1d, &grad_1d); CeedChk(ierr);
209   ierr = CeedMalloc(Q_1d, &tau); CeedChk(ierr);
210   memcpy(interp_1d, (basis)->interp_1d, Q_1d*P_1d*sizeof(basis)->interp_1d[0]);
211   memcpy(grad_1d, (basis)->grad_1d, Q_1d*P_1d*sizeof(basis)->interp_1d[0]);
212 
213   // QR Factorization, interp_1d = Q R
214   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
215   ierr = CeedQRFactorization(ceed, interp_1d, tau, Q_1d, P_1d); CeedChk(ierr);
216   // Note: This function is for backend use, so all errors are terminal
217   //   and we do not need to clean up memory on failure.
218 
219   // Apply Rinv, collo_grad_1d = grad_1d Rinv
220   for (i=0; i<Q_1d; i++) { // Row i
221     collo_grad_1d[Q_1d*i] = grad_1d[P_1d*i]/interp_1d[0];
222     for (j=1; j<P_1d; j++) { // Column j
223       collo_grad_1d[j+Q_1d*i] = grad_1d[j+P_1d*i];
224       for (k=0; k<j; k++)
225         collo_grad_1d[j+Q_1d*i] -= interp_1d[j+P_1d*k]*collo_grad_1d[k+Q_1d*i];
226       collo_grad_1d[j+Q_1d*i] /= interp_1d[j+P_1d*j];
227     }
228     for (j=P_1d; j<Q_1d; j++)
229       collo_grad_1d[j+Q_1d*i] = 0;
230   }
231 
232   // Apply Qtranspose, collograd = collo_grad Q_transpose
233   ierr = CeedHouseholderApplyQ(collo_grad_1d, interp_1d, tau, CEED_NOTRANSPOSE,
234                                Q_1d, Q_1d, P_1d, 1, Q_1d); CeedChk(ierr);
235 
236   ierr = CeedFree(&interp_1d); CeedChk(ierr);
237   ierr = CeedFree(&grad_1d); CeedChk(ierr);
238   ierr = CeedFree(&tau); CeedChk(ierr);
239   return CEED_ERROR_SUCCESS;
240 }
241 
242 /**
243   @brief Get Ceed associated with a CeedBasis
244 
245   @param basis      CeedBasis
246   @param[out] ceed  Variable to store Ceed
247 
248   @return An error code: 0 - success, otherwise - failure
249 
250   @ref Backend
251 **/
252 int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed) {
253   *ceed = basis->ceed;
254   return CEED_ERROR_SUCCESS;
255 }
256 
257 /**
258   @brief Get tensor status for given CeedBasis
259 
260   @param basis           CeedBasis
261   @param[out] is_tensor  Variable to store tensor status
262 
263   @return An error code: 0 - success, otherwise - failure
264 
265   @ref Backend
266 **/
267 int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor) {
268   *is_tensor = basis->tensor_basis;
269   return CEED_ERROR_SUCCESS;
270 }
271 
272 /**
273   @brief Get backend data of a CeedBasis
274 
275   @param basis      CeedBasis
276   @param[out] data  Variable to store data
277 
278   @return An error code: 0 - success, otherwise - failure
279 
280   @ref Backend
281 **/
282 int CeedBasisGetData(CeedBasis basis, void *data) {
283   *(void **)data = basis->data;
284   return CEED_ERROR_SUCCESS;
285 }
286 
287 /**
288   @brief Set backend data of a CeedBasis
289 
290   @param[out] basis  CeedBasis
291   @param data        Data to set
292 
293   @return An error code: 0 - success, otherwise - failure
294 
295   @ref Backend
296 **/
297 int CeedBasisSetData(CeedBasis basis, void *data) {
298   basis->data = data;
299   return CEED_ERROR_SUCCESS;
300 }
301 
302 /**
303   @brief Get dimension for given CeedElemTopology
304 
305   @param topo      CeedElemTopology
306   @param[out] dim  Variable to store dimension of topology
307 
308   @return An error code: 0 - success, otherwise - failure
309 
310   @ref Backend
311 **/
312 int CeedBasisGetTopologyDimension(CeedElemTopology topo, CeedInt *dim) {
313   *dim = (CeedInt) topo >> 16;
314   return CEED_ERROR_SUCCESS;
315 }
316 
317 /**
318   @brief Get CeedTensorContract of a CeedBasis
319 
320   @param basis          CeedBasis
321   @param[out] contract  Variable to store CeedTensorContract
322 
323   @return An error code: 0 - success, otherwise - failure
324 
325   @ref Backend
326 **/
327 int CeedBasisGetTensorContract(CeedBasis basis, CeedTensorContract *contract) {
328   *contract = basis->contract;
329   return CEED_ERROR_SUCCESS;
330 }
331 
332 /**
333   @brief Set CeedTensorContract of a CeedBasis
334 
335   @param[out] basis  CeedBasis
336   @param contract    CeedTensorContract to set
337 
338   @return An error code: 0 - success, otherwise - failure
339 
340   @ref Backend
341 **/
342 int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract *contract) {
343   basis->contract = *contract;
344   return CEED_ERROR_SUCCESS;
345 }
346 
347 /**
348   @brief Return a reference implementation of matrix multiplication C = A B.
349            Note, this is a reference implementation for CPU CeedScalar pointers
350            that is not intended for high performance.
351 
352   @param ceed        A Ceed context for error handling
353   @param[in] mat_A   Row-major matrix A
354   @param[in] mat_B   Row-major matrix B
355   @param[out] mat_C  Row-major output matrix C
356   @param m           Number of rows of C
357   @param n           Number of columns of C
358   @param kk          Number of columns of A/rows of B
359 
360   @return An error code: 0 - success, otherwise - failure
361 
362   @ref Utility
363 **/
364 int CeedMatrixMultiply(Ceed ceed, const CeedScalar *mat_A,
365                        const CeedScalar *mat_B, CeedScalar *mat_C, CeedInt m,
366                        CeedInt n, CeedInt kk) {
367   for (CeedInt i=0; i<m; i++)
368     for (CeedInt j=0; j<n; j++) {
369       CeedScalar sum = 0;
370       for (CeedInt k=0; k<kk; k++)
371         sum += mat_A[k+i*kk]*mat_B[j+k*n];
372       mat_C[j+i*n] = sum;
373     }
374   return CEED_ERROR_SUCCESS;
375 }
376 
377 /// @}
378 
379 /// ----------------------------------------------------------------------------
380 /// CeedBasis Public API
381 /// ----------------------------------------------------------------------------
382 /// @addtogroup CeedBasisUser
383 /// @{
384 
385 /**
386   @brief Create a tensor-product basis for H^1 discretizations
387 
388   @param ceed        A Ceed object where the CeedBasis will be created
389   @param dim         Topological dimension
390   @param num_comp    Number of field components (1 for scalar fields)
391   @param P_1d        Number of nodes in one dimension
392   @param Q_1d        Number of quadrature points in one dimension
393   @param interp_1d   Row-major (Q_1d * P_1d) matrix expressing the values of nodal
394                        basis functions at quadrature points
395   @param grad_1d     Row-major (Q_1d * P_1d) matrix expressing derivatives of nodal
396                        basis functions at quadrature points
397   @param q_ref_1d    Array of length Q_1d holding the locations of quadrature points
398                        on the 1D reference element [-1, 1]
399   @param q_weight_1d Array of length Q_1d holding the quadrature weights on the
400                        reference element
401   @param[out] basis  Address of the variable where the newly created
402                        CeedBasis will be stored.
403 
404   @return An error code: 0 - success, otherwise - failure
405 
406   @ref User
407 **/
408 int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp,
409                             CeedInt P_1d, CeedInt Q_1d,
410                             const CeedScalar *interp_1d,
411                             const CeedScalar *grad_1d, const CeedScalar *q_ref_1d,
412                             const CeedScalar *q_weight_1d, CeedBasis *basis) {
413   int ierr;
414 
415   if (!ceed->BasisCreateTensorH1) {
416     Ceed delegate;
417     ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr);
418 
419     if (!delegate)
420       // LCOV_EXCL_START
421       return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
422                        "Backend does not support BasisCreateTensorH1");
423     // LCOV_EXCL_STOP
424 
425     ierr = CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d,
426                                    Q_1d, interp_1d, grad_1d, q_ref_1d,
427                                    q_weight_1d, basis); CeedChk(ierr);
428     return CEED_ERROR_SUCCESS;
429   }
430 
431   if (dim<1)
432     // LCOV_EXCL_START
433     return CeedError(ceed, CEED_ERROR_DIMENSION,
434                      "Basis dimension must be a positive value");
435   // LCOV_EXCL_STOP
436   CeedElemTopology topo = dim == 1 ? CEED_LINE
437                           : dim == 2 ? CEED_QUAD
438                           : CEED_HEX;
439 
440   ierr = CeedCalloc(1, basis); CeedChk(ierr);
441   (*basis)->ceed = ceed;
442   ceed->ref_count++;
443   (*basis)->ref_count = 1;
444   (*basis)->tensor_basis = 1;
445   (*basis)->dim = dim;
446   (*basis)->topo = topo;
447   (*basis)->num_comp = num_comp;
448   (*basis)->P_1d = P_1d;
449   (*basis)->Q_1d = Q_1d;
450   (*basis)->P = CeedIntPow(P_1d, dim);
451   (*basis)->Q = CeedIntPow(Q_1d, dim);
452   ierr = CeedMalloc(Q_1d,&(*basis)->q_ref_1d); CeedChk(ierr);
453   ierr = CeedMalloc(Q_1d,&(*basis)->q_weight_1d); CeedChk(ierr);
454   memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d*sizeof(q_ref_1d[0]));
455   memcpy((*basis)->q_weight_1d, q_weight_1d, Q_1d*sizeof(q_weight_1d[0]));
456   ierr = CeedMalloc(Q_1d*P_1d,&(*basis)->interp_1d); CeedChk(ierr);
457   ierr = CeedMalloc(Q_1d*P_1d,&(*basis)->grad_1d); CeedChk(ierr);
458   memcpy((*basis)->interp_1d, interp_1d, Q_1d*P_1d*sizeof(interp_1d[0]));
459   memcpy((*basis)->grad_1d, grad_1d, Q_1d*P_1d*sizeof(grad_1d[0]));
460   ierr = ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d,
461                                    q_weight_1d, *basis); CeedChk(ierr);
462   return CEED_ERROR_SUCCESS;
463 }
464 
465 /**
466   @brief Create a tensor-product Lagrange basis
467 
468   @param ceed        A Ceed object where the CeedBasis will be created
469   @param dim         Topological dimension of element
470   @param num_comp      Number of field components (1 for scalar fields)
471   @param P           Number of Gauss-Lobatto nodes in one dimension.  The
472                        polynomial degree of the resulting Q_k element is k=P-1.
473   @param Q           Number of quadrature points in one dimension.
474   @param quad_mode   Distribution of the Q quadrature points (affects order of
475                        accuracy for the quadrature)
476   @param[out] basis  Address of the variable where the newly created
477                        CeedBasis will be stored.
478 
479   @return An error code: 0 - success, otherwise - failure
480 
481   @ref User
482 **/
483 int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp,
484                                     CeedInt P, CeedInt Q, CeedQuadMode quad_mode,
485                                     CeedBasis *basis) {
486   // Allocate
487   int ierr, ierr2, i, j, k;
488   CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d,
489              *q_weight_1d;
490 
491   if (dim<1)
492     // LCOV_EXCL_START
493     return CeedError(ceed, CEED_ERROR_DIMENSION,
494                      "Basis dimension must be a positive value");
495   // LCOV_EXCL_STOP
496 
497   // Get Nodes and Weights
498   ierr = CeedCalloc(P*Q, &interp_1d); CeedChk(ierr);
499   ierr = CeedCalloc(P*Q, &grad_1d); CeedChk(ierr);
500   ierr = CeedCalloc(P, &nodes); CeedChk(ierr);
501   ierr = CeedCalloc(Q, &q_ref_1d); CeedChk(ierr);
502   ierr = CeedCalloc(Q, &q_weight_1d); CeedChk(ierr);
503   ierr = CeedLobattoQuadrature(P, nodes, NULL);
504   if (ierr) { goto cleanup; } CeedChk(ierr);
505   switch (quad_mode) {
506   case CEED_GAUSS:
507     ierr = CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
508     break;
509   case CEED_GAUSS_LOBATTO:
510     ierr = CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
511     break;
512   }
513   if (ierr) { goto cleanup; } CeedChk(ierr);
514 
515   // Build B, D matrix
516   // Fornberg, 1998
517   for (i = 0; i  < Q; i++) {
518     c1 = 1.0;
519     c3 = nodes[0] - q_ref_1d[i];
520     interp_1d[i*P+0] = 1.0;
521     for (j = 1; j < P; j++) {
522       c2 = 1.0;
523       c4 = c3;
524       c3 = nodes[j] - q_ref_1d[i];
525       for (k = 0; k < j; k++) {
526         dx = nodes[j] - nodes[k];
527         c2 *= dx;
528         if (k == j - 1) {
529           grad_1d[i*P + j] = c1*(interp_1d[i*P + k] - c4*grad_1d[i*P + k]) / c2;
530           interp_1d[i*P + j] = - c1*c4*interp_1d[i*P + k] / c2;
531         }
532         grad_1d[i*P + k] = (c3*grad_1d[i*P + k] - interp_1d[i*P + k]) / dx;
533         interp_1d[i*P + k] = c3*interp_1d[i*P + k] / dx;
534       }
535       c1 = c2;
536     }
537   }
538   //  // Pass to CeedBasisCreateTensorH1
539   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d,
540                                  q_ref_1d,
541                                  q_weight_1d, basis); CeedChk(ierr);
542 cleanup:
543   ierr2 = CeedFree(&interp_1d); CeedChk(ierr2);
544   ierr2 = CeedFree(&grad_1d); CeedChk(ierr2);
545   ierr2 = CeedFree(&nodes); CeedChk(ierr2);
546   ierr2 = CeedFree(&q_ref_1d); CeedChk(ierr2);
547   ierr2 = CeedFree(&q_weight_1d); CeedChk(ierr2);
548   CeedChk(ierr);
549   return CEED_ERROR_SUCCESS;
550 }
551 
552 /**
553   @brief Create a non tensor-product basis for H^1 discretizations
554 
555   @param ceed        A Ceed object where the CeedBasis will be created
556   @param topo        Topology of element, e.g. hypercube, simplex, ect
557   @param num_comp    Number of field components (1 for scalar fields)
558   @param num_nodes   Total number of nodes
559   @param num_qpts    Total number of quadrature points
560   @param interp      Row-major (num_qpts * num_nodes) matrix expressing the values of
561                        nodal basis functions at quadrature points
562   @param grad        Row-major (num_qpts * dim * num_nodes) matrix expressing
563                        derivatives of nodal basis functions at quadrature points
564   @param q_ref       Array of length num_qpts holding the locations of quadrature
565                        points on the reference element [-1, 1]
566   @param q_weight    Array of length num_qpts holding the quadrature weights on the
567                        reference element
568   @param[out] basis  Address of the variable where the newly created
569                        CeedBasis will be stored.
570 
571   @return An error code: 0 - success, otherwise - failure
572 
573   @ref User
574 **/
575 int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp,
576                       CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
577                       const CeedScalar *grad, const CeedScalar *q_ref,
578                       const CeedScalar *q_weight, CeedBasis *basis) {
579   int ierr;
580   CeedInt P = num_nodes, Q = num_qpts, dim = 0;
581 
582   if (!ceed->BasisCreateH1) {
583     Ceed delegate;
584     ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr);
585 
586     if (!delegate)
587       // LCOV_EXCL_START
588       return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
589                        "Backend does not support BasisCreateH1");
590     // LCOV_EXCL_STOP
591 
592     ierr = CeedBasisCreateH1(delegate, topo, num_comp, num_nodes,
593                              num_qpts, interp, grad, q_ref,
594                              q_weight, basis); CeedChk(ierr);
595     return CEED_ERROR_SUCCESS;
596   }
597 
598   ierr = CeedCalloc(1,basis); CeedChk(ierr);
599 
600   ierr = CeedBasisGetTopologyDimension(topo, &dim); CeedChk(ierr);
601 
602   (*basis)->ceed = ceed;
603   ceed->ref_count++;
604   (*basis)->ref_count = 1;
605   (*basis)->tensor_basis = 0;
606   (*basis)->dim = dim;
607   (*basis)->topo = topo;
608   (*basis)->num_comp = num_comp;
609   (*basis)->P = P;
610   (*basis)->Q = Q;
611   ierr = CeedMalloc(Q*dim,&(*basis)->q_ref_1d); CeedChk(ierr);
612   ierr = CeedMalloc(Q,&(*basis)->q_weight_1d); CeedChk(ierr);
613   memcpy((*basis)->q_ref_1d, q_ref, Q*dim*sizeof(q_ref[0]));
614   memcpy((*basis)->q_weight_1d, q_weight, Q*sizeof(q_weight[0]));
615   ierr = CeedMalloc(Q*P, &(*basis)->interp); CeedChk(ierr);
616   ierr = CeedMalloc(dim*Q*P, &(*basis)->grad); CeedChk(ierr);
617   memcpy((*basis)->interp, interp, Q*P*sizeof(interp[0]));
618   memcpy((*basis)->grad, grad, dim*Q*P*sizeof(grad[0]));
619   ierr = ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref,
620                              q_weight, *basis); CeedChk(ierr);
621   return CEED_ERROR_SUCCESS;
622 }
623 
624 /**
625   @brief View a CeedBasis
626 
627   @param basis   CeedBasis to view
628   @param stream  Stream to view to, e.g., stdout
629 
630   @return An error code: 0 - success, otherwise - failure
631 
632   @ref User
633 **/
634 int CeedBasisView(CeedBasis basis, FILE *stream) {
635   int ierr;
636 
637   if (basis->tensor_basis) {
638     fprintf(stream, "CeedBasis: dim=%d P=%d Q=%d\n", basis->dim, basis->P_1d,
639             basis->Q_1d);
640     ierr = CeedScalarView("qref1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_ref_1d,
641                           stream); CeedChk(ierr);
642     ierr = CeedScalarView("qweight1d", "\t% 12.8f", 1, basis->Q_1d,
643                           basis->q_weight_1d, stream); CeedChk(ierr);
644     ierr = CeedScalarView("interp1d", "\t% 12.8f", basis->Q_1d, basis->P_1d,
645                           basis->interp_1d, stream); CeedChk(ierr);
646     ierr = CeedScalarView("grad1d", "\t% 12.8f", basis->Q_1d, basis->P_1d,
647                           basis->grad_1d, stream); CeedChk(ierr);
648   } else {
649     fprintf(stream, "CeedBasis: dim=%d P=%d Q=%d\n", basis->dim, basis->P,
650             basis->Q);
651     ierr = CeedScalarView("qref", "\t% 12.8f", 1, basis->Q*basis->dim,
652                           basis->q_ref_1d,
653                           stream); CeedChk(ierr);
654     ierr = CeedScalarView("qweight", "\t% 12.8f", 1, basis->Q, basis->q_weight_1d,
655                           stream); CeedChk(ierr);
656     ierr = CeedScalarView("interp", "\t% 12.8f", basis->Q, basis->P,
657                           basis->interp, stream); CeedChk(ierr);
658     ierr = CeedScalarView("grad", "\t% 12.8f", basis->dim*basis->Q, basis->P,
659                           basis->grad, stream); CeedChk(ierr);
660   }
661   return CEED_ERROR_SUCCESS;
662 }
663 
664 /**
665   @brief Apply basis evaluation from nodes to quadrature points or vice versa
666 
667   @param basis     CeedBasis to evaluate
668   @param num_elem  The number of elements to apply the basis evaluation to;
669                      the backend will specify the ordering in
670                      CeedElemRestrictionCreateBlocked()
671   @param t_mode    \ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature
672                      points, \ref CEED_TRANSPOSE to apply the transpose, mapping
673                      from quadrature points to nodes
674   @param eval_mode \ref CEED_EVAL_NONE to use values directly,
675                      \ref CEED_EVAL_INTERP to use interpolated values,
676                      \ref CEED_EVAL_GRAD to use gradients,
677                      \ref CEED_EVAL_WEIGHT to use quadrature weights.
678   @param[in] u     Input CeedVector
679   @param[out] v    Output CeedVector
680 
681   @return An error code: 0 - success, otherwise - failure
682 
683   @ref User
684 **/
685 int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode,
686                    CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
687   int ierr;
688   CeedInt u_length = 0, v_length, dim, num_comp, num_nodes, num_qpts;
689   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
690   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
691   ierr = CeedBasisGetNumNodes(basis, &num_nodes); CeedChk(ierr);
692   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
693   ierr = CeedVectorGetLength(v, &v_length); CeedChk(ierr);
694   if (u) {
695     ierr = CeedVectorGetLength(u, &u_length); CeedChk(ierr);
696   }
697 
698   if (!basis->Apply)
699     // LCOV_EXCL_START
700     return CeedError(basis->ceed, CEED_ERROR_UNSUPPORTED,
701                      "Backend does not support BasisApply");
702   // LCOV_EXCL_STOP
703 
704   // Check compatibility of topological and geometrical dimensions
705   if ((t_mode == CEED_TRANSPOSE && (v_length%num_nodes != 0 ||
706                                     u_length%num_qpts != 0)) ||
707       (t_mode == CEED_NOTRANSPOSE && (u_length%num_nodes != 0 ||
708                                       v_length%num_qpts != 0)))
709     return CeedError(basis->ceed, CEED_ERROR_DIMENSION,
710                      "Length of input/output vectors "
711                      "incompatible with basis dimensions");
712 
713   // Check vector lengths to prevent out of bounds issues
714   bool bad_dims = false;
715   switch (eval_mode) {
716   case CEED_EVAL_NONE:
717   case CEED_EVAL_INTERP: bad_dims =
718       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts ||
719                                      v_length < num_elem*num_comp*num_nodes)) ||
720        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp ||
721                                        u_length < num_elem*num_comp*num_nodes)));
722     break;
723   case CEED_EVAL_GRAD: bad_dims =
724       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts*dim ||
725                                      v_length < num_elem*num_comp*num_nodes)) ||
726        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp*dim ||
727                                        u_length < num_elem*num_comp*num_nodes)));
728     break;
729   case CEED_EVAL_WEIGHT:
730     bad_dims = v_length < num_elem*num_qpts;
731     break;
732   // LCOV_EXCL_START
733   case CEED_EVAL_DIV: bad_dims =
734       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts ||
735                                      v_length < num_elem*num_comp*num_nodes)) ||
736        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp ||
737                                        u_length < num_elem*num_comp*num_nodes)));
738     break;
739   case CEED_EVAL_CURL: bad_dims =
740       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts ||
741                                      v_length < num_elem*num_comp*num_nodes)) ||
742        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp ||
743                                        u_length < num_elem*num_comp*num_nodes)));
744     break;
745     // LCOV_EXCL_STOP
746   }
747   if (bad_dims)
748     // LCOV_EXCL_START
749     return CeedError(basis->ceed, CEED_ERROR_DIMENSION,
750                      "Input/output vectors too short for basis and evaluation mode");
751   // LCOV_EXCL_STOP
752 
753   ierr = basis->Apply(basis, num_elem, t_mode, eval_mode, u, v); CeedChk(ierr);
754   return CEED_ERROR_SUCCESS;
755 }
756 
757 /**
758   @brief Get dimension for given CeedBasis
759 
760   @param basis     CeedBasis
761   @param[out] dim  Variable to store dimension of basis
762 
763   @return An error code: 0 - success, otherwise - failure
764 
765   @ref Backend
766 **/
767 int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim) {
768   *dim = basis->dim;
769   return CEED_ERROR_SUCCESS;
770 }
771 
772 /**
773   @brief Get topology for given CeedBasis
774 
775   @param basis      CeedBasis
776   @param[out] topo  Variable to store topology of basis
777 
778   @return An error code: 0 - success, otherwise - failure
779 
780   @ref Backend
781 **/
782 int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo) {
783   *topo = basis->topo;
784   return CEED_ERROR_SUCCESS;
785 }
786 
787 /**
788   @brief Get number of components for given CeedBasis
789 
790   @param basis          CeedBasis
791   @param[out] num_comp  Variable to store number of components of basis
792 
793   @return An error code: 0 - success, otherwise - failure
794 
795   @ref Backend
796 **/
797 int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *num_comp) {
798   *num_comp = basis->num_comp;
799   return CEED_ERROR_SUCCESS;
800 }
801 
802 /**
803   @brief Get total number of nodes (in dim dimensions) of a CeedBasis
804 
805   @param basis   CeedBasis
806   @param[out] P  Variable to store number of nodes
807 
808   @return An error code: 0 - success, otherwise - failure
809 
810   @ref Utility
811 **/
812 int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P) {
813   *P = basis->P;
814   return CEED_ERROR_SUCCESS;
815 }
816 
817 /**
818   @brief Get total number of nodes (in 1 dimension) of a CeedBasis
819 
820   @param basis     CeedBasis
821   @param[out] P_1d  Variable to store number of nodes
822 
823   @return An error code: 0 - success, otherwise - failure
824 
825   @ref Backend
826 **/
827 int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P_1d) {
828   if (!basis->tensor_basis)
829     // LCOV_EXCL_START
830     return CeedError(basis->ceed, CEED_ERROR_MINOR,
831                      "Cannot supply P_1d for non-tensor basis");
832   // LCOV_EXCL_STOP
833 
834   *P_1d = basis->P_1d;
835   return CEED_ERROR_SUCCESS;
836 }
837 
838 /**
839   @brief Get total number of quadrature points (in dim dimensions) of a CeedBasis
840 
841   @param basis   CeedBasis
842   @param[out] Q  Variable to store number of quadrature points
843 
844   @return An error code: 0 - success, otherwise - failure
845 
846   @ref Utility
847 **/
848 int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q) {
849   *Q = basis->Q;
850   return CEED_ERROR_SUCCESS;
851 }
852 
853 /**
854   @brief Get total number of quadrature points (in 1 dimension) of a CeedBasis
855 
856   @param basis     CeedBasis
857   @param[out] Q_1d  Variable to store number of quadrature points
858 
859   @return An error code: 0 - success, otherwise - failure
860 
861   @ref Backend
862 **/
863 int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, CeedInt *Q_1d) {
864   if (!basis->tensor_basis)
865     // LCOV_EXCL_START
866     return CeedError(basis->ceed, CEED_ERROR_MINOR,
867                      "Cannot supply Q_1d for non-tensor basis");
868   // LCOV_EXCL_STOP
869 
870   *Q_1d = basis->Q_1d;
871   return CEED_ERROR_SUCCESS;
872 }
873 
874 /**
875   @brief Get reference coordinates of quadrature points (in dim dimensions)
876          of a CeedBasis
877 
878   @param basis       CeedBasis
879   @param[out] q_ref  Variable to store reference coordinates of quadrature points
880 
881   @return An error code: 0 - success, otherwise - failure
882 
883   @ref Backend
884 **/
885 int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **q_ref) {
886   *q_ref = basis->q_ref_1d;
887   return CEED_ERROR_SUCCESS;
888 }
889 
890 /**
891   @brief Get quadrature weights of quadrature points (in dim dimensions)
892          of a CeedBasis
893 
894   @param basis         CeedBasis
895   @param[out] q_weight  Variable to store quadrature weights
896 
897   @return An error code: 0 - success, otherwise - failure
898 
899   @ref Backend
900 **/
901 int CeedBasisGetQWeights(CeedBasis basis, const CeedScalar **q_weight) {
902   *q_weight = basis->q_weight_1d;
903   return CEED_ERROR_SUCCESS;
904 }
905 
906 /**
907   @brief Get interpolation matrix of a CeedBasis
908 
909   @param basis        CeedBasis
910   @param[out] interp  Variable to store interpolation matrix
911 
912   @return An error code: 0 - success, otherwise - failure
913 
914   @ref Backend
915 **/
916 int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp) {
917   if (!basis->interp && basis->tensor_basis) {
918     // Allocate
919     int ierr;
920     ierr = CeedMalloc(basis->Q*basis->P, &basis->interp); CeedChk(ierr);
921 
922     // Initialize
923     for (CeedInt i=0; i<basis->Q*basis->P; i++)
924       basis->interp[i] = 1.0;
925 
926     // Calculate
927     for (CeedInt d=0; d<basis->dim; d++)
928       for (CeedInt qpt=0; qpt<basis->Q; qpt++)
929         for (CeedInt node=0; node<basis->P; node++) {
930           CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
931           CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
932           basis->interp[qpt*(basis->P)+node] *= basis->interp_1d[q*basis->P_1d+p];
933         }
934   }
935   *interp = basis->interp;
936   return CEED_ERROR_SUCCESS;
937 }
938 
939 /**
940   @brief Get 1D interpolation matrix of a tensor product CeedBasis
941 
942   @param basis           CeedBasis
943   @param[out] interp_1d  Variable to store interpolation matrix
944 
945   @return An error code: 0 - success, otherwise - failure
946 
947   @ref Backend
948 **/
949 int CeedBasisGetInterp1D(CeedBasis basis, const CeedScalar **interp_1d) {
950   if (!basis->tensor_basis)
951     // LCOV_EXCL_START
952     return CeedError(basis->ceed, CEED_ERROR_MINOR,
953                      "CeedBasis is not a tensor product basis.");
954   // LCOV_EXCL_STOP
955 
956   *interp_1d = basis->interp_1d;
957   return CEED_ERROR_SUCCESS;
958 }
959 
960 /**
961   @brief Get gradient matrix of a CeedBasis
962 
963   @param basis      CeedBasis
964   @param[out] grad  Variable to store gradient matrix
965 
966   @return An error code: 0 - success, otherwise - failure
967 
968   @ref Backend
969 **/
970 int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad) {
971   if (!basis->grad && basis->tensor_basis) {
972     // Allocate
973     int ierr;
974     ierr = CeedMalloc(basis->dim*basis->Q*basis->P, &basis->grad);
975     CeedChk(ierr);
976 
977     // Initialize
978     for (CeedInt i=0; i<basis->dim*basis->Q*basis->P; i++)
979       basis->grad[i] = 1.0;
980 
981     // Calculate
982     for (CeedInt d=0; d<basis->dim; d++)
983       for (CeedInt i=0; i<basis->dim; i++)
984         for (CeedInt qpt=0; qpt<basis->Q; qpt++)
985           for (CeedInt node=0; node<basis->P; node++) {
986             CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
987             CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
988             if (i == d)
989               basis->grad[(i*basis->Q+qpt)*(basis->P)+node] *=
990                 basis->grad_1d[q*basis->P_1d+p];
991             else
992               basis->grad[(i*basis->Q+qpt)*(basis->P)+node] *=
993                 basis->interp_1d[q*basis->P_1d+p];
994           }
995   }
996   *grad = basis->grad;
997   return CEED_ERROR_SUCCESS;
998 }
999 
1000 /**
1001   @brief Get 1D gradient matrix of a tensor product CeedBasis
1002 
1003   @param basis         CeedBasis
1004   @param[out] grad_1d  Variable to store gradient matrix
1005 
1006   @return An error code: 0 - success, otherwise - failure
1007 
1008   @ref Backend
1009 **/
1010 int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad_1d) {
1011   if (!basis->tensor_basis)
1012     // LCOV_EXCL_START
1013     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1014                      "CeedBasis is not a tensor product basis.");
1015   // LCOV_EXCL_STOP
1016 
1017   *grad_1d = basis->grad_1d;
1018   return CEED_ERROR_SUCCESS;
1019 }
1020 
1021 /**
1022   @brief Destroy a CeedBasis
1023 
1024   @param basis CeedBasis to destroy
1025 
1026   @return An error code: 0 - success, otherwise - failure
1027 
1028   @ref User
1029 **/
1030 int CeedBasisDestroy(CeedBasis *basis) {
1031   int ierr;
1032 
1033   if (!*basis || --(*basis)->ref_count > 0) return CEED_ERROR_SUCCESS;
1034   if ((*basis)->Destroy) {
1035     ierr = (*basis)->Destroy(*basis); CeedChk(ierr);
1036   }
1037   ierr = CeedFree(&(*basis)->interp); CeedChk(ierr);
1038   ierr = CeedFree(&(*basis)->interp_1d); CeedChk(ierr);
1039   ierr = CeedFree(&(*basis)->grad); CeedChk(ierr);
1040   ierr = CeedFree(&(*basis)->grad_1d); CeedChk(ierr);
1041   ierr = CeedFree(&(*basis)->q_ref_1d); CeedChk(ierr);
1042   ierr = CeedFree(&(*basis)->q_weight_1d); CeedChk(ierr);
1043   ierr = CeedDestroy(&(*basis)->ceed); CeedChk(ierr);
1044   ierr = CeedFree(basis); CeedChk(ierr);
1045   return CEED_ERROR_SUCCESS;
1046 }
1047 
1048 /**
1049   @brief Construct a Gauss-Legendre quadrature
1050 
1051   @param Q               Number of quadrature points (integrates polynomials of
1052                            degree 2*Q-1 exactly)
1053   @param[out] q_ref_1d     Array of length Q to hold the abscissa on [-1, 1]
1054   @param[out] q_weight_1d  Array of length Q to hold the weights
1055 
1056   @return An error code: 0 - success, otherwise - failure
1057 
1058   @ref Utility
1059 **/
1060 int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
1061                         CeedScalar *q_weight_1d) {
1062   // Allocate
1063   CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0*atan(1.0);
1064   // Build q_ref_1d, q_weight_1d
1065   for (int i = 0; i <= Q/2; i++) {
1066     // Guess
1067     xi = cos(PI*(CeedScalar)(2*i+1)/((CeedScalar)(2*Q)));
1068     // Pn(xi)
1069     P0 = 1.0;
1070     P1 = xi;
1071     P2 = 0.0;
1072     for (int j = 2; j <= Q; j++) {
1073       P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1074       P0 = P1;
1075       P1 = P2;
1076     }
1077     // First Newton Step
1078     dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1079     xi = xi-P2/dP2;
1080     // Newton to convergence
1081     for (int k=0; k<100 && fabs(P2)>10*CEED_EPSILON; k++) {
1082       P0 = 1.0;
1083       P1 = xi;
1084       for (int j = 2; j <= Q; j++) {
1085         P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1086         P0 = P1;
1087         P1 = P2;
1088       }
1089       dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1090       xi = xi-P2/dP2;
1091     }
1092     // Save xi, wi
1093     wi = 2.0/((1.0-xi*xi)*dP2*dP2);
1094     q_weight_1d[i] = wi;
1095     q_weight_1d[Q-1-i] = wi;
1096     q_ref_1d[i] = -xi;
1097     q_ref_1d[Q-1-i]= xi;
1098   }
1099   return CEED_ERROR_SUCCESS;
1100 }
1101 
1102 /**
1103   @brief Construct a Gauss-Legendre-Lobatto quadrature
1104 
1105   @param Q               Number of quadrature points (integrates polynomials of
1106                            degree 2*Q-3 exactly)
1107   @param[out] q_ref_1d     Array of length Q to hold the abscissa on [-1, 1]
1108   @param[out] q_weight_1d  Array of length Q to hold the weights
1109 
1110   @return An error code: 0 - success, otherwise - failure
1111 
1112   @ref Utility
1113 **/
1114 int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
1115                           CeedScalar *q_weight_1d) {
1116   // Allocate
1117   CeedScalar P0, P1, P2, dP2, d2P2, xi, wi, PI = 4.0*atan(1.0);
1118   // Build q_ref_1d, q_weight_1d
1119   // Set endpoints
1120   if (Q < 2)
1121     // LCOV_EXCL_START
1122     return CeedError(NULL, CEED_ERROR_DIMENSION,
1123                      "Cannot create Lobatto quadrature with Q=%d < 2 points", Q);
1124   // LCOV_EXCL_STOP
1125   wi = 2.0/((CeedScalar)(Q*(Q-1)));
1126   if (q_weight_1d) {
1127     q_weight_1d[0] = wi;
1128     q_weight_1d[Q-1] = wi;
1129   }
1130   q_ref_1d[0] = -1.0;
1131   q_ref_1d[Q-1] = 1.0;
1132   // Interior
1133   for (int i = 1; i <= (Q-1)/2; i++) {
1134     // Guess
1135     xi = cos(PI*(CeedScalar)(i)/(CeedScalar)(Q-1));
1136     // Pn(xi)
1137     P0 = 1.0;
1138     P1 = xi;
1139     P2 = 0.0;
1140     for (int j = 2; j < Q; j++) {
1141       P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1142       P0 = P1;
1143       P1 = P2;
1144     }
1145     // First Newton step
1146     dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1147     d2P2 = (2*xi*dP2 - (CeedScalar)(Q*(Q-1))*P2)/(1.0-xi*xi);
1148     xi = xi-dP2/d2P2;
1149     // Newton to convergence
1150     for (int k=0; k<100 && fabs(dP2)>10*CEED_EPSILON; k++) {
1151       P0 = 1.0;
1152       P1 = xi;
1153       for (int j = 2; j < Q; j++) {
1154         P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1155         P0 = P1;
1156         P1 = P2;
1157       }
1158       dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1159       d2P2 = (2*xi*dP2 - (CeedScalar)(Q*(Q-1))*P2)/(1.0-xi*xi);
1160       xi = xi-dP2/d2P2;
1161     }
1162     // Save xi, wi
1163     wi = 2.0/(((CeedScalar)(Q*(Q-1)))*P2*P2);
1164     if (q_weight_1d) {
1165       q_weight_1d[i] = wi;
1166       q_weight_1d[Q-1-i] = wi;
1167     }
1168     q_ref_1d[i] = -xi;
1169     q_ref_1d[Q-1-i]= xi;
1170   }
1171   return CEED_ERROR_SUCCESS;
1172 }
1173 
1174 /**
1175   @brief Return QR Factorization of a matrix
1176 
1177   @param ceed         A Ceed context for error handling
1178   @param[in,out] mat  Row-major matrix to be factorized in place
1179   @param[in,out] tau  Vector of length m of scaling factors
1180   @param m            Number of rows
1181   @param n            Number of columns
1182 
1183   @return An error code: 0 - success, otherwise - failure
1184 
1185   @ref Utility
1186 **/
1187 int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau,
1188                         CeedInt m, CeedInt n) {
1189   CeedScalar v[m];
1190 
1191   // Check m >= n
1192   if (n > m)
1193     // LCOV_EXCL_START
1194     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
1195                      "Cannot compute QR factorization with n > m");
1196   // LCOV_EXCL_STOP
1197 
1198   for (CeedInt i=0; i<n; i++) {
1199     // Calculate Householder vector, magnitude
1200     CeedScalar sigma = 0.0;
1201     v[i] = mat[i+n*i];
1202     for (CeedInt j=i+1; j<m; j++) {
1203       v[j] = mat[i+n*j];
1204       sigma += v[j] * v[j];
1205     }
1206     CeedScalar norm = sqrt(v[i]*v[i] + sigma); // norm of v[i:m]
1207     CeedScalar Rii = -copysign(norm, v[i]);
1208     v[i] -= Rii;
1209     // norm of v[i:m] after modification above and scaling below
1210     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
1211     //   tau = 2 / (norm*norm)
1212     tau[i] = 2 * v[i]*v[i] / (v[i]*v[i] + sigma);
1213 
1214     for (CeedInt j=i+1; j<m; j++)
1215       v[j] /= v[i];
1216 
1217     // Apply Householder reflector to lower right panel
1218     CeedHouseholderReflect(&mat[i*n+i+1], &v[i], tau[i], m-i, n-i-1, n, 1);
1219     // Save v
1220     mat[i+n*i] = Rii;
1221     for (CeedInt j=i+1; j<m; j++)
1222       mat[i+n*j] = v[j];
1223   }
1224   return CEED_ERROR_SUCCESS;
1225 }
1226 
1227 /**
1228   @brief Return symmetric Schur decomposition of the symmetric matrix mat via
1229            symmetric QR factorization
1230 
1231   @param ceed         A Ceed context for error handling
1232   @param[in,out] mat  Row-major matrix to be factorized in place
1233   @param[out] lambda  Vector of length n of eigenvalues
1234   @param n            Number of rows/columns
1235 
1236   @return An error code: 0 - success, otherwise - failure
1237 
1238   @ref Utility
1239 **/
1240 int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat,
1241                                     CeedScalar *lambda, CeedInt n) {
1242   // Check bounds for clang-tidy
1243   if (n<2)
1244     // LCOV_EXCL_START
1245     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
1246                      "Cannot compute symmetric Schur decomposition of scalars");
1247   // LCOV_EXCL_STOP
1248 
1249   CeedScalar v[n-1], tau[n-1], matT[n*n];
1250 
1251   // Copy mat to matT and set mat to I
1252   memcpy(matT, mat, n*n*sizeof(mat[0]));
1253   for (CeedInt i=0; i<n; i++)
1254     for (CeedInt j=0; j<n; j++)
1255       mat[j+n*i] = (i==j) ? 1 : 0;
1256 
1257   // Reduce to tridiagonal
1258   for (CeedInt i=0; i<n-1; i++) {
1259     // Calculate Householder vector, magnitude
1260     CeedScalar sigma = 0.0;
1261     v[i] = matT[i+n*(i+1)];
1262     for (CeedInt j=i+1; j<n-1; j++) {
1263       v[j] = matT[i+n*(j+1)];
1264       sigma += v[j] * v[j];
1265     }
1266     CeedScalar norm = sqrt(v[i]*v[i] + sigma); // norm of v[i:n-1]
1267     CeedScalar Rii = -copysign(norm, v[i]);
1268     v[i] -= Rii;
1269     // norm of v[i:m] after modification above and scaling below
1270     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
1271     //   tau = 2 / (norm*norm)
1272     if (sigma > 10*CEED_EPSILON)
1273       tau[i] = 2 * v[i]*v[i] / (v[i]*v[i] + sigma);
1274     else
1275       tau[i] = 0;
1276 
1277     for (CeedInt j=i+1; j<n-1; j++)
1278       v[j] /= v[i];
1279 
1280     // Update sub and super diagonal
1281     matT[i+n*(i+1)] = Rii;
1282     matT[(i+1)+n*i] = Rii;
1283     for (CeedInt j=i+2; j<n; j++) {
1284       matT[i+n*j] = 0; matT[j+n*i] = 0;
1285     }
1286     // Apply symmetric Householder reflector to lower right panel
1287     CeedHouseholderReflect(&matT[(i+1)+n*(i+1)], &v[i], tau[i],
1288                            n-(i+1), n-(i+1), n, 1);
1289     CeedHouseholderReflect(&matT[(i+1)+n*(i+1)], &v[i], tau[i],
1290                            n-(i+1), n-(i+1), 1, n);
1291     // Save v
1292     for (CeedInt j=i+1; j<n-1; j++) {
1293       matT[i+n*(j+1)] = v[j];
1294     }
1295   }
1296   // Backwards accumulation of Q
1297   for (CeedInt i=n-2; i>=0; i--) {
1298     v[i] = 1;
1299     for (CeedInt j=i+1; j<n-1; j++) {
1300       v[j] = matT[i+n*(j+1)];
1301       matT[i+n*(j+1)] = 0;
1302     }
1303     CeedHouseholderReflect(&mat[(i+1)+n*(i+1)], &v[i], tau[i],
1304                            n-(i+1), n-(i+1), n, 1);
1305   }
1306 
1307   // Reduce sub and super diagonal
1308   CeedInt p = 0, q = 0, itr = 0, maxitr = n*n*n;
1309   CeedScalar tol = 10*CEED_EPSILON;
1310 
1311   while (q < n && itr < maxitr) {
1312     // Update p, q, size of reduced portions of diagonal
1313     p = 0; q = 0;
1314     for (CeedInt i=n-2; i>=0; i--) {
1315       if (fabs(matT[i+n*(i+1)]) < tol)
1316         q += 1;
1317       else
1318         break;
1319     }
1320     for (CeedInt i=0; i<n-1-q; i++) {
1321       if (fabs(matT[i+n*(i+1)]) < tol)
1322         p += 1;
1323       else
1324         break;
1325     }
1326     if (q == n-1) break; // Finished reducing
1327 
1328     // Reduce tridiagonal portion
1329     CeedScalar tnn = matT[(n-1-q)+n*(n-1-q)],
1330                tnnm1 = matT[(n-2-q)+n*(n-1-q)];
1331     CeedScalar d = (matT[(n-2-q)+n*(n-2-q)] - tnn)/2;
1332     CeedScalar mu = tnn - tnnm1*tnnm1 /
1333                     (d + copysign(sqrt(d*d + tnnm1*tnnm1), d));
1334     CeedScalar x = matT[p+n*p] - mu;
1335     CeedScalar z = matT[p+n*(p+1)];
1336     for (CeedInt k=p; k<n-1-q; k++) {
1337       // Compute Givens rotation
1338       CeedScalar c = 1, s = 0;
1339       if (fabs(z) > tol) {
1340         if (fabs(z) > fabs(x)) {
1341           CeedScalar tau = -x/z;
1342           s = 1/sqrt(1+tau*tau), c = s*tau;
1343         } else {
1344           CeedScalar tau = -z/x;
1345           c = 1/sqrt(1+tau*tau), s = c*tau;
1346         }
1347       }
1348 
1349       // Apply Givens rotation to T
1350       CeedGivensRotation(matT, c, s, CEED_NOTRANSPOSE, k, k+1, n, n);
1351       CeedGivensRotation(matT, c, s, CEED_TRANSPOSE, k, k+1, n, n);
1352 
1353       // Apply Givens rotation to Q
1354       CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k+1, n, n);
1355 
1356       // Update x, z
1357       if (k < n-q-2) {
1358         x = matT[k+n*(k+1)];
1359         z = matT[k+n*(k+2)];
1360       }
1361     }
1362     itr++;
1363   }
1364   // Save eigenvalues
1365   for (CeedInt i=0; i<n; i++)
1366     lambda[i] = matT[i+n*i];
1367 
1368   // Check convergence
1369   if (itr == maxitr && q < n-1)
1370     // LCOV_EXCL_START
1371     return CeedError(ceed, CEED_ERROR_MINOR,
1372                      "Symmetric QR failed to converge");
1373   // LCOV_EXCL_STOP
1374   return CEED_ERROR_SUCCESS;
1375 }
1376 
1377 /**
1378   @brief Return Simultaneous Diagonalization of two matrices. This solves the
1379            generalized eigenvalue problem A x = lambda B x, where A and B
1380            are symmetric and B is positive definite. We generate the matrix X
1381            and vector Lambda such that X^T A X = Lambda and X^T B X = I. This
1382            is equivalent to the LAPACK routine 'sygv' with TYPE = 1.
1383 
1384   @param ceed         A Ceed context for error handling
1385   @param[in] mat_A     Row-major matrix to be factorized with eigenvalues
1386   @param[in] mat_B     Row-major matrix to be factorized to identity
1387   @param[out] x       Row-major orthogonal matrix
1388   @param[out] lambda  Vector of length n of generalized eigenvalues
1389   @param n            Number of rows/columns
1390 
1391   @return An error code: 0 - success, otherwise - failure
1392 
1393   @ref Utility
1394 **/
1395 int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A,
1396                                     CeedScalar *mat_B, CeedScalar *x,
1397                                     CeedScalar *lambda, CeedInt n) {
1398   int ierr;
1399   CeedScalar *mat_C, *mat_G, *vecD;
1400   ierr = CeedCalloc(n*n, &mat_C); CeedChk(ierr);
1401   ierr = CeedCalloc(n*n, &mat_G); CeedChk(ierr);
1402   ierr = CeedCalloc(n, &vecD); CeedChk(ierr);
1403 
1404   // Compute B = G D G^T
1405   memcpy(mat_G, mat_B, n*n*sizeof(mat_B[0]));
1406   ierr = CeedSymmetricSchurDecomposition(ceed, mat_G, vecD, n); CeedChk(ierr);
1407   for (CeedInt i=0; i<n; i++)
1408     vecD[i] = sqrt(vecD[i]);
1409 
1410   // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T
1411   //           = D^-1/2 G^T A G D^-1/2
1412   for (CeedInt i=0; i<n; i++)
1413     for (CeedInt j=0; j<n; j++)
1414       mat_C[j+i*n] = mat_G[i+j*n] / vecD[i];
1415   ierr = CeedMatrixMultiply(ceed, (const CeedScalar *)mat_C,
1416                             (const CeedScalar *)mat_A, x, n, n, n);
1417   CeedChk(ierr);
1418   for (CeedInt i=0; i<n; i++)
1419     for (CeedInt j=0; j<n; j++)
1420       mat_G[j+i*n] = mat_G[j+i*n] / vecD[j];
1421   ierr = CeedMatrixMultiply(ceed, (const CeedScalar *)x,
1422                             (const CeedScalar *)mat_G, mat_C, n, n, n);
1423   CeedChk(ierr);
1424 
1425   // Compute Q^T C Q = lambda
1426   ierr = CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n); CeedChk(ierr);
1427 
1428   // Set x = (G D^1/2)^-T Q
1429   //       = G D^-1/2 Q
1430   ierr = CeedMatrixMultiply(ceed, (const CeedScalar *)mat_G,
1431                             (const CeedScalar *)mat_C, x, n, n, n);
1432   CeedChk(ierr);
1433 
1434   // Cleanup
1435   ierr = CeedFree(&mat_C); CeedChk(ierr);
1436   ierr = CeedFree(&mat_G); CeedChk(ierr);
1437   ierr = CeedFree(&vecD); CeedChk(ierr);
1438   return CEED_ERROR_SUCCESS;
1439 }
1440 
1441 /// @}
1442