xref: /libCEED/interface/ceed-basis.c (revision 2459f3f1cd4d7d2e210e1c26d669bd2fde41a0b6)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed/ceed.h>
9 #include <ceed/backend.h>
10 #include <ceed-impl.h>
11 #include <math.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 /// @file
17 /// Implementation of CeedBasis interfaces
18 
19 /// @cond DOXYGEN_SKIP
20 static struct CeedBasis_private ceed_basis_collocated;
21 /// @endcond
22 
23 /// @addtogroup CeedBasisUser
24 /// @{
25 
26 /// Indicate that the quadrature points are collocated with the nodes
27 const CeedBasis CEED_BASIS_COLLOCATED = &ceed_basis_collocated;
28 
29 /// @}
30 
31 /// ----------------------------------------------------------------------------
32 /// CeedBasis Library Internal Functions
33 /// ----------------------------------------------------------------------------
34 /// @addtogroup CeedBasisDeveloper
35 /// @{
36 
37 /**
38   @brief Compute Householder reflection
39 
40     Computes A = (I - b v v^T) A
41     where A is an mxn matrix indexed as A[i*row + j*col]
42 
43   @param[in,out] A  Matrix to apply Householder reflection to, in place
44   @param v          Householder vector
45   @param b          Scaling factor
46   @param m          Number of rows in A
47   @param n          Number of columns in A
48   @param row        Row stride
49   @param col        Col stride
50 
51   @return An error code: 0 - success, otherwise - failure
52 
53   @ref Developer
54 **/
55 static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v,
56                                   CeedScalar b, CeedInt m, CeedInt n,
57                                   CeedInt row, CeedInt col) {
58   for (CeedInt j=0; j<n; j++) {
59     CeedScalar w = A[0*row + j*col];
60     for (CeedInt i=1; i<m; i++)
61       w += v[i] * A[i*row + j*col];
62     A[0*row + j*col] -= b * w;
63     for (CeedInt i=1; i<m; i++)
64       A[i*row + j*col] -= b * w * v[i];
65   }
66   return CEED_ERROR_SUCCESS;
67 }
68 
69 /**
70   @brief Apply Householder Q matrix
71 
72     Compute A = Q A where Q is mxm and A is mxn.
73 
74   @param[in,out] A  Matrix to apply Householder Q to, in place
75   @param Q          Householder Q matrix
76   @param tau        Householder scaling factors
77   @param t_mode     Transpose mode for application
78   @param m          Number of rows in A
79   @param n          Number of columns in A
80   @param k          Number of elementary reflectors in Q, k<m
81   @param row        Row stride in A
82   @param col        Col stride in A
83 
84   @return An error code: 0 - success, otherwise - failure
85 
86   @ref Developer
87 **/
88 int CeedHouseholderApplyQ(CeedScalar *A, const CeedScalar *Q,
89                           const CeedScalar *tau, CeedTransposeMode t_mode,
90                           CeedInt m, CeedInt n, CeedInt k,
91                           CeedInt row, CeedInt col) {
92   int ierr;
93   CeedScalar *v;
94   ierr = CeedMalloc(m, &v); CeedChk(ierr);
95   for (CeedInt ii=0; ii<k; ii++) {
96     CeedInt i = t_mode == CEED_TRANSPOSE ? ii : k-1-ii;
97     for (CeedInt j=i+1; j<m; j++)
98       v[j] = Q[j*k+i];
99     // Apply Householder reflector (I - tau v v^T) collo_grad_1d^T
100     ierr = CeedHouseholderReflect(&A[i*row], &v[i], tau[i], m-i, n, row, col);
101     CeedChk(ierr);
102   }
103   ierr = CeedFree(&v); CeedChk(ierr);
104   return CEED_ERROR_SUCCESS;
105 }
106 
107 /**
108   @brief Compute Givens rotation
109 
110     Computes A = G A (or G^T A in transpose mode)
111     where A is an mxn matrix indexed as A[i*n + j*m]
112 
113   @param[in,out] A  Row major matrix to apply Givens rotation to, in place
114   @param c          Cosine factor
115   @param s          Sine factor
116   @param t_mode     @ref CEED_NOTRANSPOSE to rotate the basis counter-clockwise,
117                     which has the effect of rotating columns of A clockwise;
118                     @ref CEED_TRANSPOSE for the opposite rotation
119   @param i          First row/column to apply rotation
120   @param k          Second row/column to apply rotation
121   @param m          Number of rows in A
122   @param n          Number of columns in A
123 
124   @return An error code: 0 - success, otherwise - failure
125 
126   @ref Developer
127 **/
128 static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s,
129                               CeedTransposeMode t_mode, CeedInt i, CeedInt k,
130                               CeedInt m, CeedInt n) {
131   CeedInt stride_j = 1, stride_ik = m, num_its = n;
132   if (t_mode == CEED_NOTRANSPOSE) {
133     stride_j = n; stride_ik = 1; num_its = m;
134   }
135 
136   // Apply rotation
137   for (CeedInt j=0; j<num_its; j++) {
138     CeedScalar tau1 = A[i*stride_ik+j*stride_j], tau2 = A[k*stride_ik+j*stride_j];
139     A[i*stride_ik+j*stride_j] = c*tau1 - s*tau2;
140     A[k*stride_ik+j*stride_j] = s*tau1 + c*tau2;
141   }
142   return CEED_ERROR_SUCCESS;
143 }
144 
145 /**
146   @brief View an array stored in a CeedBasis
147 
148   @param[in] name      Name of array
149   @param[in] fp_fmt    Printing format
150   @param[in] m         Number of rows in array
151   @param[in] n         Number of columns in array
152   @param[in] a         Array to be viewed
153   @param[in] stream    Stream to view to, e.g., stdout
154 
155   @return An error code: 0 - success, otherwise - failure
156 
157   @ref Developer
158 **/
159 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m,
160                           CeedInt n, const CeedScalar *a, FILE *stream) {
161   for (int i=0; i<m; i++) {
162     if (m > 1)
163       fprintf(stream, "%12s[%d]:", name, i);
164     else
165       fprintf(stream, "%12s:", name);
166     for (int j=0; j<n; j++)
167       fprintf(stream, fp_fmt, fabs(a[i*n+j]) > 1E-14 ? a[i*n+j] : 0);
168     fputs("\n", stream);
169   }
170   return CEED_ERROR_SUCCESS;
171 }
172 
173 /// @}
174 
175 /// ----------------------------------------------------------------------------
176 /// Ceed Backend API
177 /// ----------------------------------------------------------------------------
178 /// @addtogroup CeedBasisBackend
179 /// @{
180 
181 /**
182   @brief Return collocated grad matrix
183 
184   @param basis               CeedBasis
185   @param[out] collo_grad_1d  Row-major (Q_1d * Q_1d) matrix expressing derivatives of
186                                basis functions at quadrature points
187 
188   @return An error code: 0 - success, otherwise - failure
189 
190   @ref Backend
191 **/
192 int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collo_grad_1d) {
193   int i, j, k;
194   Ceed ceed;
195   CeedInt ierr, P_1d=(basis)->P_1d, Q_1d=(basis)->Q_1d;
196   CeedScalar *interp_1d, *grad_1d, *tau;
197 
198   ierr = CeedMalloc(Q_1d*P_1d, &interp_1d); CeedChk(ierr);
199   ierr = CeedMalloc(Q_1d*P_1d, &grad_1d); CeedChk(ierr);
200   ierr = CeedMalloc(Q_1d, &tau); CeedChk(ierr);
201   memcpy(interp_1d, (basis)->interp_1d, Q_1d*P_1d*sizeof(basis)->interp_1d[0]);
202   memcpy(grad_1d, (basis)->grad_1d, Q_1d*P_1d*sizeof(basis)->interp_1d[0]);
203 
204   // QR Factorization, interp_1d = Q R
205   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
206   ierr = CeedQRFactorization(ceed, interp_1d, tau, Q_1d, P_1d); CeedChk(ierr);
207   // Note: This function is for backend use, so all errors are terminal
208   //   and we do not need to clean up memory on failure.
209 
210   // Apply Rinv, collo_grad_1d = grad_1d Rinv
211   for (i=0; i<Q_1d; i++) { // Row i
212     collo_grad_1d[Q_1d*i] = grad_1d[P_1d*i]/interp_1d[0];
213     for (j=1; j<P_1d; j++) { // Column j
214       collo_grad_1d[j+Q_1d*i] = grad_1d[j+P_1d*i];
215       for (k=0; k<j; k++)
216         collo_grad_1d[j+Q_1d*i] -= interp_1d[j+P_1d*k]*collo_grad_1d[k+Q_1d*i];
217       collo_grad_1d[j+Q_1d*i] /= interp_1d[j+P_1d*j];
218     }
219     for (j=P_1d; j<Q_1d; j++)
220       collo_grad_1d[j+Q_1d*i] = 0;
221   }
222 
223   // Apply Qtranspose, collo_grad = collo_grad Q_transpose
224   ierr = CeedHouseholderApplyQ(collo_grad_1d, interp_1d, tau, CEED_NOTRANSPOSE,
225                                Q_1d, Q_1d, P_1d, 1, Q_1d); CeedChk(ierr);
226 
227   ierr = CeedFree(&interp_1d); CeedChk(ierr);
228   ierr = CeedFree(&grad_1d); CeedChk(ierr);
229   ierr = CeedFree(&tau); CeedChk(ierr);
230   return CEED_ERROR_SUCCESS;
231 }
232 
233 /**
234   @brief Get tensor status for given CeedBasis
235 
236   @param basis           CeedBasis
237   @param[out] is_tensor  Variable to store tensor status
238 
239   @return An error code: 0 - success, otherwise - failure
240 
241   @ref Backend
242 **/
243 int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor) {
244   *is_tensor = basis->tensor_basis;
245   return CEED_ERROR_SUCCESS;
246 }
247 
248 /**
249   @brief Get backend data of a CeedBasis
250 
251   @param basis      CeedBasis
252   @param[out] data  Variable to store data
253 
254   @return An error code: 0 - success, otherwise - failure
255 
256   @ref Backend
257 **/
258 int CeedBasisGetData(CeedBasis basis, void *data) {
259   *(void **)data = basis->data;
260   return CEED_ERROR_SUCCESS;
261 }
262 
263 /**
264   @brief Set backend data of a CeedBasis
265 
266   @param[out] basis  CeedBasis
267   @param data        Data to set
268 
269   @return An error code: 0 - success, otherwise - failure
270 
271   @ref Backend
272 **/
273 int CeedBasisSetData(CeedBasis basis, void *data) {
274   basis->data = data;
275   return CEED_ERROR_SUCCESS;
276 }
277 
278 /**
279   @brief Increment the reference counter for a CeedBasis
280 
281   @param basis  Basis to increment the reference counter
282 
283   @return An error code: 0 - success, otherwise - failure
284 
285   @ref Backend
286 **/
287 int CeedBasisReference(CeedBasis basis) {
288   basis->ref_count++;
289   return CEED_ERROR_SUCCESS;
290 }
291 
292 /**
293   @brief Get dimension for given CeedElemTopology
294 
295   @param topo      CeedElemTopology
296   @param[out] dim  Variable to store dimension of topology
297 
298   @return An error code: 0 - success, otherwise - failure
299 
300   @ref Backend
301 **/
302 int CeedBasisGetTopologyDimension(CeedElemTopology topo, CeedInt *dim) {
303   *dim = (CeedInt) topo >> 16;
304   return CEED_ERROR_SUCCESS;
305 }
306 
307 /**
308   @brief Get CeedTensorContract of a CeedBasis
309 
310   @param basis          CeedBasis
311   @param[out] contract  Variable to store CeedTensorContract
312 
313   @return An error code: 0 - success, otherwise - failure
314 
315   @ref Backend
316 **/
317 int CeedBasisGetTensorContract(CeedBasis basis, CeedTensorContract *contract) {
318   *contract = basis->contract;
319   return CEED_ERROR_SUCCESS;
320 }
321 
322 /**
323   @brief Set CeedTensorContract of a CeedBasis
324 
325   @param[out] basis  CeedBasis
326   @param contract    CeedTensorContract to set
327 
328   @return An error code: 0 - success, otherwise - failure
329 
330   @ref Backend
331 **/
332 int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract contract) {
333   int ierr;
334   basis->contract = contract;
335   ierr = CeedTensorContractReference(contract); CeedChk(ierr);
336   return CEED_ERROR_SUCCESS;
337 }
338 
339 /**
340   @brief Return a reference implementation of matrix multiplication C = A B.
341            Note, this is a reference implementation for CPU CeedScalar pointers
342            that is not intended for high performance.
343 
344   @param ceed        A Ceed context for error handling
345   @param[in] mat_A   Row-major matrix A
346   @param[in] mat_B   Row-major matrix B
347   @param[out] mat_C  Row-major output matrix C
348   @param m           Number of rows of C
349   @param n           Number of columns of C
350   @param kk          Number of columns of A/rows of B
351 
352   @return An error code: 0 - success, otherwise - failure
353 
354   @ref Utility
355 **/
356 int CeedMatrixMultiply(Ceed ceed, const CeedScalar *mat_A,
357                        const CeedScalar *mat_B, CeedScalar *mat_C, CeedInt m,
358                        CeedInt n, CeedInt kk) {
359   for (CeedInt i=0; i<m; i++)
360     for (CeedInt j=0; j<n; j++) {
361       CeedScalar sum = 0;
362       for (CeedInt k=0; k<kk; k++)
363         sum += mat_A[k+i*kk]*mat_B[j+k*n];
364       mat_C[j+i*n] = sum;
365     }
366   return CEED_ERROR_SUCCESS;
367 }
368 
369 /// @}
370 
371 /// ----------------------------------------------------------------------------
372 /// CeedBasis Public API
373 /// ----------------------------------------------------------------------------
374 /// @addtogroup CeedBasisUser
375 /// @{
376 
377 /**
378   @brief Create a tensor-product basis for H^1 discretizations
379 
380   @param ceed        A Ceed object where the CeedBasis will be created
381   @param dim         Topological dimension
382   @param num_comp    Number of field components (1 for scalar fields)
383   @param P_1d        Number of nodes in one dimension
384   @param Q_1d        Number of quadrature points in one dimension
385   @param interp_1d   Row-major (Q_1d * P_1d) matrix expressing the values of nodal
386                        basis functions at quadrature points
387   @param grad_1d     Row-major (Q_1d * P_1d) matrix expressing derivatives of nodal
388                        basis functions at quadrature points
389   @param q_ref_1d    Array of length Q_1d holding the locations of quadrature points
390                        on the 1D reference element [-1, 1]
391   @param q_weight_1d Array of length Q_1d holding the quadrature weights on the
392                        reference element
393   @param[out] basis  Address of the variable where the newly created
394                        CeedBasis will be stored.
395 
396   @return An error code: 0 - success, otherwise - failure
397 
398   @ref User
399 **/
400 int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp,
401                             CeedInt P_1d, CeedInt Q_1d,
402                             const CeedScalar *interp_1d,
403                             const CeedScalar *grad_1d, const CeedScalar *q_ref_1d,
404                             const CeedScalar *q_weight_1d, CeedBasis *basis) {
405   int ierr;
406 
407   if (!ceed->BasisCreateTensorH1) {
408     Ceed delegate;
409     ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr);
410 
411     if (!delegate)
412       // LCOV_EXCL_START
413       return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
414                        "Backend does not support BasisCreateTensorH1");
415     // LCOV_EXCL_STOP
416 
417     ierr = CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d,
418                                    Q_1d, interp_1d, grad_1d, q_ref_1d,
419                                    q_weight_1d, basis); CeedChk(ierr);
420     return CEED_ERROR_SUCCESS;
421   }
422 
423   if (dim<1)
424     // LCOV_EXCL_START
425     return CeedError(ceed, CEED_ERROR_DIMENSION,
426                      "Basis dimension must be a positive value");
427   // LCOV_EXCL_STOP
428   CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE
429                           : dim == 2 ? CEED_TOPOLOGY_QUAD
430                           : CEED_TOPOLOGY_HEX;
431 
432   ierr = CeedCalloc(1, basis); CeedChk(ierr);
433   (*basis)->ceed = ceed;
434   ierr = CeedReference(ceed); CeedChk(ierr);
435   (*basis)->ref_count = 1;
436   (*basis)->tensor_basis = 1;
437   (*basis)->dim = dim;
438   (*basis)->topo = topo;
439   (*basis)->num_comp = num_comp;
440   (*basis)->P_1d = P_1d;
441   (*basis)->Q_1d = Q_1d;
442   (*basis)->P = CeedIntPow(P_1d, dim);
443   (*basis)->Q = CeedIntPow(Q_1d, dim);
444   (*basis)->Q_comp = 1;
445   (*basis)->basis_space = 1; // 1 for H^1 space
446   ierr = CeedCalloc(Q_1d, &(*basis)->q_ref_1d); CeedChk(ierr);
447   ierr = CeedCalloc(Q_1d, &(*basis)->q_weight_1d); CeedChk(ierr);
448   if (q_ref_1d) memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d*sizeof(q_ref_1d[0]));
449   if (q_weight_1d) memcpy((*basis)->q_weight_1d, q_weight_1d,
450                             Q_1d*sizeof(q_weight_1d[0]));
451   ierr = CeedCalloc(Q_1d*P_1d, &(*basis)->interp_1d); CeedChk(ierr);
452   ierr = CeedCalloc(Q_1d*P_1d, &(*basis)->grad_1d); CeedChk(ierr);
453   if (interp_1d) memcpy((*basis)->interp_1d, interp_1d,
454                           Q_1d*P_1d*sizeof(interp_1d[0]));
455   if (grad_1d) memcpy((*basis)->grad_1d, grad_1d, Q_1d*P_1d*sizeof(grad_1d[0]));
456   ierr = ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d,
457                                    q_weight_1d, *basis); CeedChk(ierr);
458   return CEED_ERROR_SUCCESS;
459 }
460 
461 /**
462   @brief Create a tensor-product Lagrange basis
463 
464   @param ceed        A Ceed object where the CeedBasis will be created
465   @param dim         Topological dimension of element
466   @param num_comp      Number of field components (1 for scalar fields)
467   @param P           Number of Gauss-Lobatto nodes in one dimension.  The
468                        polynomial degree of the resulting Q_k element is k=P-1.
469   @param Q           Number of quadrature points in one dimension.
470   @param quad_mode   Distribution of the Q quadrature points (affects order of
471                        accuracy for the quadrature)
472   @param[out] basis  Address of the variable where the newly created
473                        CeedBasis will be stored.
474 
475   @return An error code: 0 - success, otherwise - failure
476 
477   @ref User
478 **/
479 int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp,
480                                     CeedInt P, CeedInt Q, CeedQuadMode quad_mode,
481                                     CeedBasis *basis) {
482   // Allocate
483   int ierr, ierr2, i, j, k;
484   CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d,
485              *q_weight_1d;
486 
487   if (dim<1)
488     // LCOV_EXCL_START
489     return CeedError(ceed, CEED_ERROR_DIMENSION,
490                      "Basis dimension must be a positive value");
491   // LCOV_EXCL_STOP
492 
493   // Get Nodes and Weights
494   ierr = CeedCalloc(P*Q, &interp_1d); CeedChk(ierr);
495   ierr = CeedCalloc(P*Q, &grad_1d); CeedChk(ierr);
496   ierr = CeedCalloc(P, &nodes); CeedChk(ierr);
497   ierr = CeedCalloc(Q, &q_ref_1d); CeedChk(ierr);
498   ierr = CeedCalloc(Q, &q_weight_1d); CeedChk(ierr);
499   ierr = CeedLobattoQuadrature(P, nodes, NULL);
500   if (ierr) { goto cleanup; } CeedChk(ierr);
501   switch (quad_mode) {
502   case CEED_GAUSS:
503     ierr = CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
504     break;
505   case CEED_GAUSS_LOBATTO:
506     ierr = CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
507     break;
508   }
509   if (ierr) { goto cleanup; } CeedChk(ierr);
510 
511   // Build B, D matrix
512   // Fornberg, 1998
513   for (i = 0; i  < Q; i++) {
514     c1 = 1.0;
515     c3 = nodes[0] - q_ref_1d[i];
516     interp_1d[i*P+0] = 1.0;
517     for (j = 1; j < P; j++) {
518       c2 = 1.0;
519       c4 = c3;
520       c3 = nodes[j] - q_ref_1d[i];
521       for (k = 0; k < j; k++) {
522         dx = nodes[j] - nodes[k];
523         c2 *= dx;
524         if (k == j - 1) {
525           grad_1d[i*P + j] = c1*(interp_1d[i*P + k] - c4*grad_1d[i*P + k]) / c2;
526           interp_1d[i*P + j] = - c1*c4*interp_1d[i*P + k] / c2;
527         }
528         grad_1d[i*P + k] = (c3*grad_1d[i*P + k] - interp_1d[i*P + k]) / dx;
529         interp_1d[i*P + k] = c3*interp_1d[i*P + k] / dx;
530       }
531       c1 = c2;
532     }
533   }
534   // Pass to CeedBasisCreateTensorH1
535   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d,
536                                  q_ref_1d, q_weight_1d, basis); CeedChk(ierr);
537 cleanup:
538   ierr2 = CeedFree(&interp_1d); CeedChk(ierr2);
539   ierr2 = CeedFree(&grad_1d); CeedChk(ierr2);
540   ierr2 = CeedFree(&nodes); CeedChk(ierr2);
541   ierr2 = CeedFree(&q_ref_1d); CeedChk(ierr2);
542   ierr2 = CeedFree(&q_weight_1d); CeedChk(ierr2);
543   CeedChk(ierr);
544   return CEED_ERROR_SUCCESS;
545 }
546 
547 /**
548   @brief Create a non tensor-product basis for H^1 discretizations
549 
550   @param ceed        A Ceed object where the CeedBasis will be created
551   @param topo        Topology of element, e.g. hypercube, simplex, ect
552   @param num_comp    Number of field components (1 for scalar fields)
553   @param num_nodes   Total number of nodes
554   @param num_qpts    Total number of quadrature points
555   @param interp      Row-major (num_qpts * num_nodes) matrix expressing the values of
556                        nodal basis functions at quadrature points
557   @param grad        Row-major (num_qpts * dim * num_nodes) matrix expressing
558                        derivatives of nodal basis functions at quadrature points
559   @param q_ref       Array of length num_qpts holding the locations of quadrature
560                        points on the reference element
561   @param q_weight    Array of length num_qpts holding the quadrature weights on the
562                        reference element
563   @param[out] basis  Address of the variable where the newly created
564                        CeedBasis will be stored.
565 
566   @return An error code: 0 - success, otherwise - failure
567 
568   @ref User
569 **/
570 int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp,
571                       CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
572                       const CeedScalar *grad, const CeedScalar *q_ref,
573                       const CeedScalar *q_weight, CeedBasis *basis) {
574   int ierr;
575   CeedInt P = num_nodes, Q = num_qpts, dim = 0;
576 
577   if (!ceed->BasisCreateH1) {
578     Ceed delegate;
579     ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr);
580 
581     if (!delegate)
582       // LCOV_EXCL_START
583       return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
584                        "Backend does not support BasisCreateH1");
585     // LCOV_EXCL_STOP
586 
587     ierr = CeedBasisCreateH1(delegate, topo, num_comp, num_nodes,
588                              num_qpts, interp, grad, q_ref,
589                              q_weight, basis); CeedChk(ierr);
590     return CEED_ERROR_SUCCESS;
591   }
592 
593   ierr = CeedCalloc(1, basis); CeedChk(ierr);
594 
595   ierr = CeedBasisGetTopologyDimension(topo, &dim); CeedChk(ierr);
596 
597   (*basis)->ceed = ceed;
598   ierr = CeedReference(ceed); CeedChk(ierr);
599   (*basis)->ref_count = 1;
600   (*basis)->tensor_basis = 0;
601   (*basis)->dim = dim;
602   (*basis)->topo = topo;
603   (*basis)->num_comp = num_comp;
604   (*basis)->P = P;
605   (*basis)->Q = Q;
606   (*basis)->Q_comp = 1;
607   (*basis)->basis_space = 1; // 1 for H^1 space
608   ierr = CeedCalloc(Q*dim, &(*basis)->q_ref_1d); CeedChk(ierr);
609   ierr = CeedCalloc(Q, &(*basis)->q_weight_1d); CeedChk(ierr);
610   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q*dim*sizeof(q_ref[0]));
611   if(q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q*sizeof(q_weight[0]));
612   ierr = CeedCalloc(Q*P, &(*basis)->interp); CeedChk(ierr);
613   ierr = CeedCalloc(dim*Q*P, &(*basis)->grad); CeedChk(ierr);
614   if(interp) memcpy((*basis)->interp, interp, Q*P*sizeof(interp[0]));
615   if(grad) memcpy((*basis)->grad, grad, dim*Q*P*sizeof(grad[0]));
616   ierr = ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref,
617                              q_weight, *basis); CeedChk(ierr);
618   return CEED_ERROR_SUCCESS;
619 }
620 
621 /**
622   @brief Create a non tensor-product basis for H(div) discretizations
623 
624   @param ceed        A Ceed object where the CeedBasis will be created
625   @param topo        Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.),
626                      dimension of which is used in some array sizes below
627   @param num_comp    Number of components (usually 1 for vectors in H(div) bases)
628   @param num_nodes   Total number of nodes (dofs per element)
629   @param num_qpts    Total number of quadrature points
630   @param interp      Row-major (dim*num_qpts * num_nodes) matrix expressing the values of
631                        nodal basis functions at quadrature points
632   @param div        Row-major (num_qpts * num_nodes) matrix expressing
633                        divergence of nodal basis functions at quadrature points
634   @param q_ref       Array of length num_qpts holding the locations of quadrature
635                        points on the reference element
636   @param q_weight    Array of length num_qpts holding the quadrature weights on the
637                        reference element
638   @param[out] basis  Address of the variable where the newly created
639                        CeedBasis will be stored.
640 
641   @return An error code: 0 - success, otherwise - failure
642 
643   @ref User
644 **/
645 int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp,
646                         CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
647                         const CeedScalar *div, const CeedScalar *q_ref,
648                         const CeedScalar *q_weight, CeedBasis *basis) {
649   int ierr;
650   CeedInt Q = num_qpts, P = num_nodes, dim = 0;
651   ierr = CeedBasisGetTopologyDimension(topo, &dim); CeedChk(ierr);
652   if (!ceed->BasisCreateHdiv) {
653     Ceed delegate;
654     ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr);
655 
656     if (!delegate)
657       // LCOV_EXCL_START
658       return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
659                        "Backend does not implement BasisCreateHdiv");
660     // LCOV_EXCL_STOP
661 
662     ierr = CeedBasisCreateHdiv(delegate, topo, num_comp, num_nodes,
663                                num_qpts, interp, div, q_ref,
664                                q_weight, basis); CeedChk(ierr);
665     return CEED_ERROR_SUCCESS;
666   }
667 
668   ierr = CeedCalloc(1, basis); CeedChk(ierr);
669 
670   (*basis)->ceed = ceed;
671   ierr = CeedReference(ceed); CeedChk(ierr);
672   (*basis)->ref_count = 1;
673   (*basis)->tensor_basis = 0;
674   (*basis)->dim = dim;
675   (*basis)->topo = topo;
676   (*basis)->num_comp = num_comp;
677   (*basis)->P = P;
678   (*basis)->Q = Q;
679   (*basis)->Q_comp = dim;
680   (*basis)->basis_space = 2; // 2 for H(div) space
681   ierr = CeedMalloc(Q*dim, &(*basis)->q_ref_1d); CeedChk(ierr);
682   ierr = CeedMalloc(Q, &(*basis)->q_weight_1d); CeedChk(ierr);
683   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q*dim*sizeof(q_ref[0]));
684   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q*sizeof(q_weight[0]));
685   ierr = CeedMalloc(dim*Q*P, &(*basis)->interp); CeedChk(ierr);
686   ierr = CeedMalloc(Q*P, &(*basis)->div); CeedChk(ierr);
687   if (interp) memcpy((*basis)->interp, interp, dim*Q*P*sizeof(interp[0]));
688   if (div) memcpy((*basis)->div, div, Q*P*sizeof(div[0]));
689   ierr = ceed->BasisCreateHdiv(topo, dim, P, Q, interp, div, q_ref,
690                                q_weight, *basis); CeedChk(ierr);
691   return CEED_ERROR_SUCCESS;
692 }
693 
694 /**
695   @brief Copy the pointer to a CeedBasis. Both pointers should
696            be destroyed with `CeedBasisDestroy()`;
697            Note: If `*basis_copy` is non-NULL, then it is assumed that
698            `*basis_copy` is a pointer to a CeedBasis. This CeedBasis
699            will be destroyed if `*basis_copy` is the only
700            reference to this CeedBasis.
701 
702   @param basis            CeedBasis to copy reference to
703   @param[out] basis_copy  Variable to store copied reference
704 
705   @return An error code: 0 - success, otherwise - failure
706 
707   @ref User
708 **/
709 int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy) {
710   int ierr;
711 
712   ierr = CeedBasisReference(basis); CeedChk(ierr);
713   ierr = CeedBasisDestroy(basis_copy); CeedChk(ierr);
714   *basis_copy = basis;
715   return CEED_ERROR_SUCCESS;
716 }
717 
718 /**
719   @brief View a CeedBasis
720 
721   @param basis   CeedBasis to view
722   @param stream  Stream to view to, e.g., stdout
723 
724   @return An error code: 0 - success, otherwise - failure
725 
726   @ref User
727 **/
728 int CeedBasisView(CeedBasis basis, FILE *stream) {
729   int ierr;
730   CeedFESpace FE_space = basis->basis_space;
731   CeedElemTopology topo = basis->topo;
732   // Print FE space and element topology of the basis
733   if (basis->tensor_basis) {
734     fprintf(stream, "CeedBasis (%s on a %s element): dim=%d P=%d Q=%d\n",
735             CeedFESpaces[FE_space], CeedElemTopologies[topo],
736             basis->dim, basis->P_1d, basis->Q_1d);
737   } else {
738     fprintf(stream, "CeedBasis (%s on a %s element): dim=%d P=%d Q=%d\n",
739             CeedFESpaces[FE_space], CeedElemTopologies[topo],
740             basis->dim, basis->P, basis->Q);
741   }
742   // Print quadrature data, interpolation/gradient/divergene/curl of the basis
743   if (basis->tensor_basis) { // tensor basis
744     ierr = CeedScalarView("qref1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_ref_1d,
745                           stream); CeedChk(ierr);
746     ierr = CeedScalarView("qweight1d", "\t% 12.8f", 1, basis->Q_1d,
747                           basis->q_weight_1d, stream); CeedChk(ierr);
748     ierr = CeedScalarView("interp1d", "\t% 12.8f", basis->Q_1d, basis->P_1d,
749                           basis->interp_1d, stream); CeedChk(ierr);
750     ierr = CeedScalarView("grad1d", "\t% 12.8f", basis->Q_1d, basis->P_1d,
751                           basis->grad_1d, stream); CeedChk(ierr);
752   } else { // non-tensor basis
753     ierr = CeedScalarView("qref", "\t% 12.8f", 1, basis->Q*basis->dim,
754                           basis->q_ref_1d,
755                           stream); CeedChk(ierr);
756     ierr = CeedScalarView("qweight", "\t% 12.8f", 1, basis->Q, basis->q_weight_1d,
757                           stream); CeedChk(ierr);
758     ierr = CeedScalarView("interp", "\t% 12.8f", basis->Q_comp*basis->Q, basis->P,
759                           basis->interp, stream); CeedChk(ierr);
760     if (basis->grad) {
761       ierr = CeedScalarView("grad", "\t% 12.8f", basis->dim*basis->Q, basis->P,
762                             basis->grad, stream); CeedChk(ierr);
763     }
764     if (basis->div) {
765       ierr = CeedScalarView("div", "\t% 12.8f", basis->Q, basis->P,
766                             basis->div, stream); CeedChk(ierr);
767     }
768   }
769   return CEED_ERROR_SUCCESS;
770 }
771 
772 /**
773   @brief Apply basis evaluation from nodes to quadrature points or vice versa
774 
775   @param basis     CeedBasis to evaluate
776   @param num_elem  The number of elements to apply the basis evaluation to;
777                      the backend will specify the ordering in
778                      CeedElemRestrictionCreateBlocked()
779   @param t_mode    \ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature
780                      points, \ref CEED_TRANSPOSE to apply the transpose, mapping
781                      from quadrature points to nodes
782   @param eval_mode \ref CEED_EVAL_NONE to use values directly,
783                      \ref CEED_EVAL_INTERP to use interpolated values,
784                      \ref CEED_EVAL_GRAD to use gradients,
785                      \ref CEED_EVAL_WEIGHT to use quadrature weights.
786   @param[in] u     Input CeedVector
787   @param[out] v    Output CeedVector
788 
789   @return An error code: 0 - success, otherwise - failure
790 
791   @ref User
792 **/
793 int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode,
794                    CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
795   int ierr;
796   CeedSize u_length = 0, v_length;
797   CeedInt dim, num_comp, num_nodes, num_qpts;
798   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
799   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
800   ierr = CeedBasisGetNumNodes(basis, &num_nodes); CeedChk(ierr);
801   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
802   ierr = CeedVectorGetLength(v, &v_length); CeedChk(ierr);
803   if (u) {
804     ierr = CeedVectorGetLength(u, &u_length); CeedChk(ierr);
805   }
806 
807   if (!basis->Apply)
808     // LCOV_EXCL_START
809     return CeedError(basis->ceed, CEED_ERROR_UNSUPPORTED,
810                      "Backend does not support BasisApply");
811   // LCOV_EXCL_STOP
812 
813   // Check compatibility of topological and geometrical dimensions
814   if ((t_mode == CEED_TRANSPOSE && (v_length%num_nodes != 0 ||
815                                     u_length%num_qpts != 0)) ||
816       (t_mode == CEED_NOTRANSPOSE && (u_length%num_nodes != 0 ||
817                                       v_length%num_qpts != 0)))
818     // LCOV_EXCL_START
819     return CeedError(basis->ceed, CEED_ERROR_DIMENSION,
820                      "Length of input/output vectors "
821                      "incompatible with basis dimensions");
822   // LCOV_EXCL_STOP
823 
824   // Check vector lengths to prevent out of bounds issues
825   bool bad_dims = false;
826   switch (eval_mode) {
827   case CEED_EVAL_NONE:
828   case CEED_EVAL_INTERP: bad_dims =
829       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts ||
830                                      v_length < num_elem*num_comp*num_nodes)) ||
831        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp ||
832                                        u_length < num_elem*num_comp*num_nodes)));
833     break;
834   case CEED_EVAL_GRAD: bad_dims =
835       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts*dim ||
836                                      v_length < num_elem*num_comp*num_nodes)) ||
837        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp*dim ||
838                                        u_length < num_elem*num_comp*num_nodes)));
839     break;
840   case CEED_EVAL_WEIGHT:
841     bad_dims = v_length < num_elem*num_qpts;
842     break;
843   // LCOV_EXCL_START
844   case CEED_EVAL_DIV: bad_dims =
845       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts ||
846                                      v_length < num_elem*num_comp*num_nodes)) ||
847        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp ||
848                                        u_length < num_elem*num_comp*num_nodes)));
849     break;
850   case CEED_EVAL_CURL: bad_dims =
851       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts ||
852                                      v_length < num_elem*num_comp*num_nodes)) ||
853        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp ||
854                                        u_length < num_elem*num_comp*num_nodes)));
855     break;
856     // LCOV_EXCL_STOP
857   }
858   if (bad_dims)
859     // LCOV_EXCL_START
860     return CeedError(basis->ceed, CEED_ERROR_DIMENSION,
861                      "Input/output vectors too short for basis and evaluation mode");
862   // LCOV_EXCL_STOP
863 
864   ierr = basis->Apply(basis, num_elem, t_mode, eval_mode, u, v); CeedChk(ierr);
865   return CEED_ERROR_SUCCESS;
866 }
867 
868 /**
869   @brief Get Ceed associated with a CeedBasis
870 
871   @param basis      CeedBasis
872   @param[out] ceed  Variable to store Ceed
873 
874   @return An error code: 0 - success, otherwise - failure
875 
876   @ref Advanced
877 **/
878 int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed) {
879   *ceed = basis->ceed;
880   return CEED_ERROR_SUCCESS;
881 }
882 
883 /**
884   @brief Get dimension for given CeedBasis
885 
886   @param basis     CeedBasis
887   @param[out] dim  Variable to store dimension of basis
888 
889   @return An error code: 0 - success, otherwise - failure
890 
891   @ref Advanced
892 **/
893 int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim) {
894   *dim = basis->dim;
895   return CEED_ERROR_SUCCESS;
896 }
897 
898 /**
899   @brief Get topology for given CeedBasis
900 
901   @param basis      CeedBasis
902   @param[out] topo  Variable to store topology of basis
903 
904   @return An error code: 0 - success, otherwise - failure
905 
906   @ref Advanced
907 **/
908 int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo) {
909   *topo = basis->topo;
910   return CEED_ERROR_SUCCESS;
911 }
912 
913 /**
914   @brief Get number of Q-vector components for given CeedBasis
915 
916   @param basis          CeedBasis
917   @param[out] Q_comp  Variable to store number of Q-vector components of basis
918 
919   @return An error code: 0 - success, otherwise - failure
920 
921   @ref Advanced
922 **/
923 int CeedBasisGetNumQuadratureComponents(CeedBasis basis, CeedInt *Q_comp) {
924   *Q_comp = basis->Q_comp;
925   return CEED_ERROR_SUCCESS;
926 }
927 
928 /**
929   @brief Get number of components for given CeedBasis
930 
931   @param basis          CeedBasis
932   @param[out] num_comp  Variable to store number of components of basis
933 
934   @return An error code: 0 - success, otherwise - failure
935 
936   @ref Advanced
937 **/
938 int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *num_comp) {
939   *num_comp = basis->num_comp;
940   return CEED_ERROR_SUCCESS;
941 }
942 
943 /**
944   @brief Get total number of nodes (in dim dimensions) of a CeedBasis
945 
946   @param basis   CeedBasis
947   @param[out] P  Variable to store number of nodes
948 
949   @return An error code: 0 - success, otherwise - failure
950 
951   @ref Utility
952 **/
953 int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P) {
954   *P = basis->P;
955   return CEED_ERROR_SUCCESS;
956 }
957 
958 /**
959   @brief Get total number of nodes (in 1 dimension) of a CeedBasis
960 
961   @param basis     CeedBasis
962   @param[out] P_1d  Variable to store number of nodes
963 
964   @return An error code: 0 - success, otherwise - failure
965 
966   @ref Advanced
967 **/
968 int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P_1d) {
969   if (!basis->tensor_basis)
970     // LCOV_EXCL_START
971     return CeedError(basis->ceed, CEED_ERROR_MINOR,
972                      "Cannot supply P_1d for non-tensor basis");
973   // LCOV_EXCL_STOP
974 
975   *P_1d = basis->P_1d;
976   return CEED_ERROR_SUCCESS;
977 }
978 
979 /**
980   @brief Get total number of quadrature points (in dim dimensions) of a CeedBasis
981 
982   @param basis   CeedBasis
983   @param[out] Q  Variable to store number of quadrature points
984 
985   @return An error code: 0 - success, otherwise - failure
986 
987   @ref Utility
988 **/
989 int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q) {
990   *Q = basis->Q;
991   return CEED_ERROR_SUCCESS;
992 }
993 
994 /**
995   @brief Get total number of quadrature points (in 1 dimension) of a CeedBasis
996 
997   @param basis      CeedBasis
998   @param[out] Q_1d  Variable to store number of quadrature points
999 
1000   @return An error code: 0 - success, otherwise - failure
1001 
1002   @ref Advanced
1003 **/
1004 int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, CeedInt *Q_1d) {
1005   if (!basis->tensor_basis)
1006     // LCOV_EXCL_START
1007     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1008                      "Cannot supply Q_1d for non-tensor basis");
1009   // LCOV_EXCL_STOP
1010 
1011   *Q_1d = basis->Q_1d;
1012   return CEED_ERROR_SUCCESS;
1013 }
1014 
1015 /**
1016   @brief Get reference coordinates of quadrature points (in dim dimensions)
1017          of a CeedBasis
1018 
1019   @param basis       CeedBasis
1020   @param[out] q_ref  Variable to store reference coordinates of quadrature points
1021 
1022   @return An error code: 0 - success, otherwise - failure
1023 
1024   @ref Advanced
1025 **/
1026 int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **q_ref) {
1027   *q_ref = basis->q_ref_1d;
1028   return CEED_ERROR_SUCCESS;
1029 }
1030 
1031 /**
1032   @brief Get quadrature weights of quadrature points (in dim dimensions)
1033          of a CeedBasis
1034 
1035   @param basis          CeedBasis
1036   @param[out] q_weight  Variable to store quadrature weights
1037 
1038   @return An error code: 0 - success, otherwise - failure
1039 
1040   @ref Advanced
1041 **/
1042 int CeedBasisGetQWeights(CeedBasis basis, const CeedScalar **q_weight) {
1043   *q_weight = basis->q_weight_1d;
1044   return CEED_ERROR_SUCCESS;
1045 }
1046 
1047 /**
1048   @brief Get interpolation matrix of a CeedBasis
1049 
1050   @param basis        CeedBasis
1051   @param[out] interp  Variable to store interpolation matrix
1052 
1053   @return An error code: 0 - success, otherwise - failure
1054 
1055   @ref Advanced
1056 **/
1057 int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp) {
1058   if (!basis->interp && basis->tensor_basis) {
1059     // Allocate
1060     int ierr;
1061     ierr = CeedMalloc(basis->Q*basis->P, &basis->interp); CeedChk(ierr);
1062 
1063     // Initialize
1064     for (CeedInt i=0; i<basis->Q*basis->P; i++)
1065       basis->interp[i] = 1.0;
1066 
1067     // Calculate
1068     for (CeedInt d=0; d<basis->dim; d++)
1069       for (CeedInt qpt=0; qpt<basis->Q; qpt++)
1070         for (CeedInt node=0; node<basis->P; node++) {
1071           CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1072           CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1073           basis->interp[qpt*(basis->P)+node] *= basis->interp_1d[q*basis->P_1d+p];
1074         }
1075   }
1076   *interp = basis->interp;
1077   return CEED_ERROR_SUCCESS;
1078 }
1079 
1080 /**
1081   @brief Get 1D interpolation matrix of a tensor product CeedBasis
1082 
1083   @param basis           CeedBasis
1084   @param[out] interp_1d  Variable to store interpolation matrix
1085 
1086   @return An error code: 0 - success, otherwise - failure
1087 
1088   @ref Backend
1089 **/
1090 int CeedBasisGetInterp1D(CeedBasis basis, const CeedScalar **interp_1d) {
1091   if (!basis->tensor_basis)
1092     // LCOV_EXCL_START
1093     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1094                      "CeedBasis is not a tensor product basis.");
1095   // LCOV_EXCL_STOP
1096 
1097   *interp_1d = basis->interp_1d;
1098   return CEED_ERROR_SUCCESS;
1099 }
1100 
1101 /**
1102   @brief Get gradient matrix of a CeedBasis
1103 
1104   @param basis      CeedBasis
1105   @param[out] grad  Variable to store gradient matrix
1106 
1107   @return An error code: 0 - success, otherwise - failure
1108 
1109   @ref Advanced
1110 **/
1111 int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad) {
1112   if (!basis->grad && basis->tensor_basis) {
1113     // Allocate
1114     int ierr;
1115     ierr = CeedMalloc(basis->dim*basis->Q*basis->P, &basis->grad);
1116     CeedChk(ierr);
1117 
1118     // Initialize
1119     for (CeedInt i=0; i<basis->dim*basis->Q*basis->P; i++)
1120       basis->grad[i] = 1.0;
1121 
1122     // Calculate
1123     for (CeedInt d=0; d<basis->dim; d++)
1124       for (CeedInt i=0; i<basis->dim; i++)
1125         for (CeedInt qpt=0; qpt<basis->Q; qpt++)
1126           for (CeedInt node=0; node<basis->P; node++) {
1127             CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1128             CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1129             if (i == d)
1130               basis->grad[(i*basis->Q+qpt)*(basis->P)+node] *=
1131                 basis->grad_1d[q*basis->P_1d+p];
1132             else
1133               basis->grad[(i*basis->Q+qpt)*(basis->P)+node] *=
1134                 basis->interp_1d[q*basis->P_1d+p];
1135           }
1136   }
1137   *grad = basis->grad;
1138   return CEED_ERROR_SUCCESS;
1139 }
1140 
1141 /**
1142   @brief Get 1D gradient matrix of a tensor product CeedBasis
1143 
1144   @param basis         CeedBasis
1145   @param[out] grad_1d  Variable to store gradient matrix
1146 
1147   @return An error code: 0 - success, otherwise - failure
1148 
1149   @ref Advanced
1150 **/
1151 int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad_1d) {
1152   if (!basis->tensor_basis)
1153     // LCOV_EXCL_START
1154     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1155                      "CeedBasis is not a tensor product basis.");
1156   // LCOV_EXCL_STOP
1157 
1158   *grad_1d = basis->grad_1d;
1159   return CEED_ERROR_SUCCESS;
1160 }
1161 
1162 /**
1163   @brief Get divergence matrix of a CeedBasis
1164 
1165   @param basis     CeedBasis
1166   @param[out] div  Variable to store divergence matrix
1167 
1168   @return An error code: 0 - success, otherwise - failure
1169 
1170   @ref Advanced
1171 **/
1172 int CeedBasisGetDiv(CeedBasis basis, const CeedScalar **div) {
1173   if (!basis->div)
1174     // LCOV_EXCL_START
1175     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1176                      "CeedBasis does not have divergence matrix.");
1177   // LCOV_EXCL_STOP
1178 
1179   *div = basis->div;
1180   return CEED_ERROR_SUCCESS;
1181 }
1182 
1183 /**
1184   @brief Destroy a CeedBasis
1185 
1186   @param basis CeedBasis to destroy
1187 
1188   @return An error code: 0 - success, otherwise - failure
1189 
1190   @ref User
1191 **/
1192 int CeedBasisDestroy(CeedBasis *basis) {
1193   int ierr;
1194 
1195   if (!*basis || --(*basis)->ref_count > 0) return CEED_ERROR_SUCCESS;
1196   if ((*basis)->Destroy) {
1197     ierr = (*basis)->Destroy(*basis); CeedChk(ierr);
1198   }
1199   if ((*basis)->contract) {
1200     ierr = CeedTensorContractDestroy(&(*basis)->contract); CeedChk(ierr);
1201   }
1202   ierr = CeedFree(&(*basis)->interp); CeedChk(ierr);
1203   ierr = CeedFree(&(*basis)->interp_1d); CeedChk(ierr);
1204   ierr = CeedFree(&(*basis)->grad); CeedChk(ierr);
1205   ierr = CeedFree(&(*basis)->div); CeedChk(ierr);
1206   ierr = CeedFree(&(*basis)->grad_1d); CeedChk(ierr);
1207   ierr = CeedFree(&(*basis)->q_ref_1d); CeedChk(ierr);
1208   ierr = CeedFree(&(*basis)->q_weight_1d); CeedChk(ierr);
1209   ierr = CeedDestroy(&(*basis)->ceed); CeedChk(ierr);
1210   ierr = CeedFree(basis); CeedChk(ierr);
1211   return CEED_ERROR_SUCCESS;
1212 }
1213 
1214 /**
1215   @brief Construct a Gauss-Legendre quadrature
1216 
1217   @param Q                 Number of quadrature points (integrates polynomials of
1218                              degree 2*Q-1 exactly)
1219   @param[out] q_ref_1d     Array of length Q to hold the abscissa on [-1, 1]
1220   @param[out] q_weight_1d  Array of length Q to hold the weights
1221 
1222   @return An error code: 0 - success, otherwise - failure
1223 
1224   @ref Utility
1225 **/
1226 int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
1227                         CeedScalar *q_weight_1d) {
1228   // Allocate
1229   CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0*atan(1.0);
1230   // Build q_ref_1d, q_weight_1d
1231   for (int i = 0; i <= Q/2; i++) {
1232     // Guess
1233     xi = cos(PI*(CeedScalar)(2*i+1)/((CeedScalar)(2*Q)));
1234     // Pn(xi)
1235     P0 = 1.0;
1236     P1 = xi;
1237     P2 = 0.0;
1238     for (int j = 2; j <= Q; j++) {
1239       P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1240       P0 = P1;
1241       P1 = P2;
1242     }
1243     // First Newton Step
1244     dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1245     xi = xi-P2/dP2;
1246     // Newton to convergence
1247     for (int k=0; k<100 && fabs(P2)>10*CEED_EPSILON; k++) {
1248       P0 = 1.0;
1249       P1 = xi;
1250       for (int j = 2; j <= Q; j++) {
1251         P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1252         P0 = P1;
1253         P1 = P2;
1254       }
1255       dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1256       xi = xi-P2/dP2;
1257     }
1258     // Save xi, wi
1259     wi = 2.0/((1.0-xi*xi)*dP2*dP2);
1260     q_weight_1d[i] = wi;
1261     q_weight_1d[Q-1-i] = wi;
1262     q_ref_1d[i] = -xi;
1263     q_ref_1d[Q-1-i]= xi;
1264   }
1265   return CEED_ERROR_SUCCESS;
1266 }
1267 
1268 /**
1269   @brief Construct a Gauss-Legendre-Lobatto quadrature
1270 
1271   @param Q                 Number of quadrature points (integrates polynomials of
1272                              degree 2*Q-3 exactly)
1273   @param[out] q_ref_1d     Array of length Q to hold the abscissa on [-1, 1]
1274   @param[out] q_weight_1d  Array of length Q to hold the weights
1275 
1276   @return An error code: 0 - success, otherwise - failure
1277 
1278   @ref Utility
1279 **/
1280 int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
1281                           CeedScalar *q_weight_1d) {
1282   // Allocate
1283   CeedScalar P0, P1, P2, dP2, d2P2, xi, wi, PI = 4.0*atan(1.0);
1284   // Build q_ref_1d, q_weight_1d
1285   // Set endpoints
1286   if (Q < 2)
1287     // LCOV_EXCL_START
1288     return CeedError(NULL, CEED_ERROR_DIMENSION,
1289                      "Cannot create Lobatto quadrature with Q=%d < 2 points", Q);
1290   // LCOV_EXCL_STOP
1291   wi = 2.0/((CeedScalar)(Q*(Q-1)));
1292   if (q_weight_1d) {
1293     q_weight_1d[0] = wi;
1294     q_weight_1d[Q-1] = wi;
1295   }
1296   q_ref_1d[0] = -1.0;
1297   q_ref_1d[Q-1] = 1.0;
1298   // Interior
1299   for (int i = 1; i <= (Q-1)/2; i++) {
1300     // Guess
1301     xi = cos(PI*(CeedScalar)(i)/(CeedScalar)(Q-1));
1302     // Pn(xi)
1303     P0 = 1.0;
1304     P1 = xi;
1305     P2 = 0.0;
1306     for (int j = 2; j < Q; j++) {
1307       P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1308       P0 = P1;
1309       P1 = P2;
1310     }
1311     // First Newton step
1312     dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1313     d2P2 = (2*xi*dP2 - (CeedScalar)(Q*(Q-1))*P2)/(1.0-xi*xi);
1314     xi = xi-dP2/d2P2;
1315     // Newton to convergence
1316     for (int k=0; k<100 && fabs(dP2)>10*CEED_EPSILON; k++) {
1317       P0 = 1.0;
1318       P1 = xi;
1319       for (int j = 2; j < Q; j++) {
1320         P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1321         P0 = P1;
1322         P1 = P2;
1323       }
1324       dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1325       d2P2 = (2*xi*dP2 - (CeedScalar)(Q*(Q-1))*P2)/(1.0-xi*xi);
1326       xi = xi-dP2/d2P2;
1327     }
1328     // Save xi, wi
1329     wi = 2.0/(((CeedScalar)(Q*(Q-1)))*P2*P2);
1330     if (q_weight_1d) {
1331       q_weight_1d[i] = wi;
1332       q_weight_1d[Q-1-i] = wi;
1333     }
1334     q_ref_1d[i] = -xi;
1335     q_ref_1d[Q-1-i]= xi;
1336   }
1337   return CEED_ERROR_SUCCESS;
1338 }
1339 
1340 /**
1341   @brief Return QR Factorization of a matrix
1342 
1343   @param ceed         A Ceed context for error handling
1344   @param[in,out] mat  Row-major matrix to be factorized in place
1345   @param[in,out] tau  Vector of length m of scaling factors
1346   @param m            Number of rows
1347   @param n            Number of columns
1348 
1349   @return An error code: 0 - success, otherwise - failure
1350 
1351   @ref Utility
1352 **/
1353 int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau,
1354                         CeedInt m, CeedInt n) {
1355   CeedScalar v[m];
1356 
1357   // Check m >= n
1358   if (n > m)
1359     // LCOV_EXCL_START
1360     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
1361                      "Cannot compute QR factorization with n > m");
1362   // LCOV_EXCL_STOP
1363 
1364   for (CeedInt i=0; i<n; i++) {
1365     if (i >= m-1) { // last row of matrix, no reflection needed
1366       tau[i] = 0.;
1367       break;
1368     }
1369     // Calculate Householder vector, magnitude
1370     CeedScalar sigma = 0.0;
1371     v[i] = mat[i+n*i];
1372     for (CeedInt j=i+1; j<m; j++) {
1373       v[j] = mat[i+n*j];
1374       sigma += v[j] * v[j];
1375     }
1376     CeedScalar norm = sqrt(v[i]*v[i] + sigma); // norm of v[i:m]
1377     CeedScalar R_ii = -copysign(norm, v[i]);
1378     v[i] -= R_ii;
1379     // norm of v[i:m] after modification above and scaling below
1380     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
1381     //   tau = 2 / (norm*norm)
1382     tau[i] = 2 * v[i]*v[i] / (v[i]*v[i] + sigma);
1383     for (CeedInt j=i+1; j<m; j++)
1384       v[j] /= v[i];
1385 
1386     // Apply Householder reflector to lower right panel
1387     CeedHouseholderReflect(&mat[i*n+i+1], &v[i], tau[i], m-i, n-i-1, n, 1);
1388     // Save v
1389     mat[i+n*i] = R_ii;
1390     for (CeedInt j=i+1; j<m; j++)
1391       mat[i+n*j] = v[j];
1392   }
1393   return CEED_ERROR_SUCCESS;
1394 }
1395 
1396 /**
1397   @brief Return symmetric Schur decomposition of the symmetric matrix mat via
1398            symmetric QR factorization
1399 
1400   @param ceed         A Ceed context for error handling
1401   @param[in,out] mat  Row-major matrix to be factorized in place
1402   @param[out] lambda  Vector of length n of eigenvalues
1403   @param n            Number of rows/columns
1404 
1405   @return An error code: 0 - success, otherwise - failure
1406 
1407   @ref Utility
1408 **/
1409 CeedPragmaOptimizeOff
1410 int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat,
1411                                     CeedScalar *lambda, CeedInt n) {
1412   // Check bounds for clang-tidy
1413   if (n<2)
1414     // LCOV_EXCL_START
1415     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
1416                      "Cannot compute symmetric Schur decomposition of scalars");
1417   // LCOV_EXCL_STOP
1418 
1419   CeedScalar v[n-1], tau[n-1], mat_T[n*n];
1420 
1421   // Copy mat to mat_T and set mat to I
1422   memcpy(mat_T, mat, n*n*sizeof(mat[0]));
1423   for (CeedInt i=0; i<n; i++)
1424     for (CeedInt j=0; j<n; j++)
1425       mat[j+n*i] = (i==j) ? 1 : 0;
1426 
1427   // Reduce to tridiagonal
1428   for (CeedInt i=0; i<n-1; i++) {
1429     // Calculate Householder vector, magnitude
1430     CeedScalar sigma = 0.0;
1431     v[i] = mat_T[i+n*(i+1)];
1432     for (CeedInt j=i+1; j<n-1; j++) {
1433       v[j] = mat_T[i+n*(j+1)];
1434       sigma += v[j] * v[j];
1435     }
1436     CeedScalar norm = sqrt(v[i]*v[i] + sigma); // norm of v[i:n-1]
1437     CeedScalar R_ii = -copysign(norm, v[i]);
1438     v[i] -= R_ii;
1439     // norm of v[i:m] after modification above and scaling below
1440     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
1441     //   tau = 2 / (norm*norm)
1442     tau[i] = i == n - 2 ? 2 : 2 * v[i]*v[i] / (v[i]*v[i] + sigma);
1443     for (CeedInt j=i+1; j<n-1; j++)
1444       v[j] /= v[i];
1445 
1446     // Update sub and super diagonal
1447     for (CeedInt j=i+2; j<n; j++) {
1448       mat_T[i+n*j] = 0; mat_T[j+n*i] = 0;
1449     }
1450     // Apply symmetric Householder reflector to lower right panel
1451     CeedHouseholderReflect(&mat_T[(i+1)+n*(i+1)], &v[i], tau[i],
1452                            n-(i+1), n-(i+1), n, 1);
1453     CeedHouseholderReflect(&mat_T[(i+1)+n*(i+1)], &v[i], tau[i],
1454                            n-(i+1), n-(i+1), 1, n);
1455 
1456     // Save v
1457     mat_T[i+n*(i+1)] = R_ii;
1458     mat_T[(i+1)+n*i] = R_ii;
1459     for (CeedInt j=i+1; j<n-1; j++) {
1460       mat_T[i+n*(j+1)] = v[j];
1461     }
1462   }
1463   // Backwards accumulation of Q
1464   for (CeedInt i=n-2; i>=0; i--) {
1465     if (tau[i] > 0.0) {
1466       v[i] = 1;
1467       for (CeedInt j=i+1; j<n-1; j++) {
1468         v[j] = mat_T[i+n*(j+1)];
1469         mat_T[i+n*(j+1)] = 0;
1470       }
1471       CeedHouseholderReflect(&mat[(i+1)+n*(i+1)], &v[i], tau[i],
1472                              n-(i+1), n-(i+1), n, 1);
1473     }
1474   }
1475 
1476   // Reduce sub and super diagonal
1477   CeedInt p = 0, q = 0, itr = 0, max_itr = n*n*n*n;
1478   CeedScalar tol = CEED_EPSILON;
1479 
1480   while (itr < max_itr) {
1481     // Update p, q, size of reduced portions of diagonal
1482     p = 0; q = 0;
1483     for (CeedInt i=n-2; i>=0; i--) {
1484       if (fabs(mat_T[i+n*(i+1)]) < tol)
1485         q += 1;
1486       else
1487         break;
1488     }
1489     for (CeedInt i=0; i<n-q-1; i++) {
1490       if (fabs(mat_T[i+n*(i+1)]) < tol)
1491         p += 1;
1492       else
1493         break;
1494     }
1495     if (q == n-1) break; // Finished reducing
1496 
1497     // Reduce tridiagonal portion
1498     CeedScalar t_nn = mat_T[(n-1-q)+n*(n-1-q)],
1499                t_nnm1 = mat_T[(n-2-q)+n*(n-1-q)];
1500     CeedScalar d = (mat_T[(n-2-q)+n*(n-2-q)] - t_nn)/2;
1501     CeedScalar mu = t_nn - t_nnm1*t_nnm1 /
1502                     (d + copysign(sqrt(d*d + t_nnm1*t_nnm1), d));
1503     CeedScalar x = mat_T[p+n*p] - mu;
1504     CeedScalar z = mat_T[p+n*(p+1)];
1505     for (CeedInt k=p; k<n-q-1; k++) {
1506       // Compute Givens rotation
1507       CeedScalar c = 1, s = 0;
1508       if (fabs(z) > tol) {
1509         if (fabs(z) > fabs(x)) {
1510           CeedScalar tau = -x/z;
1511           s = 1/sqrt(1+tau*tau), c = s*tau;
1512         } else {
1513           CeedScalar tau = -z/x;
1514           c = 1/sqrt(1+tau*tau), s = c*tau;
1515         }
1516       }
1517 
1518       // Apply Givens rotation to T
1519       CeedGivensRotation(mat_T, c, s, CEED_NOTRANSPOSE, k, k+1, n, n);
1520       CeedGivensRotation(mat_T, c, s, CEED_TRANSPOSE, k, k+1, n, n);
1521 
1522       // Apply Givens rotation to Q
1523       CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k+1, n, n);
1524 
1525       // Update x, z
1526       if (k < n-q-2) {
1527         x = mat_T[k+n*(k+1)];
1528         z = mat_T[k+n*(k+2)];
1529       }
1530     }
1531     itr++;
1532   }
1533 
1534   // Save eigenvalues
1535   for (CeedInt i=0; i<n; i++)
1536     lambda[i] = mat_T[i+n*i];
1537 
1538   // Check convergence
1539   if (itr == max_itr && q < n-1)
1540     // LCOV_EXCL_START
1541     return CeedError(ceed, CEED_ERROR_MINOR,
1542                      "Symmetric QR failed to converge");
1543   // LCOV_EXCL_STOP
1544   return CEED_ERROR_SUCCESS;
1545 }
1546 CeedPragmaOptimizeOn
1547 
1548 /**
1549   @brief Return Simultaneous Diagonalization of two matrices. This solves the
1550            generalized eigenvalue problem A x = lambda B x, where A and B
1551            are symmetric and B is positive definite. We generate the matrix X
1552            and vector Lambda such that X^T A X = Lambda and X^T B X = I. This
1553            is equivalent to the LAPACK routine 'sygv' with TYPE = 1.
1554 
1555   @param ceed         A Ceed context for error handling
1556   @param[in] mat_A    Row-major matrix to be factorized with eigenvalues
1557   @param[in] mat_B    Row-major matrix to be factorized to identity
1558   @param[out] mat_X   Row-major orthogonal matrix
1559   @param[out] lambda  Vector of length n of generalized eigenvalues
1560   @param n            Number of rows/columns
1561 
1562   @return An error code: 0 - success, otherwise - failure
1563 
1564   @ref Utility
1565 **/
1566 CeedPragmaOptimizeOff
1567 int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A,
1568                                     CeedScalar *mat_B, CeedScalar *mat_X,
1569                                     CeedScalar *lambda, CeedInt n) {
1570   int ierr;
1571   CeedScalar *mat_C, *mat_G, *vec_D;
1572   ierr = CeedCalloc(n*n, &mat_C); CeedChk(ierr);
1573   ierr = CeedCalloc(n*n, &mat_G); CeedChk(ierr);
1574   ierr = CeedCalloc(n, &vec_D); CeedChk(ierr);
1575 
1576   // Compute B = G D G^T
1577   memcpy(mat_G, mat_B, n*n*sizeof(mat_B[0]));
1578   ierr = CeedSymmetricSchurDecomposition(ceed, mat_G, vec_D, n); CeedChk(ierr);
1579 
1580   // Sort eigenvalues
1581   for (CeedInt i=n-1; i>=0; i--)
1582     for (CeedInt j=0; j<i; j++) {
1583       if (fabs(vec_D[j]) > fabs(vec_D[j+1])) {
1584         CeedScalar temp;
1585         temp = vec_D[j]; vec_D[j] = vec_D[j+1]; vec_D[j+1] = temp;
1586         for (CeedInt k=0; k<n; k++) {
1587           temp = mat_G[k*n+j]; mat_G[k*n+j] = mat_G[k*n+j+1]; mat_G[k*n+j+1] = temp;
1588         }
1589       }
1590     }
1591 
1592   // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T
1593   //           = D^-1/2 G^T A G D^-1/2
1594   // -- D = D^-1/2
1595   for (CeedInt i=0; i<n; i++)
1596     vec_D[i] = 1./sqrt(vec_D[i]);
1597   // -- G = G D^-1/2
1598   // -- C = D^-1/2 G^T
1599   for (CeedInt i=0; i<n; i++)
1600     for (CeedInt j=0; j<n; j++) {
1601       mat_G[i*n+j] *= vec_D[j];
1602       mat_C[j*n+i]  = mat_G[i*n+j];
1603     }
1604   // -- X = (D^-1/2 G^T) A
1605   ierr = CeedMatrixMultiply(ceed, (const CeedScalar *)mat_C,
1606                             (const CeedScalar *)mat_A, mat_X, n, n, n);
1607   CeedChk(ierr);
1608   // -- C = (D^-1/2 G^T A) (G D^-1/2)
1609   ierr = CeedMatrixMultiply(ceed, (const CeedScalar *)mat_X,
1610                             (const CeedScalar *)mat_G, mat_C, n, n, n);
1611   CeedChk(ierr);
1612 
1613   // Compute Q^T C Q = lambda
1614   ierr = CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n); CeedChk(ierr);
1615 
1616   // Sort eigenvalues
1617   for (CeedInt i=n-1; i>=0; i--)
1618     for (CeedInt j=0; j<i; j++) {
1619       if (fabs(lambda[j]) > fabs(lambda[j+1])) {
1620         CeedScalar temp;
1621         temp = lambda[j]; lambda[j] = lambda[j+1]; lambda[j+1] = temp;
1622         for (CeedInt k=0; k<n; k++) {
1623           temp = mat_C[k*n+j]; mat_C[k*n+j] = mat_C[k*n+j+1]; mat_C[k*n+j+1] = temp;
1624         }
1625       }
1626     }
1627 
1628   // Set X = (G D^1/2)^-T Q
1629   //       = G D^-1/2 Q
1630   ierr = CeedMatrixMultiply(ceed, (const CeedScalar *)mat_G,
1631                             (const CeedScalar *)mat_C, mat_X, n, n, n);
1632   CeedChk(ierr);
1633 
1634   // Cleanup
1635   ierr = CeedFree(&mat_C); CeedChk(ierr);
1636   ierr = CeedFree(&mat_G); CeedChk(ierr);
1637   ierr = CeedFree(&vec_D); CeedChk(ierr);
1638   return CEED_ERROR_SUCCESS;
1639 }
1640 CeedPragmaOptimizeOn
1641 
1642 /// @}
1643