xref: /libCEED/interface/ceed-basis.c (revision c3e017a42e1d587d084e2f836fe7db08abf77849)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed/ceed.h>
9 #include <ceed/backend.h>
10 #include <ceed-impl.h>
11 #include <math.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 /// @file
17 /// Implementation of CeedBasis interfaces
18 
19 /// @cond DOXYGEN_SKIP
20 static struct CeedBasis_private ceed_basis_collocated;
21 /// @endcond
22 
23 /// @addtogroup CeedBasisUser
24 /// @{
25 
26 /// Indicate that the quadrature points are collocated with the nodes
27 const CeedBasis CEED_BASIS_COLLOCATED = &ceed_basis_collocated;
28 
29 /// @}
30 
31 /// ----------------------------------------------------------------------------
32 /// CeedBasis Library Internal Functions
33 /// ----------------------------------------------------------------------------
34 /// @addtogroup CeedBasisDeveloper
35 /// @{
36 
37 /**
38   @brief Compute Householder reflection
39 
40     Computes A = (I - b v v^T) A
41     where A is an mxn matrix indexed as A[i*row + j*col]
42 
43   @param[in,out] A  Matrix to apply Householder reflection to, in place
44   @param v          Householder vector
45   @param b          Scaling factor
46   @param m          Number of rows in A
47   @param n          Number of columns in A
48   @param row        Row stride
49   @param col        Col stride
50 
51   @return An error code: 0 - success, otherwise - failure
52 
53   @ref Developer
54 **/
55 static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v,
56                                   CeedScalar b, CeedInt m, CeedInt n,
57                                   CeedInt row, CeedInt col) {
58   for (CeedInt j=0; j<n; j++) {
59     CeedScalar w = A[0*row + j*col];
60     for (CeedInt i=1; i<m; i++)
61       w += v[i] * A[i*row + j*col];
62     A[0*row + j*col] -= b * w;
63     for (CeedInt i=1; i<m; i++)
64       A[i*row + j*col] -= b * w * v[i];
65   }
66   return CEED_ERROR_SUCCESS;
67 }
68 
69 /**
70   @brief Apply Householder Q matrix
71 
72     Compute A = Q A where Q is mxm and A is mxn.
73 
74   @param[in,out] A  Matrix to apply Householder Q to, in place
75   @param Q          Householder Q matrix
76   @param tau        Householder scaling factors
77   @param t_mode     Transpose mode for application
78   @param m          Number of rows in A
79   @param n          Number of columns in A
80   @param k          Number of elementary reflectors in Q, k<m
81   @param row        Row stride in A
82   @param col        Col stride in A
83 
84   @return An error code: 0 - success, otherwise - failure
85 
86   @ref Developer
87 **/
88 int CeedHouseholderApplyQ(CeedScalar *A, const CeedScalar *Q,
89                           const CeedScalar *tau, CeedTransposeMode t_mode,
90                           CeedInt m, CeedInt n, CeedInt k,
91                           CeedInt row, CeedInt col) {
92   int ierr;
93   CeedScalar *v;
94   ierr = CeedMalloc(m, &v); CeedChk(ierr);
95   for (CeedInt ii=0; ii<k; ii++) {
96     CeedInt i = t_mode == CEED_TRANSPOSE ? ii : k-1-ii;
97     for (CeedInt j=i+1; j<m; j++)
98       v[j] = Q[j*k+i];
99     // Apply Householder reflector (I - tau v v^T) collo_grad_1d^T
100     ierr = CeedHouseholderReflect(&A[i*row], &v[i], tau[i], m-i, n, row, col);
101     CeedChk(ierr);
102   }
103   ierr = CeedFree(&v); CeedChk(ierr);
104   return CEED_ERROR_SUCCESS;
105 }
106 
107 /**
108   @brief Compute Givens rotation
109 
110     Computes A = G A (or G^T A in transpose mode)
111     where A is an mxn matrix indexed as A[i*n + j*m]
112 
113   @param[in,out] A  Row major matrix to apply Givens rotation to, in place
114   @param c          Cosine factor
115   @param s          Sine factor
116   @param t_mode     @ref CEED_NOTRANSPOSE to rotate the basis counter-clockwise,
117                     which has the effect of rotating columns of A clockwise;
118                     @ref CEED_TRANSPOSE for the opposite rotation
119   @param i          First row/column to apply rotation
120   @param k          Second row/column to apply rotation
121   @param m          Number of rows in A
122   @param n          Number of columns in A
123 
124   @return An error code: 0 - success, otherwise - failure
125 
126   @ref Developer
127 **/
128 static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s,
129                               CeedTransposeMode t_mode, CeedInt i, CeedInt k,
130                               CeedInt m, CeedInt n) {
131   CeedInt stride_j = 1, stride_ik = m, num_its = n;
132   if (t_mode == CEED_NOTRANSPOSE) {
133     stride_j = n; stride_ik = 1; num_its = m;
134   }
135 
136   // Apply rotation
137   for (CeedInt j=0; j<num_its; j++) {
138     CeedScalar tau1 = A[i*stride_ik+j*stride_j], tau2 = A[k*stride_ik+j*stride_j];
139     A[i*stride_ik+j*stride_j] = c*tau1 - s*tau2;
140     A[k*stride_ik+j*stride_j] = s*tau1 + c*tau2;
141   }
142   return CEED_ERROR_SUCCESS;
143 }
144 
145 /**
146   @brief View an array stored in a CeedBasis
147 
148   @param[in] name      Name of array
149   @param[in] fp_fmt    Printing format
150   @param[in] m         Number of rows in array
151   @param[in] n         Number of columns in array
152   @param[in] a         Array to be viewed
153   @param[in] stream    Stream to view to, e.g., stdout
154 
155   @return An error code: 0 - success, otherwise - failure
156 
157   @ref Developer
158 **/
159 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m,
160                           CeedInt n, const CeedScalar *a, FILE *stream) {
161   for (CeedInt i=0; i<m; i++) {
162     if (m > 1)
163       fprintf(stream, "%12s[%" CeedInt_FMT "]:", name, i);
164     else
165       fprintf(stream, "%12s:", name);
166     for (CeedInt j=0; j<n; j++)
167       fprintf(stream, fp_fmt, fabs(a[i*n+j]) > 1E-14 ? a[i*n+j] : 0);
168     fputs("\n", stream);
169   }
170   return CEED_ERROR_SUCCESS;
171 }
172 
173 /**
174   @brief Create the interpolation and gradient matrices for projection from
175            the nodes of `basis_from` to the nodes of `basis_to`.
176            The interpolation is given by `interp_project = interp_to^+ * interp_from`,
177            where the pesudoinverse `interp_to^+` is given by QR factorization.
178            The gradient is given by `grad_project = interp_to^+ * grad_from`.
179          Note: `basis_from` and `basis_to` must have compatible quadrature
180            spaces.
181 
182   @param[in] basis_from       CeedBasis to project from
183   @param[in] basis_to         CeedBasis to project to
184   @param[out] interp_project  Address of the variable where the newly created
185                                 interpolation matrix will be stored.
186   @param[out] grad_project    Address of the variable where the newly created
187                                 gradient matrix will be stored.
188 
189   @return An error code: 0 - success, otherwise - failure
190 
191   @ref Developer
192 **/
193 static int CeedBasisCreateProjectionMatrices(CeedBasis basis_from,
194     CeedBasis basis_to, CeedScalar **interp_project, CeedScalar **grad_project) {
195   int ierr;
196   Ceed ceed;
197   ierr = CeedBasisGetCeed(basis_to, &ceed); CeedChk(ierr);
198 
199   // Check for compatible quadrature spaces
200   CeedInt Q_to, Q_from;
201   ierr = CeedBasisGetNumQuadraturePoints(basis_to, &Q_to); CeedChk(ierr);
202   ierr = CeedBasisGetNumQuadraturePoints(basis_from, &Q_from); CeedChk(ierr);
203   if (Q_to != Q_from)
204     // LCOV_EXCL_START
205     return CeedError(ceed, CEED_ERROR_DIMENSION,
206                      "Bases must have compatible quadrature spaces");
207   // LCOV_EXCL_STOP
208 
209   // Check for matching tensor or non-tensor
210   CeedInt P_to, P_from, Q = Q_to;
211   bool is_tensor_to, is_tensor_from;
212   ierr = CeedBasisIsTensor(basis_to, &is_tensor_to); CeedChk(ierr);
213   ierr = CeedBasisIsTensor(basis_from, &is_tensor_from); CeedChk(ierr);
214   if (is_tensor_to && is_tensor_from) {
215     ierr = CeedBasisGetNumNodes1D(basis_to, &P_to); CeedChk(ierr);
216     ierr = CeedBasisGetNumNodes1D(basis_from, &P_from); CeedChk(ierr);
217     ierr = CeedBasisGetNumQuadraturePoints1D(basis_from, &Q); CeedChk(ierr);
218   } else if (!is_tensor_to && !is_tensor_from) {
219     ierr = CeedBasisGetNumNodes(basis_to, &P_to); CeedChk(ierr);
220     ierr = CeedBasisGetNumNodes(basis_from, &P_from); CeedChk(ierr);
221   } else {
222     // LCOV_EXCL_START
223     return CeedError(ceed, CEED_ERROR_MINOR,
224                      "Bases must both be tensor or non-tensor");
225     // LCOV_EXCL_STOP
226   }
227 
228   // Get source matrices
229   CeedInt dim;
230   CeedScalar *interp_to, *interp_from, *tau;
231   ierr = CeedBasisGetDimension(basis_to, &dim); CeedChk(ierr);
232   ierr = CeedMalloc(Q * P_from, &interp_from); CeedChk(ierr);
233   ierr = CeedMalloc(Q * P_to, &interp_to); CeedChk(ierr);
234   ierr = CeedCalloc(P_to * P_from, interp_project); CeedChk(ierr);
235   ierr = CeedCalloc(P_to * P_from * (is_tensor_to ? 1 : dim), grad_project);
236   CeedChk(ierr);
237   ierr = CeedMalloc(Q, &tau); CeedChk(ierr);
238   const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL,
239                     *grad_from_source;
240   if (is_tensor_to) {
241     ierr = CeedBasisGetInterp1D(basis_to, &interp_to_source); CeedChk(ierr);
242     ierr = CeedBasisGetInterp1D(basis_from, &interp_from_source); CeedChk(ierr);
243     ierr = CeedBasisGetGrad1D(basis_from, &grad_from_source); CeedChk(ierr);
244   } else {
245     ierr = CeedBasisGetInterp(basis_to, &interp_to_source); CeedChk(ierr);
246     ierr = CeedBasisGetInterp(basis_from, &interp_from_source); CeedChk(ierr);
247     ierr = CeedBasisGetGrad(basis_from, &grad_from_source); CeedChk(ierr);
248   }
249 
250   // Build matrices
251   CeedInt num_matrices = 1 + (is_tensor_to ? 1 : dim);
252   CeedScalar *input_from[num_matrices], *output_project[num_matrices];
253   input_from[0] = (CeedScalar *)interp_from_source;
254   output_project[0] = *interp_project;
255   for (CeedInt m = 1; m < num_matrices; m++) {
256     input_from[m] = (CeedScalar *)&grad_from_source[(m - 1) * Q * P_from];
257     output_project[m] = &((*grad_project)[(m - 1) * P_to * P_from]);
258   }
259   for (CeedInt m = 0; m < num_matrices; m++) {
260     // -- QR Factorization, interp_to = Q R
261     memcpy(interp_to, interp_to_source, Q * P_to * sizeof(interp_to_source[0]));
262     ierr = CeedQRFactorization(ceed, interp_to, tau, Q, P_to); CeedChk(ierr);
263 
264     // -- Apply Qtranspose, interp_to = Qtranspose interp_from
265     memcpy(interp_from, input_from[m], Q * P_from * sizeof(input_from[m][0]));
266     ierr = CeedHouseholderApplyQ(interp_from, interp_to, tau, CEED_TRANSPOSE,
267                                  Q, P_from, P_to, P_from, 1); CeedChk(ierr);
268 
269     // -- Apply Rinv, interp_project = Rinv interp_c
270     for (CeedInt j = 0; j < P_from; j++) { // Column j
271       output_project[m][j + P_from * (P_to - 1)] = interp_from[j + P_from *
272           (P_to - 1)] / interp_to[P_to * P_to - 1];
273       for (CeedInt i = P_to - 2; i >= 0; i--) { // Row i
274         output_project[m][j + P_from * i] = interp_from[j + P_from * i];
275         for (CeedInt k = i+1; k < P_to; k++) {
276           output_project[m][j + P_from * i] -= interp_to[k + P_to * i]*
277                                                output_project[m][j + P_from * k];
278         }
279         output_project[m][j + P_from * i] /= interp_to[i + P_to * i];
280       }
281     }
282   }
283 
284   // Cleanup
285   ierr = CeedFree(&tau); CeedChk(ierr);
286   ierr = CeedFree(&interp_to); CeedChk(ierr);
287   ierr = CeedFree(&interp_from); CeedChk(ierr);
288 
289   return CEED_ERROR_SUCCESS;
290 }
291 
292 /// @}
293 
294 /// ----------------------------------------------------------------------------
295 /// Ceed Backend API
296 /// ----------------------------------------------------------------------------
297 /// @addtogroup CeedBasisBackend
298 /// @{
299 
300 /**
301   @brief Return collocated grad matrix
302 
303   @param basis               CeedBasis
304   @param[out] collo_grad_1d  Row-major (Q_1d * Q_1d) matrix expressing derivatives of
305                                basis functions at quadrature points
306 
307   @return An error code: 0 - success, otherwise - failure
308 
309   @ref Backend
310 **/
311 int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collo_grad_1d) {
312   int i, j, k;
313   Ceed ceed;
314   CeedInt ierr, P_1d=(basis)->P_1d, Q_1d=(basis)->Q_1d;
315   CeedScalar *interp_1d, *grad_1d, *tau;
316 
317   ierr = CeedMalloc(Q_1d*P_1d, &interp_1d); CeedChk(ierr);
318   ierr = CeedMalloc(Q_1d*P_1d, &grad_1d); CeedChk(ierr);
319   ierr = CeedMalloc(Q_1d, &tau); CeedChk(ierr);
320   memcpy(interp_1d, (basis)->interp_1d, Q_1d*P_1d*sizeof(basis)->interp_1d[0]);
321   memcpy(grad_1d, (basis)->grad_1d, Q_1d*P_1d*sizeof(basis)->interp_1d[0]);
322 
323   // QR Factorization, interp_1d = Q R
324   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
325   ierr = CeedQRFactorization(ceed, interp_1d, tau, Q_1d, P_1d); CeedChk(ierr);
326   // Note: This function is for backend use, so all errors are terminal
327   //   and we do not need to clean up memory on failure.
328 
329   // Apply Rinv, collo_grad_1d = grad_1d Rinv
330   for (i=0; i<Q_1d; i++) { // Row i
331     collo_grad_1d[Q_1d*i] = grad_1d[P_1d*i]/interp_1d[0];
332     for (j=1; j<P_1d; j++) { // Column j
333       collo_grad_1d[j+Q_1d*i] = grad_1d[j+P_1d*i];
334       for (k=0; k<j; k++)
335         collo_grad_1d[j+Q_1d*i] -= interp_1d[j+P_1d*k]*collo_grad_1d[k+Q_1d*i];
336       collo_grad_1d[j+Q_1d*i] /= interp_1d[j+P_1d*j];
337     }
338     for (j=P_1d; j<Q_1d; j++)
339       collo_grad_1d[j+Q_1d*i] = 0;
340   }
341 
342   // Apply Qtranspose, collo_grad = collo_grad Q_transpose
343   ierr = CeedHouseholderApplyQ(collo_grad_1d, interp_1d, tau, CEED_NOTRANSPOSE,
344                                Q_1d, Q_1d, P_1d, 1, Q_1d); CeedChk(ierr);
345 
346   ierr = CeedFree(&interp_1d); CeedChk(ierr);
347   ierr = CeedFree(&grad_1d); CeedChk(ierr);
348   ierr = CeedFree(&tau); CeedChk(ierr);
349   return CEED_ERROR_SUCCESS;
350 }
351 
352 /**
353   @brief Get tensor status for given CeedBasis
354 
355   @param basis           CeedBasis
356   @param[out] is_tensor  Variable to store tensor status
357 
358   @return An error code: 0 - success, otherwise - failure
359 
360   @ref Backend
361 **/
362 int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor) {
363   *is_tensor = basis->tensor_basis;
364   return CEED_ERROR_SUCCESS;
365 }
366 
367 /**
368   @brief Get backend data of a CeedBasis
369 
370   @param basis      CeedBasis
371   @param[out] data  Variable to store data
372 
373   @return An error code: 0 - success, otherwise - failure
374 
375   @ref Backend
376 **/
377 int CeedBasisGetData(CeedBasis basis, void *data) {
378   *(void **)data = basis->data;
379   return CEED_ERROR_SUCCESS;
380 }
381 
382 /**
383   @brief Set backend data of a CeedBasis
384 
385   @param[out] basis  CeedBasis
386   @param data        Data to set
387 
388   @return An error code: 0 - success, otherwise - failure
389 
390   @ref Backend
391 **/
392 int CeedBasisSetData(CeedBasis basis, void *data) {
393   basis->data = data;
394   return CEED_ERROR_SUCCESS;
395 }
396 
397 /**
398   @brief Increment the reference counter for a CeedBasis
399 
400   @param basis  Basis to increment the reference counter
401 
402   @return An error code: 0 - success, otherwise - failure
403 
404   @ref Backend
405 **/
406 int CeedBasisReference(CeedBasis basis) {
407   basis->ref_count++;
408   return CEED_ERROR_SUCCESS;
409 }
410 
411 /**
412   @brief Estimate number of FLOPs required to apply CeedBasis in t_mode and eval_mode
413 
414   @param basis     Basis to estimate FLOPs for
415   @param t_mode    Apply basis or transpose
416   @param eval_mode Basis evaluation mode
417   @param flops     Address of variable to hold FLOPs estimate
418 
419   @ref Backend
420 **/
421 int CeedBasisGetFlopsEstimate(CeedBasis basis, CeedTransposeMode t_mode,
422                               CeedEvalMode eval_mode, CeedSize *flops) {
423   int ierr;
424   bool is_tensor;
425 
426   ierr = CeedBasisIsTensor(basis, &is_tensor); CeedChk(ierr);
427   if (is_tensor) {
428     CeedInt dim, num_comp, P_1d, Q_1d;
429     ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
430     ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
431     ierr = CeedBasisGetNumNodes1D(basis, &P_1d);  CeedChk(ierr);
432     ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d);  CeedChk(ierr);
433     if (t_mode == CEED_TRANSPOSE) {
434       P_1d = Q_1d; Q_1d = P_1d;
435     }
436     CeedInt tensor_flops = 0, pre = num_comp * CeedIntPow(P_1d, dim-1), post = 1;
437     for (CeedInt d = 0; d < dim; d++) {
438       tensor_flops += 2 * pre * P_1d * post * Q_1d;
439       pre /= P_1d;
440       post *= Q_1d;
441     }
442     switch (eval_mode) {
443     case CEED_EVAL_NONE:   *flops = 0; break;
444     case CEED_EVAL_INTERP: *flops = tensor_flops; break;
445     case CEED_EVAL_GRAD:   *flops = tensor_flops * 2; break;
446     case CEED_EVAL_DIV:
447       // LCOV_EXCL_START
448       return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE,
449                        "Tensor CEED_EVAL_DIV not supported"); break;
450     case CEED_EVAL_CURL:
451       return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE,
452                        "Tensor CEED_EVAL_CURL not supported"); break;
453     // LCOV_EXCL_STOP
454     case CEED_EVAL_WEIGHT: *flops = dim * CeedIntPow(Q_1d, dim); break;
455     }
456   } else {
457     CeedInt dim, num_comp, num_nodes, num_qpts, Q_comp;
458     ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
459     ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
460     ierr = CeedBasisGetNumNodes(basis, &num_nodes); CeedChk(ierr);
461     ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
462     ierr = CeedBasisGetNumQuadratureComponents(basis, &Q_comp); CeedChk(ierr);
463     switch (eval_mode) {
464     case CEED_EVAL_NONE:   *flops = 0; break;
465     case CEED_EVAL_INTERP: *flops = num_nodes * num_qpts * num_comp; break;
466     case CEED_EVAL_GRAD:   *flops = num_nodes * num_qpts * num_comp * dim; break;
467     case CEED_EVAL_DIV:    *flops = num_nodes * num_qpts; break;
468     case CEED_EVAL_CURL:   *flops = num_nodes * num_qpts * dim; break;
469     case CEED_EVAL_WEIGHT: *flops = 0; break;
470     }
471   }
472 
473   return CEED_ERROR_SUCCESS;
474 }
475 
476 /**
477   @brief Get dimension for given CeedElemTopology
478 
479   @param topo      CeedElemTopology
480   @param[out] dim  Variable to store dimension of topology
481 
482   @return An error code: 0 - success, otherwise - failure
483 
484   @ref Backend
485 **/
486 int CeedBasisGetTopologyDimension(CeedElemTopology topo, CeedInt *dim) {
487   *dim = (CeedInt) topo >> 16;
488   return CEED_ERROR_SUCCESS;
489 }
490 
491 /**
492   @brief Get CeedTensorContract of a CeedBasis
493 
494   @param basis          CeedBasis
495   @param[out] contract  Variable to store CeedTensorContract
496 
497   @return An error code: 0 - success, otherwise - failure
498 
499   @ref Backend
500 **/
501 int CeedBasisGetTensorContract(CeedBasis basis, CeedTensorContract *contract) {
502   *contract = basis->contract;
503   return CEED_ERROR_SUCCESS;
504 }
505 
506 /**
507   @brief Set CeedTensorContract of a CeedBasis
508 
509   @param[out] basis  CeedBasis
510   @param contract    CeedTensorContract to set
511 
512   @return An error code: 0 - success, otherwise - failure
513 
514   @ref Backend
515 **/
516 int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract contract) {
517   int ierr;
518   basis->contract = contract;
519   ierr = CeedTensorContractReference(contract); CeedChk(ierr);
520   return CEED_ERROR_SUCCESS;
521 }
522 
523 /**
524   @brief Return a reference implementation of matrix multiplication C = A B.
525            Note, this is a reference implementation for CPU CeedScalar pointers
526            that is not intended for high performance.
527 
528   @param ceed        A Ceed context for error handling
529   @param[in] mat_A   Row-major matrix A
530   @param[in] mat_B   Row-major matrix B
531   @param[out] mat_C  Row-major output matrix C
532   @param m           Number of rows of C
533   @param n           Number of columns of C
534   @param kk          Number of columns of A/rows of B
535 
536   @return An error code: 0 - success, otherwise - failure
537 
538   @ref Utility
539 **/
540 int CeedMatrixMatrixMultiply(Ceed ceed, const CeedScalar *mat_A,
541                              const CeedScalar *mat_B, CeedScalar *mat_C,
542                              CeedInt m, CeedInt n, CeedInt kk) {
543   for (CeedInt i=0; i<m; i++)
544     for (CeedInt j=0; j<n; j++) {
545       CeedScalar sum = 0;
546       for (CeedInt k=0; k<kk; k++)
547         sum += mat_A[k+i*kk]*mat_B[j+k*n];
548       mat_C[j+i*n] = sum;
549     }
550   return CEED_ERROR_SUCCESS;
551 }
552 
553 /// @}
554 
555 /// ----------------------------------------------------------------------------
556 /// CeedBasis Public API
557 /// ----------------------------------------------------------------------------
558 /// @addtogroup CeedBasisUser
559 /// @{
560 
561 /**
562   @brief Create a tensor-product basis for H^1 discretizations
563 
564   @param ceed        A Ceed object where the CeedBasis will be created
565   @param dim         Topological dimension
566   @param num_comp    Number of field components (1 for scalar fields)
567   @param P_1d        Number of nodes in one dimension
568   @param Q_1d        Number of quadrature points in one dimension
569   @param interp_1d   Row-major (Q_1d * P_1d) matrix expressing the values of nodal
570                        basis functions at quadrature points
571   @param grad_1d     Row-major (Q_1d * P_1d) matrix expressing derivatives of nodal
572                        basis functions at quadrature points
573   @param q_ref_1d    Array of length Q_1d holding the locations of quadrature points
574                        on the 1D reference element [-1, 1]
575   @param q_weight_1d Array of length Q_1d holding the quadrature weights on the
576                        reference element
577   @param[out] basis  Address of the variable where the newly created
578                        CeedBasis will be stored.
579 
580   @return An error code: 0 - success, otherwise - failure
581 
582   @ref User
583 **/
584 int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp,
585                             CeedInt P_1d, CeedInt Q_1d,
586                             const CeedScalar *interp_1d,
587                             const CeedScalar *grad_1d, const CeedScalar *q_ref_1d,
588                             const CeedScalar *q_weight_1d, CeedBasis *basis) {
589   int ierr;
590 
591   if (!ceed->BasisCreateTensorH1) {
592     Ceed delegate;
593     ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr);
594 
595     if (!delegate)
596       // LCOV_EXCL_START
597       return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
598                        "Backend does not support BasisCreateTensorH1");
599     // LCOV_EXCL_STOP
600 
601     ierr = CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d,
602                                    Q_1d, interp_1d, grad_1d, q_ref_1d,
603                                    q_weight_1d, basis); CeedChk(ierr);
604     return CEED_ERROR_SUCCESS;
605   }
606 
607   if (dim < 1)
608     // LCOV_EXCL_START
609     return CeedError(ceed, CEED_ERROR_DIMENSION,
610                      "Basis dimension must be a positive value");
611   // LCOV_EXCL_STOP
612 
613   if (num_comp < 1)
614     // LCOV_EXCL_START
615     return CeedError(ceed, CEED_ERROR_DIMENSION,
616                      "Basis must have at least 1 component");
617   // LCOV_EXCL_STOP
618 
619   if (P_1d < 1)
620     // LCOV_EXCL_START
621     return CeedError(ceed, CEED_ERROR_DIMENSION,
622                      "Basis must have at least 1 node");
623   // LCOV_EXCL_STOP
624 
625   if (Q_1d < 1)
626     // LCOV_EXCL_START
627     return CeedError(ceed, CEED_ERROR_DIMENSION,
628                      "Basis must have at least 1 quadrature point");
629   // LCOV_EXCL_STOP
630 
631   CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE
632                           : dim == 2 ? CEED_TOPOLOGY_QUAD
633                           : CEED_TOPOLOGY_HEX;
634 
635   ierr = CeedCalloc(1, basis); CeedChk(ierr);
636   (*basis)->ceed = ceed;
637   ierr = CeedReference(ceed); CeedChk(ierr);
638   (*basis)->ref_count = 1;
639   (*basis)->tensor_basis = 1;
640   (*basis)->dim = dim;
641   (*basis)->topo = topo;
642   (*basis)->num_comp = num_comp;
643   (*basis)->P_1d = P_1d;
644   (*basis)->Q_1d = Q_1d;
645   (*basis)->P = CeedIntPow(P_1d, dim);
646   (*basis)->Q = CeedIntPow(Q_1d, dim);
647   (*basis)->Q_comp = 1;
648   (*basis)->basis_space = 1; // 1 for H^1 space
649   ierr = CeedCalloc(Q_1d, &(*basis)->q_ref_1d); CeedChk(ierr);
650   ierr = CeedCalloc(Q_1d, &(*basis)->q_weight_1d); CeedChk(ierr);
651   if (q_ref_1d) memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d*sizeof(q_ref_1d[0]));
652   if (q_weight_1d) memcpy((*basis)->q_weight_1d, q_weight_1d,
653                             Q_1d*sizeof(q_weight_1d[0]));
654   ierr = CeedCalloc(Q_1d*P_1d, &(*basis)->interp_1d); CeedChk(ierr);
655   ierr = CeedCalloc(Q_1d*P_1d, &(*basis)->grad_1d); CeedChk(ierr);
656   if (interp_1d) memcpy((*basis)->interp_1d, interp_1d,
657                           Q_1d*P_1d*sizeof(interp_1d[0]));
658   if (grad_1d) memcpy((*basis)->grad_1d, grad_1d, Q_1d*P_1d*sizeof(grad_1d[0]));
659   ierr = ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d,
660                                    q_weight_1d, *basis); CeedChk(ierr);
661   return CEED_ERROR_SUCCESS;
662 }
663 
664 /**
665   @brief Create a tensor-product Lagrange basis
666 
667   @param ceed        A Ceed object where the CeedBasis will be created
668   @param dim         Topological dimension of element
669   @param num_comp      Number of field components (1 for scalar fields)
670   @param P           Number of Gauss-Lobatto nodes in one dimension.  The
671                        polynomial degree of the resulting Q_k element is k=P-1.
672   @param Q           Number of quadrature points in one dimension.
673   @param quad_mode   Distribution of the Q quadrature points (affects order of
674                        accuracy for the quadrature)
675   @param[out] basis  Address of the variable where the newly created
676                        CeedBasis will be stored.
677 
678   @return An error code: 0 - success, otherwise - failure
679 
680   @ref User
681 **/
682 int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp,
683                                     CeedInt P, CeedInt Q, CeedQuadMode quad_mode,
684                                     CeedBasis *basis) {
685   // Allocate
686   int ierr, ierr2, i, j, k;
687   CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d,
688              *q_weight_1d;
689 
690   if (dim < 1)
691     // LCOV_EXCL_START
692     return CeedError(ceed, CEED_ERROR_DIMENSION,
693                      "Basis dimension must be a positive value");
694   // LCOV_EXCL_STOP
695 
696   if (num_comp < 1)
697     // LCOV_EXCL_START
698     return CeedError(ceed, CEED_ERROR_DIMENSION,
699                      "Basis must have at least 1 component");
700   // LCOV_EXCL_STOP
701 
702   if (P < 1)
703     // LCOV_EXCL_START
704     return CeedError(ceed, CEED_ERROR_DIMENSION,
705                      "Basis must have at least 1 node");
706   // LCOV_EXCL_STOP
707 
708   if (Q < 1)
709     // LCOV_EXCL_START
710     return CeedError(ceed, CEED_ERROR_DIMENSION,
711                      "Basis must have at least 1 quadrature point");
712   // LCOV_EXCL_STOP
713 
714   // Get Nodes and Weights
715   ierr = CeedCalloc(P*Q, &interp_1d); CeedChk(ierr);
716   ierr = CeedCalloc(P*Q, &grad_1d); CeedChk(ierr);
717   ierr = CeedCalloc(P, &nodes); CeedChk(ierr);
718   ierr = CeedCalloc(Q, &q_ref_1d); CeedChk(ierr);
719   ierr = CeedCalloc(Q, &q_weight_1d); CeedChk(ierr);
720   ierr = CeedLobattoQuadrature(P, nodes, NULL);
721   if (ierr) { goto cleanup; } CeedChk(ierr);
722   switch (quad_mode) {
723   case CEED_GAUSS:
724     ierr = CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
725     break;
726   case CEED_GAUSS_LOBATTO:
727     ierr = CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
728     break;
729   }
730   if (ierr) { goto cleanup; } CeedChk(ierr);
731 
732   // Build B, D matrix
733   // Fornberg, 1998
734   for (i = 0; i  < Q; i++) {
735     c1 = 1.0;
736     c3 = nodes[0] - q_ref_1d[i];
737     interp_1d[i*P+0] = 1.0;
738     for (j = 1; j < P; j++) {
739       c2 = 1.0;
740       c4 = c3;
741       c3 = nodes[j] - q_ref_1d[i];
742       for (k = 0; k < j; k++) {
743         dx = nodes[j] - nodes[k];
744         c2 *= dx;
745         if (k == j - 1) {
746           grad_1d[i*P + j] = c1*(interp_1d[i*P + k] - c4*grad_1d[i*P + k]) / c2;
747           interp_1d[i*P + j] = - c1*c4*interp_1d[i*P + k] / c2;
748         }
749         grad_1d[i*P + k] = (c3*grad_1d[i*P + k] - interp_1d[i*P + k]) / dx;
750         interp_1d[i*P + k] = c3*interp_1d[i*P + k] / dx;
751       }
752       c1 = c2;
753     }
754   }
755   // Pass to CeedBasisCreateTensorH1
756   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d,
757                                  q_ref_1d, q_weight_1d, basis); CeedChk(ierr);
758 cleanup:
759   ierr2 = CeedFree(&interp_1d); CeedChk(ierr2);
760   ierr2 = CeedFree(&grad_1d); CeedChk(ierr2);
761   ierr2 = CeedFree(&nodes); CeedChk(ierr2);
762   ierr2 = CeedFree(&q_ref_1d); CeedChk(ierr2);
763   ierr2 = CeedFree(&q_weight_1d); CeedChk(ierr2);
764   CeedChk(ierr);
765   return CEED_ERROR_SUCCESS;
766 }
767 
768 /**
769   @brief Create a non tensor-product basis for H^1 discretizations
770 
771   @param ceed        A Ceed object where the CeedBasis will be created
772   @param topo        Topology of element, e.g. hypercube, simplex, ect
773   @param num_comp    Number of field components (1 for scalar fields)
774   @param num_nodes   Total number of nodes
775   @param num_qpts    Total number of quadrature points
776   @param interp      Row-major (num_qpts * num_nodes) matrix expressing the values of
777                        nodal basis functions at quadrature points
778   @param grad        Row-major (num_qpts * dim * num_nodes) matrix expressing
779                        derivatives of nodal basis functions at quadrature points
780   @param q_ref       Array of length num_qpts holding the locations of quadrature
781                        points on the reference element
782   @param q_weight    Array of length num_qpts holding the quadrature weights on the
783                        reference element
784   @param[out] basis  Address of the variable where the newly created
785                        CeedBasis will be stored.
786 
787   @return An error code: 0 - success, otherwise - failure
788 
789   @ref User
790 **/
791 int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp,
792                       CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
793                       const CeedScalar *grad, const CeedScalar *q_ref,
794                       const CeedScalar *q_weight, CeedBasis *basis) {
795   int ierr;
796   CeedInt P = num_nodes, Q = num_qpts, dim = 0;
797 
798   if (!ceed->BasisCreateH1) {
799     Ceed delegate;
800     ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr);
801 
802     if (!delegate)
803       // LCOV_EXCL_START
804       return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
805                        "Backend does not support BasisCreateH1");
806     // LCOV_EXCL_STOP
807 
808     ierr = CeedBasisCreateH1(delegate, topo, num_comp, num_nodes,
809                              num_qpts, interp, grad, q_ref,
810                              q_weight, basis); CeedChk(ierr);
811     return CEED_ERROR_SUCCESS;
812   }
813 
814   if (num_comp < 1)
815     // LCOV_EXCL_START
816     return CeedError(ceed, CEED_ERROR_DIMENSION,
817                      "Basis must have at least 1 component");
818   // LCOV_EXCL_STOP
819 
820   if (num_nodes < 1)
821     // LCOV_EXCL_START
822     return CeedError(ceed, CEED_ERROR_DIMENSION,
823                      "Basis must have at least 1 node");
824   // LCOV_EXCL_STOP
825 
826   if (num_qpts < 1)
827     // LCOV_EXCL_START
828     return CeedError(ceed, CEED_ERROR_DIMENSION,
829                      "Basis must have at least 1 quadrature point");
830   // LCOV_EXCL_STOP
831 
832   ierr = CeedCalloc(1, basis); CeedChk(ierr);
833 
834   ierr = CeedBasisGetTopologyDimension(topo, &dim); CeedChk(ierr);
835 
836   (*basis)->ceed = ceed;
837   ierr = CeedReference(ceed); CeedChk(ierr);
838   (*basis)->ref_count = 1;
839   (*basis)->tensor_basis = 0;
840   (*basis)->dim = dim;
841   (*basis)->topo = topo;
842   (*basis)->num_comp = num_comp;
843   (*basis)->P = P;
844   (*basis)->Q = Q;
845   (*basis)->Q_comp = 1;
846   (*basis)->basis_space = 1; // 1 for H^1 space
847   ierr = CeedCalloc(Q*dim, &(*basis)->q_ref_1d); CeedChk(ierr);
848   ierr = CeedCalloc(Q, &(*basis)->q_weight_1d); CeedChk(ierr);
849   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q*dim*sizeof(q_ref[0]));
850   if(q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q*sizeof(q_weight[0]));
851   ierr = CeedCalloc(Q*P, &(*basis)->interp); CeedChk(ierr);
852   ierr = CeedCalloc(dim*Q*P, &(*basis)->grad); CeedChk(ierr);
853   if(interp) memcpy((*basis)->interp, interp, Q*P*sizeof(interp[0]));
854   if(grad) memcpy((*basis)->grad, grad, dim*Q*P*sizeof(grad[0]));
855   ierr = ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref,
856                              q_weight, *basis); CeedChk(ierr);
857   return CEED_ERROR_SUCCESS;
858 }
859 
860 /**
861   @brief Create a non tensor-product basis for H(div) discretizations
862 
863   @param ceed        A Ceed object where the CeedBasis will be created
864   @param topo        Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.),
865                      dimension of which is used in some array sizes below
866   @param num_comp    Number of components (usually 1 for vectors in H(div) bases)
867   @param num_nodes   Total number of nodes (dofs per element)
868   @param num_qpts    Total number of quadrature points
869   @param interp      Row-major (dim*num_qpts * num_nodes) matrix expressing the values of
870                        nodal basis functions at quadrature points
871   @param div        Row-major (num_qpts * num_nodes) matrix expressing
872                        divergence of nodal basis functions at quadrature points
873   @param q_ref       Array of length num_qpts holding the locations of quadrature
874                        points on the reference element
875   @param q_weight    Array of length num_qpts holding the quadrature weights on the
876                        reference element
877   @param[out] basis  Address of the variable where the newly created
878                        CeedBasis will be stored.
879 
880   @return An error code: 0 - success, otherwise - failure
881 
882   @ref User
883 **/
884 int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp,
885                         CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
886                         const CeedScalar *div, const CeedScalar *q_ref,
887                         const CeedScalar *q_weight, CeedBasis *basis) {
888   int ierr;
889   CeedInt Q = num_qpts, P = num_nodes, dim = 0;
890   ierr = CeedBasisGetTopologyDimension(topo, &dim); CeedChk(ierr);
891   if (!ceed->BasisCreateHdiv) {
892     Ceed delegate;
893     ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr);
894 
895     if (!delegate)
896       // LCOV_EXCL_START
897       return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
898                        "Backend does not implement BasisCreateHdiv");
899     // LCOV_EXCL_STOP
900 
901     ierr = CeedBasisCreateHdiv(delegate, topo, num_comp, num_nodes,
902                                num_qpts, interp, div, q_ref,
903                                q_weight, basis); CeedChk(ierr);
904     return CEED_ERROR_SUCCESS;
905   }
906 
907   if (num_comp < 1)
908     // LCOV_EXCL_START
909     return CeedError(ceed, CEED_ERROR_DIMENSION,
910                      "Basis must have at least 1 component");
911   // LCOV_EXCL_STOP
912 
913   if (num_nodes < 1)
914     // LCOV_EXCL_START
915     return CeedError(ceed, CEED_ERROR_DIMENSION,
916                      "Basis must have at least 1 node");
917   // LCOV_EXCL_STOP
918 
919   if (num_qpts < 1)
920     // LCOV_EXCL_START
921     return CeedError(ceed, CEED_ERROR_DIMENSION,
922                      "Basis must have at least 1 quadrature point");
923   // LCOV_EXCL_STOP
924 
925   ierr = CeedCalloc(1, basis); CeedChk(ierr);
926 
927   (*basis)->ceed = ceed;
928   ierr = CeedReference(ceed); CeedChk(ierr);
929   (*basis)->ref_count = 1;
930   (*basis)->tensor_basis = 0;
931   (*basis)->dim = dim;
932   (*basis)->topo = topo;
933   (*basis)->num_comp = num_comp;
934   (*basis)->P = P;
935   (*basis)->Q = Q;
936   (*basis)->Q_comp = dim;
937   (*basis)->basis_space = 2; // 2 for H(div) space
938   ierr = CeedMalloc(Q*dim, &(*basis)->q_ref_1d); CeedChk(ierr);
939   ierr = CeedMalloc(Q, &(*basis)->q_weight_1d); CeedChk(ierr);
940   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q*dim*sizeof(q_ref[0]));
941   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q*sizeof(q_weight[0]));
942   ierr = CeedMalloc(dim*Q*P, &(*basis)->interp); CeedChk(ierr);
943   ierr = CeedMalloc(Q*P, &(*basis)->div); CeedChk(ierr);
944   if (interp) memcpy((*basis)->interp, interp, dim*Q*P*sizeof(interp[0]));
945   if (div) memcpy((*basis)->div, div, Q*P*sizeof(div[0]));
946   ierr = ceed->BasisCreateHdiv(topo, dim, P, Q, interp, div, q_ref,
947                                q_weight, *basis); CeedChk(ierr);
948   return CEED_ERROR_SUCCESS;
949 }
950 
951 /**
952   @brief Create a CeedBasis for projection from the nodes of `basis_from`
953            to the nodes of `basis_to`. Only `CEED_EVAL_INTERP` and
954            `CEED_EVAL_GRAD` will be valid for the new basis, `basis_project`.
955            The interpolation is given by `interp_project = interp_to^+ * interp_from`,
956            where the pesudoinverse `interp_to^+` is given by QR factorization.
957            The gradient is given by `grad_project = interp_to^+ * grad_from`.
958          Note: `basis_from` and `basis_to` must have compatible quadrature
959            spaces.
960          Note: `basis_project` will have the same number of components as
961            `basis_from`, regardless of the number of components that
962            `basis_to` has. If `basis_from` has 3 components and `basis_to`
963            has 5 components, then `basis_project` will have 3 components.
964 
965   @param[in] basis_from      CeedBasis to prolong from
966   @param[in] basis_to        CeedBasis to prolong to
967   @param[out] basis_project  Address of the variable where the newly created
968                                CeedBasis will be stored.
969 
970   @return An error code: 0 - success, otherwise - failure
971 
972   @ref User
973 **/
974 int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to,
975                               CeedBasis *basis_project) {
976   int ierr;
977   Ceed ceed;
978   ierr = CeedBasisGetCeed(basis_to, &ceed); CeedChk(ierr);
979 
980   // Create projectior matrix
981   CeedScalar *interp_project, *grad_project;
982   ierr = CeedBasisCreateProjectionMatrices(basis_from, basis_to,
983          &interp_project, &grad_project);
984   CeedChk(ierr);
985 
986   // Build basis
987   bool is_tensor;
988   CeedInt dim, num_comp;
989   CeedScalar *q_ref, *q_weight;
990   ierr = CeedBasisIsTensor(basis_to, &is_tensor); CeedChk(ierr);
991   ierr = CeedBasisGetDimension(basis_to, &dim); CeedChk(ierr);
992   ierr = CeedBasisGetNumComponents(basis_from, &num_comp); CeedChk(ierr);
993   if (is_tensor) {
994     CeedInt P_1d_to, P_1d_from;
995     ierr = CeedBasisGetNumNodes1D(basis_from, &P_1d_from); CeedChk(ierr);
996     ierr = CeedBasisGetNumNodes1D(basis_to, &P_1d_to); CeedChk(ierr);
997     ierr = CeedCalloc(P_1d_to, &q_ref); CeedChk(ierr);
998     ierr = CeedCalloc(P_1d_to, &q_weight); CeedChk(ierr);
999     ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_from, P_1d_to,
1000                                    interp_project, grad_project, q_ref, q_weight, basis_project);
1001     CeedChk(ierr);
1002   } else {
1003     CeedElemTopology topo;
1004     ierr = CeedBasisGetTopology(basis_to, &topo); CeedChk(ierr);
1005     CeedInt num_nodes_to, num_nodes_from;
1006     ierr = CeedBasisGetNumNodes(basis_from, &num_nodes_from); CeedChk(ierr);
1007     ierr = CeedBasisGetNumNodes(basis_to, &num_nodes_to); CeedChk(ierr);
1008     ierr = CeedCalloc(num_nodes_to * dim, &q_ref); CeedChk(ierr);
1009     ierr = CeedCalloc(num_nodes_to, &q_weight); CeedChk(ierr);
1010     ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_from, num_nodes_to,
1011                              interp_project, grad_project, q_ref, q_weight, basis_project);
1012     CeedChk(ierr);
1013   }
1014 
1015   // Cleanup
1016   ierr = CeedFree(&interp_project); CeedChk(ierr);
1017   ierr = CeedFree(&grad_project); CeedChk(ierr);
1018   ierr = CeedFree(&q_ref); CeedChk(ierr);
1019   ierr = CeedFree(&q_weight); CeedChk(ierr);
1020 
1021   return CEED_ERROR_SUCCESS;
1022 }
1023 
1024 /**
1025   @brief Copy the pointer to a CeedBasis. Both pointers should
1026            be destroyed with `CeedBasisDestroy()`;
1027            Note: If `*basis_copy` is non-NULL, then it is assumed that
1028            `*basis_copy` is a pointer to a CeedBasis. This CeedBasis
1029            will be destroyed if `*basis_copy` is the only
1030            reference to this CeedBasis.
1031 
1032   @param basis            CeedBasis to copy reference to
1033   @param[out] basis_copy  Variable to store copied reference
1034 
1035   @return An error code: 0 - success, otherwise - failure
1036 
1037   @ref User
1038 **/
1039 int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy) {
1040   int ierr;
1041 
1042   ierr = CeedBasisReference(basis); CeedChk(ierr);
1043   ierr = CeedBasisDestroy(basis_copy); CeedChk(ierr);
1044   *basis_copy = basis;
1045   return CEED_ERROR_SUCCESS;
1046 }
1047 
1048 /**
1049   @brief View a CeedBasis
1050 
1051   @param basis   CeedBasis to view
1052   @param stream  Stream to view to, e.g., stdout
1053 
1054   @return An error code: 0 - success, otherwise - failure
1055 
1056   @ref User
1057 **/
1058 int CeedBasisView(CeedBasis basis, FILE *stream) {
1059   int ierr;
1060   CeedFESpace FE_space = basis->basis_space;
1061   CeedElemTopology topo = basis->topo;
1062   // Print FE space and element topology of the basis
1063   if (basis->tensor_basis) {
1064     fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%"
1065             CeedInt_FMT " Q=%" CeedInt_FMT "\n",
1066             CeedFESpaces[FE_space], CeedElemTopologies[topo],
1067             basis->dim, basis->P_1d, basis->Q_1d);
1068   } else {
1069     fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%"
1070             CeedInt_FMT " Q=%" CeedInt_FMT "\n",
1071             CeedFESpaces[FE_space], CeedElemTopologies[topo],
1072             basis->dim, basis->P, basis->Q);
1073   }
1074   // Print quadrature data, interpolation/gradient/divergene/curl of the basis
1075   if (basis->tensor_basis) { // tensor basis
1076     ierr = CeedScalarView("qref1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_ref_1d,
1077                           stream); CeedChk(ierr);
1078     ierr = CeedScalarView("qweight1d", "\t% 12.8f", 1, basis->Q_1d,
1079                           basis->q_weight_1d, stream); CeedChk(ierr);
1080     ierr = CeedScalarView("interp1d", "\t% 12.8f", basis->Q_1d, basis->P_1d,
1081                           basis->interp_1d, stream); CeedChk(ierr);
1082     ierr = CeedScalarView("grad1d", "\t% 12.8f", basis->Q_1d, basis->P_1d,
1083                           basis->grad_1d, stream); CeedChk(ierr);
1084   } else { // non-tensor basis
1085     ierr = CeedScalarView("qref", "\t% 12.8f", 1, basis->Q*basis->dim,
1086                           basis->q_ref_1d,
1087                           stream); CeedChk(ierr);
1088     ierr = CeedScalarView("qweight", "\t% 12.8f", 1, basis->Q, basis->q_weight_1d,
1089                           stream); CeedChk(ierr);
1090     ierr = CeedScalarView("interp", "\t% 12.8f", basis->Q_comp*basis->Q, basis->P,
1091                           basis->interp, stream); CeedChk(ierr);
1092     if (basis->grad) {
1093       ierr = CeedScalarView("grad", "\t% 12.8f", basis->dim*basis->Q, basis->P,
1094                             basis->grad, stream); CeedChk(ierr);
1095     }
1096     if (basis->div) {
1097       ierr = CeedScalarView("div", "\t% 12.8f", basis->Q, basis->P,
1098                             basis->div, stream); CeedChk(ierr);
1099     }
1100   }
1101   return CEED_ERROR_SUCCESS;
1102 }
1103 
1104 /**
1105   @brief Apply basis evaluation from nodes to quadrature points or vice versa
1106 
1107   @param basis     CeedBasis to evaluate
1108   @param num_elem  The number of elements to apply the basis evaluation to;
1109                      the backend will specify the ordering in
1110                      CeedElemRestrictionCreateBlocked()
1111   @param t_mode    \ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature
1112                      points, \ref CEED_TRANSPOSE to apply the transpose, mapping
1113                      from quadrature points to nodes
1114   @param eval_mode \ref CEED_EVAL_NONE to use values directly,
1115                      \ref CEED_EVAL_INTERP to use interpolated values,
1116                      \ref CEED_EVAL_GRAD to use gradients,
1117                      \ref CEED_EVAL_WEIGHT to use quadrature weights.
1118   @param[in] u     Input CeedVector
1119   @param[out] v    Output CeedVector
1120 
1121   @return An error code: 0 - success, otherwise - failure
1122 
1123   @ref User
1124 **/
1125 int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode,
1126                    CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
1127   int ierr;
1128   CeedSize u_length = 0, v_length;
1129   CeedInt dim, num_comp, num_nodes, num_qpts;
1130   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
1131   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
1132   ierr = CeedBasisGetNumNodes(basis, &num_nodes); CeedChk(ierr);
1133   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
1134   ierr = CeedVectorGetLength(v, &v_length); CeedChk(ierr);
1135   if (u) {
1136     ierr = CeedVectorGetLength(u, &u_length); CeedChk(ierr);
1137   }
1138 
1139   if (!basis->Apply)
1140     // LCOV_EXCL_START
1141     return CeedError(basis->ceed, CEED_ERROR_UNSUPPORTED,
1142                      "Backend does not support BasisApply");
1143   // LCOV_EXCL_STOP
1144 
1145   // Check compatibility of topological and geometrical dimensions
1146   if ((t_mode == CEED_TRANSPOSE && (v_length%num_nodes != 0 ||
1147                                     u_length%num_qpts != 0)) ||
1148       (t_mode == CEED_NOTRANSPOSE && (u_length%num_nodes != 0 ||
1149                                       v_length%num_qpts != 0)))
1150     // LCOV_EXCL_START
1151     return CeedError(basis->ceed, CEED_ERROR_DIMENSION,
1152                      "Length of input/output vectors "
1153                      "incompatible with basis dimensions");
1154   // LCOV_EXCL_STOP
1155 
1156   // Check vector lengths to prevent out of bounds issues
1157   bool bad_dims = false;
1158   switch (eval_mode) {
1159   case CEED_EVAL_NONE:
1160   case CEED_EVAL_INTERP: bad_dims =
1161       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts ||
1162                                      v_length < num_elem*num_comp*num_nodes)) ||
1163        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp ||
1164                                        u_length < num_elem*num_comp*num_nodes)));
1165     break;
1166   case CEED_EVAL_GRAD: bad_dims =
1167       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts*dim ||
1168                                      v_length < num_elem*num_comp*num_nodes)) ||
1169        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp*dim ||
1170                                        u_length < num_elem*num_comp*num_nodes)));
1171     break;
1172   case CEED_EVAL_WEIGHT:
1173     bad_dims = v_length < num_elem*num_qpts;
1174     break;
1175   // LCOV_EXCL_START
1176   case CEED_EVAL_DIV: bad_dims =
1177       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts ||
1178                                      v_length < num_elem*num_comp*num_nodes)) ||
1179        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp ||
1180                                        u_length < num_elem*num_comp*num_nodes)));
1181     break;
1182   case CEED_EVAL_CURL: bad_dims =
1183       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts ||
1184                                      v_length < num_elem*num_comp*num_nodes)) ||
1185        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp ||
1186                                        u_length < num_elem*num_comp*num_nodes)));
1187     break;
1188     // LCOV_EXCL_STOP
1189   }
1190   if (bad_dims)
1191     // LCOV_EXCL_START
1192     return CeedError(basis->ceed, CEED_ERROR_DIMENSION,
1193                      "Input/output vectors too short for basis and evaluation mode");
1194   // LCOV_EXCL_STOP
1195 
1196   ierr = basis->Apply(basis, num_elem, t_mode, eval_mode, u, v); CeedChk(ierr);
1197   return CEED_ERROR_SUCCESS;
1198 }
1199 
1200 /**
1201   @brief Get Ceed associated with a CeedBasis
1202 
1203   @param basis      CeedBasis
1204   @param[out] ceed  Variable to store Ceed
1205 
1206   @return An error code: 0 - success, otherwise - failure
1207 
1208   @ref Advanced
1209 **/
1210 int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed) {
1211   *ceed = basis->ceed;
1212   return CEED_ERROR_SUCCESS;
1213 }
1214 
1215 /**
1216   @brief Get dimension for given CeedBasis
1217 
1218   @param basis     CeedBasis
1219   @param[out] dim  Variable to store dimension of basis
1220 
1221   @return An error code: 0 - success, otherwise - failure
1222 
1223   @ref Advanced
1224 **/
1225 int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim) {
1226   *dim = basis->dim;
1227   return CEED_ERROR_SUCCESS;
1228 }
1229 
1230 /**
1231   @brief Get topology for given CeedBasis
1232 
1233   @param basis      CeedBasis
1234   @param[out] topo  Variable to store topology of basis
1235 
1236   @return An error code: 0 - success, otherwise - failure
1237 
1238   @ref Advanced
1239 **/
1240 int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo) {
1241   *topo = basis->topo;
1242   return CEED_ERROR_SUCCESS;
1243 }
1244 
1245 /**
1246   @brief Get number of Q-vector components for given CeedBasis
1247 
1248   @param basis          CeedBasis
1249   @param[out] Q_comp  Variable to store number of Q-vector components of basis
1250 
1251   @return An error code: 0 - success, otherwise - failure
1252 
1253   @ref Advanced
1254 **/
1255 int CeedBasisGetNumQuadratureComponents(CeedBasis basis, CeedInt *Q_comp) {
1256   *Q_comp = basis->Q_comp;
1257   return CEED_ERROR_SUCCESS;
1258 }
1259 
1260 /**
1261   @brief Get number of components for given CeedBasis
1262 
1263   @param basis          CeedBasis
1264   @param[out] num_comp  Variable to store number of components of basis
1265 
1266   @return An error code: 0 - success, otherwise - failure
1267 
1268   @ref Advanced
1269 **/
1270 int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *num_comp) {
1271   *num_comp = basis->num_comp;
1272   return CEED_ERROR_SUCCESS;
1273 }
1274 
1275 /**
1276   @brief Get total number of nodes (in dim dimensions) of a CeedBasis
1277 
1278   @param basis   CeedBasis
1279   @param[out] P  Variable to store number of nodes
1280 
1281   @return An error code: 0 - success, otherwise - failure
1282 
1283   @ref Utility
1284 **/
1285 int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P) {
1286   *P = basis->P;
1287   return CEED_ERROR_SUCCESS;
1288 }
1289 
1290 /**
1291   @brief Get total number of nodes (in 1 dimension) of a CeedBasis
1292 
1293   @param basis     CeedBasis
1294   @param[out] P_1d  Variable to store number of nodes
1295 
1296   @return An error code: 0 - success, otherwise - failure
1297 
1298   @ref Advanced
1299 **/
1300 int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P_1d) {
1301   if (!basis->tensor_basis)
1302     // LCOV_EXCL_START
1303     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1304                      "Cannot supply P_1d for non-tensor basis");
1305   // LCOV_EXCL_STOP
1306 
1307   *P_1d = basis->P_1d;
1308   return CEED_ERROR_SUCCESS;
1309 }
1310 
1311 /**
1312   @brief Get total number of quadrature points (in dim dimensions) of a CeedBasis
1313 
1314   @param basis   CeedBasis
1315   @param[out] Q  Variable to store number of quadrature points
1316 
1317   @return An error code: 0 - success, otherwise - failure
1318 
1319   @ref Utility
1320 **/
1321 int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q) {
1322   *Q = basis->Q;
1323   return CEED_ERROR_SUCCESS;
1324 }
1325 
1326 /**
1327   @brief Get total number of quadrature points (in 1 dimension) of a CeedBasis
1328 
1329   @param basis      CeedBasis
1330   @param[out] Q_1d  Variable to store number of quadrature points
1331 
1332   @return An error code: 0 - success, otherwise - failure
1333 
1334   @ref Advanced
1335 **/
1336 int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, CeedInt *Q_1d) {
1337   if (!basis->tensor_basis)
1338     // LCOV_EXCL_START
1339     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1340                      "Cannot supply Q_1d for non-tensor basis");
1341   // LCOV_EXCL_STOP
1342 
1343   *Q_1d = basis->Q_1d;
1344   return CEED_ERROR_SUCCESS;
1345 }
1346 
1347 /**
1348   @brief Get reference coordinates of quadrature points (in dim dimensions)
1349          of a CeedBasis
1350 
1351   @param basis       CeedBasis
1352   @param[out] q_ref  Variable to store reference coordinates of quadrature points
1353 
1354   @return An error code: 0 - success, otherwise - failure
1355 
1356   @ref Advanced
1357 **/
1358 int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **q_ref) {
1359   *q_ref = basis->q_ref_1d;
1360   return CEED_ERROR_SUCCESS;
1361 }
1362 
1363 /**
1364   @brief Get quadrature weights of quadrature points (in dim dimensions)
1365          of a CeedBasis
1366 
1367   @param basis          CeedBasis
1368   @param[out] q_weight  Variable to store quadrature weights
1369 
1370   @return An error code: 0 - success, otherwise - failure
1371 
1372   @ref Advanced
1373 **/
1374 int CeedBasisGetQWeights(CeedBasis basis, const CeedScalar **q_weight) {
1375   *q_weight = basis->q_weight_1d;
1376   return CEED_ERROR_SUCCESS;
1377 }
1378 
1379 /**
1380   @brief Get interpolation matrix of a CeedBasis
1381 
1382   @param basis        CeedBasis
1383   @param[out] interp  Variable to store interpolation matrix
1384 
1385   @return An error code: 0 - success, otherwise - failure
1386 
1387   @ref Advanced
1388 **/
1389 int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp) {
1390   if (!basis->interp && basis->tensor_basis) {
1391     // Allocate
1392     int ierr;
1393     ierr = CeedMalloc(basis->Q*basis->P, &basis->interp); CeedChk(ierr);
1394 
1395     // Initialize
1396     for (CeedInt i=0; i<basis->Q*basis->P; i++)
1397       basis->interp[i] = 1.0;
1398 
1399     // Calculate
1400     for (CeedInt d=0; d<basis->dim; d++)
1401       for (CeedInt qpt=0; qpt<basis->Q; qpt++)
1402         for (CeedInt node=0; node<basis->P; node++) {
1403           CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1404           CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1405           basis->interp[qpt*(basis->P)+node] *= basis->interp_1d[q*basis->P_1d+p];
1406         }
1407   }
1408   *interp = basis->interp;
1409   return CEED_ERROR_SUCCESS;
1410 }
1411 
1412 /**
1413   @brief Get 1D interpolation matrix of a tensor product CeedBasis
1414 
1415   @param basis           CeedBasis
1416   @param[out] interp_1d  Variable to store interpolation matrix
1417 
1418   @return An error code: 0 - success, otherwise - failure
1419 
1420   @ref Backend
1421 **/
1422 int CeedBasisGetInterp1D(CeedBasis basis, const CeedScalar **interp_1d) {
1423   if (!basis->tensor_basis)
1424     // LCOV_EXCL_START
1425     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1426                      "CeedBasis is not a tensor product basis.");
1427   // LCOV_EXCL_STOP
1428 
1429   *interp_1d = basis->interp_1d;
1430   return CEED_ERROR_SUCCESS;
1431 }
1432 
1433 /**
1434   @brief Get gradient matrix of a CeedBasis
1435 
1436   @param basis      CeedBasis
1437   @param[out] grad  Variable to store gradient matrix
1438 
1439   @return An error code: 0 - success, otherwise - failure
1440 
1441   @ref Advanced
1442 **/
1443 int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad) {
1444   if (!basis->grad && basis->tensor_basis) {
1445     // Allocate
1446     int ierr;
1447     ierr = CeedMalloc(basis->dim*basis->Q*basis->P, &basis->grad);
1448     CeedChk(ierr);
1449 
1450     // Initialize
1451     for (CeedInt i=0; i<basis->dim*basis->Q*basis->P; i++)
1452       basis->grad[i] = 1.0;
1453 
1454     // Calculate
1455     for (CeedInt d=0; d<basis->dim; d++)
1456       for (CeedInt i=0; i<basis->dim; i++)
1457         for (CeedInt qpt=0; qpt<basis->Q; qpt++)
1458           for (CeedInt node=0; node<basis->P; node++) {
1459             CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1460             CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1461             if (i == d)
1462               basis->grad[(i*basis->Q+qpt)*(basis->P)+node] *=
1463                 basis->grad_1d[q*basis->P_1d+p];
1464             else
1465               basis->grad[(i*basis->Q+qpt)*(basis->P)+node] *=
1466                 basis->interp_1d[q*basis->P_1d+p];
1467           }
1468   }
1469   *grad = basis->grad;
1470   return CEED_ERROR_SUCCESS;
1471 }
1472 
1473 /**
1474   @brief Get 1D gradient matrix of a tensor product CeedBasis
1475 
1476   @param basis         CeedBasis
1477   @param[out] grad_1d  Variable to store gradient matrix
1478 
1479   @return An error code: 0 - success, otherwise - failure
1480 
1481   @ref Advanced
1482 **/
1483 int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad_1d) {
1484   if (!basis->tensor_basis)
1485     // LCOV_EXCL_START
1486     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1487                      "CeedBasis is not a tensor product basis.");
1488   // LCOV_EXCL_STOP
1489 
1490   *grad_1d = basis->grad_1d;
1491   return CEED_ERROR_SUCCESS;
1492 }
1493 
1494 /**
1495   @brief Get divergence matrix of a CeedBasis
1496 
1497   @param basis     CeedBasis
1498   @param[out] div  Variable to store divergence matrix
1499 
1500   @return An error code: 0 - success, otherwise - failure
1501 
1502   @ref Advanced
1503 **/
1504 int CeedBasisGetDiv(CeedBasis basis, const CeedScalar **div) {
1505   if (!basis->div)
1506     // LCOV_EXCL_START
1507     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1508                      "CeedBasis does not have divergence matrix.");
1509   // LCOV_EXCL_STOP
1510 
1511   *div = basis->div;
1512   return CEED_ERROR_SUCCESS;
1513 }
1514 
1515 /**
1516   @brief Destroy a CeedBasis
1517 
1518   @param basis CeedBasis to destroy
1519 
1520   @return An error code: 0 - success, otherwise - failure
1521 
1522   @ref User
1523 **/
1524 int CeedBasisDestroy(CeedBasis *basis) {
1525   int ierr;
1526 
1527   if (!*basis || --(*basis)->ref_count > 0) return CEED_ERROR_SUCCESS;
1528   if ((*basis)->Destroy) {
1529     ierr = (*basis)->Destroy(*basis); CeedChk(ierr);
1530   }
1531   if ((*basis)->contract) {
1532     ierr = CeedTensorContractDestroy(&(*basis)->contract); CeedChk(ierr);
1533   }
1534   ierr = CeedFree(&(*basis)->interp); CeedChk(ierr);
1535   ierr = CeedFree(&(*basis)->interp_1d); CeedChk(ierr);
1536   ierr = CeedFree(&(*basis)->grad); CeedChk(ierr);
1537   ierr = CeedFree(&(*basis)->div); CeedChk(ierr);
1538   ierr = CeedFree(&(*basis)->grad_1d); CeedChk(ierr);
1539   ierr = CeedFree(&(*basis)->q_ref_1d); CeedChk(ierr);
1540   ierr = CeedFree(&(*basis)->q_weight_1d); CeedChk(ierr);
1541   ierr = CeedDestroy(&(*basis)->ceed); CeedChk(ierr);
1542   ierr = CeedFree(basis); CeedChk(ierr);
1543   return CEED_ERROR_SUCCESS;
1544 }
1545 
1546 /**
1547   @brief Construct a Gauss-Legendre quadrature
1548 
1549   @param Q                 Number of quadrature points (integrates polynomials of
1550                              degree 2*Q-1 exactly)
1551   @param[out] q_ref_1d     Array of length Q to hold the abscissa on [-1, 1]
1552   @param[out] q_weight_1d  Array of length Q to hold the weights
1553 
1554   @return An error code: 0 - success, otherwise - failure
1555 
1556   @ref Utility
1557 **/
1558 int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
1559                         CeedScalar *q_weight_1d) {
1560   // Allocate
1561   CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0*atan(1.0);
1562   // Build q_ref_1d, q_weight_1d
1563   for (CeedInt i = 0; i <= Q/2; i++) {
1564     // Guess
1565     xi = cos(PI*(CeedScalar)(2*i+1)/((CeedScalar)(2*Q)));
1566     // Pn(xi)
1567     P0 = 1.0;
1568     P1 = xi;
1569     P2 = 0.0;
1570     for (CeedInt j = 2; j <= Q; j++) {
1571       P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1572       P0 = P1;
1573       P1 = P2;
1574     }
1575     // First Newton Step
1576     dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1577     xi = xi-P2/dP2;
1578     // Newton to convergence
1579     for (CeedInt k=0; k<100 && fabs(P2)>10*CEED_EPSILON; k++) {
1580       P0 = 1.0;
1581       P1 = xi;
1582       for (CeedInt j = 2; j <= Q; j++) {
1583         P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1584         P0 = P1;
1585         P1 = P2;
1586       }
1587       dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1588       xi = xi-P2/dP2;
1589     }
1590     // Save xi, wi
1591     wi = 2.0/((1.0-xi*xi)*dP2*dP2);
1592     q_weight_1d[i] = wi;
1593     q_weight_1d[Q-1-i] = wi;
1594     q_ref_1d[i] = -xi;
1595     q_ref_1d[Q-1-i]= xi;
1596   }
1597   return CEED_ERROR_SUCCESS;
1598 }
1599 
1600 /**
1601   @brief Construct a Gauss-Legendre-Lobatto quadrature
1602 
1603   @param Q                 Number of quadrature points (integrates polynomials of
1604                              degree 2*Q-3 exactly)
1605   @param[out] q_ref_1d     Array of length Q to hold the abscissa on [-1, 1]
1606   @param[out] q_weight_1d  Array of length Q to hold the weights
1607 
1608   @return An error code: 0 - success, otherwise - failure
1609 
1610   @ref Utility
1611 **/
1612 int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
1613                           CeedScalar *q_weight_1d) {
1614   // Allocate
1615   CeedScalar P0, P1, P2, dP2, d2P2, xi, wi, PI = 4.0*atan(1.0);
1616   // Build q_ref_1d, q_weight_1d
1617   // Set endpoints
1618   if (Q < 2)
1619     // LCOV_EXCL_START
1620     return CeedError(NULL, CEED_ERROR_DIMENSION,
1621                      "Cannot create Lobatto quadrature with Q=%" CeedInt_FMT " < 2 points", Q);
1622   // LCOV_EXCL_STOP
1623   wi = 2.0/((CeedScalar)(Q*(Q-1)));
1624   if (q_weight_1d) {
1625     q_weight_1d[0] = wi;
1626     q_weight_1d[Q-1] = wi;
1627   }
1628   q_ref_1d[0] = -1.0;
1629   q_ref_1d[Q-1] = 1.0;
1630   // Interior
1631   for (CeedInt i = 1; i <= (Q-1)/2; i++) {
1632     // Guess
1633     xi = cos(PI*(CeedScalar)(i)/(CeedScalar)(Q-1));
1634     // Pn(xi)
1635     P0 = 1.0;
1636     P1 = xi;
1637     P2 = 0.0;
1638     for (CeedInt j = 2; j < Q; j++) {
1639       P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1640       P0 = P1;
1641       P1 = P2;
1642     }
1643     // First Newton step
1644     dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1645     d2P2 = (2*xi*dP2 - (CeedScalar)(Q*(Q-1))*P2)/(1.0-xi*xi);
1646     xi = xi-dP2/d2P2;
1647     // Newton to convergence
1648     for (CeedInt k=0; k<100 && fabs(dP2)>10*CEED_EPSILON; k++) {
1649       P0 = 1.0;
1650       P1 = xi;
1651       for (CeedInt j = 2; j < Q; j++) {
1652         P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1653         P0 = P1;
1654         P1 = P2;
1655       }
1656       dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1657       d2P2 = (2*xi*dP2 - (CeedScalar)(Q*(Q-1))*P2)/(1.0-xi*xi);
1658       xi = xi-dP2/d2P2;
1659     }
1660     // Save xi, wi
1661     wi = 2.0/(((CeedScalar)(Q*(Q-1)))*P2*P2);
1662     if (q_weight_1d) {
1663       q_weight_1d[i] = wi;
1664       q_weight_1d[Q-1-i] = wi;
1665     }
1666     q_ref_1d[i] = -xi;
1667     q_ref_1d[Q-1-i]= xi;
1668   }
1669   return CEED_ERROR_SUCCESS;
1670 }
1671 
1672 /**
1673   @brief Return QR Factorization of a matrix
1674 
1675   @param ceed         A Ceed context for error handling
1676   @param[in,out] mat  Row-major matrix to be factorized in place
1677   @param[in,out] tau  Vector of length m of scaling factors
1678   @param m            Number of rows
1679   @param n            Number of columns
1680 
1681   @return An error code: 0 - success, otherwise - failure
1682 
1683   @ref Utility
1684 **/
1685 int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau,
1686                         CeedInt m, CeedInt n) {
1687   CeedScalar v[m];
1688 
1689   // Check m >= n
1690   if (n > m)
1691     // LCOV_EXCL_START
1692     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
1693                      "Cannot compute QR factorization with n > m");
1694   // LCOV_EXCL_STOP
1695 
1696   for (CeedInt i=0; i<n; i++) {
1697     if (i >= m-1) { // last row of matrix, no reflection needed
1698       tau[i] = 0.;
1699       break;
1700     }
1701     // Calculate Householder vector, magnitude
1702     CeedScalar sigma = 0.0;
1703     v[i] = mat[i+n*i];
1704     for (CeedInt j=i+1; j<m; j++) {
1705       v[j] = mat[i+n*j];
1706       sigma += v[j] * v[j];
1707     }
1708     CeedScalar norm = sqrt(v[i]*v[i] + sigma); // norm of v[i:m]
1709     CeedScalar R_ii = -copysign(norm, v[i]);
1710     v[i] -= R_ii;
1711     // norm of v[i:m] after modification above and scaling below
1712     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
1713     //   tau = 2 / (norm*norm)
1714     tau[i] = 2 * v[i]*v[i] / (v[i]*v[i] + sigma);
1715     for (CeedInt j=i+1; j<m; j++)
1716       v[j] /= v[i];
1717 
1718     // Apply Householder reflector to lower right panel
1719     CeedHouseholderReflect(&mat[i*n+i+1], &v[i], tau[i], m-i, n-i-1, n, 1);
1720     // Save v
1721     mat[i+n*i] = R_ii;
1722     for (CeedInt j=i+1; j<m; j++)
1723       mat[i+n*j] = v[j];
1724   }
1725   return CEED_ERROR_SUCCESS;
1726 }
1727 
1728 /**
1729   @brief Return symmetric Schur decomposition of the symmetric matrix mat via
1730            symmetric QR factorization
1731 
1732   @param ceed         A Ceed context for error handling
1733   @param[in,out] mat  Row-major matrix to be factorized in place
1734   @param[out] lambda  Vector of length n of eigenvalues
1735   @param n            Number of rows/columns
1736 
1737   @return An error code: 0 - success, otherwise - failure
1738 
1739   @ref Utility
1740 **/
1741 CeedPragmaOptimizeOff
1742 int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat,
1743                                     CeedScalar *lambda, CeedInt n) {
1744   // Check bounds for clang-tidy
1745   if (n<2)
1746     // LCOV_EXCL_START
1747     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
1748                      "Cannot compute symmetric Schur decomposition of scalars");
1749   // LCOV_EXCL_STOP
1750 
1751   CeedScalar v[n-1], tau[n-1], mat_T[n*n];
1752 
1753   // Copy mat to mat_T and set mat to I
1754   memcpy(mat_T, mat, n*n*sizeof(mat[0]));
1755   for (CeedInt i=0; i<n; i++)
1756     for (CeedInt j=0; j<n; j++)
1757       mat[j+n*i] = (i==j) ? 1 : 0;
1758 
1759   // Reduce to tridiagonal
1760   for (CeedInt i=0; i<n-1; i++) {
1761     // Calculate Householder vector, magnitude
1762     CeedScalar sigma = 0.0;
1763     v[i] = mat_T[i+n*(i+1)];
1764     for (CeedInt j=i+1; j<n-1; j++) {
1765       v[j] = mat_T[i+n*(j+1)];
1766       sigma += v[j] * v[j];
1767     }
1768     CeedScalar norm = sqrt(v[i]*v[i] + sigma); // norm of v[i:n-1]
1769     CeedScalar R_ii = -copysign(norm, v[i]);
1770     v[i] -= R_ii;
1771     // norm of v[i:m] after modification above and scaling below
1772     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
1773     //   tau = 2 / (norm*norm)
1774     tau[i] = i == n - 2 ? 2 : 2 * v[i]*v[i] / (v[i]*v[i] + sigma);
1775     for (CeedInt j=i+1; j<n-1; j++)
1776       v[j] /= v[i];
1777 
1778     // Update sub and super diagonal
1779     for (CeedInt j=i+2; j<n; j++) {
1780       mat_T[i+n*j] = 0; mat_T[j+n*i] = 0;
1781     }
1782     // Apply symmetric Householder reflector to lower right panel
1783     CeedHouseholderReflect(&mat_T[(i+1)+n*(i+1)], &v[i], tau[i],
1784                            n-(i+1), n-(i+1), n, 1);
1785     CeedHouseholderReflect(&mat_T[(i+1)+n*(i+1)], &v[i], tau[i],
1786                            n-(i+1), n-(i+1), 1, n);
1787 
1788     // Save v
1789     mat_T[i+n*(i+1)] = R_ii;
1790     mat_T[(i+1)+n*i] = R_ii;
1791     for (CeedInt j=i+1; j<n-1; j++) {
1792       mat_T[i+n*(j+1)] = v[j];
1793     }
1794   }
1795   // Backwards accumulation of Q
1796   for (CeedInt i=n-2; i>=0; i--) {
1797     if (tau[i] > 0.0) {
1798       v[i] = 1;
1799       for (CeedInt j=i+1; j<n-1; j++) {
1800         v[j] = mat_T[i+n*(j+1)];
1801         mat_T[i+n*(j+1)] = 0;
1802       }
1803       CeedHouseholderReflect(&mat[(i+1)+n*(i+1)], &v[i], tau[i],
1804                              n-(i+1), n-(i+1), n, 1);
1805     }
1806   }
1807 
1808   // Reduce sub and super diagonal
1809   CeedInt p = 0, q = 0, itr = 0, max_itr = n*n*n*n;
1810   CeedScalar tol = CEED_EPSILON;
1811 
1812   while (itr < max_itr) {
1813     // Update p, q, size of reduced portions of diagonal
1814     p = 0; q = 0;
1815     for (CeedInt i=n-2; i>=0; i--) {
1816       if (fabs(mat_T[i+n*(i+1)]) < tol)
1817         q += 1;
1818       else
1819         break;
1820     }
1821     for (CeedInt i=0; i<n-q-1; i++) {
1822       if (fabs(mat_T[i+n*(i+1)]) < tol)
1823         p += 1;
1824       else
1825         break;
1826     }
1827     if (q == n-1) break; // Finished reducing
1828 
1829     // Reduce tridiagonal portion
1830     CeedScalar t_nn = mat_T[(n-1-q)+n*(n-1-q)],
1831                t_nnm1 = mat_T[(n-2-q)+n*(n-1-q)];
1832     CeedScalar d = (mat_T[(n-2-q)+n*(n-2-q)] - t_nn)/2;
1833     CeedScalar mu = t_nn - t_nnm1*t_nnm1 /
1834                     (d + copysign(sqrt(d*d + t_nnm1*t_nnm1), d));
1835     CeedScalar x = mat_T[p+n*p] - mu;
1836     CeedScalar z = mat_T[p+n*(p+1)];
1837     for (CeedInt k=p; k<n-q-1; k++) {
1838       // Compute Givens rotation
1839       CeedScalar c = 1, s = 0;
1840       if (fabs(z) > tol) {
1841         if (fabs(z) > fabs(x)) {
1842           CeedScalar tau = -x/z;
1843           s = 1/sqrt(1+tau*tau), c = s*tau;
1844         } else {
1845           CeedScalar tau = -z/x;
1846           c = 1/sqrt(1+tau*tau), s = c*tau;
1847         }
1848       }
1849 
1850       // Apply Givens rotation to T
1851       CeedGivensRotation(mat_T, c, s, CEED_NOTRANSPOSE, k, k+1, n, n);
1852       CeedGivensRotation(mat_T, c, s, CEED_TRANSPOSE, k, k+1, n, n);
1853 
1854       // Apply Givens rotation to Q
1855       CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k+1, n, n);
1856 
1857       // Update x, z
1858       if (k < n-q-2) {
1859         x = mat_T[k+n*(k+1)];
1860         z = mat_T[k+n*(k+2)];
1861       }
1862     }
1863     itr++;
1864   }
1865 
1866   // Save eigenvalues
1867   for (CeedInt i=0; i<n; i++)
1868     lambda[i] = mat_T[i+n*i];
1869 
1870   // Check convergence
1871   if (itr == max_itr && q < n-1)
1872     // LCOV_EXCL_START
1873     return CeedError(ceed, CEED_ERROR_MINOR,
1874                      "Symmetric QR failed to converge");
1875   // LCOV_EXCL_STOP
1876   return CEED_ERROR_SUCCESS;
1877 }
1878 CeedPragmaOptimizeOn
1879 
1880 /**
1881   @brief Return Simultaneous Diagonalization of two matrices. This solves the
1882            generalized eigenvalue problem A x = lambda B x, where A and B
1883            are symmetric and B is positive definite. We generate the matrix X
1884            and vector Lambda such that X^T A X = Lambda and X^T B X = I. This
1885            is equivalent to the LAPACK routine 'sygv' with TYPE = 1.
1886 
1887   @param ceed         A Ceed context for error handling
1888   @param[in] mat_A    Row-major matrix to be factorized with eigenvalues
1889   @param[in] mat_B    Row-major matrix to be factorized to identity
1890   @param[out] mat_X   Row-major orthogonal matrix
1891   @param[out] lambda  Vector of length n of generalized eigenvalues
1892   @param n            Number of rows/columns
1893 
1894   @return An error code: 0 - success, otherwise - failure
1895 
1896   @ref Utility
1897 **/
1898 CeedPragmaOptimizeOff
1899 int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A,
1900                                     CeedScalar *mat_B, CeedScalar *mat_X,
1901                                     CeedScalar *lambda, CeedInt n) {
1902   int ierr;
1903   CeedScalar *mat_C, *mat_G, *vec_D;
1904   ierr = CeedCalloc(n*n, &mat_C); CeedChk(ierr);
1905   ierr = CeedCalloc(n*n, &mat_G); CeedChk(ierr);
1906   ierr = CeedCalloc(n, &vec_D); CeedChk(ierr);
1907 
1908   // Compute B = G D G^T
1909   memcpy(mat_G, mat_B, n*n*sizeof(mat_B[0]));
1910   ierr = CeedSymmetricSchurDecomposition(ceed, mat_G, vec_D, n); CeedChk(ierr);
1911 
1912   // Sort eigenvalues
1913   for (CeedInt i=n-1; i>=0; i--)
1914     for (CeedInt j=0; j<i; j++) {
1915       if (fabs(vec_D[j]) > fabs(vec_D[j+1])) {
1916         CeedScalar temp;
1917         temp = vec_D[j]; vec_D[j] = vec_D[j+1]; vec_D[j+1] = temp;
1918         for (CeedInt k=0; k<n; k++) {
1919           temp = mat_G[k*n+j]; mat_G[k*n+j] = mat_G[k*n+j+1]; mat_G[k*n+j+1] = temp;
1920         }
1921       }
1922     }
1923 
1924   // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T
1925   //           = D^-1/2 G^T A G D^-1/2
1926   // -- D = D^-1/2
1927   for (CeedInt i=0; i<n; i++)
1928     vec_D[i] = 1./sqrt(vec_D[i]);
1929   // -- G = G D^-1/2
1930   // -- C = D^-1/2 G^T
1931   for (CeedInt i=0; i<n; i++)
1932     for (CeedInt j=0; j<n; j++) {
1933       mat_G[i*n+j] *= vec_D[j];
1934       mat_C[j*n+i]  = mat_G[i*n+j];
1935     }
1936   // -- X = (D^-1/2 G^T) A
1937   ierr = CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_C,
1938                                   (const CeedScalar *)mat_A, mat_X, n, n, n);
1939   CeedChk(ierr);
1940   // -- C = (D^-1/2 G^T A) (G D^-1/2)
1941   ierr = CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_X,
1942                                   (const CeedScalar *)mat_G, mat_C, n, n, n);
1943   CeedChk(ierr);
1944 
1945   // Compute Q^T C Q = lambda
1946   ierr = CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n); CeedChk(ierr);
1947 
1948   // Sort eigenvalues
1949   for (CeedInt i=n-1; i>=0; i--)
1950     for (CeedInt j=0; j<i; j++) {
1951       if (fabs(lambda[j]) > fabs(lambda[j+1])) {
1952         CeedScalar temp;
1953         temp = lambda[j]; lambda[j] = lambda[j+1]; lambda[j+1] = temp;
1954         for (CeedInt k=0; k<n; k++) {
1955           temp = mat_C[k*n+j]; mat_C[k*n+j] = mat_C[k*n+j+1]; mat_C[k*n+j+1] = temp;
1956         }
1957       }
1958     }
1959 
1960   // Set X = (G D^1/2)^-T Q
1961   //       = G D^-1/2 Q
1962   ierr = CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_G,
1963                                   (const CeedScalar *)mat_C, mat_X, n, n, n);
1964   CeedChk(ierr);
1965 
1966   // Cleanup
1967   ierr = CeedFree(&mat_C); CeedChk(ierr);
1968   ierr = CeedFree(&mat_G); CeedChk(ierr);
1969   ierr = CeedFree(&vec_D); CeedChk(ierr);
1970   return CEED_ERROR_SUCCESS;
1971 }
1972 CeedPragmaOptimizeOn
1973 
1974 /// @}
1975