xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-basis.c (revision a76a04e77a9db9309c4310fb27f031a0ea1f3c06)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed/ceed.h>
9 #include <ceed/backend.h>
10 #include <ceed-impl.h>
11 #include <math.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 /// @file
17 /// Implementation of CeedBasis interfaces
18 
19 /// @cond DOXYGEN_SKIP
20 static struct CeedBasis_private ceed_basis_collocated;
21 /// @endcond
22 
23 /// @addtogroup CeedBasisUser
24 /// @{
25 
26 /// Indicate that the quadrature points are collocated with the nodes
27 const CeedBasis CEED_BASIS_COLLOCATED = &ceed_basis_collocated;
28 
29 /// @}
30 
31 /// ----------------------------------------------------------------------------
32 /// CeedBasis Library Internal Functions
33 /// ----------------------------------------------------------------------------
34 /// @addtogroup CeedBasisDeveloper
35 /// @{
36 
37 /**
38   @brief Compute Householder reflection
39 
40     Computes A = (I - b v v^T) A
41     where A is an mxn matrix indexed as A[i*row + j*col]
42 
43   @param[in,out] A  Matrix to apply Householder reflection to, in place
44   @param v          Householder vector
45   @param b          Scaling factor
46   @param m          Number of rows in A
47   @param n          Number of columns in A
48   @param row        Row stride
49   @param col        Col stride
50 
51   @return An error code: 0 - success, otherwise - failure
52 
53   @ref Developer
54 **/
55 static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v,
56                                   CeedScalar b, CeedInt m, CeedInt n,
57                                   CeedInt row, CeedInt col) {
58   for (CeedInt j=0; j<n; j++) {
59     CeedScalar w = A[0*row + j*col];
60     for (CeedInt i=1; i<m; i++)
61       w += v[i] * A[i*row + j*col];
62     A[0*row + j*col] -= b * w;
63     for (CeedInt i=1; i<m; i++)
64       A[i*row + j*col] -= b * w * v[i];
65   }
66   return CEED_ERROR_SUCCESS;
67 }
68 
69 /**
70   @brief Apply Householder Q matrix
71 
72     Compute A = Q A where Q is mxm and A is mxn.
73 
74   @param[in,out] A  Matrix to apply Householder Q to, in place
75   @param Q          Householder Q matrix
76   @param tau        Householder scaling factors
77   @param t_mode     Transpose mode for application
78   @param m          Number of rows in A
79   @param n          Number of columns in A
80   @param k          Number of elementary reflectors in Q, k<m
81   @param row        Row stride in A
82   @param col        Col stride in A
83 
84   @return An error code: 0 - success, otherwise - failure
85 
86   @ref Developer
87 **/
88 int CeedHouseholderApplyQ(CeedScalar *A, const CeedScalar *Q,
89                           const CeedScalar *tau, CeedTransposeMode t_mode,
90                           CeedInt m, CeedInt n, CeedInt k,
91                           CeedInt row, CeedInt col) {
92   int ierr;
93   CeedScalar *v;
94   ierr = CeedMalloc(m, &v); CeedChk(ierr);
95   for (CeedInt ii=0; ii<k; ii++) {
96     CeedInt i = t_mode == CEED_TRANSPOSE ? ii : k-1-ii;
97     for (CeedInt j=i+1; j<m; j++)
98       v[j] = Q[j*k+i];
99     // Apply Householder reflector (I - tau v v^T) collo_grad_1d^T
100     ierr = CeedHouseholderReflect(&A[i*row], &v[i], tau[i], m-i, n, row, col);
101     CeedChk(ierr);
102   }
103   ierr = CeedFree(&v); CeedChk(ierr);
104   return CEED_ERROR_SUCCESS;
105 }
106 
107 /**
108   @brief Compute Givens rotation
109 
110     Computes A = G A (or G^T A in transpose mode)
111     where A is an mxn matrix indexed as A[i*n + j*m]
112 
113   @param[in,out] A  Row major matrix to apply Givens rotation to, in place
114   @param c          Cosine factor
115   @param s          Sine factor
116   @param t_mode     @ref CEED_NOTRANSPOSE to rotate the basis counter-clockwise,
117                     which has the effect of rotating columns of A clockwise;
118                     @ref CEED_TRANSPOSE for the opposite rotation
119   @param i          First row/column to apply rotation
120   @param k          Second row/column to apply rotation
121   @param m          Number of rows in A
122   @param n          Number of columns in A
123 
124   @return An error code: 0 - success, otherwise - failure
125 
126   @ref Developer
127 **/
128 static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s,
129                               CeedTransposeMode t_mode, CeedInt i, CeedInt k,
130                               CeedInt m, CeedInt n) {
131   CeedInt stride_j = 1, stride_ik = m, num_its = n;
132   if (t_mode == CEED_NOTRANSPOSE) {
133     stride_j = n; stride_ik = 1; num_its = m;
134   }
135 
136   // Apply rotation
137   for (CeedInt j=0; j<num_its; j++) {
138     CeedScalar tau1 = A[i*stride_ik+j*stride_j], tau2 = A[k*stride_ik+j*stride_j];
139     A[i*stride_ik+j*stride_j] = c*tau1 - s*tau2;
140     A[k*stride_ik+j*stride_j] = s*tau1 + c*tau2;
141   }
142   return CEED_ERROR_SUCCESS;
143 }
144 
145 /**
146   @brief View an array stored in a CeedBasis
147 
148   @param[in] name      Name of array
149   @param[in] fp_fmt    Printing format
150   @param[in] m         Number of rows in array
151   @param[in] n         Number of columns in array
152   @param[in] a         Array to be viewed
153   @param[in] stream    Stream to view to, e.g., stdout
154 
155   @return An error code: 0 - success, otherwise - failure
156 
157   @ref Developer
158 **/
159 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m,
160                           CeedInt n, const CeedScalar *a, FILE *stream) {
161   for (CeedInt i=0; i<m; i++) {
162     if (m > 1)
163       fprintf(stream, "%12s[%" CeedInt_FMT "]:", name, i);
164     else
165       fprintf(stream, "%12s:", name);
166     for (CeedInt j=0; j<n; j++)
167       fprintf(stream, fp_fmt, fabs(a[i*n+j]) > 1E-14 ? a[i*n+j] : 0);
168     fputs("\n", stream);
169   }
170   return CEED_ERROR_SUCCESS;
171 }
172 
173 /**
174   @brief Create the interpolation matrix for projection from the nodes of
175            `basis_from` to the nodes of `basis_to`. This projection is
176            given by `interp_project = interp_to^+ * interp_from`, where
177            the pesudoinverse `interp_to^+` is given by QR factorization.
178          Note: `basis_from` and `basis_to` must have compatible quadrature
179            spaces.
180 
181   @param[in] basis_from       CeedBasis to project from
182   @param[in] basis_to         CeedBasis to project to
183   @param[out] interp_project  Address of the variable where the newly created
184                                 projection matrix will be stored.
185 
186   @return An error code: 0 - success, otherwise - failure
187 
188   @ref Developer
189 **/
190 static int CeedBasisCreateProjectionMatrix(CeedBasis basis_from,
191                                     CeedBasis basis_to,
192                                     CeedScalar **interp_project) {
193   int ierr;
194   Ceed ceed;
195   ierr = CeedBasisGetCeed(basis_to, &ceed); CeedChk(ierr);
196 
197   // Check for compatible quadrature spaces
198   CeedInt Q_to, Q_from;
199   ierr = CeedBasisGetNumQuadraturePoints(basis_to, &Q_to); CeedChk(ierr);
200   ierr = CeedBasisGetNumQuadraturePoints(basis_from, &Q_from); CeedChk(ierr);
201   if (Q_to != Q_from)
202     // LCOV_EXCL_START
203     return CeedError(ceed, CEED_ERROR_DIMENSION,
204                      "Bases must have compatible quadrature spaces");
205   // LCOV_EXCL_STOP
206 
207   // Coarse to fine basis
208   CeedInt P_to, P_from, Q = Q_to;
209   bool is_tensor_to, is_tensor_from;
210   ierr = CeedBasisIsTensor(basis_to, &is_tensor_to); CeedChk(ierr);
211   ierr = CeedBasisIsTensor(basis_from, &is_tensor_from); CeedChk(ierr);
212   CeedScalar *interp_to, *interp_from, *tau;
213   if (is_tensor_to && is_tensor_from) {
214     ierr = CeedBasisGetNumNodes1D(basis_to, &P_to); CeedChk(ierr);
215     ierr = CeedBasisGetNumNodes1D(basis_from, &P_from); CeedChk(ierr);
216     ierr = CeedBasisGetNumQuadraturePoints1D(basis_from, &Q); CeedChk(ierr);
217   } else if (!is_tensor_to && !is_tensor_from) {
218     ierr = CeedBasisGetNumNodes(basis_to, &P_to); CeedChk(ierr);
219     ierr = CeedBasisGetNumNodes(basis_from, &P_from); CeedChk(ierr);
220   } else {
221     // LCOV_EXCL_START
222     return CeedError(ceed, CEED_ERROR_MINOR,
223                      "Bases must both be tensor or non-tensor");
224     // LCOV_EXCL_STOP
225   }
226 
227   ierr = CeedMalloc(Q * P_from, &interp_from); CeedChk(ierr);
228   ierr = CeedMalloc(Q * P_to, &interp_to); CeedChk(ierr);
229   ierr = CeedCalloc(P_to * P_from, interp_project); CeedChk(ierr);
230   ierr = CeedMalloc(Q, &tau); CeedChk(ierr);
231   const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL;
232   if (is_tensor_to) {
233     ierr = CeedBasisGetInterp1D(basis_to, &interp_to_source); CeedChk(ierr);
234     ierr = CeedBasisGetInterp1D(basis_from, &interp_from_source); CeedChk(ierr);
235   } else {
236     ierr = CeedBasisGetInterp(basis_to, &interp_to_source); CeedChk(ierr);
237     ierr = CeedBasisGetInterp(basis_from, &interp_from_source); CeedChk(ierr);
238   }
239   memcpy(interp_to, interp_to_source, Q * P_to * sizeof(interp_to_source[0]));
240   memcpy(interp_from, interp_from_source,
241          Q * P_from * sizeof(interp_from_source[0]));
242 
243   // -- QR Factorization, interp_to = Q R
244   ierr = CeedQRFactorization(ceed, interp_to, tau, Q, P_to); CeedChk(ierr);
245 
246   // -- Apply Qtranspose, interp_to = Qtranspose interp_from
247   ierr = CeedHouseholderApplyQ(interp_from, interp_to, tau, CEED_TRANSPOSE,
248                                Q, P_from, P_to, P_from, 1); CeedChk(ierr);
249 
250   // -- Apply Rinv, interp_project = Rinv interp_c
251   for (CeedInt j = 0; j < P_from; j++) { // Column j
252     (*interp_project)[j + P_from * (P_to - 1)] = interp_from[j + P_from *
253         (P_to - 1)] / interp_to[P_to * P_to - 1];
254     for (CeedInt i = P_to - 2; i >= 0; i--) { // Row i
255       (*interp_project)[j + P_from * i] = interp_from[j + P_from * i];
256       for (CeedInt k = i+1; k < P_to; k++) {
257         (*interp_project)[j + P_from * i] -= interp_to[k + P_to * i]*
258                                              (*interp_project)[j + P_from * k];
259       }
260       (*interp_project)[j + P_from * i] /= interp_to[i + P_to * i];
261     }
262   }
263   ierr = CeedFree(&tau); CeedChk(ierr);
264   ierr = CeedFree(&interp_to); CeedChk(ierr);
265   ierr = CeedFree(&interp_from); CeedChk(ierr);
266 
267   return CEED_ERROR_SUCCESS;
268 }
269 
270 /// @}
271 
272 /// ----------------------------------------------------------------------------
273 /// Ceed Backend API
274 /// ----------------------------------------------------------------------------
275 /// @addtogroup CeedBasisBackend
276 /// @{
277 
278 /**
279   @brief Return collocated grad matrix
280 
281   @param basis               CeedBasis
282   @param[out] collo_grad_1d  Row-major (Q_1d * Q_1d) matrix expressing derivatives of
283                                basis functions at quadrature points
284 
285   @return An error code: 0 - success, otherwise - failure
286 
287   @ref Backend
288 **/
289 int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collo_grad_1d) {
290   int i, j, k;
291   Ceed ceed;
292   CeedInt ierr, P_1d=(basis)->P_1d, Q_1d=(basis)->Q_1d;
293   CeedScalar *interp_1d, *grad_1d, *tau;
294 
295   ierr = CeedMalloc(Q_1d*P_1d, &interp_1d); CeedChk(ierr);
296   ierr = CeedMalloc(Q_1d*P_1d, &grad_1d); CeedChk(ierr);
297   ierr = CeedMalloc(Q_1d, &tau); CeedChk(ierr);
298   memcpy(interp_1d, (basis)->interp_1d, Q_1d*P_1d*sizeof(basis)->interp_1d[0]);
299   memcpy(grad_1d, (basis)->grad_1d, Q_1d*P_1d*sizeof(basis)->interp_1d[0]);
300 
301   // QR Factorization, interp_1d = Q R
302   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
303   ierr = CeedQRFactorization(ceed, interp_1d, tau, Q_1d, P_1d); CeedChk(ierr);
304   // Note: This function is for backend use, so all errors are terminal
305   //   and we do not need to clean up memory on failure.
306 
307   // Apply Rinv, collo_grad_1d = grad_1d Rinv
308   for (i=0; i<Q_1d; i++) { // Row i
309     collo_grad_1d[Q_1d*i] = grad_1d[P_1d*i]/interp_1d[0];
310     for (j=1; j<P_1d; j++) { // Column j
311       collo_grad_1d[j+Q_1d*i] = grad_1d[j+P_1d*i];
312       for (k=0; k<j; k++)
313         collo_grad_1d[j+Q_1d*i] -= interp_1d[j+P_1d*k]*collo_grad_1d[k+Q_1d*i];
314       collo_grad_1d[j+Q_1d*i] /= interp_1d[j+P_1d*j];
315     }
316     for (j=P_1d; j<Q_1d; j++)
317       collo_grad_1d[j+Q_1d*i] = 0;
318   }
319 
320   // Apply Qtranspose, collo_grad = collo_grad Q_transpose
321   ierr = CeedHouseholderApplyQ(collo_grad_1d, interp_1d, tau, CEED_NOTRANSPOSE,
322                                Q_1d, Q_1d, P_1d, 1, Q_1d); CeedChk(ierr);
323 
324   ierr = CeedFree(&interp_1d); CeedChk(ierr);
325   ierr = CeedFree(&grad_1d); CeedChk(ierr);
326   ierr = CeedFree(&tau); CeedChk(ierr);
327   return CEED_ERROR_SUCCESS;
328 }
329 
330 /**
331   @brief Get tensor status for given CeedBasis
332 
333   @param basis           CeedBasis
334   @param[out] is_tensor  Variable to store tensor status
335 
336   @return An error code: 0 - success, otherwise - failure
337 
338   @ref Backend
339 **/
340 int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor) {
341   *is_tensor = basis->tensor_basis;
342   return CEED_ERROR_SUCCESS;
343 }
344 
345 /**
346   @brief Get backend data of a CeedBasis
347 
348   @param basis      CeedBasis
349   @param[out] data  Variable to store data
350 
351   @return An error code: 0 - success, otherwise - failure
352 
353   @ref Backend
354 **/
355 int CeedBasisGetData(CeedBasis basis, void *data) {
356   *(void **)data = basis->data;
357   return CEED_ERROR_SUCCESS;
358 }
359 
360 /**
361   @brief Set backend data of a CeedBasis
362 
363   @param[out] basis  CeedBasis
364   @param data        Data to set
365 
366   @return An error code: 0 - success, otherwise - failure
367 
368   @ref Backend
369 **/
370 int CeedBasisSetData(CeedBasis basis, void *data) {
371   basis->data = data;
372   return CEED_ERROR_SUCCESS;
373 }
374 
375 /**
376   @brief Increment the reference counter for a CeedBasis
377 
378   @param basis  Basis to increment the reference counter
379 
380   @return An error code: 0 - success, otherwise - failure
381 
382   @ref Backend
383 **/
384 int CeedBasisReference(CeedBasis basis) {
385   basis->ref_count++;
386   return CEED_ERROR_SUCCESS;
387 }
388 
389 /**
390   @brief Estimate number of FLOPs required to apply CeedBasis in t_mode and eval_mode
391 
392   @param basis     Basis to estimate FLOPs for
393   @param t_mode    Apply basis or transpose
394   @param eval_mode Basis evaluation mode
395   @param flops     Address of variable to hold FLOPs estimate
396 
397   @ref Backend
398 **/
399 int CeedBasisGetFlopsEstimate(CeedBasis basis, CeedTransposeMode t_mode,
400                               CeedEvalMode eval_mode, CeedSize *flops) {
401   int ierr;
402   bool is_tensor;
403 
404   ierr = CeedBasisIsTensor(basis, &is_tensor); CeedChk(ierr);
405   if (is_tensor) {
406     CeedInt dim, num_comp, P_1d, Q_1d;
407     ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
408     ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
409     ierr = CeedBasisGetNumNodes1D(basis, &P_1d);  CeedChk(ierr);
410     ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d);  CeedChk(ierr);
411     if (t_mode == CEED_TRANSPOSE) {
412       P_1d = Q_1d; Q_1d = P_1d;
413     }
414     CeedInt tensor_flops = 0, pre = num_comp * CeedIntPow(P_1d, dim-1), post = 1;
415     for (CeedInt d = 0; d < dim; d++) {
416       tensor_flops += 2 * pre * P_1d * post * Q_1d;
417       pre /= P_1d;
418       post *= Q_1d;
419     }
420     switch (eval_mode) {
421     case CEED_EVAL_NONE:   *flops = 0; break;
422     case CEED_EVAL_INTERP: *flops = tensor_flops; break;
423     case CEED_EVAL_GRAD:   *flops = tensor_flops * 2; break;
424     case CEED_EVAL_DIV:
425       // LCOV_EXCL_START
426       return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE,
427                        "Tensor CEED_EVAL_DIV not supported"); break;
428     case CEED_EVAL_CURL:
429       return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE,
430                        "Tensor CEED_EVAL_CURL not supported"); break;
431     // LCOV_EXCL_STOP
432     case CEED_EVAL_WEIGHT: *flops = dim * CeedIntPow(Q_1d, dim); break;
433     }
434   } else {
435     CeedInt dim, num_comp, num_nodes, num_qpts, Q_comp;
436     ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
437     ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
438     ierr = CeedBasisGetNumNodes(basis, &num_nodes); CeedChk(ierr);
439     ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
440     ierr = CeedBasisGetNumQuadratureComponents(basis, &Q_comp); CeedChk(ierr);
441     switch (eval_mode) {
442     case CEED_EVAL_NONE:   *flops = 0; break;
443     case CEED_EVAL_INTERP: *flops = num_nodes * num_qpts * num_comp; break;
444     case CEED_EVAL_GRAD:   *flops = num_nodes * num_qpts * num_comp * dim; break;
445     case CEED_EVAL_DIV:    *flops = num_nodes * num_qpts; break;
446     case CEED_EVAL_CURL:   *flops = num_nodes * num_qpts * dim; break;
447     case CEED_EVAL_WEIGHT: *flops = 0; break;
448     }
449   }
450 
451   return CEED_ERROR_SUCCESS;
452 }
453 
454 /**
455   @brief Get dimension for given CeedElemTopology
456 
457   @param topo      CeedElemTopology
458   @param[out] dim  Variable to store dimension of topology
459 
460   @return An error code: 0 - success, otherwise - failure
461 
462   @ref Backend
463 **/
464 int CeedBasisGetTopologyDimension(CeedElemTopology topo, CeedInt *dim) {
465   *dim = (CeedInt) topo >> 16;
466   return CEED_ERROR_SUCCESS;
467 }
468 
469 /**
470   @brief Get CeedTensorContract of a CeedBasis
471 
472   @param basis          CeedBasis
473   @param[out] contract  Variable to store CeedTensorContract
474 
475   @return An error code: 0 - success, otherwise - failure
476 
477   @ref Backend
478 **/
479 int CeedBasisGetTensorContract(CeedBasis basis, CeedTensorContract *contract) {
480   *contract = basis->contract;
481   return CEED_ERROR_SUCCESS;
482 }
483 
484 /**
485   @brief Set CeedTensorContract of a CeedBasis
486 
487   @param[out] basis  CeedBasis
488   @param contract    CeedTensorContract to set
489 
490   @return An error code: 0 - success, otherwise - failure
491 
492   @ref Backend
493 **/
494 int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract contract) {
495   int ierr;
496   basis->contract = contract;
497   ierr = CeedTensorContractReference(contract); CeedChk(ierr);
498   return CEED_ERROR_SUCCESS;
499 }
500 
501 /**
502   @brief Return a reference implementation of matrix multiplication C = A B.
503            Note, this is a reference implementation for CPU CeedScalar pointers
504            that is not intended for high performance.
505 
506   @param ceed        A Ceed context for error handling
507   @param[in] mat_A   Row-major matrix A
508   @param[in] mat_B   Row-major matrix B
509   @param[out] mat_C  Row-major output matrix C
510   @param m           Number of rows of C
511   @param n           Number of columns of C
512   @param kk          Number of columns of A/rows of B
513 
514   @return An error code: 0 - success, otherwise - failure
515 
516   @ref Utility
517 **/
518 int CeedMatrixMatrixMultiply(Ceed ceed, const CeedScalar *mat_A,
519                              const CeedScalar *mat_B, CeedScalar *mat_C,
520                              CeedInt m, CeedInt n, CeedInt kk) {
521   for (CeedInt i=0; i<m; i++)
522     for (CeedInt j=0; j<n; j++) {
523       CeedScalar sum = 0;
524       for (CeedInt k=0; k<kk; k++)
525         sum += mat_A[k+i*kk]*mat_B[j+k*n];
526       mat_C[j+i*n] = sum;
527     }
528   return CEED_ERROR_SUCCESS;
529 }
530 
531 /// @}
532 
533 /// ----------------------------------------------------------------------------
534 /// CeedBasis Public API
535 /// ----------------------------------------------------------------------------
536 /// @addtogroup CeedBasisUser
537 /// @{
538 
539 /**
540   @brief Create a tensor-product basis for H^1 discretizations
541 
542   @param ceed        A Ceed object where the CeedBasis will be created
543   @param dim         Topological dimension
544   @param num_comp    Number of field components (1 for scalar fields)
545   @param P_1d        Number of nodes in one dimension
546   @param Q_1d        Number of quadrature points in one dimension
547   @param interp_1d   Row-major (Q_1d * P_1d) matrix expressing the values of nodal
548                        basis functions at quadrature points
549   @param grad_1d     Row-major (Q_1d * P_1d) matrix expressing derivatives of nodal
550                        basis functions at quadrature points
551   @param q_ref_1d    Array of length Q_1d holding the locations of quadrature points
552                        on the 1D reference element [-1, 1]
553   @param q_weight_1d Array of length Q_1d holding the quadrature weights on the
554                        reference element
555   @param[out] basis  Address of the variable where the newly created
556                        CeedBasis will be stored.
557 
558   @return An error code: 0 - success, otherwise - failure
559 
560   @ref User
561 **/
562 int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp,
563                             CeedInt P_1d, CeedInt Q_1d,
564                             const CeedScalar *interp_1d,
565                             const CeedScalar *grad_1d, const CeedScalar *q_ref_1d,
566                             const CeedScalar *q_weight_1d, CeedBasis *basis) {
567   int ierr;
568 
569   if (!ceed->BasisCreateTensorH1) {
570     Ceed delegate;
571     ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr);
572 
573     if (!delegate)
574       // LCOV_EXCL_START
575       return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
576                        "Backend does not support BasisCreateTensorH1");
577     // LCOV_EXCL_STOP
578 
579     ierr = CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d,
580                                    Q_1d, interp_1d, grad_1d, q_ref_1d,
581                                    q_weight_1d, basis); CeedChk(ierr);
582     return CEED_ERROR_SUCCESS;
583   }
584 
585   if (dim < 1)
586     // LCOV_EXCL_START
587     return CeedError(ceed, CEED_ERROR_DIMENSION,
588                      "Basis dimension must be a positive value");
589   // LCOV_EXCL_STOP
590 
591   if (num_comp < 1)
592     // LCOV_EXCL_START
593     return CeedError(ceed, CEED_ERROR_DIMENSION,
594                      "Basis must have at least 1 component");
595   // LCOV_EXCL_STOP
596 
597   if (P_1d < 1)
598     // LCOV_EXCL_START
599     return CeedError(ceed, CEED_ERROR_DIMENSION,
600                      "Basis must have at least 1 node");
601   // LCOV_EXCL_STOP
602 
603   if (Q_1d < 1)
604     // LCOV_EXCL_START
605     return CeedError(ceed, CEED_ERROR_DIMENSION,
606                      "Basis must have at least 1 quadrature point");
607   // LCOV_EXCL_STOP
608 
609   CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE
610                           : dim == 2 ? CEED_TOPOLOGY_QUAD
611                           : CEED_TOPOLOGY_HEX;
612 
613   ierr = CeedCalloc(1, basis); CeedChk(ierr);
614   (*basis)->ceed = ceed;
615   ierr = CeedReference(ceed); CeedChk(ierr);
616   (*basis)->ref_count = 1;
617   (*basis)->tensor_basis = 1;
618   (*basis)->dim = dim;
619   (*basis)->topo = topo;
620   (*basis)->num_comp = num_comp;
621   (*basis)->P_1d = P_1d;
622   (*basis)->Q_1d = Q_1d;
623   (*basis)->P = CeedIntPow(P_1d, dim);
624   (*basis)->Q = CeedIntPow(Q_1d, dim);
625   (*basis)->Q_comp = 1;
626   (*basis)->basis_space = 1; // 1 for H^1 space
627   ierr = CeedCalloc(Q_1d, &(*basis)->q_ref_1d); CeedChk(ierr);
628   ierr = CeedCalloc(Q_1d, &(*basis)->q_weight_1d); CeedChk(ierr);
629   if (q_ref_1d) memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d*sizeof(q_ref_1d[0]));
630   if (q_weight_1d) memcpy((*basis)->q_weight_1d, q_weight_1d,
631                             Q_1d*sizeof(q_weight_1d[0]));
632   ierr = CeedCalloc(Q_1d*P_1d, &(*basis)->interp_1d); CeedChk(ierr);
633   ierr = CeedCalloc(Q_1d*P_1d, &(*basis)->grad_1d); CeedChk(ierr);
634   if (interp_1d) memcpy((*basis)->interp_1d, interp_1d,
635                           Q_1d*P_1d*sizeof(interp_1d[0]));
636   if (grad_1d) memcpy((*basis)->grad_1d, grad_1d, Q_1d*P_1d*sizeof(grad_1d[0]));
637   ierr = ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d,
638                                    q_weight_1d, *basis); CeedChk(ierr);
639   return CEED_ERROR_SUCCESS;
640 }
641 
642 /**
643   @brief Create a tensor-product Lagrange basis
644 
645   @param ceed        A Ceed object where the CeedBasis will be created
646   @param dim         Topological dimension of element
647   @param num_comp      Number of field components (1 for scalar fields)
648   @param P           Number of Gauss-Lobatto nodes in one dimension.  The
649                        polynomial degree of the resulting Q_k element is k=P-1.
650   @param Q           Number of quadrature points in one dimension.
651   @param quad_mode   Distribution of the Q quadrature points (affects order of
652                        accuracy for the quadrature)
653   @param[out] basis  Address of the variable where the newly created
654                        CeedBasis will be stored.
655 
656   @return An error code: 0 - success, otherwise - failure
657 
658   @ref User
659 **/
660 int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp,
661                                     CeedInt P, CeedInt Q, CeedQuadMode quad_mode,
662                                     CeedBasis *basis) {
663   // Allocate
664   int ierr, ierr2, i, j, k;
665   CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d,
666              *q_weight_1d;
667 
668   if (dim < 1)
669     // LCOV_EXCL_START
670     return CeedError(ceed, CEED_ERROR_DIMENSION,
671                      "Basis dimension must be a positive value");
672   // LCOV_EXCL_STOP
673 
674   if (num_comp < 1)
675     // LCOV_EXCL_START
676     return CeedError(ceed, CEED_ERROR_DIMENSION,
677                      "Basis must have at least 1 component");
678   // LCOV_EXCL_STOP
679 
680   if (P < 1)
681     // LCOV_EXCL_START
682     return CeedError(ceed, CEED_ERROR_DIMENSION,
683                      "Basis must have at least 1 node");
684   // LCOV_EXCL_STOP
685 
686   if (Q < 1)
687     // LCOV_EXCL_START
688     return CeedError(ceed, CEED_ERROR_DIMENSION,
689                      "Basis must have at least 1 quadrature point");
690   // LCOV_EXCL_STOP
691 
692   // Get Nodes and Weights
693   ierr = CeedCalloc(P*Q, &interp_1d); CeedChk(ierr);
694   ierr = CeedCalloc(P*Q, &grad_1d); CeedChk(ierr);
695   ierr = CeedCalloc(P, &nodes); CeedChk(ierr);
696   ierr = CeedCalloc(Q, &q_ref_1d); CeedChk(ierr);
697   ierr = CeedCalloc(Q, &q_weight_1d); CeedChk(ierr);
698   ierr = CeedLobattoQuadrature(P, nodes, NULL);
699   if (ierr) { goto cleanup; } CeedChk(ierr);
700   switch (quad_mode) {
701   case CEED_GAUSS:
702     ierr = CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
703     break;
704   case CEED_GAUSS_LOBATTO:
705     ierr = CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
706     break;
707   }
708   if (ierr) { goto cleanup; } CeedChk(ierr);
709 
710   // Build B, D matrix
711   // Fornberg, 1998
712   for (i = 0; i  < Q; i++) {
713     c1 = 1.0;
714     c3 = nodes[0] - q_ref_1d[i];
715     interp_1d[i*P+0] = 1.0;
716     for (j = 1; j < P; j++) {
717       c2 = 1.0;
718       c4 = c3;
719       c3 = nodes[j] - q_ref_1d[i];
720       for (k = 0; k < j; k++) {
721         dx = nodes[j] - nodes[k];
722         c2 *= dx;
723         if (k == j - 1) {
724           grad_1d[i*P + j] = c1*(interp_1d[i*P + k] - c4*grad_1d[i*P + k]) / c2;
725           interp_1d[i*P + j] = - c1*c4*interp_1d[i*P + k] / c2;
726         }
727         grad_1d[i*P + k] = (c3*grad_1d[i*P + k] - interp_1d[i*P + k]) / dx;
728         interp_1d[i*P + k] = c3*interp_1d[i*P + k] / dx;
729       }
730       c1 = c2;
731     }
732   }
733   // Pass to CeedBasisCreateTensorH1
734   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d,
735                                  q_ref_1d, q_weight_1d, basis); CeedChk(ierr);
736 cleanup:
737   ierr2 = CeedFree(&interp_1d); CeedChk(ierr2);
738   ierr2 = CeedFree(&grad_1d); CeedChk(ierr2);
739   ierr2 = CeedFree(&nodes); CeedChk(ierr2);
740   ierr2 = CeedFree(&q_ref_1d); CeedChk(ierr2);
741   ierr2 = CeedFree(&q_weight_1d); CeedChk(ierr2);
742   CeedChk(ierr);
743   return CEED_ERROR_SUCCESS;
744 }
745 
746 /**
747   @brief Create a non tensor-product basis for H^1 discretizations
748 
749   @param ceed        A Ceed object where the CeedBasis will be created
750   @param topo        Topology of element, e.g. hypercube, simplex, ect
751   @param num_comp    Number of field components (1 for scalar fields)
752   @param num_nodes   Total number of nodes
753   @param num_qpts    Total number of quadrature points
754   @param interp      Row-major (num_qpts * num_nodes) matrix expressing the values of
755                        nodal basis functions at quadrature points
756   @param grad        Row-major (num_qpts * dim * num_nodes) matrix expressing
757                        derivatives of nodal basis functions at quadrature points
758   @param q_ref       Array of length num_qpts holding the locations of quadrature
759                        points on the reference element
760   @param q_weight    Array of length num_qpts holding the quadrature weights on the
761                        reference element
762   @param[out] basis  Address of the variable where the newly created
763                        CeedBasis will be stored.
764 
765   @return An error code: 0 - success, otherwise - failure
766 
767   @ref User
768 **/
769 int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp,
770                       CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
771                       const CeedScalar *grad, const CeedScalar *q_ref,
772                       const CeedScalar *q_weight, CeedBasis *basis) {
773   int ierr;
774   CeedInt P = num_nodes, Q = num_qpts, dim = 0;
775 
776   if (!ceed->BasisCreateH1) {
777     Ceed delegate;
778     ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr);
779 
780     if (!delegate)
781       // LCOV_EXCL_START
782       return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
783                        "Backend does not support BasisCreateH1");
784     // LCOV_EXCL_STOP
785 
786     ierr = CeedBasisCreateH1(delegate, topo, num_comp, num_nodes,
787                              num_qpts, interp, grad, q_ref,
788                              q_weight, basis); CeedChk(ierr);
789     return CEED_ERROR_SUCCESS;
790   }
791 
792   if (num_comp < 1)
793     // LCOV_EXCL_START
794     return CeedError(ceed, CEED_ERROR_DIMENSION,
795                      "Basis must have at least 1 component");
796   // LCOV_EXCL_STOP
797 
798   if (num_nodes < 1)
799     // LCOV_EXCL_START
800     return CeedError(ceed, CEED_ERROR_DIMENSION,
801                      "Basis must have at least 1 node");
802   // LCOV_EXCL_STOP
803 
804   if (num_qpts < 1)
805     // LCOV_EXCL_START
806     return CeedError(ceed, CEED_ERROR_DIMENSION,
807                      "Basis must have at least 1 quadrature point");
808   // LCOV_EXCL_STOP
809 
810   ierr = CeedCalloc(1, basis); CeedChk(ierr);
811 
812   ierr = CeedBasisGetTopologyDimension(topo, &dim); CeedChk(ierr);
813 
814   (*basis)->ceed = ceed;
815   ierr = CeedReference(ceed); CeedChk(ierr);
816   (*basis)->ref_count = 1;
817   (*basis)->tensor_basis = 0;
818   (*basis)->dim = dim;
819   (*basis)->topo = topo;
820   (*basis)->num_comp = num_comp;
821   (*basis)->P = P;
822   (*basis)->Q = Q;
823   (*basis)->Q_comp = 1;
824   (*basis)->basis_space = 1; // 1 for H^1 space
825   ierr = CeedCalloc(Q*dim, &(*basis)->q_ref_1d); CeedChk(ierr);
826   ierr = CeedCalloc(Q, &(*basis)->q_weight_1d); CeedChk(ierr);
827   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q*dim*sizeof(q_ref[0]));
828   if(q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q*sizeof(q_weight[0]));
829   ierr = CeedCalloc(Q*P, &(*basis)->interp); CeedChk(ierr);
830   ierr = CeedCalloc(dim*Q*P, &(*basis)->grad); CeedChk(ierr);
831   if(interp) memcpy((*basis)->interp, interp, Q*P*sizeof(interp[0]));
832   if(grad) memcpy((*basis)->grad, grad, dim*Q*P*sizeof(grad[0]));
833   ierr = ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref,
834                              q_weight, *basis); CeedChk(ierr);
835   return CEED_ERROR_SUCCESS;
836 }
837 
838 /**
839   @brief Create a non tensor-product basis for H(div) discretizations
840 
841   @param ceed        A Ceed object where the CeedBasis will be created
842   @param topo        Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.),
843                      dimension of which is used in some array sizes below
844   @param num_comp    Number of components (usually 1 for vectors in H(div) bases)
845   @param num_nodes   Total number of nodes (dofs per element)
846   @param num_qpts    Total number of quadrature points
847   @param interp      Row-major (dim*num_qpts * num_nodes) matrix expressing the values of
848                        nodal basis functions at quadrature points
849   @param div        Row-major (num_qpts * num_nodes) matrix expressing
850                        divergence of nodal basis functions at quadrature points
851   @param q_ref       Array of length num_qpts holding the locations of quadrature
852                        points on the reference element
853   @param q_weight    Array of length num_qpts holding the quadrature weights on the
854                        reference element
855   @param[out] basis  Address of the variable where the newly created
856                        CeedBasis will be stored.
857 
858   @return An error code: 0 - success, otherwise - failure
859 
860   @ref User
861 **/
862 int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp,
863                         CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
864                         const CeedScalar *div, const CeedScalar *q_ref,
865                         const CeedScalar *q_weight, CeedBasis *basis) {
866   int ierr;
867   CeedInt Q = num_qpts, P = num_nodes, dim = 0;
868   ierr = CeedBasisGetTopologyDimension(topo, &dim); CeedChk(ierr);
869   if (!ceed->BasisCreateHdiv) {
870     Ceed delegate;
871     ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr);
872 
873     if (!delegate)
874       // LCOV_EXCL_START
875       return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
876                        "Backend does not implement BasisCreateHdiv");
877     // LCOV_EXCL_STOP
878 
879     ierr = CeedBasisCreateHdiv(delegate, topo, num_comp, num_nodes,
880                                num_qpts, interp, div, q_ref,
881                                q_weight, basis); CeedChk(ierr);
882     return CEED_ERROR_SUCCESS;
883   }
884 
885   if (num_comp < 1)
886     // LCOV_EXCL_START
887     return CeedError(ceed, CEED_ERROR_DIMENSION,
888                      "Basis must have at least 1 component");
889   // LCOV_EXCL_STOP
890 
891   if (num_nodes < 1)
892     // LCOV_EXCL_START
893     return CeedError(ceed, CEED_ERROR_DIMENSION,
894                      "Basis must have at least 1 node");
895   // LCOV_EXCL_STOP
896 
897   if (num_qpts < 1)
898     // LCOV_EXCL_START
899     return CeedError(ceed, CEED_ERROR_DIMENSION,
900                      "Basis must have at least 1 quadrature point");
901   // LCOV_EXCL_STOP
902 
903   ierr = CeedCalloc(1, basis); CeedChk(ierr);
904 
905   (*basis)->ceed = ceed;
906   ierr = CeedReference(ceed); CeedChk(ierr);
907   (*basis)->ref_count = 1;
908   (*basis)->tensor_basis = 0;
909   (*basis)->dim = dim;
910   (*basis)->topo = topo;
911   (*basis)->num_comp = num_comp;
912   (*basis)->P = P;
913   (*basis)->Q = Q;
914   (*basis)->Q_comp = dim;
915   (*basis)->basis_space = 2; // 2 for H(div) space
916   ierr = CeedMalloc(Q*dim, &(*basis)->q_ref_1d); CeedChk(ierr);
917   ierr = CeedMalloc(Q, &(*basis)->q_weight_1d); CeedChk(ierr);
918   if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q*dim*sizeof(q_ref[0]));
919   if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q*sizeof(q_weight[0]));
920   ierr = CeedMalloc(dim*Q*P, &(*basis)->interp); CeedChk(ierr);
921   ierr = CeedMalloc(Q*P, &(*basis)->div); CeedChk(ierr);
922   if (interp) memcpy((*basis)->interp, interp, dim*Q*P*sizeof(interp[0]));
923   if (div) memcpy((*basis)->div, div, Q*P*sizeof(div[0]));
924   ierr = ceed->BasisCreateHdiv(topo, dim, P, Q, interp, div, q_ref,
925                                q_weight, *basis); CeedChk(ierr);
926   return CEED_ERROR_SUCCESS;
927 }
928 
929 /**
930   @brief Create a CeedBasis for projection from the nodes of `basis_from`
931            to the nodes of `basis_to`. Only `CEED_EVAL_INTERP` will be
932            valid for the new basis, `basis_project`. This projection is
933            given by `interp_project = interp_to^+ * interp_from`, where
934            the pesudoinverse `interp_to^+` is given by QR factorization.
935          Note: `basis_from` and `basis_to` must have compatible quadrature
936            spaces.
937          Note: `basis_project` will have the same number of components as
938            `basis_from`, regardless of the number of components that
939            `basis_to` has. If `basis_from` has 3 components and `basis_to`
940            has 5 components, then `basis_project` will have 3 components.
941 
942   @param[in] basis_from      CeedBasis to prolong from
943   @param[in] basis_to        CeedBasis to prolong to
944   @param[out] basis_project  Address of the variable where the newly created
945                                CeedBasis will be stored.
946 
947   @return An error code: 0 - success, otherwise - failure
948 
949   @ref User
950 **/
951 int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to,
952                               CeedBasis *basis_project) {
953   int ierr;
954   Ceed ceed;
955   ierr = CeedBasisGetCeed(basis_to, &ceed); CeedChk(ierr);
956 
957   // Create projectior matrix
958   CeedScalar *interp_project;
959   ierr = CeedBasisCreateProjectionMatrix(basis_from, basis_to,
960                                          &interp_project); CeedChk(ierr);
961 
962   // Build basis
963   bool is_tensor;
964   CeedInt dim, num_comp;
965   CeedScalar *q_ref, *q_weight, *grad;
966   ierr = CeedBasisIsTensor(basis_to, &is_tensor); CeedChk(ierr);
967   ierr = CeedBasisGetDimension(basis_to, &dim); CeedChk(ierr);
968   ierr = CeedBasisGetNumComponents(basis_from, &num_comp); CeedChk(ierr);
969   if (is_tensor) {
970     CeedInt P_1d_to, P_1d_from;
971     ierr = CeedBasisGetNumNodes1D(basis_from, &P_1d_from); CeedChk(ierr);
972     ierr = CeedBasisGetNumNodes1D(basis_to, &P_1d_to); CeedChk(ierr);
973     ierr = CeedCalloc(P_1d_to, &q_ref); CeedChk(ierr);
974     ierr = CeedCalloc(P_1d_to, &q_weight); CeedChk(ierr);
975     ierr = CeedCalloc(P_1d_to * P_1d_from * dim, &grad); CeedChk(ierr);
976     ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_from, P_1d_to,
977                                    interp_project, grad, q_ref, q_weight, basis_project);
978     CeedChk(ierr);
979   } else {
980     CeedElemTopology topo;
981     ierr = CeedBasisGetTopology(basis_to, &topo); CeedChk(ierr);
982     CeedInt num_nodes_to, num_nodes_from;
983     ierr = CeedBasisGetNumNodes(basis_from, &num_nodes_from); CeedChk(ierr);
984     ierr = CeedBasisGetNumNodes(basis_to, &num_nodes_to); CeedChk(ierr);
985     ierr = CeedCalloc(num_nodes_to * dim, &q_ref); CeedChk(ierr);
986     ierr = CeedCalloc(num_nodes_to, &q_weight); CeedChk(ierr);
987     ierr = CeedCalloc(num_nodes_to * num_nodes_from * dim, &grad); CeedChk(ierr);
988     ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_from, num_nodes_to,
989                              interp_project, grad, q_ref, q_weight, basis_project);
990     CeedChk(ierr);
991   }
992 
993   // Cleanup
994   ierr = CeedFree(&interp_project); CeedChk(ierr);
995   ierr = CeedFree(&q_ref); CeedChk(ierr);
996   ierr = CeedFree(&q_weight); CeedChk(ierr);
997   ierr = CeedFree(&grad); CeedChk(ierr);
998 
999   return CEED_ERROR_SUCCESS;
1000 }
1001 
1002 /**
1003   @brief Copy the pointer to a CeedBasis. Both pointers should
1004            be destroyed with `CeedBasisDestroy()`;
1005            Note: If `*basis_copy` is non-NULL, then it is assumed that
1006            `*basis_copy` is a pointer to a CeedBasis. This CeedBasis
1007            will be destroyed if `*basis_copy` is the only
1008            reference to this CeedBasis.
1009 
1010   @param basis            CeedBasis to copy reference to
1011   @param[out] basis_copy  Variable to store copied reference
1012 
1013   @return An error code: 0 - success, otherwise - failure
1014 
1015   @ref User
1016 **/
1017 int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy) {
1018   int ierr;
1019 
1020   ierr = CeedBasisReference(basis); CeedChk(ierr);
1021   ierr = CeedBasisDestroy(basis_copy); CeedChk(ierr);
1022   *basis_copy = basis;
1023   return CEED_ERROR_SUCCESS;
1024 }
1025 
1026 /**
1027   @brief View a CeedBasis
1028 
1029   @param basis   CeedBasis to view
1030   @param stream  Stream to view to, e.g., stdout
1031 
1032   @return An error code: 0 - success, otherwise - failure
1033 
1034   @ref User
1035 **/
1036 int CeedBasisView(CeedBasis basis, FILE *stream) {
1037   int ierr;
1038   CeedFESpace FE_space = basis->basis_space;
1039   CeedElemTopology topo = basis->topo;
1040   // Print FE space and element topology of the basis
1041   if (basis->tensor_basis) {
1042     fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%"
1043             CeedInt_FMT " Q=%" CeedInt_FMT "\n",
1044             CeedFESpaces[FE_space], CeedElemTopologies[topo],
1045             basis->dim, basis->P_1d, basis->Q_1d);
1046   } else {
1047     fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%"
1048             CeedInt_FMT " Q=%" CeedInt_FMT "\n",
1049             CeedFESpaces[FE_space], CeedElemTopologies[topo],
1050             basis->dim, basis->P, basis->Q);
1051   }
1052   // Print quadrature data, interpolation/gradient/divergene/curl of the basis
1053   if (basis->tensor_basis) { // tensor basis
1054     ierr = CeedScalarView("qref1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_ref_1d,
1055                           stream); CeedChk(ierr);
1056     ierr = CeedScalarView("qweight1d", "\t% 12.8f", 1, basis->Q_1d,
1057                           basis->q_weight_1d, stream); CeedChk(ierr);
1058     ierr = CeedScalarView("interp1d", "\t% 12.8f", basis->Q_1d, basis->P_1d,
1059                           basis->interp_1d, stream); CeedChk(ierr);
1060     ierr = CeedScalarView("grad1d", "\t% 12.8f", basis->Q_1d, basis->P_1d,
1061                           basis->grad_1d, stream); CeedChk(ierr);
1062   } else { // non-tensor basis
1063     ierr = CeedScalarView("qref", "\t% 12.8f", 1, basis->Q*basis->dim,
1064                           basis->q_ref_1d,
1065                           stream); CeedChk(ierr);
1066     ierr = CeedScalarView("qweight", "\t% 12.8f", 1, basis->Q, basis->q_weight_1d,
1067                           stream); CeedChk(ierr);
1068     ierr = CeedScalarView("interp", "\t% 12.8f", basis->Q_comp*basis->Q, basis->P,
1069                           basis->interp, stream); CeedChk(ierr);
1070     if (basis->grad) {
1071       ierr = CeedScalarView("grad", "\t% 12.8f", basis->dim*basis->Q, basis->P,
1072                             basis->grad, stream); CeedChk(ierr);
1073     }
1074     if (basis->div) {
1075       ierr = CeedScalarView("div", "\t% 12.8f", basis->Q, basis->P,
1076                             basis->div, stream); CeedChk(ierr);
1077     }
1078   }
1079   return CEED_ERROR_SUCCESS;
1080 }
1081 
1082 /**
1083   @brief Apply basis evaluation from nodes to quadrature points or vice versa
1084 
1085   @param basis     CeedBasis to evaluate
1086   @param num_elem  The number of elements to apply the basis evaluation to;
1087                      the backend will specify the ordering in
1088                      CeedElemRestrictionCreateBlocked()
1089   @param t_mode    \ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature
1090                      points, \ref CEED_TRANSPOSE to apply the transpose, mapping
1091                      from quadrature points to nodes
1092   @param eval_mode \ref CEED_EVAL_NONE to use values directly,
1093                      \ref CEED_EVAL_INTERP to use interpolated values,
1094                      \ref CEED_EVAL_GRAD to use gradients,
1095                      \ref CEED_EVAL_WEIGHT to use quadrature weights.
1096   @param[in] u     Input CeedVector
1097   @param[out] v    Output CeedVector
1098 
1099   @return An error code: 0 - success, otherwise - failure
1100 
1101   @ref User
1102 **/
1103 int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode,
1104                    CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
1105   int ierr;
1106   CeedSize u_length = 0, v_length;
1107   CeedInt dim, num_comp, num_nodes, num_qpts;
1108   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
1109   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
1110   ierr = CeedBasisGetNumNodes(basis, &num_nodes); CeedChk(ierr);
1111   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
1112   ierr = CeedVectorGetLength(v, &v_length); CeedChk(ierr);
1113   if (u) {
1114     ierr = CeedVectorGetLength(u, &u_length); CeedChk(ierr);
1115   }
1116 
1117   if (!basis->Apply)
1118     // LCOV_EXCL_START
1119     return CeedError(basis->ceed, CEED_ERROR_UNSUPPORTED,
1120                      "Backend does not support BasisApply");
1121   // LCOV_EXCL_STOP
1122 
1123   // Check compatibility of topological and geometrical dimensions
1124   if ((t_mode == CEED_TRANSPOSE && (v_length%num_nodes != 0 ||
1125                                     u_length%num_qpts != 0)) ||
1126       (t_mode == CEED_NOTRANSPOSE && (u_length%num_nodes != 0 ||
1127                                       v_length%num_qpts != 0)))
1128     // LCOV_EXCL_START
1129     return CeedError(basis->ceed, CEED_ERROR_DIMENSION,
1130                      "Length of input/output vectors "
1131                      "incompatible with basis dimensions");
1132   // LCOV_EXCL_STOP
1133 
1134   // Check vector lengths to prevent out of bounds issues
1135   bool bad_dims = false;
1136   switch (eval_mode) {
1137   case CEED_EVAL_NONE:
1138   case CEED_EVAL_INTERP: bad_dims =
1139       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts ||
1140                                      v_length < num_elem*num_comp*num_nodes)) ||
1141        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp ||
1142                                        u_length < num_elem*num_comp*num_nodes)));
1143     break;
1144   case CEED_EVAL_GRAD: bad_dims =
1145       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts*dim ||
1146                                      v_length < num_elem*num_comp*num_nodes)) ||
1147        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp*dim ||
1148                                        u_length < num_elem*num_comp*num_nodes)));
1149     break;
1150   case CEED_EVAL_WEIGHT:
1151     bad_dims = v_length < num_elem*num_qpts;
1152     break;
1153   // LCOV_EXCL_START
1154   case CEED_EVAL_DIV: bad_dims =
1155       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts ||
1156                                      v_length < num_elem*num_comp*num_nodes)) ||
1157        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp ||
1158                                        u_length < num_elem*num_comp*num_nodes)));
1159     break;
1160   case CEED_EVAL_CURL: bad_dims =
1161       ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts ||
1162                                      v_length < num_elem*num_comp*num_nodes)) ||
1163        (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp ||
1164                                        u_length < num_elem*num_comp*num_nodes)));
1165     break;
1166     // LCOV_EXCL_STOP
1167   }
1168   if (bad_dims)
1169     // LCOV_EXCL_START
1170     return CeedError(basis->ceed, CEED_ERROR_DIMENSION,
1171                      "Input/output vectors too short for basis and evaluation mode");
1172   // LCOV_EXCL_STOP
1173 
1174   ierr = basis->Apply(basis, num_elem, t_mode, eval_mode, u, v); CeedChk(ierr);
1175   return CEED_ERROR_SUCCESS;
1176 }
1177 
1178 /**
1179   @brief Get Ceed associated with a CeedBasis
1180 
1181   @param basis      CeedBasis
1182   @param[out] ceed  Variable to store Ceed
1183 
1184   @return An error code: 0 - success, otherwise - failure
1185 
1186   @ref Advanced
1187 **/
1188 int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed) {
1189   *ceed = basis->ceed;
1190   return CEED_ERROR_SUCCESS;
1191 }
1192 
1193 /**
1194   @brief Get dimension for given CeedBasis
1195 
1196   @param basis     CeedBasis
1197   @param[out] dim  Variable to store dimension of basis
1198 
1199   @return An error code: 0 - success, otherwise - failure
1200 
1201   @ref Advanced
1202 **/
1203 int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim) {
1204   *dim = basis->dim;
1205   return CEED_ERROR_SUCCESS;
1206 }
1207 
1208 /**
1209   @brief Get topology for given CeedBasis
1210 
1211   @param basis      CeedBasis
1212   @param[out] topo  Variable to store topology of basis
1213 
1214   @return An error code: 0 - success, otherwise - failure
1215 
1216   @ref Advanced
1217 **/
1218 int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo) {
1219   *topo = basis->topo;
1220   return CEED_ERROR_SUCCESS;
1221 }
1222 
1223 /**
1224   @brief Get number of Q-vector components for given CeedBasis
1225 
1226   @param basis          CeedBasis
1227   @param[out] Q_comp  Variable to store number of Q-vector components of basis
1228 
1229   @return An error code: 0 - success, otherwise - failure
1230 
1231   @ref Advanced
1232 **/
1233 int CeedBasisGetNumQuadratureComponents(CeedBasis basis, CeedInt *Q_comp) {
1234   *Q_comp = basis->Q_comp;
1235   return CEED_ERROR_SUCCESS;
1236 }
1237 
1238 /**
1239   @brief Get number of components for given CeedBasis
1240 
1241   @param basis          CeedBasis
1242   @param[out] num_comp  Variable to store number of components of basis
1243 
1244   @return An error code: 0 - success, otherwise - failure
1245 
1246   @ref Advanced
1247 **/
1248 int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *num_comp) {
1249   *num_comp = basis->num_comp;
1250   return CEED_ERROR_SUCCESS;
1251 }
1252 
1253 /**
1254   @brief Get total number of nodes (in dim dimensions) of a CeedBasis
1255 
1256   @param basis   CeedBasis
1257   @param[out] P  Variable to store number of nodes
1258 
1259   @return An error code: 0 - success, otherwise - failure
1260 
1261   @ref Utility
1262 **/
1263 int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P) {
1264   *P = basis->P;
1265   return CEED_ERROR_SUCCESS;
1266 }
1267 
1268 /**
1269   @brief Get total number of nodes (in 1 dimension) of a CeedBasis
1270 
1271   @param basis     CeedBasis
1272   @param[out] P_1d  Variable to store number of nodes
1273 
1274   @return An error code: 0 - success, otherwise - failure
1275 
1276   @ref Advanced
1277 **/
1278 int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P_1d) {
1279   if (!basis->tensor_basis)
1280     // LCOV_EXCL_START
1281     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1282                      "Cannot supply P_1d for non-tensor basis");
1283   // LCOV_EXCL_STOP
1284 
1285   *P_1d = basis->P_1d;
1286   return CEED_ERROR_SUCCESS;
1287 }
1288 
1289 /**
1290   @brief Get total number of quadrature points (in dim dimensions) of a CeedBasis
1291 
1292   @param basis   CeedBasis
1293   @param[out] Q  Variable to store number of quadrature points
1294 
1295   @return An error code: 0 - success, otherwise - failure
1296 
1297   @ref Utility
1298 **/
1299 int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q) {
1300   *Q = basis->Q;
1301   return CEED_ERROR_SUCCESS;
1302 }
1303 
1304 /**
1305   @brief Get total number of quadrature points (in 1 dimension) of a CeedBasis
1306 
1307   @param basis      CeedBasis
1308   @param[out] Q_1d  Variable to store number of quadrature points
1309 
1310   @return An error code: 0 - success, otherwise - failure
1311 
1312   @ref Advanced
1313 **/
1314 int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, CeedInt *Q_1d) {
1315   if (!basis->tensor_basis)
1316     // LCOV_EXCL_START
1317     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1318                      "Cannot supply Q_1d for non-tensor basis");
1319   // LCOV_EXCL_STOP
1320 
1321   *Q_1d = basis->Q_1d;
1322   return CEED_ERROR_SUCCESS;
1323 }
1324 
1325 /**
1326   @brief Get reference coordinates of quadrature points (in dim dimensions)
1327          of a CeedBasis
1328 
1329   @param basis       CeedBasis
1330   @param[out] q_ref  Variable to store reference coordinates of quadrature points
1331 
1332   @return An error code: 0 - success, otherwise - failure
1333 
1334   @ref Advanced
1335 **/
1336 int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **q_ref) {
1337   *q_ref = basis->q_ref_1d;
1338   return CEED_ERROR_SUCCESS;
1339 }
1340 
1341 /**
1342   @brief Get quadrature weights of quadrature points (in dim dimensions)
1343          of a CeedBasis
1344 
1345   @param basis          CeedBasis
1346   @param[out] q_weight  Variable to store quadrature weights
1347 
1348   @return An error code: 0 - success, otherwise - failure
1349 
1350   @ref Advanced
1351 **/
1352 int CeedBasisGetQWeights(CeedBasis basis, const CeedScalar **q_weight) {
1353   *q_weight = basis->q_weight_1d;
1354   return CEED_ERROR_SUCCESS;
1355 }
1356 
1357 /**
1358   @brief Get interpolation matrix of a CeedBasis
1359 
1360   @param basis        CeedBasis
1361   @param[out] interp  Variable to store interpolation matrix
1362 
1363   @return An error code: 0 - success, otherwise - failure
1364 
1365   @ref Advanced
1366 **/
1367 int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp) {
1368   if (!basis->interp && basis->tensor_basis) {
1369     // Allocate
1370     int ierr;
1371     ierr = CeedMalloc(basis->Q*basis->P, &basis->interp); CeedChk(ierr);
1372 
1373     // Initialize
1374     for (CeedInt i=0; i<basis->Q*basis->P; i++)
1375       basis->interp[i] = 1.0;
1376 
1377     // Calculate
1378     for (CeedInt d=0; d<basis->dim; d++)
1379       for (CeedInt qpt=0; qpt<basis->Q; qpt++)
1380         for (CeedInt node=0; node<basis->P; node++) {
1381           CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1382           CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1383           basis->interp[qpt*(basis->P)+node] *= basis->interp_1d[q*basis->P_1d+p];
1384         }
1385   }
1386   *interp = basis->interp;
1387   return CEED_ERROR_SUCCESS;
1388 }
1389 
1390 /**
1391   @brief Get 1D interpolation matrix of a tensor product CeedBasis
1392 
1393   @param basis           CeedBasis
1394   @param[out] interp_1d  Variable to store interpolation matrix
1395 
1396   @return An error code: 0 - success, otherwise - failure
1397 
1398   @ref Backend
1399 **/
1400 int CeedBasisGetInterp1D(CeedBasis basis, const CeedScalar **interp_1d) {
1401   if (!basis->tensor_basis)
1402     // LCOV_EXCL_START
1403     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1404                      "CeedBasis is not a tensor product basis.");
1405   // LCOV_EXCL_STOP
1406 
1407   *interp_1d = basis->interp_1d;
1408   return CEED_ERROR_SUCCESS;
1409 }
1410 
1411 /**
1412   @brief Get gradient matrix of a CeedBasis
1413 
1414   @param basis      CeedBasis
1415   @param[out] grad  Variable to store gradient matrix
1416 
1417   @return An error code: 0 - success, otherwise - failure
1418 
1419   @ref Advanced
1420 **/
1421 int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad) {
1422   if (!basis->grad && basis->tensor_basis) {
1423     // Allocate
1424     int ierr;
1425     ierr = CeedMalloc(basis->dim*basis->Q*basis->P, &basis->grad);
1426     CeedChk(ierr);
1427 
1428     // Initialize
1429     for (CeedInt i=0; i<basis->dim*basis->Q*basis->P; i++)
1430       basis->grad[i] = 1.0;
1431 
1432     // Calculate
1433     for (CeedInt d=0; d<basis->dim; d++)
1434       for (CeedInt i=0; i<basis->dim; i++)
1435         for (CeedInt qpt=0; qpt<basis->Q; qpt++)
1436           for (CeedInt node=0; node<basis->P; node++) {
1437             CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d;
1438             CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d;
1439             if (i == d)
1440               basis->grad[(i*basis->Q+qpt)*(basis->P)+node] *=
1441                 basis->grad_1d[q*basis->P_1d+p];
1442             else
1443               basis->grad[(i*basis->Q+qpt)*(basis->P)+node] *=
1444                 basis->interp_1d[q*basis->P_1d+p];
1445           }
1446   }
1447   *grad = basis->grad;
1448   return CEED_ERROR_SUCCESS;
1449 }
1450 
1451 /**
1452   @brief Get 1D gradient matrix of a tensor product CeedBasis
1453 
1454   @param basis         CeedBasis
1455   @param[out] grad_1d  Variable to store gradient matrix
1456 
1457   @return An error code: 0 - success, otherwise - failure
1458 
1459   @ref Advanced
1460 **/
1461 int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad_1d) {
1462   if (!basis->tensor_basis)
1463     // LCOV_EXCL_START
1464     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1465                      "CeedBasis is not a tensor product basis.");
1466   // LCOV_EXCL_STOP
1467 
1468   *grad_1d = basis->grad_1d;
1469   return CEED_ERROR_SUCCESS;
1470 }
1471 
1472 /**
1473   @brief Get divergence matrix of a CeedBasis
1474 
1475   @param basis     CeedBasis
1476   @param[out] div  Variable to store divergence matrix
1477 
1478   @return An error code: 0 - success, otherwise - failure
1479 
1480   @ref Advanced
1481 **/
1482 int CeedBasisGetDiv(CeedBasis basis, const CeedScalar **div) {
1483   if (!basis->div)
1484     // LCOV_EXCL_START
1485     return CeedError(basis->ceed, CEED_ERROR_MINOR,
1486                      "CeedBasis does not have divergence matrix.");
1487   // LCOV_EXCL_STOP
1488 
1489   *div = basis->div;
1490   return CEED_ERROR_SUCCESS;
1491 }
1492 
1493 /**
1494   @brief Destroy a CeedBasis
1495 
1496   @param basis CeedBasis to destroy
1497 
1498   @return An error code: 0 - success, otherwise - failure
1499 
1500   @ref User
1501 **/
1502 int CeedBasisDestroy(CeedBasis *basis) {
1503   int ierr;
1504 
1505   if (!*basis || --(*basis)->ref_count > 0) return CEED_ERROR_SUCCESS;
1506   if ((*basis)->Destroy) {
1507     ierr = (*basis)->Destroy(*basis); CeedChk(ierr);
1508   }
1509   if ((*basis)->contract) {
1510     ierr = CeedTensorContractDestroy(&(*basis)->contract); CeedChk(ierr);
1511   }
1512   ierr = CeedFree(&(*basis)->interp); CeedChk(ierr);
1513   ierr = CeedFree(&(*basis)->interp_1d); CeedChk(ierr);
1514   ierr = CeedFree(&(*basis)->grad); CeedChk(ierr);
1515   ierr = CeedFree(&(*basis)->div); CeedChk(ierr);
1516   ierr = CeedFree(&(*basis)->grad_1d); CeedChk(ierr);
1517   ierr = CeedFree(&(*basis)->q_ref_1d); CeedChk(ierr);
1518   ierr = CeedFree(&(*basis)->q_weight_1d); CeedChk(ierr);
1519   ierr = CeedDestroy(&(*basis)->ceed); CeedChk(ierr);
1520   ierr = CeedFree(basis); CeedChk(ierr);
1521   return CEED_ERROR_SUCCESS;
1522 }
1523 
1524 /**
1525   @brief Construct a Gauss-Legendre quadrature
1526 
1527   @param Q                 Number of quadrature points (integrates polynomials of
1528                              degree 2*Q-1 exactly)
1529   @param[out] q_ref_1d     Array of length Q to hold the abscissa on [-1, 1]
1530   @param[out] q_weight_1d  Array of length Q to hold the weights
1531 
1532   @return An error code: 0 - success, otherwise - failure
1533 
1534   @ref Utility
1535 **/
1536 int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
1537                         CeedScalar *q_weight_1d) {
1538   // Allocate
1539   CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0*atan(1.0);
1540   // Build q_ref_1d, q_weight_1d
1541   for (CeedInt i = 0; i <= Q/2; i++) {
1542     // Guess
1543     xi = cos(PI*(CeedScalar)(2*i+1)/((CeedScalar)(2*Q)));
1544     // Pn(xi)
1545     P0 = 1.0;
1546     P1 = xi;
1547     P2 = 0.0;
1548     for (CeedInt j = 2; j <= Q; j++) {
1549       P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1550       P0 = P1;
1551       P1 = P2;
1552     }
1553     // First Newton Step
1554     dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1555     xi = xi-P2/dP2;
1556     // Newton to convergence
1557     for (CeedInt k=0; k<100 && fabs(P2)>10*CEED_EPSILON; k++) {
1558       P0 = 1.0;
1559       P1 = xi;
1560       for (CeedInt j = 2; j <= Q; j++) {
1561         P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1562         P0 = P1;
1563         P1 = P2;
1564       }
1565       dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1566       xi = xi-P2/dP2;
1567     }
1568     // Save xi, wi
1569     wi = 2.0/((1.0-xi*xi)*dP2*dP2);
1570     q_weight_1d[i] = wi;
1571     q_weight_1d[Q-1-i] = wi;
1572     q_ref_1d[i] = -xi;
1573     q_ref_1d[Q-1-i]= xi;
1574   }
1575   return CEED_ERROR_SUCCESS;
1576 }
1577 
1578 /**
1579   @brief Construct a Gauss-Legendre-Lobatto quadrature
1580 
1581   @param Q                 Number of quadrature points (integrates polynomials of
1582                              degree 2*Q-3 exactly)
1583   @param[out] q_ref_1d     Array of length Q to hold the abscissa on [-1, 1]
1584   @param[out] q_weight_1d  Array of length Q to hold the weights
1585 
1586   @return An error code: 0 - success, otherwise - failure
1587 
1588   @ref Utility
1589 **/
1590 int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d,
1591                           CeedScalar *q_weight_1d) {
1592   // Allocate
1593   CeedScalar P0, P1, P2, dP2, d2P2, xi, wi, PI = 4.0*atan(1.0);
1594   // Build q_ref_1d, q_weight_1d
1595   // Set endpoints
1596   if (Q < 2)
1597     // LCOV_EXCL_START
1598     return CeedError(NULL, CEED_ERROR_DIMENSION,
1599                      "Cannot create Lobatto quadrature with Q=%" CeedInt_FMT " < 2 points", Q);
1600   // LCOV_EXCL_STOP
1601   wi = 2.0/((CeedScalar)(Q*(Q-1)));
1602   if (q_weight_1d) {
1603     q_weight_1d[0] = wi;
1604     q_weight_1d[Q-1] = wi;
1605   }
1606   q_ref_1d[0] = -1.0;
1607   q_ref_1d[Q-1] = 1.0;
1608   // Interior
1609   for (CeedInt i = 1; i <= (Q-1)/2; i++) {
1610     // Guess
1611     xi = cos(PI*(CeedScalar)(i)/(CeedScalar)(Q-1));
1612     // Pn(xi)
1613     P0 = 1.0;
1614     P1 = xi;
1615     P2 = 0.0;
1616     for (CeedInt j = 2; j < Q; j++) {
1617       P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1618       P0 = P1;
1619       P1 = P2;
1620     }
1621     // First Newton step
1622     dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1623     d2P2 = (2*xi*dP2 - (CeedScalar)(Q*(Q-1))*P2)/(1.0-xi*xi);
1624     xi = xi-dP2/d2P2;
1625     // Newton to convergence
1626     for (CeedInt k=0; k<100 && fabs(dP2)>10*CEED_EPSILON; k++) {
1627       P0 = 1.0;
1628       P1 = xi;
1629       for (CeedInt j = 2; j < Q; j++) {
1630         P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j));
1631         P0 = P1;
1632         P1 = P2;
1633       }
1634       dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0);
1635       d2P2 = (2*xi*dP2 - (CeedScalar)(Q*(Q-1))*P2)/(1.0-xi*xi);
1636       xi = xi-dP2/d2P2;
1637     }
1638     // Save xi, wi
1639     wi = 2.0/(((CeedScalar)(Q*(Q-1)))*P2*P2);
1640     if (q_weight_1d) {
1641       q_weight_1d[i] = wi;
1642       q_weight_1d[Q-1-i] = wi;
1643     }
1644     q_ref_1d[i] = -xi;
1645     q_ref_1d[Q-1-i]= xi;
1646   }
1647   return CEED_ERROR_SUCCESS;
1648 }
1649 
1650 /**
1651   @brief Return QR Factorization of a matrix
1652 
1653   @param ceed         A Ceed context for error handling
1654   @param[in,out] mat  Row-major matrix to be factorized in place
1655   @param[in,out] tau  Vector of length m of scaling factors
1656   @param m            Number of rows
1657   @param n            Number of columns
1658 
1659   @return An error code: 0 - success, otherwise - failure
1660 
1661   @ref Utility
1662 **/
1663 int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau,
1664                         CeedInt m, CeedInt n) {
1665   CeedScalar v[m];
1666 
1667   // Check m >= n
1668   if (n > m)
1669     // LCOV_EXCL_START
1670     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
1671                      "Cannot compute QR factorization with n > m");
1672   // LCOV_EXCL_STOP
1673 
1674   for (CeedInt i=0; i<n; i++) {
1675     if (i >= m-1) { // last row of matrix, no reflection needed
1676       tau[i] = 0.;
1677       break;
1678     }
1679     // Calculate Householder vector, magnitude
1680     CeedScalar sigma = 0.0;
1681     v[i] = mat[i+n*i];
1682     for (CeedInt j=i+1; j<m; j++) {
1683       v[j] = mat[i+n*j];
1684       sigma += v[j] * v[j];
1685     }
1686     CeedScalar norm = sqrt(v[i]*v[i] + sigma); // norm of v[i:m]
1687     CeedScalar R_ii = -copysign(norm, v[i]);
1688     v[i] -= R_ii;
1689     // norm of v[i:m] after modification above and scaling below
1690     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
1691     //   tau = 2 / (norm*norm)
1692     tau[i] = 2 * v[i]*v[i] / (v[i]*v[i] + sigma);
1693     for (CeedInt j=i+1; j<m; j++)
1694       v[j] /= v[i];
1695 
1696     // Apply Householder reflector to lower right panel
1697     CeedHouseholderReflect(&mat[i*n+i+1], &v[i], tau[i], m-i, n-i-1, n, 1);
1698     // Save v
1699     mat[i+n*i] = R_ii;
1700     for (CeedInt j=i+1; j<m; j++)
1701       mat[i+n*j] = v[j];
1702   }
1703   return CEED_ERROR_SUCCESS;
1704 }
1705 
1706 /**
1707   @brief Return symmetric Schur decomposition of the symmetric matrix mat via
1708            symmetric QR factorization
1709 
1710   @param ceed         A Ceed context for error handling
1711   @param[in,out] mat  Row-major matrix to be factorized in place
1712   @param[out] lambda  Vector of length n of eigenvalues
1713   @param n            Number of rows/columns
1714 
1715   @return An error code: 0 - success, otherwise - failure
1716 
1717   @ref Utility
1718 **/
1719 CeedPragmaOptimizeOff
1720 int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat,
1721                                     CeedScalar *lambda, CeedInt n) {
1722   // Check bounds for clang-tidy
1723   if (n<2)
1724     // LCOV_EXCL_START
1725     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
1726                      "Cannot compute symmetric Schur decomposition of scalars");
1727   // LCOV_EXCL_STOP
1728 
1729   CeedScalar v[n-1], tau[n-1], mat_T[n*n];
1730 
1731   // Copy mat to mat_T and set mat to I
1732   memcpy(mat_T, mat, n*n*sizeof(mat[0]));
1733   for (CeedInt i=0; i<n; i++)
1734     for (CeedInt j=0; j<n; j++)
1735       mat[j+n*i] = (i==j) ? 1 : 0;
1736 
1737   // Reduce to tridiagonal
1738   for (CeedInt i=0; i<n-1; i++) {
1739     // Calculate Householder vector, magnitude
1740     CeedScalar sigma = 0.0;
1741     v[i] = mat_T[i+n*(i+1)];
1742     for (CeedInt j=i+1; j<n-1; j++) {
1743       v[j] = mat_T[i+n*(j+1)];
1744       sigma += v[j] * v[j];
1745     }
1746     CeedScalar norm = sqrt(v[i]*v[i] + sigma); // norm of v[i:n-1]
1747     CeedScalar R_ii = -copysign(norm, v[i]);
1748     v[i] -= R_ii;
1749     // norm of v[i:m] after modification above and scaling below
1750     //   norm = sqrt(v[i]*v[i] + sigma) / v[i];
1751     //   tau = 2 / (norm*norm)
1752     tau[i] = i == n - 2 ? 2 : 2 * v[i]*v[i] / (v[i]*v[i] + sigma);
1753     for (CeedInt j=i+1; j<n-1; j++)
1754       v[j] /= v[i];
1755 
1756     // Update sub and super diagonal
1757     for (CeedInt j=i+2; j<n; j++) {
1758       mat_T[i+n*j] = 0; mat_T[j+n*i] = 0;
1759     }
1760     // Apply symmetric Householder reflector to lower right panel
1761     CeedHouseholderReflect(&mat_T[(i+1)+n*(i+1)], &v[i], tau[i],
1762                            n-(i+1), n-(i+1), n, 1);
1763     CeedHouseholderReflect(&mat_T[(i+1)+n*(i+1)], &v[i], tau[i],
1764                            n-(i+1), n-(i+1), 1, n);
1765 
1766     // Save v
1767     mat_T[i+n*(i+1)] = R_ii;
1768     mat_T[(i+1)+n*i] = R_ii;
1769     for (CeedInt j=i+1; j<n-1; j++) {
1770       mat_T[i+n*(j+1)] = v[j];
1771     }
1772   }
1773   // Backwards accumulation of Q
1774   for (CeedInt i=n-2; i>=0; i--) {
1775     if (tau[i] > 0.0) {
1776       v[i] = 1;
1777       for (CeedInt j=i+1; j<n-1; j++) {
1778         v[j] = mat_T[i+n*(j+1)];
1779         mat_T[i+n*(j+1)] = 0;
1780       }
1781       CeedHouseholderReflect(&mat[(i+1)+n*(i+1)], &v[i], tau[i],
1782                              n-(i+1), n-(i+1), n, 1);
1783     }
1784   }
1785 
1786   // Reduce sub and super diagonal
1787   CeedInt p = 0, q = 0, itr = 0, max_itr = n*n*n*n;
1788   CeedScalar tol = CEED_EPSILON;
1789 
1790   while (itr < max_itr) {
1791     // Update p, q, size of reduced portions of diagonal
1792     p = 0; q = 0;
1793     for (CeedInt i=n-2; i>=0; i--) {
1794       if (fabs(mat_T[i+n*(i+1)]) < tol)
1795         q += 1;
1796       else
1797         break;
1798     }
1799     for (CeedInt i=0; i<n-q-1; i++) {
1800       if (fabs(mat_T[i+n*(i+1)]) < tol)
1801         p += 1;
1802       else
1803         break;
1804     }
1805     if (q == n-1) break; // Finished reducing
1806 
1807     // Reduce tridiagonal portion
1808     CeedScalar t_nn = mat_T[(n-1-q)+n*(n-1-q)],
1809                t_nnm1 = mat_T[(n-2-q)+n*(n-1-q)];
1810     CeedScalar d = (mat_T[(n-2-q)+n*(n-2-q)] - t_nn)/2;
1811     CeedScalar mu = t_nn - t_nnm1*t_nnm1 /
1812                     (d + copysign(sqrt(d*d + t_nnm1*t_nnm1), d));
1813     CeedScalar x = mat_T[p+n*p] - mu;
1814     CeedScalar z = mat_T[p+n*(p+1)];
1815     for (CeedInt k=p; k<n-q-1; k++) {
1816       // Compute Givens rotation
1817       CeedScalar c = 1, s = 0;
1818       if (fabs(z) > tol) {
1819         if (fabs(z) > fabs(x)) {
1820           CeedScalar tau = -x/z;
1821           s = 1/sqrt(1+tau*tau), c = s*tau;
1822         } else {
1823           CeedScalar tau = -z/x;
1824           c = 1/sqrt(1+tau*tau), s = c*tau;
1825         }
1826       }
1827 
1828       // Apply Givens rotation to T
1829       CeedGivensRotation(mat_T, c, s, CEED_NOTRANSPOSE, k, k+1, n, n);
1830       CeedGivensRotation(mat_T, c, s, CEED_TRANSPOSE, k, k+1, n, n);
1831 
1832       // Apply Givens rotation to Q
1833       CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k+1, n, n);
1834 
1835       // Update x, z
1836       if (k < n-q-2) {
1837         x = mat_T[k+n*(k+1)];
1838         z = mat_T[k+n*(k+2)];
1839       }
1840     }
1841     itr++;
1842   }
1843 
1844   // Save eigenvalues
1845   for (CeedInt i=0; i<n; i++)
1846     lambda[i] = mat_T[i+n*i];
1847 
1848   // Check convergence
1849   if (itr == max_itr && q < n-1)
1850     // LCOV_EXCL_START
1851     return CeedError(ceed, CEED_ERROR_MINOR,
1852                      "Symmetric QR failed to converge");
1853   // LCOV_EXCL_STOP
1854   return CEED_ERROR_SUCCESS;
1855 }
1856 CeedPragmaOptimizeOn
1857 
1858 /**
1859   @brief Return Simultaneous Diagonalization of two matrices. This solves the
1860            generalized eigenvalue problem A x = lambda B x, where A and B
1861            are symmetric and B is positive definite. We generate the matrix X
1862            and vector Lambda such that X^T A X = Lambda and X^T B X = I. This
1863            is equivalent to the LAPACK routine 'sygv' with TYPE = 1.
1864 
1865   @param ceed         A Ceed context for error handling
1866   @param[in] mat_A    Row-major matrix to be factorized with eigenvalues
1867   @param[in] mat_B    Row-major matrix to be factorized to identity
1868   @param[out] mat_X   Row-major orthogonal matrix
1869   @param[out] lambda  Vector of length n of generalized eigenvalues
1870   @param n            Number of rows/columns
1871 
1872   @return An error code: 0 - success, otherwise - failure
1873 
1874   @ref Utility
1875 **/
1876 CeedPragmaOptimizeOff
1877 int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A,
1878                                     CeedScalar *mat_B, CeedScalar *mat_X,
1879                                     CeedScalar *lambda, CeedInt n) {
1880   int ierr;
1881   CeedScalar *mat_C, *mat_G, *vec_D;
1882   ierr = CeedCalloc(n*n, &mat_C); CeedChk(ierr);
1883   ierr = CeedCalloc(n*n, &mat_G); CeedChk(ierr);
1884   ierr = CeedCalloc(n, &vec_D); CeedChk(ierr);
1885 
1886   // Compute B = G D G^T
1887   memcpy(mat_G, mat_B, n*n*sizeof(mat_B[0]));
1888   ierr = CeedSymmetricSchurDecomposition(ceed, mat_G, vec_D, n); CeedChk(ierr);
1889 
1890   // Sort eigenvalues
1891   for (CeedInt i=n-1; i>=0; i--)
1892     for (CeedInt j=0; j<i; j++) {
1893       if (fabs(vec_D[j]) > fabs(vec_D[j+1])) {
1894         CeedScalar temp;
1895         temp = vec_D[j]; vec_D[j] = vec_D[j+1]; vec_D[j+1] = temp;
1896         for (CeedInt k=0; k<n; k++) {
1897           temp = mat_G[k*n+j]; mat_G[k*n+j] = mat_G[k*n+j+1]; mat_G[k*n+j+1] = temp;
1898         }
1899       }
1900     }
1901 
1902   // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T
1903   //           = D^-1/2 G^T A G D^-1/2
1904   // -- D = D^-1/2
1905   for (CeedInt i=0; i<n; i++)
1906     vec_D[i] = 1./sqrt(vec_D[i]);
1907   // -- G = G D^-1/2
1908   // -- C = D^-1/2 G^T
1909   for (CeedInt i=0; i<n; i++)
1910     for (CeedInt j=0; j<n; j++) {
1911       mat_G[i*n+j] *= vec_D[j];
1912       mat_C[j*n+i]  = mat_G[i*n+j];
1913     }
1914   // -- X = (D^-1/2 G^T) A
1915   ierr = CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_C,
1916                                   (const CeedScalar *)mat_A, mat_X, n, n, n);
1917   CeedChk(ierr);
1918   // -- C = (D^-1/2 G^T A) (G D^-1/2)
1919   ierr = CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_X,
1920                                   (const CeedScalar *)mat_G, mat_C, n, n, n);
1921   CeedChk(ierr);
1922 
1923   // Compute Q^T C Q = lambda
1924   ierr = CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n); CeedChk(ierr);
1925 
1926   // Sort eigenvalues
1927   for (CeedInt i=n-1; i>=0; i--)
1928     for (CeedInt j=0; j<i; j++) {
1929       if (fabs(lambda[j]) > fabs(lambda[j+1])) {
1930         CeedScalar temp;
1931         temp = lambda[j]; lambda[j] = lambda[j+1]; lambda[j+1] = temp;
1932         for (CeedInt k=0; k<n; k++) {
1933           temp = mat_C[k*n+j]; mat_C[k*n+j] = mat_C[k*n+j+1]; mat_C[k*n+j+1] = temp;
1934         }
1935       }
1936     }
1937 
1938   // Set X = (G D^1/2)^-T Q
1939   //       = G D^-1/2 Q
1940   ierr = CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_G,
1941                                   (const CeedScalar *)mat_C, mat_X, n, n, n);
1942   CeedChk(ierr);
1943 
1944   // Cleanup
1945   ierr = CeedFree(&mat_C); CeedChk(ierr);
1946   ierr = CeedFree(&mat_G); CeedChk(ierr);
1947   ierr = CeedFree(&vec_D); CeedChk(ierr);
1948   return CEED_ERROR_SUCCESS;
1949 }
1950 CeedPragmaOptimizeOn
1951 
1952 /// @}
1953