xref: /petsc/src/dm/dt/interface/dt.c (revision 21e3ffae2f3b73c0bd738cf6d0a809700fc04bb0)
1 /* Discretization tools */
2 
3 #include <petscdt.h> /*I "petscdt.h" I*/
4 #include <petscblaslapack.h>
5 #include <petsc/private/petscimpl.h>
6 #include <petsc/private/dtimpl.h>
7 #include <petscviewer.h>
8 #include <petscdmplex.h>
9 #include <petscdmshell.h>
10 
11 #if defined(PETSC_HAVE_MPFR)
12   #include <mpfr.h>
13 #endif
14 
15 const char *const        PetscDTNodeTypes_shifted[] = {"default", "gaussjacobi", "equispaced", "tanhsinh", "PETSCDTNODES_", NULL};
16 const char *const *const PetscDTNodeTypes           = PetscDTNodeTypes_shifted + 1;
17 
18 const char *const        PetscDTSimplexQuadratureTypes_shifted[] = {"default", "conic", "minsym", "PETSCDTSIMPLEXQUAD_", NULL};
19 const char *const *const PetscDTSimplexQuadratureTypes           = PetscDTSimplexQuadratureTypes_shifted + 1;
20 
21 static PetscBool GolubWelschCite       = PETSC_FALSE;
22 const char       GolubWelschCitation[] = "@article{GolubWelsch1969,\n"
23                                          "  author  = {Golub and Welsch},\n"
24                                          "  title   = {Calculation of Quadrature Rules},\n"
25                                          "  journal = {Math. Comp.},\n"
26                                          "  volume  = {23},\n"
27                                          "  number  = {106},\n"
28                                          "  pages   = {221--230},\n"
29                                          "  year    = {1969}\n}\n";
30 
31 /* Numerical tests in src/dm/dt/tests/ex1.c show that when computing the nodes and weights of Gauss-Jacobi
32    quadrature rules:
33 
34    - in double precision, Newton's method and Golub & Welsch both work for moderate degrees (< 100),
35    - in single precision, Newton's method starts producing incorrect roots around n = 15, but
36      the weights from Golub & Welsch become a problem before then: they produces errors
37      in computing the Jacobi-polynomial Gram matrix around n = 6.
38 
39    So we default to Newton's method (required fewer dependencies) */
40 PetscBool PetscDTGaussQuadratureNewton_Internal = PETSC_TRUE;
41 
42 PetscClassId PETSCQUADRATURE_CLASSID = 0;
43 
44 /*@
45   PetscQuadratureCreate - Create a `PetscQuadrature` object
46 
47   Collective
48 
49   Input Parameter:
50 . comm - The communicator for the `PetscQuadrature` object
51 
52   Output Parameter:
53 . q  - The PetscQuadrature object
54 
55   Level: beginner
56 
57 .seealso: `PetscQuadrature`, `Petscquadraturedestroy()`, `PetscQuadratureGetData()`
58 @*/
59 PetscErrorCode PetscQuadratureCreate(MPI_Comm comm, PetscQuadrature *q)
60 {
61   PetscFunctionBegin;
62   PetscValidPointer(q, 2);
63   PetscCall(DMInitializePackage());
64   PetscCall(PetscHeaderCreate(*q, PETSCQUADRATURE_CLASSID, "PetscQuadrature", "Quadrature", "DT", comm, PetscQuadratureDestroy, PetscQuadratureView));
65   (*q)->dim       = -1;
66   (*q)->Nc        = 1;
67   (*q)->order     = -1;
68   (*q)->numPoints = 0;
69   (*q)->points    = NULL;
70   (*q)->weights   = NULL;
71   PetscFunctionReturn(PETSC_SUCCESS);
72 }
73 
74 /*@
75   PetscQuadratureDuplicate - Create a deep copy of the `PetscQuadrature` object
76 
77   Collective on q
78 
79   Input Parameter:
80 . q  - The `PetscQuadrature` object
81 
82   Output Parameter:
83 . r  - The new `PetscQuadrature` object
84 
85   Level: beginner
86 
87 .seealso: `PetscQuadrature`, `PetscQuadratureCreate()`, `PetscQuadratureDestroy()`, `PetscQuadratureGetData()`
88 @*/
89 PetscErrorCode PetscQuadratureDuplicate(PetscQuadrature q, PetscQuadrature *r)
90 {
91   PetscInt         order, dim, Nc, Nq;
92   const PetscReal *points, *weights;
93   PetscReal       *p, *w;
94 
95   PetscFunctionBegin;
96   PetscValidPointer(q, 1);
97   PetscCall(PetscQuadratureCreate(PetscObjectComm((PetscObject)q), r));
98   PetscCall(PetscQuadratureGetOrder(q, &order));
99   PetscCall(PetscQuadratureSetOrder(*r, order));
100   PetscCall(PetscQuadratureGetData(q, &dim, &Nc, &Nq, &points, &weights));
101   PetscCall(PetscMalloc1(Nq * dim, &p));
102   PetscCall(PetscMalloc1(Nq * Nc, &w));
103   PetscCall(PetscArraycpy(p, points, Nq * dim));
104   PetscCall(PetscArraycpy(w, weights, Nc * Nq));
105   PetscCall(PetscQuadratureSetData(*r, dim, Nc, Nq, p, w));
106   PetscFunctionReturn(PETSC_SUCCESS);
107 }
108 
109 /*@
110   PetscQuadratureDestroy - Destroys a `PetscQuadrature` object
111 
112   Collective on q
113 
114   Input Parameter:
115 . q  - The `PetscQuadrature` object
116 
117   Level: beginner
118 
119 .seealso: `PetscQuadrature`, `PetscQuadratureCreate()`, `PetscQuadratureGetData()`
120 @*/
121 PetscErrorCode PetscQuadratureDestroy(PetscQuadrature *q)
122 {
123   PetscFunctionBegin;
124   if (!*q) PetscFunctionReturn(PETSC_SUCCESS);
125   PetscValidHeaderSpecific((*q), PETSCQUADRATURE_CLASSID, 1);
126   if (--((PetscObject)(*q))->refct > 0) {
127     *q = NULL;
128     PetscFunctionReturn(PETSC_SUCCESS);
129   }
130   PetscCall(PetscFree((*q)->points));
131   PetscCall(PetscFree((*q)->weights));
132   PetscCall(PetscHeaderDestroy(q));
133   PetscFunctionReturn(PETSC_SUCCESS);
134 }
135 
136 /*@
137   PetscQuadratureGetOrder - Return the order of the method in the `PetscQuadrature`
138 
139   Not collective
140 
141   Input Parameter:
142 . q - The `PetscQuadrature` object
143 
144   Output Parameter:
145 . order - The order of the quadrature, i.e. the highest degree polynomial that is exactly integrated
146 
147   Level: intermediate
148 
149 .seealso: `PetscQuadrature`, `PetscQuadratureSetOrder()`, `PetscQuadratureGetData()`, `PetscQuadratureSetData()`
150 @*/
151 PetscErrorCode PetscQuadratureGetOrder(PetscQuadrature q, PetscInt *order)
152 {
153   PetscFunctionBegin;
154   PetscValidHeaderSpecific(q, PETSCQUADRATURE_CLASSID, 1);
155   PetscValidIntPointer(order, 2);
156   *order = q->order;
157   PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 
160 /*@
161   PetscQuadratureSetOrder - Set the order of the method in the `PetscQuadrature`
162 
163   Not collective
164 
165   Input Parameters:
166 + q - The `PetscQuadrature` object
167 - order - The order of the quadrature, i.e. the highest degree polynomial that is exactly integrated
168 
169   Level: intermediate
170 
171 .seealso: `PetscQuadrature`, `PetscQuadratureGetOrder()`, `PetscQuadratureGetData()`, `PetscQuadratureSetData()`
172 @*/
173 PetscErrorCode PetscQuadratureSetOrder(PetscQuadrature q, PetscInt order)
174 {
175   PetscFunctionBegin;
176   PetscValidHeaderSpecific(q, PETSCQUADRATURE_CLASSID, 1);
177   q->order = order;
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 /*@
182   PetscQuadratureGetNumComponents - Return the number of components for functions to be integrated
183 
184   Not collective
185 
186   Input Parameter:
187 . q - The `PetscQuadrature` object
188 
189   Output Parameter:
190 . Nc - The number of components
191 
192   Note:
193   We are performing an integral int f(x) . w(x) dx, where both f and w (the weight) have Nc components.
194 
195   Level: intermediate
196 
197 .seealso: `PetscQuadrature`, `PetscQuadratureSetNumComponents()`, `PetscQuadratureGetData()`, `PetscQuadratureSetData()`
198 @*/
199 PetscErrorCode PetscQuadratureGetNumComponents(PetscQuadrature q, PetscInt *Nc)
200 {
201   PetscFunctionBegin;
202   PetscValidHeaderSpecific(q, PETSCQUADRATURE_CLASSID, 1);
203   PetscValidIntPointer(Nc, 2);
204   *Nc = q->Nc;
205   PetscFunctionReturn(PETSC_SUCCESS);
206 }
207 
208 /*@
209   PetscQuadratureSetNumComponents - Return the number of components for functions to be integrated
210 
211   Not collective
212 
213   Input Parameters:
214 + q  - The PetscQuadrature object
215 - Nc - The number of components
216 
217   Note:
218   We are performing an integral int f(x) . w(x) dx, where both f and w (the weight) have Nc components.
219 
220   Level: intermediate
221 
222 .seealso: `PetscQuadrature`, `PetscQuadratureGetNumComponents()`, `PetscQuadratureGetData()`, `PetscQuadratureSetData()`
223 @*/
224 PetscErrorCode PetscQuadratureSetNumComponents(PetscQuadrature q, PetscInt Nc)
225 {
226   PetscFunctionBegin;
227   PetscValidHeaderSpecific(q, PETSCQUADRATURE_CLASSID, 1);
228   q->Nc = Nc;
229   PetscFunctionReturn(PETSC_SUCCESS);
230 }
231 
232 /*@C
233   PetscQuadratureGetData - Returns the data defining the `PetscQuadrature`
234 
235   Not collective
236 
237   Input Parameter:
238 . q  - The `PetscQuadrature` object
239 
240   Output Parameters:
241 + dim - The spatial dimension
242 . Nc - The number of components
243 . npoints - The number of quadrature points
244 . points - The coordinates of each quadrature point
245 - weights - The weight of each quadrature point
246 
247   Level: intermediate
248 
249   Fortran Note:
250   From Fortran you must call `PetscQuadratureRestoreData()` when you are done with the data
251 
252 .seealso: `PetscQuadrature`, `PetscQuadratureCreate()`, `PetscQuadratureSetData()`
253 @*/
254 PetscErrorCode PetscQuadratureGetData(PetscQuadrature q, PetscInt *dim, PetscInt *Nc, PetscInt *npoints, const PetscReal *points[], const PetscReal *weights[])
255 {
256   PetscFunctionBegin;
257   PetscValidHeaderSpecific(q, PETSCQUADRATURE_CLASSID, 1);
258   if (dim) {
259     PetscValidIntPointer(dim, 2);
260     *dim = q->dim;
261   }
262   if (Nc) {
263     PetscValidIntPointer(Nc, 3);
264     *Nc = q->Nc;
265   }
266   if (npoints) {
267     PetscValidIntPointer(npoints, 4);
268     *npoints = q->numPoints;
269   }
270   if (points) {
271     PetscValidPointer(points, 5);
272     *points = q->points;
273   }
274   if (weights) {
275     PetscValidPointer(weights, 6);
276     *weights = q->weights;
277   }
278   PetscFunctionReturn(PETSC_SUCCESS);
279 }
280 
281 /*@
282   PetscQuadratureEqual - determine whether two quadratures are equivalent
283 
284   Input Parameters:
285 + A - A `PetscQuadrature` object
286 - B - Another `PetscQuadrature` object
287 
288   Output Parameters:
289 . equal - `PETSC_TRUE` if the quadratures are the same
290 
291   Level: intermediate
292 
293 .seealso: `PetscQuadrature`, `PetscQuadratureCreate()`
294 @*/
295 PetscErrorCode PetscQuadratureEqual(PetscQuadrature A, PetscQuadrature B, PetscBool *equal)
296 {
297   PetscFunctionBegin;
298   PetscValidHeaderSpecific(A, PETSCQUADRATURE_CLASSID, 1);
299   PetscValidHeaderSpecific(B, PETSCQUADRATURE_CLASSID, 2);
300   PetscValidBoolPointer(equal, 3);
301   *equal = PETSC_FALSE;
302   if (A->dim != B->dim || A->Nc != B->Nc || A->order != B->order || A->numPoints != B->numPoints) PetscFunctionReturn(PETSC_SUCCESS);
303   for (PetscInt i = 0; i < A->numPoints * A->dim; i++) {
304     if (PetscAbsReal(A->points[i] - B->points[i]) > PETSC_SMALL) PetscFunctionReturn(PETSC_SUCCESS);
305   }
306   if (!A->weights && !B->weights) {
307     *equal = PETSC_TRUE;
308     PetscFunctionReturn(PETSC_SUCCESS);
309   }
310   if (A->weights && B->weights) {
311     for (PetscInt i = 0; i < A->numPoints; i++) {
312       if (PetscAbsReal(A->weights[i] - B->weights[i]) > PETSC_SMALL) PetscFunctionReturn(PETSC_SUCCESS);
313     }
314     *equal = PETSC_TRUE;
315   }
316   PetscFunctionReturn(PETSC_SUCCESS);
317 }
318 
319 static PetscErrorCode PetscDTJacobianInverse_Internal(PetscInt m, PetscInt n, const PetscReal J[], PetscReal Jinv[])
320 {
321   PetscScalar *Js, *Jinvs;
322   PetscInt     i, j, k;
323   PetscBLASInt bm, bn, info;
324 
325   PetscFunctionBegin;
326   if (!m || !n) PetscFunctionReturn(PETSC_SUCCESS);
327   PetscCall(PetscBLASIntCast(m, &bm));
328   PetscCall(PetscBLASIntCast(n, &bn));
329 #if defined(PETSC_USE_COMPLEX)
330   PetscCall(PetscMalloc2(m * n, &Js, m * n, &Jinvs));
331   for (i = 0; i < m * n; i++) Js[i] = J[i];
332 #else
333   Js    = (PetscReal *)J;
334   Jinvs = Jinv;
335 #endif
336   if (m == n) {
337     PetscBLASInt *pivots;
338     PetscScalar  *W;
339 
340     PetscCall(PetscMalloc2(m, &pivots, m, &W));
341 
342     PetscCall(PetscArraycpy(Jinvs, Js, m * m));
343     PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&bm, &bm, Jinvs, &bm, pivots, &info));
344     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error returned from LAPACKgetrf %" PetscInt_FMT, (PetscInt)info);
345     PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&bm, Jinvs, &bm, pivots, W, &bm, &info));
346     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error returned from LAPACKgetri %" PetscInt_FMT, (PetscInt)info);
347     PetscCall(PetscFree2(pivots, W));
348   } else if (m < n) {
349     PetscScalar  *JJT;
350     PetscBLASInt *pivots;
351     PetscScalar  *W;
352 
353     PetscCall(PetscMalloc1(m * m, &JJT));
354     PetscCall(PetscMalloc2(m, &pivots, m, &W));
355     for (i = 0; i < m; i++) {
356       for (j = 0; j < m; j++) {
357         PetscScalar val = 0.;
358 
359         for (k = 0; k < n; k++) val += Js[i * n + k] * Js[j * n + k];
360         JJT[i * m + j] = val;
361       }
362     }
363 
364     PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&bm, &bm, JJT, &bm, pivots, &info));
365     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error returned from LAPACKgetrf %" PetscInt_FMT, (PetscInt)info);
366     PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&bm, JJT, &bm, pivots, W, &bm, &info));
367     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error returned from LAPACKgetri %" PetscInt_FMT, (PetscInt)info);
368     for (i = 0; i < n; i++) {
369       for (j = 0; j < m; j++) {
370         PetscScalar val = 0.;
371 
372         for (k = 0; k < m; k++) val += Js[k * n + i] * JJT[k * m + j];
373         Jinvs[i * m + j] = val;
374       }
375     }
376     PetscCall(PetscFree2(pivots, W));
377     PetscCall(PetscFree(JJT));
378   } else {
379     PetscScalar  *JTJ;
380     PetscBLASInt *pivots;
381     PetscScalar  *W;
382 
383     PetscCall(PetscMalloc1(n * n, &JTJ));
384     PetscCall(PetscMalloc2(n, &pivots, n, &W));
385     for (i = 0; i < n; i++) {
386       for (j = 0; j < n; j++) {
387         PetscScalar val = 0.;
388 
389         for (k = 0; k < m; k++) val += Js[k * n + i] * Js[k * n + j];
390         JTJ[i * n + j] = val;
391       }
392     }
393 
394     PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&bn, &bn, JTJ, &bn, pivots, &info));
395     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error returned from LAPACKgetrf %" PetscInt_FMT, (PetscInt)info);
396     PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&bn, JTJ, &bn, pivots, W, &bn, &info));
397     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error returned from LAPACKgetri %" PetscInt_FMT, (PetscInt)info);
398     for (i = 0; i < n; i++) {
399       for (j = 0; j < m; j++) {
400         PetscScalar val = 0.;
401 
402         for (k = 0; k < n; k++) val += JTJ[i * n + k] * Js[j * n + k];
403         Jinvs[i * m + j] = val;
404       }
405     }
406     PetscCall(PetscFree2(pivots, W));
407     PetscCall(PetscFree(JTJ));
408   }
409 #if defined(PETSC_USE_COMPLEX)
410   for (i = 0; i < m * n; i++) Jinv[i] = PetscRealPart(Jinvs[i]);
411   PetscCall(PetscFree2(Js, Jinvs));
412 #endif
413   PetscFunctionReturn(PETSC_SUCCESS);
414 }
415 
416 /*@
417    PetscQuadraturePushForward - Push forward a quadrature functional under an affine transformation.
418 
419    Collecive on `PetscQuadrature`
420 
421    Input Parameters:
422 +  q - the quadrature functional
423 .  imageDim - the dimension of the image of the transformation
424 .  origin - a point in the original space
425 .  originImage - the image of the origin under the transformation
426 .  J - the Jacobian of the image: an [imageDim x dim] matrix in row major order
427 -  formDegree - transform the quadrature weights as k-forms of this form degree (if the number of components is a multiple of (dim choose formDegree), it is assumed that they represent multiple k-forms) [see `PetscDTAltVPullback()` for interpretation of formDegree]
428 
429    Output Parameters:
430 .  Jinvstarq - a quadrature rule where each point is the image of a point in the original quadrature rule, and where the k-form weights have been pulled-back by the pseudoinverse of J to the k-form weights in the image space.
431 
432    Level: intermediate
433 
434    Note:
435    The new quadrature rule will have a different number of components if spaces have different dimensions.  For example, pushing a 2-form forward from a two dimensional space to a three dimensional space changes the number of components from 1 to 3.
436 
437 .seealso: `PetscQuadrature`, `PetscDTAltVPullback()`, `PetscDTAltVPullbackMatrix()`
438 @*/
439 PetscErrorCode PetscQuadraturePushForward(PetscQuadrature q, PetscInt imageDim, const PetscReal origin[], const PetscReal originImage[], const PetscReal J[], PetscInt formDegree, PetscQuadrature *Jinvstarq)
440 {
441   PetscInt         dim, Nc, imageNc, formSize, Ncopies, imageFormSize, Npoints, pt, i, j, c;
442   const PetscReal *points;
443   const PetscReal *weights;
444   PetscReal       *imagePoints, *imageWeights;
445   PetscReal       *Jinv;
446   PetscReal       *Jinvstar;
447 
448   PetscFunctionBegin;
449   PetscValidHeaderSpecific(q, PETSCQUADRATURE_CLASSID, 1);
450   PetscCheck(imageDim >= PetscAbsInt(formDegree), PetscObjectComm((PetscObject)q), PETSC_ERR_ARG_INCOMP, "Cannot represent a %" PetscInt_FMT "-form in %" PetscInt_FMT " dimensions", PetscAbsInt(formDegree), imageDim);
451   PetscCall(PetscQuadratureGetData(q, &dim, &Nc, &Npoints, &points, &weights));
452   PetscCall(PetscDTBinomialInt(dim, PetscAbsInt(formDegree), &formSize));
453   PetscCheck(Nc % formSize == 0, PetscObjectComm((PetscObject)q), PETSC_ERR_ARG_INCOMP, "Number of components %" PetscInt_FMT " is not a multiple of formSize %" PetscInt_FMT, Nc, formSize);
454   Ncopies = Nc / formSize;
455   PetscCall(PetscDTBinomialInt(imageDim, PetscAbsInt(formDegree), &imageFormSize));
456   imageNc = Ncopies * imageFormSize;
457   PetscCall(PetscMalloc1(Npoints * imageDim, &imagePoints));
458   PetscCall(PetscMalloc1(Npoints * imageNc, &imageWeights));
459   PetscCall(PetscMalloc2(imageDim * dim, &Jinv, formSize * imageFormSize, &Jinvstar));
460   PetscCall(PetscDTJacobianInverse_Internal(imageDim, dim, J, Jinv));
461   PetscCall(PetscDTAltVPullbackMatrix(imageDim, dim, Jinv, formDegree, Jinvstar));
462   for (pt = 0; pt < Npoints; pt++) {
463     const PetscReal *point      = &points[pt * dim];
464     PetscReal       *imagePoint = &imagePoints[pt * imageDim];
465 
466     for (i = 0; i < imageDim; i++) {
467       PetscReal val = originImage[i];
468 
469       for (j = 0; j < dim; j++) val += J[i * dim + j] * (point[j] - origin[j]);
470       imagePoint[i] = val;
471     }
472     for (c = 0; c < Ncopies; c++) {
473       const PetscReal *form      = &weights[pt * Nc + c * formSize];
474       PetscReal       *imageForm = &imageWeights[pt * imageNc + c * imageFormSize];
475 
476       for (i = 0; i < imageFormSize; i++) {
477         PetscReal val = 0.;
478 
479         for (j = 0; j < formSize; j++) val += Jinvstar[i * formSize + j] * form[j];
480         imageForm[i] = val;
481       }
482     }
483   }
484   PetscCall(PetscQuadratureCreate(PetscObjectComm((PetscObject)q), Jinvstarq));
485   PetscCall(PetscQuadratureSetData(*Jinvstarq, imageDim, imageNc, Npoints, imagePoints, imageWeights));
486   PetscCall(PetscFree2(Jinv, Jinvstar));
487   PetscFunctionReturn(PETSC_SUCCESS);
488 }
489 
490 /*@C
491   PetscQuadratureSetData - Sets the data defining the quadrature
492 
493   Not collective
494 
495   Input Parameters:
496 + q  - The `PetscQuadrature` object
497 . dim - The spatial dimension
498 . Nc - The number of components
499 . npoints - The number of quadrature points
500 . points - The coordinates of each quadrature point
501 - weights - The weight of each quadrature point
502 
503   Level: intermediate
504 
505   Note:
506   This routine owns the references to points and weights, so they must be allocated using `PetscMalloc()` and the user should not free them.
507 
508 .seealso: `PetscQuadrature`, `PetscQuadratureCreate()`, `PetscQuadratureGetData()`
509 @*/
510 PetscErrorCode PetscQuadratureSetData(PetscQuadrature q, PetscInt dim, PetscInt Nc, PetscInt npoints, const PetscReal points[], const PetscReal weights[])
511 {
512   PetscFunctionBegin;
513   PetscValidHeaderSpecific(q, PETSCQUADRATURE_CLASSID, 1);
514   if (dim >= 0) q->dim = dim;
515   if (Nc >= 0) q->Nc = Nc;
516   if (npoints >= 0) q->numPoints = npoints;
517   if (points) {
518     PetscValidRealPointer(points, 5);
519     q->points = points;
520   }
521   if (weights) {
522     PetscValidRealPointer(weights, 6);
523     q->weights = weights;
524   }
525   PetscFunctionReturn(PETSC_SUCCESS);
526 }
527 
528 static PetscErrorCode PetscQuadratureView_Ascii(PetscQuadrature quad, PetscViewer v)
529 {
530   PetscInt          q, d, c;
531   PetscViewerFormat format;
532 
533   PetscFunctionBegin;
534   if (quad->Nc > 1) PetscCall(PetscViewerASCIIPrintf(v, "Quadrature of order %" PetscInt_FMT " on %" PetscInt_FMT " points (dim %" PetscInt_FMT ") with %" PetscInt_FMT " components\n", quad->order, quad->numPoints, quad->dim, quad->Nc));
535   else PetscCall(PetscViewerASCIIPrintf(v, "Quadrature of order %" PetscInt_FMT " on %" PetscInt_FMT " points (dim %" PetscInt_FMT ")\n", quad->order, quad->numPoints, quad->dim));
536   PetscCall(PetscViewerGetFormat(v, &format));
537   if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
538   for (q = 0; q < quad->numPoints; ++q) {
539     PetscCall(PetscViewerASCIIPrintf(v, "p%" PetscInt_FMT " (", q));
540     PetscCall(PetscViewerASCIIUseTabs(v, PETSC_FALSE));
541     for (d = 0; d < quad->dim; ++d) {
542       if (d) PetscCall(PetscViewerASCIIPrintf(v, ", "));
543       PetscCall(PetscViewerASCIIPrintf(v, "%+g", (double)quad->points[q * quad->dim + d]));
544     }
545     PetscCall(PetscViewerASCIIPrintf(v, ") "));
546     if (quad->Nc > 1) PetscCall(PetscViewerASCIIPrintf(v, "w%" PetscInt_FMT " (", q));
547     for (c = 0; c < quad->Nc; ++c) {
548       if (c) PetscCall(PetscViewerASCIIPrintf(v, ", "));
549       PetscCall(PetscViewerASCIIPrintf(v, "%+g", (double)quad->weights[q * quad->Nc + c]));
550     }
551     if (quad->Nc > 1) PetscCall(PetscViewerASCIIPrintf(v, ")"));
552     PetscCall(PetscViewerASCIIPrintf(v, "\n"));
553     PetscCall(PetscViewerASCIIUseTabs(v, PETSC_TRUE));
554   }
555   PetscFunctionReturn(PETSC_SUCCESS);
556 }
557 
558 /*@C
559   PetscQuadratureView - View a `PetscQuadrature` object
560 
561   Collective on quad
562 
563   Input Parameters:
564 + quad  - The `PetscQuadrature` object
565 - viewer - The `PetscViewer` object
566 
567   Level: beginner
568 
569 .seealso: `PetscQuadrature`, `PetscViewer`, `PetscQuadratureCreate()`, `PetscQuadratureGetData()`
570 @*/
571 PetscErrorCode PetscQuadratureView(PetscQuadrature quad, PetscViewer viewer)
572 {
573   PetscBool iascii;
574 
575   PetscFunctionBegin;
576   PetscValidHeader(quad, 1);
577   if (viewer) PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
578   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)quad), &viewer));
579   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
580   PetscCall(PetscViewerASCIIPushTab(viewer));
581   if (iascii) PetscCall(PetscQuadratureView_Ascii(quad, viewer));
582   PetscCall(PetscViewerASCIIPopTab(viewer));
583   PetscFunctionReturn(PETSC_SUCCESS);
584 }
585 
586 /*@C
587   PetscQuadratureExpandComposite - Return a quadrature over the composite element, which has the original quadrature in each subelement
588 
589   Not collective
590 
591   Input Parameters:
592 + q - The original `PetscQuadrature`
593 . numSubelements - The number of subelements the original element is divided into
594 . v0 - An array of the initial points for each subelement
595 - jac - An array of the Jacobian mappings from the reference to each subelement
596 
597   Output Parameters:
598 . dim - The dimension
599 
600   Note:
601   Together v0 and jac define an affine mapping from the original reference element to each subelement
602 
603   Fortran Note:
604   Not available from Fortran
605 
606   Level: intermediate
607 
608 .seealso: `PetscQuadrature`, `PetscFECreate()`, `PetscSpaceGetDimension()`, `PetscDualSpaceGetDimension()`
609 @*/
610 PetscErrorCode PetscQuadratureExpandComposite(PetscQuadrature q, PetscInt numSubelements, const PetscReal v0[], const PetscReal jac[], PetscQuadrature *qref)
611 {
612   const PetscReal *points, *weights;
613   PetscReal       *pointsRef, *weightsRef;
614   PetscInt         dim, Nc, order, npoints, npointsRef, c, p, cp, d, e;
615 
616   PetscFunctionBegin;
617   PetscValidHeaderSpecific(q, PETSCQUADRATURE_CLASSID, 1);
618   PetscValidRealPointer(v0, 3);
619   PetscValidRealPointer(jac, 4);
620   PetscValidPointer(qref, 5);
621   PetscCall(PetscQuadratureCreate(PETSC_COMM_SELF, qref));
622   PetscCall(PetscQuadratureGetOrder(q, &order));
623   PetscCall(PetscQuadratureGetData(q, &dim, &Nc, &npoints, &points, &weights));
624   npointsRef = npoints * numSubelements;
625   PetscCall(PetscMalloc1(npointsRef * dim, &pointsRef));
626   PetscCall(PetscMalloc1(npointsRef * Nc, &weightsRef));
627   for (c = 0; c < numSubelements; ++c) {
628     for (p = 0; p < npoints; ++p) {
629       for (d = 0; d < dim; ++d) {
630         pointsRef[(c * npoints + p) * dim + d] = v0[c * dim + d];
631         for (e = 0; e < dim; ++e) pointsRef[(c * npoints + p) * dim + d] += jac[(c * dim + d) * dim + e] * (points[p * dim + e] + 1.0);
632       }
633       /* Could also use detJ here */
634       for (cp = 0; cp < Nc; ++cp) weightsRef[(c * npoints + p) * Nc + cp] = weights[p * Nc + cp] / numSubelements;
635     }
636   }
637   PetscCall(PetscQuadratureSetOrder(*qref, order));
638   PetscCall(PetscQuadratureSetData(*qref, dim, Nc, npointsRef, pointsRef, weightsRef));
639   PetscFunctionReturn(PETSC_SUCCESS);
640 }
641 
642 /* Compute the coefficients for the Jacobi polynomial recurrence,
643  *
644  * J^{a,b}_n(x) = (cnm1 + cnm1x * x) * J^{a,b}_{n-1}(x) - cnm2 * J^{a,b}_{n-2}(x).
645  */
646 #define PetscDTJacobiRecurrence_Internal(n, a, b, cnm1, cnm1x, cnm2) \
647   do { \
648     PetscReal _a = (a); \
649     PetscReal _b = (b); \
650     PetscReal _n = (n); \
651     if (n == 1) { \
652       (cnm1)  = (_a - _b) * 0.5; \
653       (cnm1x) = (_a + _b + 2.) * 0.5; \
654       (cnm2)  = 0.; \
655     } else { \
656       PetscReal _2n  = _n + _n; \
657       PetscReal _d   = (_2n * (_n + _a + _b) * (_2n + _a + _b - 2)); \
658       PetscReal _n1  = (_2n + _a + _b - 1.) * (_a * _a - _b * _b); \
659       PetscReal _n1x = (_2n + _a + _b - 1.) * (_2n + _a + _b) * (_2n + _a + _b - 2); \
660       PetscReal _n2  = 2. * ((_n + _a - 1.) * (_n + _b - 1.) * (_2n + _a + _b)); \
661       (cnm1)         = _n1 / _d; \
662       (cnm1x)        = _n1x / _d; \
663       (cnm2)         = _n2 / _d; \
664     } \
665   } while (0)
666 
667 /*@
668   PetscDTJacobiNorm - Compute the weighted L2 norm of a Jacobi polynomial.
669 
670   $\| P^{\alpha,\beta}_n \|_{\alpha,\beta}^2 = \int_{-1}^1 (1 + x)^{\alpha} (1 - x)^{\beta} P^{\alpha,\beta}_n (x)^2 dx.$
671 
672   Input Parameters:
673 - alpha - the left exponent > -1
674 . beta - the right exponent > -1
675 + n - the polynomial degree
676 
677   Output Parameter:
678 . norm - the weighted L2 norm
679 
680   Level: beginner
681 
682 .seealso: `PetscQuadrature`, `PetscDTJacobiEval()`
683 @*/
684 PetscErrorCode PetscDTJacobiNorm(PetscReal alpha, PetscReal beta, PetscInt n, PetscReal *norm)
685 {
686   PetscReal twoab1;
687   PetscReal gr;
688 
689   PetscFunctionBegin;
690   PetscCheck(alpha > -1., PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Exponent alpha %g <= -1. invalid", (double)alpha);
691   PetscCheck(beta > -1., PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Exponent beta %g <= -1. invalid", (double)beta);
692   PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "n %" PetscInt_FMT " < 0 invalid", n);
693   twoab1 = PetscPowReal(2., alpha + beta + 1.);
694 #if defined(PETSC_HAVE_LGAMMA)
695   if (!n) {
696     gr = PetscExpReal(PetscLGamma(alpha + 1.) + PetscLGamma(beta + 1.) - PetscLGamma(alpha + beta + 2.));
697   } else {
698     gr = PetscExpReal(PetscLGamma(n + alpha + 1.) + PetscLGamma(n + beta + 1.) - (PetscLGamma(n + 1.) + PetscLGamma(n + alpha + beta + 1.))) / (n + n + alpha + beta + 1.);
699   }
700 #else
701   {
702     PetscInt alphai = (PetscInt)alpha;
703     PetscInt betai  = (PetscInt)beta;
704     PetscInt i;
705 
706     gr = n ? (1. / (n + n + alpha + beta + 1.)) : 1.;
707     if ((PetscReal)alphai == alpha) {
708       if (!n) {
709         for (i = 0; i < alphai; i++) gr *= (i + 1.) / (beta + i + 1.);
710         gr /= (alpha + beta + 1.);
711       } else {
712         for (i = 0; i < alphai; i++) gr *= (n + i + 1.) / (n + beta + i + 1.);
713       }
714     } else if ((PetscReal)betai == beta) {
715       if (!n) {
716         for (i = 0; i < betai; i++) gr *= (i + 1.) / (alpha + i + 2.);
717         gr /= (alpha + beta + 1.);
718       } else {
719         for (i = 0; i < betai; i++) gr *= (n + i + 1.) / (n + alpha + i + 1.);
720       }
721     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "lgamma() - math routine is unavailable.");
722   }
723 #endif
724   *norm = PetscSqrtReal(twoab1 * gr);
725   PetscFunctionReturn(PETSC_SUCCESS);
726 }
727 
728 static PetscErrorCode PetscDTJacobiEval_Internal(PetscInt npoints, PetscReal a, PetscReal b, PetscInt k, const PetscReal *points, PetscInt ndegree, const PetscInt *degrees, PetscReal *p)
729 {
730   PetscReal ak, bk;
731   PetscReal abk1;
732   PetscInt  i, l, maxdegree;
733 
734   PetscFunctionBegin;
735   maxdegree = degrees[ndegree - 1] - k;
736   ak        = a + k;
737   bk        = b + k;
738   abk1      = a + b + k + 1.;
739   if (maxdegree < 0) {
740     for (i = 0; i < npoints; i++)
741       for (l = 0; l < ndegree; l++) p[i * ndegree + l] = 0.;
742     PetscFunctionReturn(PETSC_SUCCESS);
743   }
744   for (i = 0; i < npoints; i++) {
745     PetscReal pm1, pm2, x;
746     PetscReal cnm1, cnm1x, cnm2;
747     PetscInt  j, m;
748 
749     x   = points[i];
750     pm2 = 1.;
751     PetscDTJacobiRecurrence_Internal(1, ak, bk, cnm1, cnm1x, cnm2);
752     pm1 = (cnm1 + cnm1x * x);
753     l   = 0;
754     while (l < ndegree && degrees[l] - k < 0) p[l++] = 0.;
755     while (l < ndegree && degrees[l] - k == 0) {
756       p[l] = pm2;
757       for (m = 0; m < k; m++) p[l] *= (abk1 + m) * 0.5;
758       l++;
759     }
760     while (l < ndegree && degrees[l] - k == 1) {
761       p[l] = pm1;
762       for (m = 0; m < k; m++) p[l] *= (abk1 + 1 + m) * 0.5;
763       l++;
764     }
765     for (j = 2; j <= maxdegree; j++) {
766       PetscReal pp;
767 
768       PetscDTJacobiRecurrence_Internal(j, ak, bk, cnm1, cnm1x, cnm2);
769       pp  = (cnm1 + cnm1x * x) * pm1 - cnm2 * pm2;
770       pm2 = pm1;
771       pm1 = pp;
772       while (l < ndegree && degrees[l] - k == j) {
773         p[l] = pp;
774         for (m = 0; m < k; m++) p[l] *= (abk1 + j + m) * 0.5;
775         l++;
776       }
777     }
778     p += ndegree;
779   }
780   PetscFunctionReturn(PETSC_SUCCESS);
781 }
782 
783 /*@
784   PetscDTJacobiEvalJet - Evaluate the jet (function and derivatives) of the Jacobi polynomials basis up to a given degree.
785   The Jacobi polynomials with indices $\alpha$ and $\beta$ are orthogonal with respect to the weighted inner product
786   $\langle f, g \rangle = \int_{-1}^1 (1+x)^{\alpha} (1-x)^{\beta} f(x) g(x) dx$.
787 
788   Input Parameters:
789 + alpha - the left exponent of the weight
790 . beta - the right exponetn of the weight
791 . npoints - the number of points to evaluate the polynomials at
792 . points - [npoints] array of point coordinates
793 . degree - the maximm degree polynomial space to evaluate, (degree + 1) will be evaluated total.
794 - k - the maximum derivative to evaluate in the jet, (k + 1) will be evaluated total.
795 
796   Output Parameters:
797 - p - an array containing the evaluations of the Jacobi polynomials's jets on the points.  the size is (degree + 1) x
798   (k + 1) x npoints, which also describes the order of the dimensions of this three-dimensional array: the first
799   (slowest varying) dimension is polynomial degree; the second dimension is derivative order; the third (fastest
800   varying) dimension is the index of the evaluation point.
801 
802   Level: advanced
803 
804 .seealso: `PetscDTJacobiEval()`, `PetscDTPKDEvalJet()`
805 @*/
806 PetscErrorCode PetscDTJacobiEvalJet(PetscReal alpha, PetscReal beta, PetscInt npoints, const PetscReal points[], PetscInt degree, PetscInt k, PetscReal p[])
807 {
808   PetscInt   i, j, l;
809   PetscInt  *degrees;
810   PetscReal *psingle;
811 
812   PetscFunctionBegin;
813   if (degree == 0) {
814     PetscInt zero = 0;
815 
816     for (i = 0; i <= k; i++) PetscCall(PetscDTJacobiEval_Internal(npoints, alpha, beta, i, points, 1, &zero, &p[i * npoints]));
817     PetscFunctionReturn(PETSC_SUCCESS);
818   }
819   PetscCall(PetscMalloc1(degree + 1, &degrees));
820   PetscCall(PetscMalloc1((degree + 1) * npoints, &psingle));
821   for (i = 0; i <= degree; i++) degrees[i] = i;
822   for (i = 0; i <= k; i++) {
823     PetscCall(PetscDTJacobiEval_Internal(npoints, alpha, beta, i, points, degree + 1, degrees, psingle));
824     for (j = 0; j <= degree; j++) {
825       for (l = 0; l < npoints; l++) p[(j * (k + 1) + i) * npoints + l] = psingle[l * (degree + 1) + j];
826     }
827   }
828   PetscCall(PetscFree(psingle));
829   PetscCall(PetscFree(degrees));
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 /*@
834    PetscDTJacobiEval - evaluate Jacobi polynomials for the weight function $(1.+x)^{\alpha} (1.-x)^{\beta}$ at a set of points
835                        at points
836 
837    Not Collective
838 
839    Input Parameters:
840 +  npoints - number of spatial points to evaluate at
841 .  alpha - the left exponent > -1
842 .  beta - the right exponent > -1
843 .  points - array of locations to evaluate at
844 .  ndegree - number of basis degrees to evaluate
845 -  degrees - sorted array of degrees to evaluate
846 
847    Output Parameters:
848 +  B - row-oriented basis evaluation matrix B[point*ndegree + degree] (dimension npoints*ndegrees, allocated by caller) (or NULL)
849 .  D - row-oriented derivative evaluation matrix (or NULL)
850 -  D2 - row-oriented second derivative evaluation matrix (or NULL)
851 
852    Level: intermediate
853 
854 .seealso: `PetscDTGaussQuadrature()`, `PetscDTLegendreEval()`
855 @*/
856 PetscErrorCode PetscDTJacobiEval(PetscInt npoints, PetscReal alpha, PetscReal beta, const PetscReal *points, PetscInt ndegree, const PetscInt *degrees, PetscReal *B, PetscReal *D, PetscReal *D2)
857 {
858   PetscFunctionBegin;
859   PetscCheck(alpha > -1., PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "alpha must be > -1.");
860   PetscCheck(beta > -1., PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "beta must be > -1.");
861   if (!npoints || !ndegree) PetscFunctionReturn(PETSC_SUCCESS);
862   if (B) PetscCall(PetscDTJacobiEval_Internal(npoints, alpha, beta, 0, points, ndegree, degrees, B));
863   if (D) PetscCall(PetscDTJacobiEval_Internal(npoints, alpha, beta, 1, points, ndegree, degrees, D));
864   if (D2) PetscCall(PetscDTJacobiEval_Internal(npoints, alpha, beta, 2, points, ndegree, degrees, D2));
865   PetscFunctionReturn(PETSC_SUCCESS);
866 }
867 
868 /*@
869    PetscDTLegendreEval - evaluate Legendre polynomials at points
870 
871    Not Collective
872 
873    Input Parameters:
874 +  npoints - number of spatial points to evaluate at
875 .  points - array of locations to evaluate at
876 .  ndegree - number of basis degrees to evaluate
877 -  degrees - sorted array of degrees to evaluate
878 
879    Output Parameters:
880 +  B - row-oriented basis evaluation matrix B[point*ndegree + degree] (dimension npoints*ndegrees, allocated by caller) (or NULL)
881 .  D - row-oriented derivative evaluation matrix (or NULL)
882 -  D2 - row-oriented second derivative evaluation matrix (or NULL)
883 
884    Level: intermediate
885 
886 .seealso: `PetscDTGaussQuadrature()`
887 @*/
888 PetscErrorCode PetscDTLegendreEval(PetscInt npoints, const PetscReal *points, PetscInt ndegree, const PetscInt *degrees, PetscReal *B, PetscReal *D, PetscReal *D2)
889 {
890   PetscFunctionBegin;
891   PetscCall(PetscDTJacobiEval(npoints, 0., 0., points, ndegree, degrees, B, D, D2));
892   PetscFunctionReturn(PETSC_SUCCESS);
893 }
894 
895 /*@
896   PetscDTIndexToGradedOrder - convert an index into a tuple of monomial degrees in a graded order (that is, if the degree sum of tuple x is less than the degree sum of tuple y, then the index of x is smaller than the index of y)
897 
898   Input Parameters:
899 + len - the desired length of the degree tuple
900 - index - the index to convert: should be >= 0
901 
902   Output Parameter:
903 . degtup - will be filled with a tuple of degrees
904 
905   Level: beginner
906 
907   Note:
908   For two tuples x and y with the same degree sum, partial degree sums over the final elements of the tuples
909   acts as a tiebreaker.  For example, (2, 1, 1) and (1, 2, 1) have the same degree sum, but the degree sum over the
910   last two elements is smaller for the former, so (2, 1, 1) < (1, 2, 1).
911 
912 .seealso: `PetscDTGradedOrderToIndex()`
913 @*/
914 PetscErrorCode PetscDTIndexToGradedOrder(PetscInt len, PetscInt index, PetscInt degtup[])
915 {
916   PetscInt i, total;
917   PetscInt sum;
918 
919   PetscFunctionBeginHot;
920   PetscCheck(len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "length must be non-negative");
921   PetscCheck(index >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "index must be non-negative");
922   total = 1;
923   sum   = 0;
924   while (index >= total) {
925     index -= total;
926     total = (total * (len + sum)) / (sum + 1);
927     sum++;
928   }
929   for (i = 0; i < len; i++) {
930     PetscInt c;
931 
932     degtup[i] = sum;
933     for (c = 0, total = 1; c < sum; c++) {
934       /* going into the loop, total is the number of way to have a tuple of sum exactly c with length len - 1 - i */
935       if (index < total) break;
936       index -= total;
937       total = (total * (len - 1 - i + c)) / (c + 1);
938       degtup[i]--;
939     }
940     sum -= degtup[i];
941   }
942   PetscFunctionReturn(PETSC_SUCCESS);
943 }
944 
945 /*@
946   PetscDTGradedOrderToIndex - convert a tuple into an index in a graded order, the inverse of `PetscDTIndexToGradedOrder()`.
947 
948   Input Parameters:
949 + len - the length of the degree tuple
950 - degtup - tuple with this length
951 
952   Output Parameter:
953 . index - index in graded order: >= 0
954 
955   Level: Beginner
956 
957   Note:
958   For two tuples x and y with the same degree sum, partial degree sums over the final elements of the tuples
959   acts as a tiebreaker.  For example, (2, 1, 1) and (1, 2, 1) have the same degree sum, but the degree sum over the
960   last two elements is smaller for the former, so (2, 1, 1) < (1, 2, 1).
961 
962 .seealso: `PetscDTIndexToGradedOrder()`
963 @*/
964 PetscErrorCode PetscDTGradedOrderToIndex(PetscInt len, const PetscInt degtup[], PetscInt *index)
965 {
966   PetscInt i, idx, sum, total;
967 
968   PetscFunctionBeginHot;
969   PetscCheck(len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "length must be non-negative");
970   for (i = 0, sum = 0; i < len; i++) sum += degtup[i];
971   idx   = 0;
972   total = 1;
973   for (i = 0; i < sum; i++) {
974     idx += total;
975     total = (total * (len + i)) / (i + 1);
976   }
977   for (i = 0; i < len - 1; i++) {
978     PetscInt c;
979 
980     total = 1;
981     sum -= degtup[i];
982     for (c = 0; c < sum; c++) {
983       idx += total;
984       total = (total * (len - 1 - i + c)) / (c + 1);
985     }
986   }
987   *index = idx;
988   PetscFunctionReturn(PETSC_SUCCESS);
989 }
990 
991 static PetscBool PKDCite       = PETSC_FALSE;
992 const char       PKDCitation[] = "@article{Kirby2010,\n"
993                                  "  title={Singularity-free evaluation of collapsed-coordinate orthogonal polynomials},\n"
994                                  "  author={Kirby, Robert C},\n"
995                                  "  journal={ACM Transactions on Mathematical Software (TOMS)},\n"
996                                  "  volume={37},\n"
997                                  "  number={1},\n"
998                                  "  pages={1--16},\n"
999                                  "  year={2010},\n"
1000                                  "  publisher={ACM New York, NY, USA}\n}\n";
1001 
1002 /*@
1003   PetscDTPKDEvalJet - Evaluate the jet (function and derivatives) of the Proriol-Koornwinder-Dubiner (PKD) basis for
1004   the space of polynomials up to a given degree.  The PKD basis is L2-orthonormal on the biunit simplex (which is used
1005   as the reference element for finite elements in PETSc), which makes it a stable basis to use for evaluating
1006   polynomials in that domain.
1007 
1008   Input Parameters:
1009 + dim - the number of variables in the multivariate polynomials
1010 . npoints - the number of points to evaluate the polynomials at
1011 . points - [npoints x dim] array of point coordinates
1012 . degree - the degree (sum of degrees on the variables in a monomial) of the polynomial space to evaluate.  There are ((dim + degree) choose dim) polynomials in this space.
1013 - k - the maximum order partial derivative to evaluate in the jet.  There are (dim + k choose dim) partial derivatives
1014   in the jet.  Choosing k = 0 means to evaluate just the function and no derivatives
1015 
1016   Output Parameters:
1017 - p - an array containing the evaluations of the PKD polynomials' jets on the points.  The size is ((dim + degree)
1018   choose dim) x ((dim + k) choose dim) x npoints, which also describes the order of the dimensions of this
1019   three-dimensional array: the first (slowest varying) dimension is basis function index; the second dimension is jet
1020   index; the third (fastest varying) dimension is the index of the evaluation point.
1021 
1022   Level: advanced
1023 
1024   Notes:
1025   The ordering of the basis functions, and the ordering of the derivatives in the jet, both follow the graded
1026   ordering of `PetscDTIndexToGradedOrder()` and `PetscDTGradedOrderToIndex()`.  For example, in 3D, the polynomial with
1027   leading monomial x^2,y^0,z^1, which has degree tuple (2,0,1), which by `PetscDTGradedOrderToIndex()` has index 12 (it is the 13th basis function in the space);
1028   the partial derivative $\partial_x \partial_z$ has order tuple (1,0,1), appears at index 6 in the jet (it is the 7th partial derivative in the jet).
1029 
1030   The implementation uses Kirby's singularity-free evaluation algorithm, https://doi.org/10.1145/1644001.1644006.
1031 
1032 .seealso: `PetscDTGradedOrderToIndex()`, `PetscDTIndexToGradedOrder()`, `PetscDTJacobiEvalJet()`
1033 @*/
1034 PetscErrorCode PetscDTPKDEvalJet(PetscInt dim, PetscInt npoints, const PetscReal points[], PetscInt degree, PetscInt k, PetscReal p[])
1035 {
1036   PetscInt   degidx, kidx, d, pt;
1037   PetscInt   Nk, Ndeg;
1038   PetscInt  *ktup, *degtup;
1039   PetscReal *scales, initscale, scaleexp;
1040 
1041   PetscFunctionBegin;
1042   PetscCall(PetscCitationsRegister(PKDCitation, &PKDCite));
1043   PetscCall(PetscDTBinomialInt(dim + k, k, &Nk));
1044   PetscCall(PetscDTBinomialInt(degree + dim, degree, &Ndeg));
1045   PetscCall(PetscMalloc2(dim, &degtup, dim, &ktup));
1046   PetscCall(PetscMalloc1(Ndeg, &scales));
1047   initscale = 1.;
1048   if (dim > 1) {
1049     PetscCall(PetscDTBinomial(dim, 2, &scaleexp));
1050     initscale = PetscPowReal(2., scaleexp * 0.5);
1051   }
1052   for (degidx = 0; degidx < Ndeg; degidx++) {
1053     PetscInt  e, i;
1054     PetscInt  m1idx = -1, m2idx = -1;
1055     PetscInt  n;
1056     PetscInt  degsum;
1057     PetscReal alpha;
1058     PetscReal cnm1, cnm1x, cnm2;
1059     PetscReal norm;
1060 
1061     PetscCall(PetscDTIndexToGradedOrder(dim, degidx, degtup));
1062     for (d = dim - 1; d >= 0; d--)
1063       if (degtup[d]) break;
1064     if (d < 0) { /* constant is 1 everywhere, all derivatives are zero */
1065       scales[degidx] = initscale;
1066       for (e = 0; e < dim; e++) {
1067         PetscCall(PetscDTJacobiNorm(e, 0., 0, &norm));
1068         scales[degidx] /= norm;
1069       }
1070       for (i = 0; i < npoints; i++) p[degidx * Nk * npoints + i] = 1.;
1071       for (i = 0; i < (Nk - 1) * npoints; i++) p[(degidx * Nk + 1) * npoints + i] = 0.;
1072       continue;
1073     }
1074     n = degtup[d];
1075     degtup[d]--;
1076     PetscCall(PetscDTGradedOrderToIndex(dim, degtup, &m1idx));
1077     if (degtup[d] > 0) {
1078       degtup[d]--;
1079       PetscCall(PetscDTGradedOrderToIndex(dim, degtup, &m2idx));
1080       degtup[d]++;
1081     }
1082     degtup[d]++;
1083     for (e = 0, degsum = 0; e < d; e++) degsum += degtup[e];
1084     alpha = 2 * degsum + d;
1085     PetscDTJacobiRecurrence_Internal(n, alpha, 0., cnm1, cnm1x, cnm2);
1086 
1087     scales[degidx] = initscale;
1088     for (e = 0, degsum = 0; e < dim; e++) {
1089       PetscInt  f;
1090       PetscReal ealpha;
1091       PetscReal enorm;
1092 
1093       ealpha = 2 * degsum + e;
1094       for (f = 0; f < degsum; f++) scales[degidx] *= 2.;
1095       PetscCall(PetscDTJacobiNorm(ealpha, 0., degtup[e], &enorm));
1096       scales[degidx] /= enorm;
1097       degsum += degtup[e];
1098     }
1099 
1100     for (pt = 0; pt < npoints; pt++) {
1101       /* compute the multipliers */
1102       PetscReal thetanm1, thetanm1x, thetanm2;
1103 
1104       thetanm1x = dim - (d + 1) + 2. * points[pt * dim + d];
1105       for (e = d + 1; e < dim; e++) thetanm1x += points[pt * dim + e];
1106       thetanm1x *= 0.5;
1107       thetanm1 = (2. - (dim - (d + 1)));
1108       for (e = d + 1; e < dim; e++) thetanm1 -= points[pt * dim + e];
1109       thetanm1 *= 0.5;
1110       thetanm2 = thetanm1 * thetanm1;
1111 
1112       for (kidx = 0; kidx < Nk; kidx++) {
1113         PetscInt f;
1114 
1115         PetscCall(PetscDTIndexToGradedOrder(dim, kidx, ktup));
1116         /* first sum in the same derivative terms */
1117         p[(degidx * Nk + kidx) * npoints + pt] = (cnm1 * thetanm1 + cnm1x * thetanm1x) * p[(m1idx * Nk + kidx) * npoints + pt];
1118         if (m2idx >= 0) p[(degidx * Nk + kidx) * npoints + pt] -= cnm2 * thetanm2 * p[(m2idx * Nk + kidx) * npoints + pt];
1119 
1120         for (f = d; f < dim; f++) {
1121           PetscInt km1idx, mplty = ktup[f];
1122 
1123           if (!mplty) continue;
1124           ktup[f]--;
1125           PetscCall(PetscDTGradedOrderToIndex(dim, ktup, &km1idx));
1126 
1127           /* the derivative of  cnm1x * thetanm1x  wrt x variable f is 0.5 * cnm1x if f > d otherwise it is cnm1x */
1128           /* the derivative of  cnm1  * thetanm1   wrt x variable f is 0 if f == d, otherwise it is -0.5 * cnm1 */
1129           /* the derivative of -cnm2  * thetanm2   wrt x variable f is 0 if f == d, otherwise it is cnm2 * thetanm1 */
1130           if (f > d) {
1131             PetscInt f2;
1132 
1133             p[(degidx * Nk + kidx) * npoints + pt] += mplty * 0.5 * (cnm1x - cnm1) * p[(m1idx * Nk + km1idx) * npoints + pt];
1134             if (m2idx >= 0) {
1135               p[(degidx * Nk + kidx) * npoints + pt] += mplty * cnm2 * thetanm1 * p[(m2idx * Nk + km1idx) * npoints + pt];
1136               /* second derivatives of -cnm2  * thetanm2   wrt x variable f,f2 is like - 0.5 * cnm2 */
1137               for (f2 = f; f2 < dim; f2++) {
1138                 PetscInt km2idx, mplty2 = ktup[f2];
1139                 PetscInt factor;
1140 
1141                 if (!mplty2) continue;
1142                 ktup[f2]--;
1143                 PetscCall(PetscDTGradedOrderToIndex(dim, ktup, &km2idx));
1144 
1145                 factor = mplty * mplty2;
1146                 if (f == f2) factor /= 2;
1147                 p[(degidx * Nk + kidx) * npoints + pt] -= 0.5 * factor * cnm2 * p[(m2idx * Nk + km2idx) * npoints + pt];
1148                 ktup[f2]++;
1149               }
1150             }
1151           } else {
1152             p[(degidx * Nk + kidx) * npoints + pt] += mplty * cnm1x * p[(m1idx * Nk + km1idx) * npoints + pt];
1153           }
1154           ktup[f]++;
1155         }
1156       }
1157     }
1158   }
1159   for (degidx = 0; degidx < Ndeg; degidx++) {
1160     PetscReal scale = scales[degidx];
1161     PetscInt  i;
1162 
1163     for (i = 0; i < Nk * npoints; i++) p[degidx * Nk * npoints + i] *= scale;
1164   }
1165   PetscCall(PetscFree(scales));
1166   PetscCall(PetscFree2(degtup, ktup));
1167   PetscFunctionReturn(PETSC_SUCCESS);
1168 }
1169 
1170 /*@
1171   PetscDTPTrimmedSize - The size of the trimmed polynomial space of k-forms with a given degree and form degree,
1172   which can be evaluated in `PetscDTPTrimmedEvalJet()`.
1173 
1174   Input Parameters:
1175 + dim - the number of variables in the multivariate polynomials
1176 . degree - the degree (sum of degrees on the variables in a monomial) of the trimmed polynomial space.
1177 - formDegree - the degree of the form
1178 
1179   Output Parameters:
1180 - size - The number ((dim + degree) choose (dim + formDegree)) x ((degree + formDegree - 1) choose (formDegree))
1181 
1182   Level: advanced
1183 
1184 .seealso: `PetscDTPTrimmedEvalJet()`
1185 @*/
1186 PetscErrorCode PetscDTPTrimmedSize(PetscInt dim, PetscInt degree, PetscInt formDegree, PetscInt *size)
1187 {
1188   PetscInt Nrk, Nbpt; // number of trimmed polynomials
1189 
1190   PetscFunctionBegin;
1191   formDegree = PetscAbsInt(formDegree);
1192   PetscCall(PetscDTBinomialInt(degree + dim, degree + formDegree, &Nbpt));
1193   PetscCall(PetscDTBinomialInt(degree + formDegree - 1, formDegree, &Nrk));
1194   Nbpt *= Nrk;
1195   *size = Nbpt;
1196   PetscFunctionReturn(PETSC_SUCCESS);
1197 }
1198 
1199 /* there was a reference implementation based on section 4.4 of Arnold, Falk & Winther (acta numerica, 2006), but it
1200  * was inferior to this implementation */
1201 static PetscErrorCode PetscDTPTrimmedEvalJet_Internal(PetscInt dim, PetscInt npoints, const PetscReal points[], PetscInt degree, PetscInt formDegree, PetscInt jetDegree, PetscReal p[])
1202 {
1203   PetscInt  formDegreeOrig = formDegree;
1204   PetscBool formNegative   = (formDegreeOrig < 0) ? PETSC_TRUE : PETSC_FALSE;
1205 
1206   PetscFunctionBegin;
1207   formDegree = PetscAbsInt(formDegreeOrig);
1208   if (formDegree == 0) {
1209     PetscCall(PetscDTPKDEvalJet(dim, npoints, points, degree, jetDegree, p));
1210     PetscFunctionReturn(PETSC_SUCCESS);
1211   }
1212   if (formDegree == dim) {
1213     PetscCall(PetscDTPKDEvalJet(dim, npoints, points, degree - 1, jetDegree, p));
1214     PetscFunctionReturn(PETSC_SUCCESS);
1215   }
1216   PetscInt Nbpt;
1217   PetscCall(PetscDTPTrimmedSize(dim, degree, formDegree, &Nbpt));
1218   PetscInt Nf;
1219   PetscCall(PetscDTBinomialInt(dim, formDegree, &Nf));
1220   PetscInt Nk;
1221   PetscCall(PetscDTBinomialInt(dim + jetDegree, dim, &Nk));
1222   PetscCall(PetscArrayzero(p, Nbpt * Nf * Nk * npoints));
1223 
1224   PetscInt Nbpm1; // number of scalar polynomials up to degree - 1;
1225   PetscCall(PetscDTBinomialInt(dim + degree - 1, dim, &Nbpm1));
1226   PetscReal *p_scalar;
1227   PetscCall(PetscMalloc1(Nbpm1 * Nk * npoints, &p_scalar));
1228   PetscCall(PetscDTPKDEvalJet(dim, npoints, points, degree - 1, jetDegree, p_scalar));
1229   PetscInt total = 0;
1230   // First add the full polynomials up to degree - 1 into the basis: take the scalar
1231   // and copy one for each form component
1232   for (PetscInt i = 0; i < Nbpm1; i++) {
1233     const PetscReal *src = &p_scalar[i * Nk * npoints];
1234     for (PetscInt f = 0; f < Nf; f++) {
1235       PetscReal *dest = &p[(total++ * Nf + f) * Nk * npoints];
1236       PetscCall(PetscArraycpy(dest, src, Nk * npoints));
1237     }
1238   }
1239   PetscInt *form_atoms;
1240   PetscCall(PetscMalloc1(formDegree + 1, &form_atoms));
1241   // construct the interior product pattern
1242   PetscInt(*pattern)[3];
1243   PetscInt Nf1; // number of formDegree + 1 forms
1244   PetscCall(PetscDTBinomialInt(dim, formDegree + 1, &Nf1));
1245   PetscInt nnz = Nf1 * (formDegree + 1);
1246   PetscCall(PetscMalloc1(Nf1 * (formDegree + 1), &pattern));
1247   PetscCall(PetscDTAltVInteriorPattern(dim, formDegree + 1, pattern));
1248   PetscReal centroid = (1. - dim) / (dim + 1.);
1249   PetscInt *deriv;
1250   PetscCall(PetscMalloc1(dim, &deriv));
1251   for (PetscInt d = dim; d >= formDegree + 1; d--) {
1252     PetscInt Nfd1; // number of formDegree + 1 forms in dimension d that include dx_0
1253                    // (equal to the number of formDegree forms in dimension d-1)
1254     PetscCall(PetscDTBinomialInt(d - 1, formDegree, &Nfd1));
1255     // The number of homogeneous (degree-1) scalar polynomials in d variables
1256     PetscInt Nh;
1257     PetscCall(PetscDTBinomialInt(d - 1 + degree - 1, d - 1, &Nh));
1258     const PetscReal *h_scalar = &p_scalar[(Nbpm1 - Nh) * Nk * npoints];
1259     for (PetscInt b = 0; b < Nh; b++) {
1260       const PetscReal *h_s = &h_scalar[b * Nk * npoints];
1261       for (PetscInt f = 0; f < Nfd1; f++) {
1262         // construct all formDegree+1 forms that start with dx_(dim - d) /\ ...
1263         form_atoms[0] = dim - d;
1264         PetscCall(PetscDTEnumSubset(d - 1, formDegree, f, &form_atoms[1]));
1265         for (PetscInt i = 0; i < formDegree; i++) form_atoms[1 + i] += form_atoms[0] + 1;
1266         PetscInt f_ind; // index of the resulting form
1267         PetscCall(PetscDTSubsetIndex(dim, formDegree + 1, form_atoms, &f_ind));
1268         PetscReal *p_f = &p[total++ * Nf * Nk * npoints];
1269         for (PetscInt nz = 0; nz < nnz; nz++) {
1270           PetscInt  i     = pattern[nz][0]; // formDegree component
1271           PetscInt  j     = pattern[nz][1]; // (formDegree + 1) component
1272           PetscInt  v     = pattern[nz][2]; // coordinate component
1273           PetscReal scale = v < 0 ? -1. : 1.;
1274 
1275           i     = formNegative ? (Nf - 1 - i) : i;
1276           scale = (formNegative && (i & 1)) ? -scale : scale;
1277           v     = v < 0 ? -(v + 1) : v;
1278           if (j != f_ind) continue;
1279           PetscReal *p_i = &p_f[i * Nk * npoints];
1280           for (PetscInt jet = 0; jet < Nk; jet++) {
1281             const PetscReal *h_jet = &h_s[jet * npoints];
1282             PetscReal       *p_jet = &p_i[jet * npoints];
1283 
1284             for (PetscInt pt = 0; pt < npoints; pt++) p_jet[pt] += scale * h_jet[pt] * (points[pt * dim + v] - centroid);
1285             PetscCall(PetscDTIndexToGradedOrder(dim, jet, deriv));
1286             deriv[v]++;
1287             PetscReal mult = deriv[v];
1288             PetscInt  l;
1289             PetscCall(PetscDTGradedOrderToIndex(dim, deriv, &l));
1290             if (l >= Nk) continue;
1291             p_jet = &p_i[l * npoints];
1292             for (PetscInt pt = 0; pt < npoints; pt++) p_jet[pt] += scale * mult * h_jet[pt];
1293             deriv[v]--;
1294           }
1295         }
1296       }
1297     }
1298   }
1299   PetscCheck(total == Nbpt, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrectly counted P trimmed polynomials");
1300   PetscCall(PetscFree(deriv));
1301   PetscCall(PetscFree(pattern));
1302   PetscCall(PetscFree(form_atoms));
1303   PetscCall(PetscFree(p_scalar));
1304   PetscFunctionReturn(PETSC_SUCCESS);
1305 }
1306 
1307 /*@
1308   PetscDTPTrimmedEvalJet - Evaluate the jet (function and derivatives) of a basis of the trimmed polynomial k-forms up to
1309   a given degree.
1310 
1311   Input Parameters:
1312 + dim - the number of variables in the multivariate polynomials
1313 . npoints - the number of points to evaluate the polynomials at
1314 . points - [npoints x dim] array of point coordinates
1315 . degree - the degree (sum of degrees on the variables in a monomial) of the trimmed polynomial space to evaluate.
1316            There are ((dim + degree) choose (dim + formDegree)) x ((degree + formDegree - 1) choose (formDegree)) polynomials in this space.
1317            (You can use `PetscDTPTrimmedSize()` to compute this size.)
1318 . formDegree - the degree of the form
1319 - jetDegree - the maximum order partial derivative to evaluate in the jet.  There are ((dim + jetDegree) choose dim) partial derivatives
1320               in the jet.  Choosing jetDegree = 0 means to evaluate just the function and no derivatives
1321 
1322   Output Parameters:
1323 - p - an array containing the evaluations of the PKD polynomials' jets on the points.  The size is
1324       `PetscDTPTrimmedSize()` x ((dim + formDegree) choose dim) x ((dim + k) choose dim) x npoints,
1325       which also describes the order of the dimensions of this
1326       four-dimensional array:
1327         the first (slowest varying) dimension is basis function index;
1328         the second dimension is component of the form;
1329         the third dimension is jet index;
1330         the fourth (fastest varying) dimension is the index of the evaluation point.
1331 
1332   Level: advanced
1333 
1334   Notes:
1335   The ordering of the basis functions is not graded, so the basis functions are not nested by degree like `PetscDTPKDEvalJet()`.
1336   The basis functions are not an L2-orthonormal basis on any particular domain.
1337 
1338   The implementation is based on the description of the trimmed polynomials up to degree r as
1339   the direct sum of polynomials up to degree (r-1) and the Koszul differential applied to
1340   homogeneous polynomials of degree (r-1).
1341 
1342 .seealso: `PetscDTPKDEvalJet()`, `PetscDTPTrimmedSize()`
1343 @*/
1344 PetscErrorCode PetscDTPTrimmedEvalJet(PetscInt dim, PetscInt npoints, const PetscReal points[], PetscInt degree, PetscInt formDegree, PetscInt jetDegree, PetscReal p[])
1345 {
1346   PetscFunctionBegin;
1347   PetscCall(PetscDTPTrimmedEvalJet_Internal(dim, npoints, points, degree, formDegree, jetDegree, p));
1348   PetscFunctionReturn(PETSC_SUCCESS);
1349 }
1350 
1351 /* solve the symmetric tridiagonal eigenvalue system, writing the eigenvalues into eigs and the eigenvectors into V
1352  * with lds n; diag and subdiag are overwritten */
1353 static PetscErrorCode PetscDTSymmetricTridiagonalEigensolve(PetscInt n, PetscReal diag[], PetscReal subdiag[], PetscReal eigs[], PetscScalar V[])
1354 {
1355   char          jobz   = 'V'; /* eigenvalues and eigenvectors */
1356   char          range  = 'A'; /* all eigenvalues will be found */
1357   PetscReal     VL     = 0.;  /* ignored because range is 'A' */
1358   PetscReal     VU     = 0.;  /* ignored because range is 'A' */
1359   PetscBLASInt  IL     = 0;   /* ignored because range is 'A' */
1360   PetscBLASInt  IU     = 0;   /* ignored because range is 'A' */
1361   PetscReal     abstol = 0.;  /* unused */
1362   PetscBLASInt  bn, bm, ldz;  /* bm will equal bn on exit */
1363   PetscBLASInt *isuppz;
1364   PetscBLASInt  lwork, liwork;
1365   PetscReal     workquery;
1366   PetscBLASInt  iworkquery;
1367   PetscBLASInt *iwork;
1368   PetscBLASInt  info;
1369   PetscReal    *work = NULL;
1370 
1371   PetscFunctionBegin;
1372 #if !defined(PETSCDTGAUSSIANQUADRATURE_EIG)
1373   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "A LAPACK symmetric tridiagonal eigensolver could not be found");
1374 #endif
1375   PetscCall(PetscBLASIntCast(n, &bn));
1376   PetscCall(PetscBLASIntCast(n, &ldz));
1377 #if !defined(PETSC_MISSING_LAPACK_STEGR)
1378   PetscCall(PetscMalloc1(2 * n, &isuppz));
1379   lwork  = -1;
1380   liwork = -1;
1381   PetscCallBLAS("LAPACKstegr", LAPACKstegr_(&jobz, &range, &bn, diag, subdiag, &VL, &VU, &IL, &IU, &abstol, &bm, eigs, V, &ldz, isuppz, &workquery, &lwork, &iworkquery, &liwork, &info));
1382   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_PLIB, "xSTEGR error");
1383   lwork  = (PetscBLASInt)workquery;
1384   liwork = (PetscBLASInt)iworkquery;
1385   PetscCall(PetscMalloc2(lwork, &work, liwork, &iwork));
1386   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1387   PetscCallBLAS("LAPACKstegr", LAPACKstegr_(&jobz, &range, &bn, diag, subdiag, &VL, &VU, &IL, &IU, &abstol, &bm, eigs, V, &ldz, isuppz, work, &lwork, iwork, &liwork, &info));
1388   PetscCall(PetscFPTrapPop());
1389   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_PLIB, "xSTEGR error");
1390   PetscCall(PetscFree2(work, iwork));
1391   PetscCall(PetscFree(isuppz));
1392 #elif !defined(PETSC_MISSING_LAPACK_STEQR)
1393   jobz = 'I'; /* Compute eigenvalues and eigenvectors of the
1394                  tridiagonal matrix.  Z is initialized to the identity
1395                  matrix. */
1396   PetscCall(PetscMalloc1(PetscMax(1, 2 * n - 2), &work));
1397   PetscCallBLAS("LAPACKsteqr", LAPACKsteqr_("I", &bn, diag, subdiag, V, &ldz, work, &info));
1398   PetscCall(PetscFPTrapPop());
1399   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_PLIB, "xSTEQR error");
1400   PetscCall(PetscFree(work));
1401   PetscCall(PetscArraycpy(eigs, diag, n));
1402 #endif
1403   PetscFunctionReturn(PETSC_SUCCESS);
1404 }
1405 
1406 /* Formula for the weights at the endpoints (-1 and 1) of Gauss-Lobatto-Jacobi
1407  * quadrature rules on the interval [-1, 1] */
1408 static PetscErrorCode PetscDTGaussLobattoJacobiEndweights_Internal(PetscInt n, PetscReal alpha, PetscReal beta, PetscReal *leftw, PetscReal *rightw)
1409 {
1410   PetscReal twoab1;
1411   PetscInt  m = n - 2;
1412   PetscReal a = alpha + 1.;
1413   PetscReal b = beta + 1.;
1414   PetscReal gra, grb;
1415 
1416   PetscFunctionBegin;
1417   twoab1 = PetscPowReal(2., a + b - 1.);
1418 #if defined(PETSC_HAVE_LGAMMA)
1419   grb = PetscExpReal(2. * PetscLGamma(b + 1.) + PetscLGamma(m + 1.) + PetscLGamma(m + a + 1.) - (PetscLGamma(m + b + 1) + PetscLGamma(m + a + b + 1.)));
1420   gra = PetscExpReal(2. * PetscLGamma(a + 1.) + PetscLGamma(m + 1.) + PetscLGamma(m + b + 1.) - (PetscLGamma(m + a + 1) + PetscLGamma(m + a + b + 1.)));
1421 #else
1422   {
1423     PetscInt alphai = (PetscInt)alpha;
1424     PetscInt betai  = (PetscInt)beta;
1425 
1426     if ((PetscReal)alphai == alpha && (PetscReal)betai == beta) {
1427       PetscReal binom1, binom2;
1428 
1429       PetscCall(PetscDTBinomial(m + b, b, &binom1));
1430       PetscCall(PetscDTBinomial(m + a + b, b, &binom2));
1431       grb = 1. / (binom1 * binom2);
1432       PetscCall(PetscDTBinomial(m + a, a, &binom1));
1433       PetscCall(PetscDTBinomial(m + a + b, a, &binom2));
1434       gra = 1. / (binom1 * binom2);
1435     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "lgamma() - math routine is unavailable.");
1436   }
1437 #endif
1438   *leftw  = twoab1 * grb / b;
1439   *rightw = twoab1 * gra / a;
1440   PetscFunctionReturn(PETSC_SUCCESS);
1441 }
1442 
1443 /* Evaluates the nth jacobi polynomial with weight parameters a,b at a point x.
1444    Recurrence relations implemented from the pseudocode given in Karniadakis and Sherwin, Appendix B */
1445 static inline PetscErrorCode PetscDTComputeJacobi(PetscReal a, PetscReal b, PetscInt n, PetscReal x, PetscReal *P)
1446 {
1447   PetscReal pn1, pn2;
1448   PetscReal cnm1, cnm1x, cnm2;
1449   PetscInt  k;
1450 
1451   PetscFunctionBegin;
1452   if (!n) {
1453     *P = 1.0;
1454     PetscFunctionReturn(PETSC_SUCCESS);
1455   }
1456   PetscDTJacobiRecurrence_Internal(1, a, b, cnm1, cnm1x, cnm2);
1457   pn2 = 1.;
1458   pn1 = cnm1 + cnm1x * x;
1459   if (n == 1) {
1460     *P = pn1;
1461     PetscFunctionReturn(PETSC_SUCCESS);
1462   }
1463   *P = 0.0;
1464   for (k = 2; k < n + 1; ++k) {
1465     PetscDTJacobiRecurrence_Internal(k, a, b, cnm1, cnm1x, cnm2);
1466 
1467     *P  = (cnm1 + cnm1x * x) * pn1 - cnm2 * pn2;
1468     pn2 = pn1;
1469     pn1 = *P;
1470   }
1471   PetscFunctionReturn(PETSC_SUCCESS);
1472 }
1473 
1474 /* Evaluates the first derivative of P_{n}^{a,b} at a point x. */
1475 static inline PetscErrorCode PetscDTComputeJacobiDerivative(PetscReal a, PetscReal b, PetscInt n, PetscReal x, PetscInt k, PetscReal *P)
1476 {
1477   PetscReal nP;
1478   PetscInt  i;
1479 
1480   PetscFunctionBegin;
1481   *P = 0.0;
1482   if (k > n) PetscFunctionReturn(PETSC_SUCCESS);
1483   PetscCall(PetscDTComputeJacobi(a + k, b + k, n - k, x, &nP));
1484   for (i = 0; i < k; i++) nP *= (a + b + n + 1. + i) * 0.5;
1485   *P = nP;
1486   PetscFunctionReturn(PETSC_SUCCESS);
1487 }
1488 
1489 static PetscErrorCode PetscDTGaussJacobiQuadrature_Newton_Internal(PetscInt npoints, PetscReal a, PetscReal b, PetscReal x[], PetscReal w[])
1490 {
1491   PetscInt  maxIter = 100;
1492   PetscReal eps     = PetscExpReal(0.75 * PetscLogReal(PETSC_MACHINE_EPSILON));
1493   PetscReal a1, a6, gf;
1494   PetscInt  k;
1495 
1496   PetscFunctionBegin;
1497 
1498   a1 = PetscPowReal(2.0, a + b + 1);
1499 #if defined(PETSC_HAVE_LGAMMA)
1500   {
1501     PetscReal a2, a3, a4, a5;
1502     a2 = PetscLGamma(a + npoints + 1);
1503     a3 = PetscLGamma(b + npoints + 1);
1504     a4 = PetscLGamma(a + b + npoints + 1);
1505     a5 = PetscLGamma(npoints + 1);
1506     gf = PetscExpReal(a2 + a3 - (a4 + a5));
1507   }
1508 #else
1509   {
1510     PetscInt ia, ib;
1511 
1512     ia = (PetscInt)a;
1513     ib = (PetscInt)b;
1514     gf = 1.;
1515     if (ia == a && ia >= 0) { /* compute ratio of rising factorals wrt a */
1516       for (k = 0; k < ia; k++) gf *= (npoints + 1. + k) / (npoints + b + 1. + k);
1517     } else if (b == b && ib >= 0) { /* compute ratio of rising factorials wrt b */
1518       for (k = 0; k < ib; k++) gf *= (npoints + 1. + k) / (npoints + a + 1. + k);
1519     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "lgamma() - math routine is unavailable.");
1520   }
1521 #endif
1522 
1523   a6 = a1 * gf;
1524   /* Computes the m roots of P_{m}^{a,b} on [-1,1] by Newton's method with Chebyshev points as initial guesses.
1525    Algorithm implemented from the pseudocode given by Karniadakis and Sherwin and Python in FIAT */
1526   for (k = 0; k < npoints; ++k) {
1527     PetscReal r = PetscCosReal(PETSC_PI * (1. - (4. * k + 3. + 2. * b) / (4. * npoints + 2. * (a + b + 1.)))), dP;
1528     PetscInt  j;
1529 
1530     if (k > 0) r = 0.5 * (r + x[k - 1]);
1531     for (j = 0; j < maxIter; ++j) {
1532       PetscReal s = 0.0, delta, f, fp;
1533       PetscInt  i;
1534 
1535       for (i = 0; i < k; ++i) s = s + 1.0 / (r - x[i]);
1536       PetscCall(PetscDTComputeJacobi(a, b, npoints, r, &f));
1537       PetscCall(PetscDTComputeJacobiDerivative(a, b, npoints, r, 1, &fp));
1538       delta = f / (fp - f * s);
1539       r     = r - delta;
1540       if (PetscAbsReal(delta) < eps) break;
1541     }
1542     x[k] = r;
1543     PetscCall(PetscDTComputeJacobiDerivative(a, b, npoints, x[k], 1, &dP));
1544     w[k] = a6 / (1.0 - PetscSqr(x[k])) / PetscSqr(dP);
1545   }
1546   PetscFunctionReturn(PETSC_SUCCESS);
1547 }
1548 
1549 /* Compute the diagonals of the Jacobi matrix used in Golub & Welsch algorithms for Gauss-Jacobi
1550  * quadrature weight calculations on [-1,1] for exponents (1. + x)^a (1.-x)^b */
1551 static PetscErrorCode PetscDTJacobiMatrix_Internal(PetscInt nPoints, PetscReal a, PetscReal b, PetscReal *d, PetscReal *s)
1552 {
1553   PetscInt i;
1554 
1555   PetscFunctionBegin;
1556   for (i = 0; i < nPoints; i++) {
1557     PetscReal A, B, C;
1558 
1559     PetscDTJacobiRecurrence_Internal(i + 1, a, b, A, B, C);
1560     d[i] = -A / B;
1561     if (i) s[i - 1] *= C / B;
1562     if (i < nPoints - 1) s[i] = 1. / B;
1563   }
1564   PetscFunctionReturn(PETSC_SUCCESS);
1565 }
1566 
1567 static PetscErrorCode PetscDTGaussJacobiQuadrature_GolubWelsch_Internal(PetscInt npoints, PetscReal a, PetscReal b, PetscReal *x, PetscReal *w)
1568 {
1569   PetscReal mu0;
1570   PetscReal ga, gb, gab;
1571   PetscInt  i;
1572 
1573   PetscFunctionBegin;
1574   PetscCall(PetscCitationsRegister(GolubWelschCitation, &GolubWelschCite));
1575 
1576 #if defined(PETSC_HAVE_TGAMMA)
1577   ga  = PetscTGamma(a + 1);
1578   gb  = PetscTGamma(b + 1);
1579   gab = PetscTGamma(a + b + 2);
1580 #else
1581   {
1582     PetscInt ia, ib;
1583 
1584     ia = (PetscInt)a;
1585     ib = (PetscInt)b;
1586     if (ia == a && ib == b && ia + 1 > 0 && ib + 1 > 0 && ia + ib + 2 > 0) { /* All gamma(x) terms are (x-1)! terms */
1587       PetscCall(PetscDTFactorial(ia, &ga));
1588       PetscCall(PetscDTFactorial(ib, &gb));
1589       PetscCall(PetscDTFactorial(ia + ib + 1, &gb));
1590     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "tgamma() - math routine is unavailable.");
1591   }
1592 #endif
1593   mu0 = PetscPowReal(2., a + b + 1.) * ga * gb / gab;
1594 
1595 #if defined(PETSCDTGAUSSIANQUADRATURE_EIG)
1596   {
1597     PetscReal   *diag, *subdiag;
1598     PetscScalar *V;
1599 
1600     PetscCall(PetscMalloc2(npoints, &diag, npoints, &subdiag));
1601     PetscCall(PetscMalloc1(npoints * npoints, &V));
1602     PetscCall(PetscDTJacobiMatrix_Internal(npoints, a, b, diag, subdiag));
1603     for (i = 0; i < npoints - 1; i++) subdiag[i] = PetscSqrtReal(subdiag[i]);
1604     PetscCall(PetscDTSymmetricTridiagonalEigensolve(npoints, diag, subdiag, x, V));
1605     for (i = 0; i < npoints; i++) w[i] = PetscSqr(PetscRealPart(V[i * npoints])) * mu0;
1606     PetscCall(PetscFree(V));
1607     PetscCall(PetscFree2(diag, subdiag));
1608   }
1609 #else
1610   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "A LAPACK symmetric tridiagonal eigensolver could not be found");
1611 #endif
1612   { /* As of March 2, 2020, The Sun Performance Library breaks the LAPACK contract for xstegr and xsteqr: the
1613        eigenvalues are not guaranteed to be in ascending order.  So we heave a passive aggressive sigh and check that
1614        the eigenvalues are sorted */
1615     PetscBool sorted;
1616 
1617     PetscCall(PetscSortedReal(npoints, x, &sorted));
1618     if (!sorted) {
1619       PetscInt  *order, i;
1620       PetscReal *tmp;
1621 
1622       PetscCall(PetscMalloc2(npoints, &order, npoints, &tmp));
1623       for (i = 0; i < npoints; i++) order[i] = i;
1624       PetscCall(PetscSortRealWithPermutation(npoints, x, order));
1625       PetscCall(PetscArraycpy(tmp, x, npoints));
1626       for (i = 0; i < npoints; i++) x[i] = tmp[order[i]];
1627       PetscCall(PetscArraycpy(tmp, w, npoints));
1628       for (i = 0; i < npoints; i++) w[i] = tmp[order[i]];
1629       PetscCall(PetscFree2(order, tmp));
1630     }
1631   }
1632   PetscFunctionReturn(PETSC_SUCCESS);
1633 }
1634 
1635 static PetscErrorCode PetscDTGaussJacobiQuadrature_Internal(PetscInt npoints, PetscReal alpha, PetscReal beta, PetscReal x[], PetscReal w[], PetscBool newton)
1636 {
1637   PetscFunctionBegin;
1638   PetscCheck(npoints >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of points must be positive");
1639   /* If asking for a 1D Lobatto point, just return the non-Lobatto 1D point */
1640   PetscCheck(alpha > -1., PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "alpha must be > -1.");
1641   PetscCheck(beta > -1., PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "beta must be > -1.");
1642 
1643   if (newton) PetscCall(PetscDTGaussJacobiQuadrature_Newton_Internal(npoints, alpha, beta, x, w));
1644   else PetscCall(PetscDTGaussJacobiQuadrature_GolubWelsch_Internal(npoints, alpha, beta, x, w));
1645   if (alpha == beta) { /* symmetrize */
1646     PetscInt i;
1647     for (i = 0; i < (npoints + 1) / 2; i++) {
1648       PetscInt  j  = npoints - 1 - i;
1649       PetscReal xi = x[i];
1650       PetscReal xj = x[j];
1651       PetscReal wi = w[i];
1652       PetscReal wj = w[j];
1653 
1654       x[i] = (xi - xj) / 2.;
1655       x[j] = (xj - xi) / 2.;
1656       w[i] = w[j] = (wi + wj) / 2.;
1657     }
1658   }
1659   PetscFunctionReturn(PETSC_SUCCESS);
1660 }
1661 
1662 /*@
1663   PetscDTGaussJacobiQuadrature - quadrature for the interval [a, b] with the weight function
1664   $(x-a)^\alpha (x-b)^\beta$.
1665 
1666   Not collective
1667 
1668   Input Parameters:
1669 + npoints - the number of points in the quadrature rule
1670 . a - the left endpoint of the interval
1671 . b - the right endpoint of the interval
1672 . alpha - the left exponent
1673 - beta - the right exponent
1674 
1675   Output Parameters:
1676 + x - array of length npoints, the locations of the quadrature points
1677 - w - array of length npoints, the weights of the quadrature points
1678 
1679   Level: intermediate
1680 
1681   Note:
1682   This quadrature rule is exact for polynomials up to degree 2*npoints - 1.
1683 
1684 .seealso: `PetscDTGaussQuadrature()`
1685 @*/
1686 PetscErrorCode PetscDTGaussJacobiQuadrature(PetscInt npoints, PetscReal a, PetscReal b, PetscReal alpha, PetscReal beta, PetscReal x[], PetscReal w[])
1687 {
1688   PetscInt i;
1689 
1690   PetscFunctionBegin;
1691   PetscCall(PetscDTGaussJacobiQuadrature_Internal(npoints, alpha, beta, x, w, PetscDTGaussQuadratureNewton_Internal));
1692   if (a != -1. || b != 1.) { /* shift */
1693     for (i = 0; i < npoints; i++) {
1694       x[i] = (x[i] + 1.) * ((b - a) / 2.) + a;
1695       w[i] *= (b - a) / 2.;
1696     }
1697   }
1698   PetscFunctionReturn(PETSC_SUCCESS);
1699 }
1700 
1701 static PetscErrorCode PetscDTGaussLobattoJacobiQuadrature_Internal(PetscInt npoints, PetscReal alpha, PetscReal beta, PetscReal x[], PetscReal w[], PetscBool newton)
1702 {
1703   PetscInt i;
1704 
1705   PetscFunctionBegin;
1706   PetscCheck(npoints >= 2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of points must be positive");
1707   /* If asking for a 1D Lobatto point, just return the non-Lobatto 1D point */
1708   PetscCheck(alpha > -1., PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "alpha must be > -1.");
1709   PetscCheck(beta > -1., PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "beta must be > -1.");
1710 
1711   x[0]           = -1.;
1712   x[npoints - 1] = 1.;
1713   if (npoints > 2) PetscCall(PetscDTGaussJacobiQuadrature_Internal(npoints - 2, alpha + 1., beta + 1., &x[1], &w[1], newton));
1714   for (i = 1; i < npoints - 1; i++) w[i] /= (1. - x[i] * x[i]);
1715   PetscCall(PetscDTGaussLobattoJacobiEndweights_Internal(npoints, alpha, beta, &w[0], &w[npoints - 1]));
1716   PetscFunctionReturn(PETSC_SUCCESS);
1717 }
1718 
1719 /*@
1720   PetscDTGaussLobattoJacobiQuadrature - quadrature for the interval [a, b] with the weight function
1721   $(x-a)^\alpha (x-b)^\beta$, with endpoints a and b included as quadrature points.
1722 
1723   Not collective
1724 
1725   Input Parameters:
1726 + npoints - the number of points in the quadrature rule
1727 . a - the left endpoint of the interval
1728 . b - the right endpoint of the interval
1729 . alpha - the left exponent
1730 - beta - the right exponent
1731 
1732   Output Parameters:
1733 + x - array of length npoints, the locations of the quadrature points
1734 - w - array of length npoints, the weights of the quadrature points
1735 
1736   Level: intermediate
1737 
1738   Note:
1739   This quadrature rule is exact for polynomials up to degree 2*npoints - 3.
1740 
1741 .seealso: `PetscDTGaussJacobiQuadrature()`
1742 @*/
1743 PetscErrorCode PetscDTGaussLobattoJacobiQuadrature(PetscInt npoints, PetscReal a, PetscReal b, PetscReal alpha, PetscReal beta, PetscReal x[], PetscReal w[])
1744 {
1745   PetscInt i;
1746 
1747   PetscFunctionBegin;
1748   PetscCall(PetscDTGaussLobattoJacobiQuadrature_Internal(npoints, alpha, beta, x, w, PetscDTGaussQuadratureNewton_Internal));
1749   if (a != -1. || b != 1.) { /* shift */
1750     for (i = 0; i < npoints; i++) {
1751       x[i] = (x[i] + 1.) * ((b - a) / 2.) + a;
1752       w[i] *= (b - a) / 2.;
1753     }
1754   }
1755   PetscFunctionReturn(PETSC_SUCCESS);
1756 }
1757 
1758 /*@
1759    PetscDTGaussQuadrature - create Gauss-Legendre quadrature
1760 
1761    Not Collective
1762 
1763    Input Parameters:
1764 +  npoints - number of points
1765 .  a - left end of interval (often-1)
1766 -  b - right end of interval (often +1)
1767 
1768    Output Parameters:
1769 +  x - quadrature points
1770 -  w - quadrature weights
1771 
1772    Level: intermediate
1773 
1774    References:
1775 .  * - Golub and Welsch, Calculation of Quadrature Rules, Math. Comp. 23(106), 1969.
1776 
1777 .seealso: `PetscDTLegendreEval()`, `PetscDTGaussJacobiQuadrature()`
1778 @*/
1779 PetscErrorCode PetscDTGaussQuadrature(PetscInt npoints, PetscReal a, PetscReal b, PetscReal *x, PetscReal *w)
1780 {
1781   PetscInt i;
1782 
1783   PetscFunctionBegin;
1784   PetscCall(PetscDTGaussJacobiQuadrature_Internal(npoints, 0., 0., x, w, PetscDTGaussQuadratureNewton_Internal));
1785   if (a != -1. || b != 1.) { /* shift */
1786     for (i = 0; i < npoints; i++) {
1787       x[i] = (x[i] + 1.) * ((b - a) / 2.) + a;
1788       w[i] *= (b - a) / 2.;
1789     }
1790   }
1791   PetscFunctionReturn(PETSC_SUCCESS);
1792 }
1793 
1794 /*@C
1795    PetscDTGaussLobattoLegendreQuadrature - creates a set of the locations and weights of the Gauss-Lobatto-Legendre
1796                       nodes of a given size on the domain [-1,1]
1797 
1798    Not Collective
1799 
1800    Input Parameters:
1801 +  n - number of grid nodes
1802 -  type - `PETSCGAUSSLOBATTOLEGENDRE_VIA_LINEAR_ALGEBRA` or `PETSCGAUSSLOBATTOLEGENDRE_VIA_NEWTON`
1803 
1804    Output Parameters:
1805 +  x - quadrature points
1806 -  w - quadrature weights
1807 
1808    Level: intermediate
1809 
1810    Notes:
1811     For n > 30  the Newton approach computes duplicate (incorrect) values for some nodes because the initial guess is apparently not
1812           close enough to the desired solution
1813 
1814    These are useful for implementing spectral methods based on Gauss-Lobatto-Legendre (GLL) nodes
1815 
1816    See  https://epubs.siam.org/doi/abs/10.1137/110855442  https://epubs.siam.org/doi/abs/10.1137/120889873 for better ways to compute GLL nodes
1817 
1818 .seealso: `PetscDTGaussQuadrature()`, `PetscGaussLobattoLegendreCreateType`
1819 
1820 @*/
1821 PetscErrorCode PetscDTGaussLobattoLegendreQuadrature(PetscInt npoints, PetscGaussLobattoLegendreCreateType type, PetscReal *x, PetscReal *w)
1822 {
1823   PetscBool newton;
1824 
1825   PetscFunctionBegin;
1826   PetscCheck(npoints >= 2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must provide at least 2 grid points per element");
1827   newton = (PetscBool)(type == PETSCGAUSSLOBATTOLEGENDRE_VIA_NEWTON);
1828   PetscCall(PetscDTGaussLobattoJacobiQuadrature_Internal(npoints, 0., 0., x, w, newton));
1829   PetscFunctionReturn(PETSC_SUCCESS);
1830 }
1831 
1832 /*@
1833   PetscDTGaussTensorQuadrature - creates a tensor-product Gauss quadrature
1834 
1835   Not Collective
1836 
1837   Input Parameters:
1838 + dim     - The spatial dimension
1839 . Nc      - The number of components
1840 . npoints - number of points in one dimension
1841 . a       - left end of interval (often-1)
1842 - b       - right end of interval (often +1)
1843 
1844   Output Parameter:
1845 . q - A `PetscQuadrature` object
1846 
1847   Level: intermediate
1848 
1849 .seealso: `PetscDTGaussQuadrature()`, `PetscDTLegendreEval()`
1850 @*/
1851 PetscErrorCode PetscDTGaussTensorQuadrature(PetscInt dim, PetscInt Nc, PetscInt npoints, PetscReal a, PetscReal b, PetscQuadrature *q)
1852 {
1853   PetscInt   totpoints = dim > 1 ? dim > 2 ? npoints * PetscSqr(npoints) : PetscSqr(npoints) : npoints, i, j, k, c;
1854   PetscReal *x, *w, *xw, *ww;
1855 
1856   PetscFunctionBegin;
1857   PetscCall(PetscMalloc1(totpoints * dim, &x));
1858   PetscCall(PetscMalloc1(totpoints * Nc, &w));
1859   /* Set up the Golub-Welsch system */
1860   switch (dim) {
1861   case 0:
1862     PetscCall(PetscFree(x));
1863     PetscCall(PetscFree(w));
1864     PetscCall(PetscMalloc1(1, &x));
1865     PetscCall(PetscMalloc1(Nc, &w));
1866     x[0] = 0.0;
1867     for (c = 0; c < Nc; ++c) w[c] = 1.0;
1868     break;
1869   case 1:
1870     PetscCall(PetscMalloc1(npoints, &ww));
1871     PetscCall(PetscDTGaussQuadrature(npoints, a, b, x, ww));
1872     for (i = 0; i < npoints; ++i)
1873       for (c = 0; c < Nc; ++c) w[i * Nc + c] = ww[i];
1874     PetscCall(PetscFree(ww));
1875     break;
1876   case 2:
1877     PetscCall(PetscMalloc2(npoints, &xw, npoints, &ww));
1878     PetscCall(PetscDTGaussQuadrature(npoints, a, b, xw, ww));
1879     for (i = 0; i < npoints; ++i) {
1880       for (j = 0; j < npoints; ++j) {
1881         x[(i * npoints + j) * dim + 0] = xw[i];
1882         x[(i * npoints + j) * dim + 1] = xw[j];
1883         for (c = 0; c < Nc; ++c) w[(i * npoints + j) * Nc + c] = ww[i] * ww[j];
1884       }
1885     }
1886     PetscCall(PetscFree2(xw, ww));
1887     break;
1888   case 3:
1889     PetscCall(PetscMalloc2(npoints, &xw, npoints, &ww));
1890     PetscCall(PetscDTGaussQuadrature(npoints, a, b, xw, ww));
1891     for (i = 0; i < npoints; ++i) {
1892       for (j = 0; j < npoints; ++j) {
1893         for (k = 0; k < npoints; ++k) {
1894           x[((i * npoints + j) * npoints + k) * dim + 0] = xw[i];
1895           x[((i * npoints + j) * npoints + k) * dim + 1] = xw[j];
1896           x[((i * npoints + j) * npoints + k) * dim + 2] = xw[k];
1897           for (c = 0; c < Nc; ++c) w[((i * npoints + j) * npoints + k) * Nc + c] = ww[i] * ww[j] * ww[k];
1898         }
1899       }
1900     }
1901     PetscCall(PetscFree2(xw, ww));
1902     break;
1903   default:
1904     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot construct quadrature rule for dimension %" PetscInt_FMT, dim);
1905   }
1906   PetscCall(PetscQuadratureCreate(PETSC_COMM_SELF, q));
1907   PetscCall(PetscQuadratureSetOrder(*q, 2 * npoints - 1));
1908   PetscCall(PetscQuadratureSetData(*q, dim, Nc, totpoints, x, w));
1909   PetscCall(PetscObjectChangeTypeName((PetscObject)*q, "GaussTensor"));
1910   PetscFunctionReturn(PETSC_SUCCESS);
1911 }
1912 
1913 /*@
1914   PetscDTStroudConicalQuadrature - create Stroud conical quadrature for a simplex
1915 
1916   Not Collective
1917 
1918   Input Parameters:
1919 + dim     - The simplex dimension
1920 . Nc      - The number of components
1921 . npoints - The number of points in one dimension
1922 . a       - left end of interval (often-1)
1923 - b       - right end of interval (often +1)
1924 
1925   Output Parameter:
1926 . q - A PetscQuadrature object
1927 
1928   Level: intermediate
1929 
1930   Note:
1931   For dim == 1, this is Gauss-Legendre quadrature
1932 
1933   References:
1934 . * - Karniadakis and Sherwin.  FIAT
1935 
1936 .seealso: `PetscDTGaussTensorQuadrature()`, `PetscDTGaussQuadrature()`
1937 @*/
1938 PetscErrorCode PetscDTStroudConicalQuadrature(PetscInt dim, PetscInt Nc, PetscInt npoints, PetscReal a, PetscReal b, PetscQuadrature *q)
1939 {
1940   PetscInt   totprev, totrem;
1941   PetscInt   totpoints;
1942   PetscReal *p1, *w1;
1943   PetscReal *x, *w;
1944   PetscInt   i, j, k, l, m, pt, c;
1945 
1946   PetscFunctionBegin;
1947   PetscCheck(!(a != -1.0) && !(b != 1.0), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must use default internal right now");
1948   totpoints = 1;
1949   for (i = 0, totpoints = 1; i < dim; i++) totpoints *= npoints;
1950   PetscCall(PetscMalloc1(totpoints * dim, &x));
1951   PetscCall(PetscMalloc1(totpoints * Nc, &w));
1952   PetscCall(PetscMalloc2(npoints, &p1, npoints, &w1));
1953   for (i = 0; i < totpoints * Nc; i++) w[i] = 1.;
1954   for (i = 0, totprev = 1, totrem = totpoints / npoints; i < dim; i++) {
1955     PetscReal mul;
1956 
1957     mul = PetscPowReal(2., -i);
1958     PetscCall(PetscDTGaussJacobiQuadrature(npoints, -1., 1., i, 0.0, p1, w1));
1959     for (pt = 0, l = 0; l < totprev; l++) {
1960       for (j = 0; j < npoints; j++) {
1961         for (m = 0; m < totrem; m++, pt++) {
1962           for (k = 0; k < i; k++) x[pt * dim + k] = (x[pt * dim + k] + 1.) * (1. - p1[j]) * 0.5 - 1.;
1963           x[pt * dim + i] = p1[j];
1964           for (c = 0; c < Nc; c++) w[pt * Nc + c] *= mul * w1[j];
1965         }
1966       }
1967     }
1968     totprev *= npoints;
1969     totrem /= npoints;
1970   }
1971   PetscCall(PetscFree2(p1, w1));
1972   PetscCall(PetscQuadratureCreate(PETSC_COMM_SELF, q));
1973   PetscCall(PetscQuadratureSetOrder(*q, 2 * npoints - 1));
1974   PetscCall(PetscQuadratureSetData(*q, dim, Nc, totpoints, x, w));
1975   PetscCall(PetscObjectChangeTypeName((PetscObject)*q, "StroudConical"));
1976   PetscFunctionReturn(PETSC_SUCCESS);
1977 }
1978 
1979 static PetscBool MinSymTriQuadCite       = PETSC_FALSE;
1980 const char       MinSymTriQuadCitation[] = "@article{WitherdenVincent2015,\n"
1981                                            "  title = {On the identification of symmetric quadrature rules for finite element methods},\n"
1982                                            "  journal = {Computers & Mathematics with Applications},\n"
1983                                            "  volume = {69},\n"
1984                                            "  number = {10},\n"
1985                                            "  pages = {1232-1241},\n"
1986                                            "  year = {2015},\n"
1987                                            "  issn = {0898-1221},\n"
1988                                            "  doi = {10.1016/j.camwa.2015.03.017},\n"
1989                                            "  url = {https://www.sciencedirect.com/science/article/pii/S0898122115001224},\n"
1990                                            "  author = {F.D. Witherden and P.E. Vincent},\n"
1991                                            "}\n";
1992 
1993 #include "petscdttriquadrules.h"
1994 
1995 static PetscBool MinSymTetQuadCite       = PETSC_FALSE;
1996 const char       MinSymTetQuadCitation[] = "@article{JaskowiecSukumar2021\n"
1997                                            "  author = {Jaskowiec, Jan and Sukumar, N.},\n"
1998                                            "  title = {High-order symmetric cubature rules for tetrahedra and pyramids},\n"
1999                                            "  journal = {International Journal for Numerical Methods in Engineering},\n"
2000                                            "  volume = {122},\n"
2001                                            "  number = {1},\n"
2002                                            "  pages = {148-171},\n"
2003                                            "  doi = {10.1002/nme.6528},\n"
2004                                            "  url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/nme.6528},\n"
2005                                            "  eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/nme.6528},\n"
2006                                            "  year = {2021}\n"
2007                                            "}\n";
2008 
2009 #include "petscdttetquadrules.h"
2010 
2011 // https://en.wikipedia.org/wiki/Partition_(number_theory)
2012 static PetscErrorCode PetscDTPartitionNumber(PetscInt n, PetscInt *p)
2013 {
2014   // sequence A000041 in the OEIS
2015   const PetscInt partition[]   = {1, 1, 2, 3, 5, 7, 11, 15, 22, 30, 42, 56, 77, 101, 135, 176, 231, 297, 385, 490, 627, 792, 1002, 1255, 1575, 1958, 2436, 3010, 3718, 4565, 5604};
2016   PetscInt       tabulated_max = PETSC_STATIC_ARRAY_LENGTH(partition) - 1;
2017 
2018   PetscFunctionBegin;
2019   PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Partition number not defined for negative number %" PetscInt_FMT, n);
2020   // not implementing the pentagonal number recurrence, we don't need partition numbers for n that high
2021   PetscCheck(n <= tabulated_max, PETSC_COMM_SELF, PETSC_ERR_SUP, "Partition numbers only tabulated up to %" PetscInt_FMT ", not computed for %" PetscInt_FMT, tabulated_max, n);
2022   *p = partition[n];
2023   PetscFunctionReturn(PETSC_SUCCESS);
2024 }
2025 
2026 /*@
2027   PetscDTSimplexQuadrature - Create a quadrature rule for a simplex that exactly integrates polynomials up to a given degree.
2028 
2029   Not Collective
2030 
2031   Input Parameters:
2032 + dim     - The spatial dimension of the simplex (1 = segment, 2 = triangle, 3 = tetrahedron)
2033 . degree  - The largest polynomial degree that is required to be integrated exactly
2034 - type    - left end of interval (often-1)
2035 
2036   Output Parameter:
2037 . quad    - A `PetscQuadrature` object for integration over the biunit simplex
2038             (defined by the bounds $x_i >= -1$ and $\sum_i x_i <= 2 - d$) that is exact for
2039             polynomials up to the given degree
2040 
2041   Level: intermediate
2042 
2043 .seealso: `PetscDTSimplexQuadratureType`, `PetscDTGaussQuadrature()`, `PetscDTStroudCononicalQuadrature()`, `PetscQuadrature`
2044 @*/
2045 PetscErrorCode PetscDTSimplexQuadrature(PetscInt dim, PetscInt degree, PetscDTSimplexQuadratureType type, PetscQuadrature *quad)
2046 {
2047   PetscDTSimplexQuadratureType orig_type = type;
2048 
2049   PetscFunctionBegin;
2050   PetscCheck(dim >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative dimension %" PetscInt_FMT, dim);
2051   PetscCheck(degree >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative degree %" PetscInt_FMT, degree);
2052   if (type == PETSCDTSIMPLEXQUAD_DEFAULT) type = PETSCDTSIMPLEXQUAD_MINSYM;
2053   if (type == PETSCDTSIMPLEXQUAD_CONIC || dim < 2) {
2054     PetscInt points_per_dim = (degree + 2) / 2; // ceil((degree + 1) / 2);
2055     PetscCall(PetscDTStroudConicalQuadrature(dim, 1, points_per_dim, -1, 1, quad));
2056   } else {
2057     PetscInt          n    = dim + 1;
2058     PetscInt          fact = 1;
2059     PetscInt         *part, *perm;
2060     PetscInt          p = 0;
2061     PetscInt          max_degree;
2062     const PetscInt   *nodes_per_type     = NULL;
2063     const PetscInt   *all_num_full_nodes = NULL;
2064     const PetscReal **weights_list       = NULL;
2065     const PetscReal **compact_nodes_list = NULL;
2066     const char       *citation           = NULL;
2067     PetscBool        *cited              = NULL;
2068 
2069     switch (dim) {
2070     case 2:
2071       cited              = &MinSymTriQuadCite;
2072       citation           = MinSymTriQuadCitation;
2073       max_degree         = PetscDTWVTriQuad_max_degree;
2074       nodes_per_type     = PetscDTWVTriQuad_num_orbits;
2075       all_num_full_nodes = PetscDTWVTriQuad_num_nodes;
2076       weights_list       = PetscDTWVTriQuad_weights;
2077       compact_nodes_list = PetscDTWVTriQuad_orbits;
2078       break;
2079     case 3:
2080       cited              = &MinSymTetQuadCite;
2081       citation           = MinSymTetQuadCitation;
2082       max_degree         = PetscDTJSTetQuad_max_degree;
2083       nodes_per_type     = PetscDTJSTetQuad_num_orbits;
2084       all_num_full_nodes = PetscDTJSTetQuad_num_nodes;
2085       weights_list       = PetscDTJSTetQuad_weights;
2086       compact_nodes_list = PetscDTJSTetQuad_orbits;
2087       break;
2088     default:
2089       max_degree = -1;
2090       break;
2091     }
2092 
2093     if (degree > max_degree) {
2094       if (orig_type == PETSCDTSIMPLEXQUAD_DEFAULT) {
2095         // fall back to conic
2096         PetscCall(PetscDTSimplexQuadrature(dim, degree, PETSCDTSIMPLEXQUAD_CONIC, quad));
2097         PetscFunctionReturn(PETSC_SUCCESS);
2098       } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Minimal symmetric quadrature for dim %" PetscInt_FMT ", degree %" PetscInt_FMT " unsupported", dim, degree);
2099     }
2100 
2101     PetscCall(PetscCitationsRegister(citation, cited));
2102 
2103     PetscCall(PetscDTPartitionNumber(n, &p));
2104     for (PetscInt d = 2; d <= n; d++) fact *= d;
2105 
2106     PetscInt         num_full_nodes      = all_num_full_nodes[degree];
2107     const PetscReal *all_compact_nodes   = compact_nodes_list[degree];
2108     const PetscReal *all_compact_weights = weights_list[degree];
2109     nodes_per_type                       = &nodes_per_type[p * degree];
2110 
2111     PetscReal      *points;
2112     PetscReal      *counts;
2113     PetscReal      *weights;
2114     PetscReal      *bary_to_biunit; // row-major transformation of barycentric coordinate to biunit
2115     PetscQuadrature q;
2116 
2117     // compute the transformation
2118     PetscCall(PetscMalloc1(n * dim, &bary_to_biunit));
2119     for (PetscInt d = 0; d < dim; d++) {
2120       for (PetscInt b = 0; b < n; b++) bary_to_biunit[d * n + b] = (d == b) ? 1.0 : -1.0;
2121     }
2122 
2123     PetscCall(PetscMalloc3(n, &part, n, &perm, n, &counts));
2124     PetscCall(PetscCalloc1(num_full_nodes * dim, &points));
2125     PetscCall(PetscMalloc1(num_full_nodes, &weights));
2126 
2127     // (0, 0, ...) is the first partition lexicographically
2128     PetscCall(PetscArrayzero(part, n));
2129     PetscCall(PetscArrayzero(counts, n));
2130     counts[0] = n;
2131 
2132     // for each partition
2133     for (PetscInt s = 0, node_offset = 0; s < p; s++) {
2134       PetscInt num_compact_coords = part[n - 1] + 1;
2135 
2136       const PetscReal *compact_nodes   = all_compact_nodes;
2137       const PetscReal *compact_weights = all_compact_weights;
2138       all_compact_nodes += num_compact_coords * nodes_per_type[s];
2139       all_compact_weights += nodes_per_type[s];
2140 
2141       // for every permutation of the vertices
2142       for (PetscInt f = 0; f < fact; f++) {
2143         PetscCall(PetscDTEnumPerm(n, f, perm, NULL));
2144 
2145         // check if it is a valid permutation
2146         PetscInt digit;
2147         for (digit = 1; digit < n; digit++) {
2148           // skip permutations that would duplicate a node because it has a smaller symmetry group
2149           if (part[digit - 1] == part[digit] && perm[digit - 1] > perm[digit]) break;
2150         }
2151         if (digit < n) continue;
2152 
2153         // create full nodes from this permutation of the compact nodes
2154         PetscReal *full_nodes   = &points[node_offset * dim];
2155         PetscReal *full_weights = &weights[node_offset];
2156 
2157         PetscCall(PetscArraycpy(full_weights, compact_weights, nodes_per_type[s]));
2158         for (PetscInt b = 0; b < n; b++) {
2159           for (PetscInt d = 0; d < dim; d++) {
2160             for (PetscInt node = 0; node < nodes_per_type[s]; node++) full_nodes[node * dim + d] += bary_to_biunit[d * n + perm[b]] * compact_nodes[node * num_compact_coords + part[b]];
2161           }
2162         }
2163         node_offset += nodes_per_type[s];
2164       }
2165 
2166       if (s < p - 1) { // Generate the next partition
2167         /* A partition is described by the number of coordinates that are in
2168          * each set of duplicates (counts) and redundantly by mapping each
2169          * index to its set of duplicates (part)
2170          *
2171          * Counts should always be in nonincreasing order
2172          *
2173          * We want to generate the partitions lexically by part, which means
2174          * finding the last index where count > 1 and reducing by 1.
2175          *
2176          * For the new counts beyond that index, we eagerly assign the remaining
2177          * capacity of the partition to smaller indices (ensures lexical ordering),
2178          * while respecting the nonincreasing invariant of the counts
2179          */
2180         PetscInt last_digit            = part[n - 1];
2181         PetscInt last_digit_with_extra = last_digit;
2182         while (counts[last_digit_with_extra] == 1) last_digit_with_extra--;
2183         PetscInt limit               = --counts[last_digit_with_extra];
2184         PetscInt total_to_distribute = last_digit - last_digit_with_extra + 1;
2185         for (PetscInt digit = last_digit_with_extra + 1; digit < n; digit++) {
2186           counts[digit] = PetscMin(limit, total_to_distribute);
2187           total_to_distribute -= PetscMin(limit, total_to_distribute);
2188         }
2189         for (PetscInt digit = 0, offset = 0; digit < n; digit++) {
2190           PetscInt count = counts[digit];
2191           for (PetscInt c = 0; c < count; c++) part[offset++] = digit;
2192         }
2193       }
2194     }
2195     PetscCall(PetscFree3(part, perm, counts));
2196     PetscCall(PetscFree(bary_to_biunit));
2197     PetscCall(PetscQuadratureCreate(PETSC_COMM_SELF, &q));
2198     PetscCall(PetscQuadratureSetOrder(q, degree));
2199     PetscCall(PetscQuadratureSetData(q, dim, 1, num_full_nodes, points, weights));
2200     *quad = q;
2201   }
2202   PetscFunctionReturn(PETSC_SUCCESS);
2203 }
2204 
2205 /*@
2206   PetscDTTanhSinhTensorQuadrature - create tanh-sinh quadrature for a tensor product cell
2207 
2208   Not Collective
2209 
2210   Input Parameters:
2211 + dim   - The cell dimension
2212 . level - The number of points in one dimension, 2^l
2213 . a     - left end of interval (often-1)
2214 - b     - right end of interval (often +1)
2215 
2216   Output Parameter:
2217 . q - A `PetscQuadrature` object
2218 
2219   Level: intermediate
2220 
2221 .seealso: `PetscDTGaussTensorQuadrature()`, `PetscQuadrature`
2222 @*/
2223 PetscErrorCode PetscDTTanhSinhTensorQuadrature(PetscInt dim, PetscInt level, PetscReal a, PetscReal b, PetscQuadrature *q)
2224 {
2225   const PetscInt  p     = 16;                        /* Digits of precision in the evaluation */
2226   const PetscReal alpha = (b - a) / 2.;              /* Half-width of the integration interval */
2227   const PetscReal beta  = (b + a) / 2.;              /* Center of the integration interval */
2228   const PetscReal h     = PetscPowReal(2.0, -level); /* Step size, length between x_k */
2229   PetscReal       xk;                                /* Quadrature point x_k on reference domain [-1, 1] */
2230   PetscReal       wk = 0.5 * PETSC_PI;               /* Quadrature weight at x_k */
2231   PetscReal      *x, *w;
2232   PetscInt        K, k, npoints;
2233 
2234   PetscFunctionBegin;
2235   PetscCheck(dim <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Dimension %" PetscInt_FMT " not yet implemented", dim);
2236   PetscCheck(level, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must give a number of significant digits");
2237   /* Find K such that the weights are < 32 digits of precision */
2238   for (K = 1; PetscAbsReal(PetscLog10Real(wk)) < 2 * p; ++K) wk = 0.5 * h * PETSC_PI * PetscCoshReal(K * h) / PetscSqr(PetscCoshReal(0.5 * PETSC_PI * PetscSinhReal(K * h)));
2239   PetscCall(PetscQuadratureCreate(PETSC_COMM_SELF, q));
2240   PetscCall(PetscQuadratureSetOrder(*q, 2 * K + 1));
2241   npoints = 2 * K - 1;
2242   PetscCall(PetscMalloc1(npoints * dim, &x));
2243   PetscCall(PetscMalloc1(npoints, &w));
2244   /* Center term */
2245   x[0] = beta;
2246   w[0] = 0.5 * alpha * PETSC_PI;
2247   for (k = 1; k < K; ++k) {
2248     wk           = 0.5 * alpha * h * PETSC_PI * PetscCoshReal(k * h) / PetscSqr(PetscCoshReal(0.5 * PETSC_PI * PetscSinhReal(k * h)));
2249     xk           = PetscTanhReal(0.5 * PETSC_PI * PetscSinhReal(k * h));
2250     x[2 * k - 1] = -alpha * xk + beta;
2251     w[2 * k - 1] = wk;
2252     x[2 * k + 0] = alpha * xk + beta;
2253     w[2 * k + 0] = wk;
2254   }
2255   PetscCall(PetscQuadratureSetData(*q, dim, 1, npoints, x, w));
2256   PetscFunctionReturn(PETSC_SUCCESS);
2257 }
2258 
2259 PetscErrorCode PetscDTTanhSinhIntegrate(void (*func)(const PetscReal[], void *, PetscReal *), PetscReal a, PetscReal b, PetscInt digits, void *ctx, PetscReal *sol)
2260 {
2261   const PetscInt  p     = 16;           /* Digits of precision in the evaluation */
2262   const PetscReal alpha = (b - a) / 2.; /* Half-width of the integration interval */
2263   const PetscReal beta  = (b + a) / 2.; /* Center of the integration interval */
2264   PetscReal       h     = 1.0;          /* Step size, length between x_k */
2265   PetscInt        l     = 0;            /* Level of refinement, h = 2^{-l} */
2266   PetscReal       osum  = 0.0;          /* Integral on last level */
2267   PetscReal       psum  = 0.0;          /* Integral on the level before the last level */
2268   PetscReal       sum;                  /* Integral on current level */
2269   PetscReal       yk;                   /* Quadrature point 1 - x_k on reference domain [-1, 1] */
2270   PetscReal       lx, rx;               /* Quadrature points to the left and right of 0 on the real domain [a, b] */
2271   PetscReal       wk;                   /* Quadrature weight at x_k */
2272   PetscReal       lval, rval;           /* Terms in the quadature sum to the left and right of 0 */
2273   PetscInt        d;                    /* Digits of precision in the integral */
2274 
2275   PetscFunctionBegin;
2276   PetscCheck(digits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must give a positive number of significant digits");
2277   /* Center term */
2278   func(&beta, ctx, &lval);
2279   sum = 0.5 * alpha * PETSC_PI * lval;
2280   /* */
2281   do {
2282     PetscReal lterm, rterm, maxTerm = 0.0, d1, d2, d3, d4;
2283     PetscInt  k = 1;
2284 
2285     ++l;
2286     /* PetscPrintf(PETSC_COMM_SELF, "LEVEL %" PetscInt_FMT " sum: %15.15f\n", l, sum); */
2287     /* At each level of refinement, h --> h/2 and sum --> sum/2 */
2288     psum = osum;
2289     osum = sum;
2290     h *= 0.5;
2291     sum *= 0.5;
2292     do {
2293       wk = 0.5 * h * PETSC_PI * PetscCoshReal(k * h) / PetscSqr(PetscCoshReal(0.5 * PETSC_PI * PetscSinhReal(k * h)));
2294       yk = 1.0 / (PetscExpReal(0.5 * PETSC_PI * PetscSinhReal(k * h)) * PetscCoshReal(0.5 * PETSC_PI * PetscSinhReal(k * h)));
2295       lx = -alpha * (1.0 - yk) + beta;
2296       rx = alpha * (1.0 - yk) + beta;
2297       func(&lx, ctx, &lval);
2298       func(&rx, ctx, &rval);
2299       lterm   = alpha * wk * lval;
2300       maxTerm = PetscMax(PetscAbsReal(lterm), maxTerm);
2301       sum += lterm;
2302       rterm   = alpha * wk * rval;
2303       maxTerm = PetscMax(PetscAbsReal(rterm), maxTerm);
2304       sum += rterm;
2305       ++k;
2306       /* Only need to evaluate every other point on refined levels */
2307       if (l != 1) ++k;
2308     } while (PetscAbsReal(PetscLog10Real(wk)) < p); /* Only need to evaluate sum until weights are < 32 digits of precision */
2309 
2310     d1 = PetscLog10Real(PetscAbsReal(sum - osum));
2311     d2 = PetscLog10Real(PetscAbsReal(sum - psum));
2312     d3 = PetscLog10Real(maxTerm) - p;
2313     if (PetscMax(PetscAbsReal(lterm), PetscAbsReal(rterm)) == 0.0) d4 = 0.0;
2314     else d4 = PetscLog10Real(PetscMax(PetscAbsReal(lterm), PetscAbsReal(rterm)));
2315     d = PetscAbsInt(PetscMin(0, PetscMax(PetscMax(PetscMax(PetscSqr(d1) / d2, 2 * d1), d3), d4)));
2316   } while (d < digits && l < 12);
2317   *sol = sum;
2318 
2319   PetscFunctionReturn(PETSC_SUCCESS);
2320 }
2321 
2322 #if defined(PETSC_HAVE_MPFR)
2323 PetscErrorCode PetscDTTanhSinhIntegrateMPFR(void (*func)(const PetscReal[], void *, PetscReal *), PetscReal a, PetscReal b, PetscInt digits, void *ctx, PetscReal *sol)
2324 {
2325   const PetscInt safetyFactor = 2; /* Calculate abcissa until 2*p digits */
2326   PetscInt       l            = 0; /* Level of refinement, h = 2^{-l} */
2327   mpfr_t         alpha;            /* Half-width of the integration interval */
2328   mpfr_t         beta;             /* Center of the integration interval */
2329   mpfr_t         h;                /* Step size, length between x_k */
2330   mpfr_t         osum;             /* Integral on last level */
2331   mpfr_t         psum;             /* Integral on the level before the last level */
2332   mpfr_t         sum;              /* Integral on current level */
2333   mpfr_t         yk;               /* Quadrature point 1 - x_k on reference domain [-1, 1] */
2334   mpfr_t         lx, rx;           /* Quadrature points to the left and right of 0 on the real domain [a, b] */
2335   mpfr_t         wk;               /* Quadrature weight at x_k */
2336   PetscReal      lval, rval, rtmp; /* Terms in the quadature sum to the left and right of 0 */
2337   PetscInt       d;                /* Digits of precision in the integral */
2338   mpfr_t         pi2, kh, msinh, mcosh, maxTerm, curTerm, tmp;
2339 
2340   PetscFunctionBegin;
2341   PetscCheck(digits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must give a positive number of significant digits");
2342   /* Create high precision storage */
2343   mpfr_inits2(PetscCeilReal(safetyFactor * digits * PetscLogReal(10.) / PetscLogReal(2.)), alpha, beta, h, sum, osum, psum, yk, wk, lx, rx, tmp, maxTerm, curTerm, pi2, kh, msinh, mcosh, NULL);
2344   /* Initialization */
2345   mpfr_set_d(alpha, 0.5 * (b - a), MPFR_RNDN);
2346   mpfr_set_d(beta, 0.5 * (b + a), MPFR_RNDN);
2347   mpfr_set_d(osum, 0.0, MPFR_RNDN);
2348   mpfr_set_d(psum, 0.0, MPFR_RNDN);
2349   mpfr_set_d(h, 1.0, MPFR_RNDN);
2350   mpfr_const_pi(pi2, MPFR_RNDN);
2351   mpfr_mul_d(pi2, pi2, 0.5, MPFR_RNDN);
2352   /* Center term */
2353   rtmp = 0.5 * (b + a);
2354   func(&rtmp, ctx, &lval);
2355   mpfr_set(sum, pi2, MPFR_RNDN);
2356   mpfr_mul(sum, sum, alpha, MPFR_RNDN);
2357   mpfr_mul_d(sum, sum, lval, MPFR_RNDN);
2358   /* */
2359   do {
2360     PetscReal d1, d2, d3, d4;
2361     PetscInt  k = 1;
2362 
2363     ++l;
2364     mpfr_set_d(maxTerm, 0.0, MPFR_RNDN);
2365     /* PetscPrintf(PETSC_COMM_SELF, "LEVEL %" PetscInt_FMT " sum: %15.15f\n", l, sum); */
2366     /* At each level of refinement, h --> h/2 and sum --> sum/2 */
2367     mpfr_set(psum, osum, MPFR_RNDN);
2368     mpfr_set(osum, sum, MPFR_RNDN);
2369     mpfr_mul_d(h, h, 0.5, MPFR_RNDN);
2370     mpfr_mul_d(sum, sum, 0.5, MPFR_RNDN);
2371     do {
2372       mpfr_set_si(kh, k, MPFR_RNDN);
2373       mpfr_mul(kh, kh, h, MPFR_RNDN);
2374       /* Weight */
2375       mpfr_set(wk, h, MPFR_RNDN);
2376       mpfr_sinh_cosh(msinh, mcosh, kh, MPFR_RNDN);
2377       mpfr_mul(msinh, msinh, pi2, MPFR_RNDN);
2378       mpfr_mul(mcosh, mcosh, pi2, MPFR_RNDN);
2379       mpfr_cosh(tmp, msinh, MPFR_RNDN);
2380       mpfr_sqr(tmp, tmp, MPFR_RNDN);
2381       mpfr_mul(wk, wk, mcosh, MPFR_RNDN);
2382       mpfr_div(wk, wk, tmp, MPFR_RNDN);
2383       /* Abscissa */
2384       mpfr_set_d(yk, 1.0, MPFR_RNDZ);
2385       mpfr_cosh(tmp, msinh, MPFR_RNDN);
2386       mpfr_div(yk, yk, tmp, MPFR_RNDZ);
2387       mpfr_exp(tmp, msinh, MPFR_RNDN);
2388       mpfr_div(yk, yk, tmp, MPFR_RNDZ);
2389       /* Quadrature points */
2390       mpfr_sub_d(lx, yk, 1.0, MPFR_RNDZ);
2391       mpfr_mul(lx, lx, alpha, MPFR_RNDU);
2392       mpfr_add(lx, lx, beta, MPFR_RNDU);
2393       mpfr_d_sub(rx, 1.0, yk, MPFR_RNDZ);
2394       mpfr_mul(rx, rx, alpha, MPFR_RNDD);
2395       mpfr_add(rx, rx, beta, MPFR_RNDD);
2396       /* Evaluation */
2397       rtmp = mpfr_get_d(lx, MPFR_RNDU);
2398       func(&rtmp, ctx, &lval);
2399       rtmp = mpfr_get_d(rx, MPFR_RNDD);
2400       func(&rtmp, ctx, &rval);
2401       /* Update */
2402       mpfr_mul(tmp, wk, alpha, MPFR_RNDN);
2403       mpfr_mul_d(tmp, tmp, lval, MPFR_RNDN);
2404       mpfr_add(sum, sum, tmp, MPFR_RNDN);
2405       mpfr_abs(tmp, tmp, MPFR_RNDN);
2406       mpfr_max(maxTerm, maxTerm, tmp, MPFR_RNDN);
2407       mpfr_set(curTerm, tmp, MPFR_RNDN);
2408       mpfr_mul(tmp, wk, alpha, MPFR_RNDN);
2409       mpfr_mul_d(tmp, tmp, rval, MPFR_RNDN);
2410       mpfr_add(sum, sum, tmp, MPFR_RNDN);
2411       mpfr_abs(tmp, tmp, MPFR_RNDN);
2412       mpfr_max(maxTerm, maxTerm, tmp, MPFR_RNDN);
2413       mpfr_max(curTerm, curTerm, tmp, MPFR_RNDN);
2414       ++k;
2415       /* Only need to evaluate every other point on refined levels */
2416       if (l != 1) ++k;
2417       mpfr_log10(tmp, wk, MPFR_RNDN);
2418       mpfr_abs(tmp, tmp, MPFR_RNDN);
2419     } while (mpfr_get_d(tmp, MPFR_RNDN) < safetyFactor * digits); /* Only need to evaluate sum until weights are < 32 digits of precision */
2420     mpfr_sub(tmp, sum, osum, MPFR_RNDN);
2421     mpfr_abs(tmp, tmp, MPFR_RNDN);
2422     mpfr_log10(tmp, tmp, MPFR_RNDN);
2423     d1 = mpfr_get_d(tmp, MPFR_RNDN);
2424     mpfr_sub(tmp, sum, psum, MPFR_RNDN);
2425     mpfr_abs(tmp, tmp, MPFR_RNDN);
2426     mpfr_log10(tmp, tmp, MPFR_RNDN);
2427     d2 = mpfr_get_d(tmp, MPFR_RNDN);
2428     mpfr_log10(tmp, maxTerm, MPFR_RNDN);
2429     d3 = mpfr_get_d(tmp, MPFR_RNDN) - digits;
2430     mpfr_log10(tmp, curTerm, MPFR_RNDN);
2431     d4 = mpfr_get_d(tmp, MPFR_RNDN);
2432     d  = PetscAbsInt(PetscMin(0, PetscMax(PetscMax(PetscMax(PetscSqr(d1) / d2, 2 * d1), d3), d4)));
2433   } while (d < digits && l < 8);
2434   *sol = mpfr_get_d(sum, MPFR_RNDN);
2435   /* Cleanup */
2436   mpfr_clears(alpha, beta, h, sum, osum, psum, yk, wk, lx, rx, tmp, maxTerm, curTerm, pi2, kh, msinh, mcosh, NULL);
2437   PetscFunctionReturn(PETSC_SUCCESS);
2438 }
2439 #else
2440 
2441 PetscErrorCode PetscDTTanhSinhIntegrateMPFR(void (*func)(const PetscReal[], void *, PetscReal *), PetscReal a, PetscReal b, PetscInt digits, void *ctx, PetscReal *sol)
2442 {
2443   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "This method will not work without MPFR. Reconfigure using --download-mpfr --download-gmp");
2444 }
2445 #endif
2446 
2447 /*@
2448   PetscDTTensorQuadratureCreate - create the tensor product quadrature from two lower-dimensional quadratures
2449 
2450   Not Collective
2451 
2452   Input Parameters:
2453 + q1 - The first quadrature
2454 - q2 - The second quadrature
2455 
2456   Output Parameter:
2457 . q - A `PetscQuadrature` object
2458 
2459   Level: intermediate
2460 
2461 .seealso: `PetscQuadrature`, `PetscDTGaussTensorQuadrature()`
2462 @*/
2463 PetscErrorCode PetscDTTensorQuadratureCreate(PetscQuadrature q1, PetscQuadrature q2, PetscQuadrature *q)
2464 {
2465   const PetscReal *x1, *w1, *x2, *w2;
2466   PetscReal       *x, *w;
2467   PetscInt         dim1, Nc1, Np1, order1, qa, d1;
2468   PetscInt         dim2, Nc2, Np2, order2, qb, d2;
2469   PetscInt         dim, Nc, Np, order, qc, d;
2470 
2471   PetscFunctionBegin;
2472   PetscValidHeaderSpecific(q1, PETSCQUADRATURE_CLASSID, 1);
2473   PetscValidHeaderSpecific(q2, PETSCQUADRATURE_CLASSID, 2);
2474   PetscValidPointer(q, 3);
2475   PetscCall(PetscQuadratureGetOrder(q1, &order1));
2476   PetscCall(PetscQuadratureGetOrder(q2, &order2));
2477   PetscCheck(order1 == order2, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Order1 %" PetscInt_FMT " != %" PetscInt_FMT " Order2", order1, order2);
2478   PetscCall(PetscQuadratureGetData(q1, &dim1, &Nc1, &Np1, &x1, &w1));
2479   PetscCall(PetscQuadratureGetData(q2, &dim2, &Nc2, &Np2, &x2, &w2));
2480   PetscCheck(Nc1 == Nc2, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "NumComp1 %" PetscInt_FMT " != %" PetscInt_FMT " NumComp2", Nc1, Nc2);
2481 
2482   dim   = dim1 + dim2;
2483   Nc    = Nc1;
2484   Np    = Np1 * Np2;
2485   order = order1;
2486   PetscCall(PetscQuadratureCreate(PETSC_COMM_SELF, q));
2487   PetscCall(PetscQuadratureSetOrder(*q, order));
2488   PetscCall(PetscMalloc1(Np * dim, &x));
2489   PetscCall(PetscMalloc1(Np, &w));
2490   for (qa = 0, qc = 0; qa < Np1; ++qa) {
2491     for (qb = 0; qb < Np2; ++qb, ++qc) {
2492       for (d1 = 0, d = 0; d1 < dim1; ++d1, ++d) x[qc * dim + d] = x1[qa * dim1 + d1];
2493       for (d2 = 0; d2 < dim2; ++d2, ++d) x[qc * dim + d] = x2[qb * dim2 + d2];
2494       w[qc] = w1[qa] * w2[qb];
2495     }
2496   }
2497   PetscCall(PetscQuadratureSetData(*q, dim, Nc, Np, x, w));
2498   PetscFunctionReturn(PETSC_SUCCESS);
2499 }
2500 
2501 /* Overwrites A. Can only handle full-rank problems with m>=n
2502    A in column-major format
2503    Ainv in row-major format
2504    tau has length m
2505    worksize must be >= max(1,n)
2506  */
2507 static PetscErrorCode PetscDTPseudoInverseQR(PetscInt m, PetscInt mstride, PetscInt n, PetscReal *A_in, PetscReal *Ainv_out, PetscScalar *tau, PetscInt worksize, PetscScalar *work)
2508 {
2509   PetscBLASInt M, N, K, lda, ldb, ldwork, info;
2510   PetscScalar *A, *Ainv, *R, *Q, Alpha;
2511 
2512   PetscFunctionBegin;
2513 #if defined(PETSC_USE_COMPLEX)
2514   {
2515     PetscInt i, j;
2516     PetscCall(PetscMalloc2(m * n, &A, m * n, &Ainv));
2517     for (j = 0; j < n; j++) {
2518       for (i = 0; i < m; i++) A[i + m * j] = A_in[i + mstride * j];
2519     }
2520     mstride = m;
2521   }
2522 #else
2523   A = A_in;
2524   Ainv = Ainv_out;
2525 #endif
2526 
2527   PetscCall(PetscBLASIntCast(m, &M));
2528   PetscCall(PetscBLASIntCast(n, &N));
2529   PetscCall(PetscBLASIntCast(mstride, &lda));
2530   PetscCall(PetscBLASIntCast(worksize, &ldwork));
2531   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
2532   PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&M, &N, A, &lda, tau, work, &ldwork, &info));
2533   PetscCall(PetscFPTrapPop());
2534   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "xGEQRF error");
2535   R = A; /* Upper triangular part of A now contains R, the rest contains the elementary reflectors */
2536 
2537   /* Extract an explicit representation of Q */
2538   Q = Ainv;
2539   PetscCall(PetscArraycpy(Q, A, mstride * n));
2540   K = N; /* full rank */
2541   PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&M, &N, &K, Q, &lda, tau, work, &ldwork, &info));
2542   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "xORGQR/xUNGQR error");
2543 
2544   /* Compute A^{-T} = (R^{-1} Q^T)^T = Q R^{-T} */
2545   Alpha = 1.0;
2546   ldb   = lda;
2547   PetscCallBLAS("BLAStrsm", BLAStrsm_("Right", "Upper", "ConjugateTranspose", "NotUnitTriangular", &M, &N, &Alpha, R, &lda, Q, &ldb));
2548   /* Ainv is Q, overwritten with inverse */
2549 
2550 #if defined(PETSC_USE_COMPLEX)
2551   {
2552     PetscInt i;
2553     for (i = 0; i < m * n; i++) Ainv_out[i] = PetscRealPart(Ainv[i]);
2554     PetscCall(PetscFree2(A, Ainv));
2555   }
2556 #endif
2557   PetscFunctionReturn(PETSC_SUCCESS);
2558 }
2559 
2560 /* Computes integral of L_p' over intervals {(x0,x1),(x1,x2),...} */
2561 static PetscErrorCode PetscDTLegendreIntegrate(PetscInt ninterval, const PetscReal *x, PetscInt ndegree, const PetscInt *degrees, PetscBool Transpose, PetscReal *B)
2562 {
2563   PetscReal *Bv;
2564   PetscInt   i, j;
2565 
2566   PetscFunctionBegin;
2567   PetscCall(PetscMalloc1((ninterval + 1) * ndegree, &Bv));
2568   /* Point evaluation of L_p on all the source vertices */
2569   PetscCall(PetscDTLegendreEval(ninterval + 1, x, ndegree, degrees, Bv, NULL, NULL));
2570   /* Integral over each interval: \int_a^b L_p' = L_p(b)-L_p(a) */
2571   for (i = 0; i < ninterval; i++) {
2572     for (j = 0; j < ndegree; j++) {
2573       if (Transpose) B[i + ninterval * j] = Bv[(i + 1) * ndegree + j] - Bv[i * ndegree + j];
2574       else B[i * ndegree + j] = Bv[(i + 1) * ndegree + j] - Bv[i * ndegree + j];
2575     }
2576   }
2577   PetscCall(PetscFree(Bv));
2578   PetscFunctionReturn(PETSC_SUCCESS);
2579 }
2580 
2581 /*@
2582    PetscDTReconstructPoly - create matrix representing polynomial reconstruction using cell intervals and evaluation at target intervals
2583 
2584    Not Collective
2585 
2586    Input Parameters:
2587 +  degree - degree of reconstruction polynomial
2588 .  nsource - number of source intervals
2589 .  sourcex - sorted coordinates of source cell boundaries (length nsource+1)
2590 .  ntarget - number of target intervals
2591 -  targetx - sorted coordinates of target cell boundaries (length ntarget+1)
2592 
2593    Output Parameter:
2594 .  R - reconstruction matrix, utarget = sum_s R[t*nsource+s] * usource[s]
2595 
2596    Level: advanced
2597 
2598 .seealso: `PetscDTLegendreEval()`
2599 @*/
2600 PetscErrorCode PetscDTReconstructPoly(PetscInt degree, PetscInt nsource, const PetscReal *sourcex, PetscInt ntarget, const PetscReal *targetx, PetscReal *R)
2601 {
2602   PetscInt     i, j, k, *bdegrees, worksize;
2603   PetscReal    xmin, xmax, center, hscale, *sourcey, *targety, *Bsource, *Bsinv, *Btarget;
2604   PetscScalar *tau, *work;
2605 
2606   PetscFunctionBegin;
2607   PetscValidRealPointer(sourcex, 3);
2608   PetscValidRealPointer(targetx, 5);
2609   PetscValidRealPointer(R, 6);
2610   PetscCheck(degree < nsource, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Reconstruction degree %" PetscInt_FMT " must be less than number of source intervals %" PetscInt_FMT, degree, nsource);
2611   if (PetscDefined(USE_DEBUG)) {
2612     for (i = 0; i < nsource; i++) PetscCheck(sourcex[i] < sourcex[i + 1], PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Source interval %" PetscInt_FMT " has negative orientation (%g,%g)", i, (double)sourcex[i], (double)sourcex[i + 1]);
2613     for (i = 0; i < ntarget; i++) PetscCheck(targetx[i] < targetx[i + 1], PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Target interval %" PetscInt_FMT " has negative orientation (%g,%g)", i, (double)targetx[i], (double)targetx[i + 1]);
2614   }
2615   xmin     = PetscMin(sourcex[0], targetx[0]);
2616   xmax     = PetscMax(sourcex[nsource], targetx[ntarget]);
2617   center   = (xmin + xmax) / 2;
2618   hscale   = (xmax - xmin) / 2;
2619   worksize = nsource;
2620   PetscCall(PetscMalloc4(degree + 1, &bdegrees, nsource + 1, &sourcey, nsource * (degree + 1), &Bsource, worksize, &work));
2621   PetscCall(PetscMalloc4(nsource, &tau, nsource * (degree + 1), &Bsinv, ntarget + 1, &targety, ntarget * (degree + 1), &Btarget));
2622   for (i = 0; i <= nsource; i++) sourcey[i] = (sourcex[i] - center) / hscale;
2623   for (i = 0; i <= degree; i++) bdegrees[i] = i + 1;
2624   PetscCall(PetscDTLegendreIntegrate(nsource, sourcey, degree + 1, bdegrees, PETSC_TRUE, Bsource));
2625   PetscCall(PetscDTPseudoInverseQR(nsource, nsource, degree + 1, Bsource, Bsinv, tau, nsource, work));
2626   for (i = 0; i <= ntarget; i++) targety[i] = (targetx[i] - center) / hscale;
2627   PetscCall(PetscDTLegendreIntegrate(ntarget, targety, degree + 1, bdegrees, PETSC_FALSE, Btarget));
2628   for (i = 0; i < ntarget; i++) {
2629     PetscReal rowsum = 0;
2630     for (j = 0; j < nsource; j++) {
2631       PetscReal sum = 0;
2632       for (k = 0; k < degree + 1; k++) sum += Btarget[i * (degree + 1) + k] * Bsinv[k * nsource + j];
2633       R[i * nsource + j] = sum;
2634       rowsum += sum;
2635     }
2636     for (j = 0; j < nsource; j++) R[i * nsource + j] /= rowsum; /* normalize each row */
2637   }
2638   PetscCall(PetscFree4(bdegrees, sourcey, Bsource, work));
2639   PetscCall(PetscFree4(tau, Bsinv, targety, Btarget));
2640   PetscFunctionReturn(PETSC_SUCCESS);
2641 }
2642 
2643 /*@C
2644    PetscGaussLobattoLegendreIntegrate - Compute the L2 integral of a function on the GLL points
2645 
2646    Not Collective
2647 
2648    Input Parameters:
2649 +  n - the number of GLL nodes
2650 .  nodes - the GLL nodes
2651 .  weights - the GLL weights
2652 -  f - the function values at the nodes
2653 
2654    Output Parameter:
2655 .  in - the value of the integral
2656 
2657    Level: beginner
2658 
2659 .seealso: `PetscDTGaussLobattoLegendreQuadrature()`
2660 @*/
2661 PetscErrorCode PetscGaussLobattoLegendreIntegrate(PetscInt n, PetscReal *nodes, PetscReal *weights, const PetscReal *f, PetscReal *in)
2662 {
2663   PetscInt i;
2664 
2665   PetscFunctionBegin;
2666   *in = 0.;
2667   for (i = 0; i < n; i++) *in += f[i] * f[i] * weights[i];
2668   PetscFunctionReturn(PETSC_SUCCESS);
2669 }
2670 
2671 /*@C
2672    PetscGaussLobattoLegendreElementLaplacianCreate - computes the Laplacian for a single 1d GLL element
2673 
2674    Not Collective
2675 
2676    Input Parameters:
2677 +  n - the number of GLL nodes
2678 .  nodes - the GLL nodes
2679 -  weights - the GLL weights
2680 
2681    Output Parameter:
2682 .  A - the stiffness element
2683 
2684    Level: beginner
2685 
2686    Notes:
2687    Destroy this with `PetscGaussLobattoLegendreElementLaplacianDestroy()`
2688 
2689    You can access entries in this array with AA[i][j] but in memory it is stored in contiguous memory, row oriented (the array is symmetric)
2690 
2691 .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementLaplacianDestroy()`
2692 @*/
2693 PetscErrorCode PetscGaussLobattoLegendreElementLaplacianCreate(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2694 {
2695   PetscReal      **A;
2696   const PetscReal *gllnodes = nodes;
2697   const PetscInt   p        = n - 1;
2698   PetscReal        z0, z1, z2 = -1, x, Lpj, Lpr;
2699   PetscInt         i, j, nn, r;
2700 
2701   PetscFunctionBegin;
2702   PetscCall(PetscMalloc1(n, &A));
2703   PetscCall(PetscMalloc1(n * n, &A[0]));
2704   for (i = 1; i < n; i++) A[i] = A[i - 1] + n;
2705 
2706   for (j = 1; j < p; j++) {
2707     x  = gllnodes[j];
2708     z0 = 1.;
2709     z1 = x;
2710     for (nn = 1; nn < p; nn++) {
2711       z2 = x * z1 * (2. * ((PetscReal)nn) + 1.) / (((PetscReal)nn) + 1.) - z0 * (((PetscReal)nn) / (((PetscReal)nn) + 1.));
2712       z0 = z1;
2713       z1 = z2;
2714     }
2715     Lpj = z2;
2716     for (r = 1; r < p; r++) {
2717       if (r == j) {
2718         A[j][j] = 2. / (3. * (1. - gllnodes[j] * gllnodes[j]) * Lpj * Lpj);
2719       } else {
2720         x  = gllnodes[r];
2721         z0 = 1.;
2722         z1 = x;
2723         for (nn = 1; nn < p; nn++) {
2724           z2 = x * z1 * (2. * ((PetscReal)nn) + 1.) / (((PetscReal)nn) + 1.) - z0 * (((PetscReal)nn) / (((PetscReal)nn) + 1.));
2725           z0 = z1;
2726           z1 = z2;
2727         }
2728         Lpr     = z2;
2729         A[r][j] = 4. / (((PetscReal)p) * (((PetscReal)p) + 1.) * Lpj * Lpr * (gllnodes[j] - gllnodes[r]) * (gllnodes[j] - gllnodes[r]));
2730       }
2731     }
2732   }
2733   for (j = 1; j < p + 1; j++) {
2734     x  = gllnodes[j];
2735     z0 = 1.;
2736     z1 = x;
2737     for (nn = 1; nn < p; nn++) {
2738       z2 = x * z1 * (2. * ((PetscReal)nn) + 1.) / (((PetscReal)nn) + 1.) - z0 * (((PetscReal)nn) / (((PetscReal)nn) + 1.));
2739       z0 = z1;
2740       z1 = z2;
2741     }
2742     Lpj     = z2;
2743     A[j][0] = 4. * PetscPowRealInt(-1., p) / (((PetscReal)p) * (((PetscReal)p) + 1.) * Lpj * (1. + gllnodes[j]) * (1. + gllnodes[j]));
2744     A[0][j] = A[j][0];
2745   }
2746   for (j = 0; j < p; j++) {
2747     x  = gllnodes[j];
2748     z0 = 1.;
2749     z1 = x;
2750     for (nn = 1; nn < p; nn++) {
2751       z2 = x * z1 * (2. * ((PetscReal)nn) + 1.) / (((PetscReal)nn) + 1.) - z0 * (((PetscReal)nn) / (((PetscReal)nn) + 1.));
2752       z0 = z1;
2753       z1 = z2;
2754     }
2755     Lpj = z2;
2756 
2757     A[p][j] = 4. / (((PetscReal)p) * (((PetscReal)p) + 1.) * Lpj * (1. - gllnodes[j]) * (1. - gllnodes[j]));
2758     A[j][p] = A[p][j];
2759   }
2760   A[0][0] = 0.5 + (((PetscReal)p) * (((PetscReal)p) + 1.) - 2.) / 6.;
2761   A[p][p] = A[0][0];
2762   *AA     = A;
2763   PetscFunctionReturn(PETSC_SUCCESS);
2764 }
2765 
2766 /*@C
2767    PetscGaussLobattoLegendreElementLaplacianDestroy - frees the Laplacian for a single 1d GLL element created with `PetscGaussLobattoLegendreElementLaplacianCreate()`
2768 
2769    Not Collective
2770 
2771    Input Parameters:
2772 +  n - the number of GLL nodes
2773 .  nodes - the GLL nodes
2774 .  weights - the GLL weightss
2775 -  A - the stiffness element
2776 
2777    Level: beginner
2778 
2779 .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementLaplacianCreate()`
2780 @*/
2781 PetscErrorCode PetscGaussLobattoLegendreElementLaplacianDestroy(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2782 {
2783   PetscFunctionBegin;
2784   PetscCall(PetscFree((*AA)[0]));
2785   PetscCall(PetscFree(*AA));
2786   *AA = NULL;
2787   PetscFunctionReturn(PETSC_SUCCESS);
2788 }
2789 
2790 /*@C
2791    PetscGaussLobattoLegendreElementGradientCreate - computes the gradient for a single 1d GLL element
2792 
2793    Not Collective
2794 
2795    Input Parameter:
2796 +  n - the number of GLL nodes
2797 .  nodes - the GLL nodes
2798 .  weights - the GLL weights
2799 
2800    Output Parameters:
2801 .  AA - the stiffness element
2802 -  AAT - the transpose of AA (pass in NULL if you do not need this array)
2803 
2804    Level: beginner
2805 
2806    Notes:
2807    Destroy this with `PetscGaussLobattoLegendreElementGradientDestroy()`
2808 
2809    You can access entries in these arrays with AA[i][j] but in memory it is stored in contiguous memory, row oriented
2810 
2811 .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementLaplacianDestroy()`, `PetscGaussLobattoLegendreElementGradientDestroy()`
2812 @*/
2813 PetscErrorCode PetscGaussLobattoLegendreElementGradientCreate(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA, PetscReal ***AAT)
2814 {
2815   PetscReal      **A, **AT = NULL;
2816   const PetscReal *gllnodes = nodes;
2817   const PetscInt   p        = n - 1;
2818   PetscReal        Li, Lj, d0;
2819   PetscInt         i, j;
2820 
2821   PetscFunctionBegin;
2822   PetscCall(PetscMalloc1(n, &A));
2823   PetscCall(PetscMalloc1(n * n, &A[0]));
2824   for (i = 1; i < n; i++) A[i] = A[i - 1] + n;
2825 
2826   if (AAT) {
2827     PetscCall(PetscMalloc1(n, &AT));
2828     PetscCall(PetscMalloc1(n * n, &AT[0]));
2829     for (i = 1; i < n; i++) AT[i] = AT[i - 1] + n;
2830   }
2831 
2832   if (n == 1) A[0][0] = 0.;
2833   d0 = (PetscReal)p * ((PetscReal)p + 1.) / 4.;
2834   for (i = 0; i < n; i++) {
2835     for (j = 0; j < n; j++) {
2836       A[i][j] = 0.;
2837       PetscCall(PetscDTComputeJacobi(0., 0., p, gllnodes[i], &Li));
2838       PetscCall(PetscDTComputeJacobi(0., 0., p, gllnodes[j], &Lj));
2839       if (i != j) A[i][j] = Li / (Lj * (gllnodes[i] - gllnodes[j]));
2840       if ((j == i) && (i == 0)) A[i][j] = -d0;
2841       if (j == i && i == p) A[i][j] = d0;
2842       if (AT) AT[j][i] = A[i][j];
2843     }
2844   }
2845   if (AAT) *AAT = AT;
2846   *AA = A;
2847   PetscFunctionReturn(PETSC_SUCCESS);
2848 }
2849 
2850 /*@C
2851    PetscGaussLobattoLegendreElementGradientDestroy - frees the gradient for a single 1d GLL element obtained with `PetscGaussLobattoLegendreElementGradientCreate()`
2852 
2853    Not Collective
2854 
2855    Input Parameters:
2856 +  n - the number of GLL nodes
2857 .  nodes - the GLL nodes
2858 .  weights - the GLL weights
2859 .  AA - the stiffness element
2860 -  AAT - the transpose of the element
2861 
2862    Level: beginner
2863 
2864 .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementLaplacianCreate()`, `PetscGaussLobattoLegendreElementAdvectionCreate()`
2865 @*/
2866 PetscErrorCode PetscGaussLobattoLegendreElementGradientDestroy(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA, PetscReal ***AAT)
2867 {
2868   PetscFunctionBegin;
2869   PetscCall(PetscFree((*AA)[0]));
2870   PetscCall(PetscFree(*AA));
2871   *AA = NULL;
2872   if (*AAT) {
2873     PetscCall(PetscFree((*AAT)[0]));
2874     PetscCall(PetscFree(*AAT));
2875     *AAT = NULL;
2876   }
2877   PetscFunctionReturn(PETSC_SUCCESS);
2878 }
2879 
2880 /*@C
2881    PetscGaussLobattoLegendreElementAdvectionCreate - computes the advection operator for a single 1d GLL element
2882 
2883    Not Collective
2884 
2885    Input Parameters:
2886 +  n - the number of GLL nodes
2887 .  nodes - the GLL nodes
2888 -  weights - the GLL weightss
2889 
2890    Output Parameter:
2891 .  AA - the stiffness element
2892 
2893    Level: beginner
2894 
2895    Notes:
2896    Destroy this with `PetscGaussLobattoLegendreElementAdvectionDestroy()`
2897 
2898    This is the same as the Gradient operator multiplied by the diagonal mass matrix
2899 
2900    You can access entries in this array with AA[i][j] but in memory it is stored in contiguous memory, row oriented
2901 
2902 .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementLaplacianCreate()`, `PetscGaussLobattoLegendreElementAdvectionDestroy()`
2903 @*/
2904 PetscErrorCode PetscGaussLobattoLegendreElementAdvectionCreate(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2905 {
2906   PetscReal      **D;
2907   const PetscReal *gllweights = weights;
2908   const PetscInt   glln       = n;
2909   PetscInt         i, j;
2910 
2911   PetscFunctionBegin;
2912   PetscCall(PetscGaussLobattoLegendreElementGradientCreate(n, nodes, weights, &D, NULL));
2913   for (i = 0; i < glln; i++) {
2914     for (j = 0; j < glln; j++) D[i][j] = gllweights[i] * D[i][j];
2915   }
2916   *AA = D;
2917   PetscFunctionReturn(PETSC_SUCCESS);
2918 }
2919 
2920 /*@C
2921    PetscGaussLobattoLegendreElementAdvectionDestroy - frees the advection stiffness for a single 1d GLL element created with `PetscGaussLobattoLegendreElementAdvectionCreate()`
2922 
2923    Not Collective
2924 
2925    Input Parameters:
2926 +  n - the number of GLL nodes
2927 .  nodes - the GLL nodes
2928 .  weights - the GLL weights
2929 -  A - advection
2930 
2931    Level: beginner
2932 
2933 .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementAdvectionCreate()`
2934 @*/
2935 PetscErrorCode PetscGaussLobattoLegendreElementAdvectionDestroy(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2936 {
2937   PetscFunctionBegin;
2938   PetscCall(PetscFree((*AA)[0]));
2939   PetscCall(PetscFree(*AA));
2940   *AA = NULL;
2941   PetscFunctionReturn(PETSC_SUCCESS);
2942 }
2943 
2944 PetscErrorCode PetscGaussLobattoLegendreElementMassCreate(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2945 {
2946   PetscReal      **A;
2947   const PetscReal *gllweights = weights;
2948   const PetscInt   glln       = n;
2949   PetscInt         i, j;
2950 
2951   PetscFunctionBegin;
2952   PetscCall(PetscMalloc1(glln, &A));
2953   PetscCall(PetscMalloc1(glln * glln, &A[0]));
2954   for (i = 1; i < glln; i++) A[i] = A[i - 1] + glln;
2955   if (glln == 1) A[0][0] = 0.;
2956   for (i = 0; i < glln; i++) {
2957     for (j = 0; j < glln; j++) {
2958       A[i][j] = 0.;
2959       if (j == i) A[i][j] = gllweights[i];
2960     }
2961   }
2962   *AA = A;
2963   PetscFunctionReturn(PETSC_SUCCESS);
2964 }
2965 
2966 PetscErrorCode PetscGaussLobattoLegendreElementMassDestroy(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2967 {
2968   PetscFunctionBegin;
2969   PetscCall(PetscFree((*AA)[0]));
2970   PetscCall(PetscFree(*AA));
2971   *AA = NULL;
2972   PetscFunctionReturn(PETSC_SUCCESS);
2973 }
2974 
2975 /*@
2976   PetscDTIndexToBary - convert an index into a barycentric coordinate.
2977 
2978   Input Parameters:
2979 + len - the desired length of the barycentric tuple (usually 1 more than the dimension it represents, so a barycentric coordinate in a triangle has length 3)
2980 . sum - the value that the sum of the barycentric coordinates (which will be non-negative integers) should sum to
2981 - index - the index to convert: should be >= 0 and < Binomial(len - 1 + sum, sum)
2982 
2983   Output Parameter:
2984 . coord - will be filled with the barycentric coordinate
2985 
2986   Level: beginner
2987 
2988   Note:
2989   The indices map to barycentric coordinates in lexicographic order, where the first index is the
2990   least significant and the last index is the most significant.
2991 
2992 .seealso: `PetscDTBaryToIndex()`
2993 @*/
2994 PetscErrorCode PetscDTIndexToBary(PetscInt len, PetscInt sum, PetscInt index, PetscInt coord[])
2995 {
2996   PetscInt c, d, s, total, subtotal, nexttotal;
2997 
2998   PetscFunctionBeginHot;
2999   PetscCheck(len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "length must be non-negative");
3000   PetscCheck(index >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "index must be non-negative");
3001   if (!len) {
3002     if (!sum && !index) PetscFunctionReturn(PETSC_SUCCESS);
3003     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid index or sum for length 0 barycentric coordinate");
3004   }
3005   for (c = 1, total = 1; c <= len; c++) {
3006     /* total is the number of ways to have a tuple of length c with sum */
3007     if (index < total) break;
3008     total = (total * (sum + c)) / c;
3009   }
3010   PetscCheck(c <= len, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "index out of range");
3011   for (d = c; d < len; d++) coord[d] = 0;
3012   for (s = 0, subtotal = 1, nexttotal = 1; c > 0;) {
3013     /* subtotal is the number of ways to have a tuple of length c with sum s */
3014     /* nexttotal is the number of ways to have a tuple of length c-1 with sum s */
3015     if ((index + subtotal) >= total) {
3016       coord[--c] = sum - s;
3017       index -= (total - subtotal);
3018       sum       = s;
3019       total     = nexttotal;
3020       subtotal  = 1;
3021       nexttotal = 1;
3022       s         = 0;
3023     } else {
3024       subtotal  = (subtotal * (c + s)) / (s + 1);
3025       nexttotal = (nexttotal * (c - 1 + s)) / (s + 1);
3026       s++;
3027     }
3028   }
3029   PetscFunctionReturn(PETSC_SUCCESS);
3030 }
3031 
3032 /*@
3033   PetscDTBaryToIndex - convert a barycentric coordinate to an index
3034 
3035   Input Parameters:
3036 + len - the desired length of the barycentric tuple (usually 1 more than the dimension it represents, so a barycentric coordinate in a triangle has length 3)
3037 . sum - the value that the sum of the barycentric coordinates (which will be non-negative integers) should sum to
3038 - coord - a barycentric coordinate with the given length and sum
3039 
3040   Output Parameter:
3041 . index - the unique index for the coordinate, >= 0 and < Binomial(len - 1 + sum, sum)
3042 
3043   Level: beginner
3044 
3045   Note:
3046   The indices map to barycentric coordinates in lexicographic order, where the first index is the
3047   least significant and the last index is the most significant.
3048 
3049 .seealso: `PetscDTIndexToBary`
3050 @*/
3051 PetscErrorCode PetscDTBaryToIndex(PetscInt len, PetscInt sum, const PetscInt coord[], PetscInt *index)
3052 {
3053   PetscInt c;
3054   PetscInt i;
3055   PetscInt total;
3056 
3057   PetscFunctionBeginHot;
3058   PetscCheck(len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "length must be non-negative");
3059   if (!len) {
3060     if (!sum) {
3061       *index = 0;
3062       PetscFunctionReturn(PETSC_SUCCESS);
3063     }
3064     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid index or sum for length 0 barycentric coordinate");
3065   }
3066   for (c = 1, total = 1; c < len; c++) total = (total * (sum + c)) / c;
3067   i = total - 1;
3068   c = len - 1;
3069   sum -= coord[c];
3070   while (sum > 0) {
3071     PetscInt subtotal;
3072     PetscInt s;
3073 
3074     for (s = 1, subtotal = 1; s < sum; s++) subtotal = (subtotal * (c + s)) / s;
3075     i -= subtotal;
3076     sum -= coord[--c];
3077   }
3078   *index = i;
3079   PetscFunctionReturn(PETSC_SUCCESS);
3080 }
3081