1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed-impl.h> 9 #include <ceed/backend.h> 10 #include <ceed/ceed.h> 11 #include <math.h> 12 #include <stdbool.h> 13 #include <stdio.h> 14 #include <string.h> 15 16 /// @file 17 /// Implementation of CeedBasis interfaces 18 19 /// @cond DOXYGEN_SKIP 20 static struct CeedBasis_private ceed_basis_collocated; 21 /// @endcond 22 23 /// @addtogroup CeedBasisUser 24 /// @{ 25 26 /// Indicate that the quadrature points are collocated with the nodes 27 const CeedBasis CEED_BASIS_COLLOCATED = &ceed_basis_collocated; 28 29 /// @} 30 31 /// ---------------------------------------------------------------------------- 32 /// CeedBasis Library Internal Functions 33 /// ---------------------------------------------------------------------------- 34 /// @addtogroup CeedBasisDeveloper 35 /// @{ 36 37 /** 38 @brief Compute Householder reflection 39 40 Computes A = (I - b v v^T) A 41 where A is an mxn matrix indexed as A[i*row + j*col] 42 43 @param[in,out] A Matrix to apply Householder reflection to, in place 44 @param v Householder vector 45 @param b Scaling factor 46 @param m Number of rows in A 47 @param n Number of columns in A 48 @param row Row stride 49 @param col Col stride 50 51 @return An error code: 0 - success, otherwise - failure 52 53 @ref Developer 54 **/ 55 static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v, CeedScalar b, CeedInt m, CeedInt n, CeedInt row, CeedInt col) { 56 for (CeedInt j = 0; j < n; j++) { 57 CeedScalar w = A[0 * row + j * col]; 58 for (CeedInt i = 1; i < m; i++) w += v[i] * A[i * row + j * col]; 59 A[0 * row + j * col] -= b * w; 60 for (CeedInt i = 1; i < m; i++) A[i * row + j * col] -= b * w * v[i]; 61 } 62 return CEED_ERROR_SUCCESS; 63 } 64 65 /** 66 @brief Apply Householder Q matrix 67 68 Compute A = Q A where Q is mxm and A is mxn. 69 70 @param[in,out] A Matrix to apply Householder Q to, in place 71 @param Q Householder Q matrix 72 @param tau Householder scaling factors 73 @param t_mode Transpose mode for application 74 @param m Number of rows in A 75 @param n Number of columns in A 76 @param k Number of elementary reflectors in Q, k<m 77 @param row Row stride in A 78 @param col Col stride in A 79 80 @return An error code: 0 - success, otherwise - failure 81 82 @ref Developer 83 **/ 84 int CeedHouseholderApplyQ(CeedScalar *A, const CeedScalar *Q, const CeedScalar *tau, CeedTransposeMode t_mode, CeedInt m, CeedInt n, CeedInt k, 85 CeedInt row, CeedInt col) { 86 CeedScalar *v; 87 CeedCall(CeedMalloc(m, &v)); 88 for (CeedInt ii = 0; ii < k; ii++) { 89 CeedInt i = t_mode == CEED_TRANSPOSE ? ii : k - 1 - ii; 90 for (CeedInt j = i + 1; j < m; j++) v[j] = Q[j * k + i]; 91 // Apply Householder reflector (I - tau v v^T) collo_grad_1d^T 92 CeedCall(CeedHouseholderReflect(&A[i * row], &v[i], tau[i], m - i, n, row, col)); 93 } 94 CeedCall(CeedFree(&v)); 95 return CEED_ERROR_SUCCESS; 96 } 97 98 /** 99 @brief Compute Givens rotation 100 101 Computes A = G A (or G^T A in transpose mode) 102 where A is an mxn matrix indexed as A[i*n + j*m] 103 104 @param[in,out] A Row major matrix to apply Givens rotation to, in place 105 @param c Cosine factor 106 @param s Sine factor 107 @param t_mode @ref CEED_NOTRANSPOSE to rotate the basis counter-clockwise, 108 which has the effect of rotating columns of A clockwise; 109 @ref CEED_TRANSPOSE for the opposite rotation 110 @param i First row/column to apply rotation 111 @param k Second row/column to apply rotation 112 @param m Number of rows in A 113 @param n Number of columns in A 114 115 @return An error code: 0 - success, otherwise - failure 116 117 @ref Developer 118 **/ 119 static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s, CeedTransposeMode t_mode, CeedInt i, CeedInt k, CeedInt m, CeedInt n) { 120 CeedInt stride_j = 1, stride_ik = m, num_its = n; 121 if (t_mode == CEED_NOTRANSPOSE) { 122 stride_j = n; 123 stride_ik = 1; 124 num_its = m; 125 } 126 127 // Apply rotation 128 for (CeedInt j = 0; j < num_its; j++) { 129 CeedScalar tau1 = A[i * stride_ik + j * stride_j], tau2 = A[k * stride_ik + j * stride_j]; 130 A[i * stride_ik + j * stride_j] = c * tau1 - s * tau2; 131 A[k * stride_ik + j * stride_j] = s * tau1 + c * tau2; 132 } 133 return CEED_ERROR_SUCCESS; 134 } 135 136 /** 137 @brief View an array stored in a CeedBasis 138 139 @param[in] name Name of array 140 @param[in] fp_fmt Printing format 141 @param[in] m Number of rows in array 142 @param[in] n Number of columns in array 143 @param[in] a Array to be viewed 144 @param[in] stream Stream to view to, e.g., stdout 145 146 @return An error code: 0 - success, otherwise - failure 147 148 @ref Developer 149 **/ 150 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m, CeedInt n, const CeedScalar *a, FILE *stream) { 151 for (CeedInt i = 0; i < m; i++) { 152 if (m > 1) fprintf(stream, "%12s[%" CeedInt_FMT "]:", name, i); 153 else fprintf(stream, "%12s:", name); 154 for (CeedInt j = 0; j < n; j++) fprintf(stream, fp_fmt, fabs(a[i * n + j]) > 1E-14 ? a[i * n + j] : 0); 155 fputs("\n", stream); 156 } 157 return CEED_ERROR_SUCCESS; 158 } 159 160 /** 161 @brief Create the interpolation and gradient matrices for projection from 162 the nodes of `basis_from` to the nodes of `basis_to`. 163 The interpolation is given by `interp_project = interp_to^+ * interp_from`, 164 where the pesudoinverse `interp_to^+` is given by QR factorization. 165 The gradient is given by `grad_project = interp_to^+ * grad_from`. 166 Note: `basis_from` and `basis_to` must have compatible quadrature 167 spaces. 168 169 @param[in] basis_from CeedBasis to project from 170 @param[in] basis_to CeedBasis to project to 171 @param[out] interp_project Address of the variable where the newly created 172 interpolation matrix will be stored. 173 @param[out] grad_project Address of the variable where the newly created 174 gradient matrix will be stored. 175 176 @return An error code: 0 - success, otherwise - failure 177 178 @ref Developer 179 **/ 180 static int CeedBasisCreateProjectionMatrices(CeedBasis basis_from, CeedBasis basis_to, CeedScalar **interp_project, CeedScalar **grad_project) { 181 Ceed ceed; 182 CeedCall(CeedBasisGetCeed(basis_to, &ceed)); 183 184 // Check for compatible quadrature spaces 185 CeedInt Q_to, Q_from; 186 CeedCall(CeedBasisGetNumQuadraturePoints(basis_to, &Q_to)); 187 CeedCall(CeedBasisGetNumQuadraturePoints(basis_from, &Q_from)); 188 if (Q_to != Q_from) { 189 // LCOV_EXCL_START 190 return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces"); 191 // LCOV_EXCL_STOP 192 } 193 194 // Check for matching tensor or non-tensor 195 CeedInt P_to, P_from, Q = Q_to; 196 bool is_tensor_to, is_tensor_from; 197 CeedCall(CeedBasisIsTensor(basis_to, &is_tensor_to)); 198 CeedCall(CeedBasisIsTensor(basis_from, &is_tensor_from)); 199 if (is_tensor_to && is_tensor_from) { 200 CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_to)); 201 CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_from)); 202 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis_from, &Q)); 203 } else if (!is_tensor_to && !is_tensor_from) { 204 CeedCall(CeedBasisGetNumNodes(basis_to, &P_to)); 205 CeedCall(CeedBasisGetNumNodes(basis_from, &P_from)); 206 } else { 207 // LCOV_EXCL_START 208 return CeedError(ceed, CEED_ERROR_MINOR, "Bases must both be tensor or non-tensor"); 209 // LCOV_EXCL_STOP 210 } 211 212 // Get source matrices 213 CeedInt dim; 214 CeedScalar *interp_to, *interp_from, *tau; 215 CeedCall(CeedBasisGetDimension(basis_to, &dim)); 216 CeedCall(CeedMalloc(Q * P_from, &interp_from)); 217 CeedCall(CeedMalloc(Q * P_to, &interp_to)); 218 CeedCall(CeedCalloc(P_to * P_from, interp_project)); 219 CeedCall(CeedCalloc(P_to * P_from * (is_tensor_to ? 1 : dim), grad_project)); 220 CeedCall(CeedMalloc(Q, &tau)); 221 const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL, *grad_from_source; 222 if (is_tensor_to) { 223 CeedCall(CeedBasisGetInterp1D(basis_to, &interp_to_source)); 224 CeedCall(CeedBasisGetInterp1D(basis_from, &interp_from_source)); 225 CeedCall(CeedBasisGetGrad1D(basis_from, &grad_from_source)); 226 } else { 227 CeedCall(CeedBasisGetInterp(basis_to, &interp_to_source)); 228 CeedCall(CeedBasisGetInterp(basis_from, &interp_from_source)); 229 CeedCall(CeedBasisGetGrad(basis_from, &grad_from_source)); 230 } 231 232 // Build matrices 233 CeedInt num_matrices = 1 + (is_tensor_to ? 1 : dim); 234 CeedScalar *input_from[num_matrices], *output_project[num_matrices]; 235 input_from[0] = (CeedScalar *)interp_from_source; 236 output_project[0] = *interp_project; 237 for (CeedInt m = 1; m < num_matrices; m++) { 238 input_from[m] = (CeedScalar *)&grad_from_source[(m - 1) * Q * P_from]; 239 output_project[m] = &((*grad_project)[(m - 1) * P_to * P_from]); 240 } 241 for (CeedInt m = 0; m < num_matrices; m++) { 242 // -- QR Factorization, interp_to = Q R 243 memcpy(interp_to, interp_to_source, Q * P_to * sizeof(interp_to_source[0])); 244 CeedCall(CeedQRFactorization(ceed, interp_to, tau, Q, P_to)); 245 246 // -- Apply Qtranspose, interp_to = Qtranspose interp_from 247 memcpy(interp_from, input_from[m], Q * P_from * sizeof(input_from[m][0])); 248 CeedCall(CeedHouseholderApplyQ(interp_from, interp_to, tau, CEED_TRANSPOSE, Q, P_from, P_to, P_from, 1)); 249 250 // -- Apply Rinv, interp_project = Rinv interp_c 251 for (CeedInt j = 0; j < P_from; j++) { // Column j 252 output_project[m][j + P_from * (P_to - 1)] = interp_from[j + P_from * (P_to - 1)] / interp_to[P_to * P_to - 1]; 253 for (CeedInt i = P_to - 2; i >= 0; i--) { // Row i 254 output_project[m][j + P_from * i] = interp_from[j + P_from * i]; 255 for (CeedInt k = i + 1; k < P_to; k++) { 256 output_project[m][j + P_from * i] -= interp_to[k + P_to * i] * output_project[m][j + P_from * k]; 257 } 258 output_project[m][j + P_from * i] /= interp_to[i + P_to * i]; 259 } 260 } 261 } 262 263 // Cleanup 264 CeedCall(CeedFree(&tau)); 265 CeedCall(CeedFree(&interp_to)); 266 CeedCall(CeedFree(&interp_from)); 267 268 return CEED_ERROR_SUCCESS; 269 } 270 271 /// @} 272 273 /// ---------------------------------------------------------------------------- 274 /// Ceed Backend API 275 /// ---------------------------------------------------------------------------- 276 /// @addtogroup CeedBasisBackend 277 /// @{ 278 279 /** 280 @brief Return collocated grad matrix 281 282 @param basis CeedBasis 283 @param[out] collo_grad_1d Row-major (Q_1d * Q_1d) matrix expressing derivatives of 284 basis functions at quadrature points 285 286 @return An error code: 0 - success, otherwise - failure 287 288 @ref Backend 289 **/ 290 int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collo_grad_1d) { 291 int i, j, k; 292 Ceed ceed; 293 CeedInt P_1d = (basis)->P_1d, Q_1d = (basis)->Q_1d; 294 CeedScalar *interp_1d, *grad_1d, *tau; 295 296 CeedCall(CeedMalloc(Q_1d * P_1d, &interp_1d)); 297 CeedCall(CeedMalloc(Q_1d * P_1d, &grad_1d)); 298 CeedCall(CeedMalloc(Q_1d, &tau)); 299 memcpy(interp_1d, (basis)->interp_1d, Q_1d * P_1d * sizeof(basis)->interp_1d[0]); 300 memcpy(grad_1d, (basis)->grad_1d, Q_1d * P_1d * sizeof(basis)->interp_1d[0]); 301 302 // QR Factorization, interp_1d = Q R 303 CeedCall(CeedBasisGetCeed(basis, &ceed)); 304 CeedCall(CeedQRFactorization(ceed, interp_1d, tau, Q_1d, P_1d)); 305 // Note: This function is for backend use, so all errors are terminal 306 // and we do not need to clean up memory on failure. 307 308 // Apply Rinv, collo_grad_1d = grad_1d Rinv 309 for (i = 0; i < Q_1d; i++) { // Row i 310 collo_grad_1d[Q_1d * i] = grad_1d[P_1d * i] / interp_1d[0]; 311 for (j = 1; j < P_1d; j++) { // Column j 312 collo_grad_1d[j + Q_1d * i] = grad_1d[j + P_1d * i]; 313 for (k = 0; k < j; k++) collo_grad_1d[j + Q_1d * i] -= interp_1d[j + P_1d * k] * collo_grad_1d[k + Q_1d * i]; 314 collo_grad_1d[j + Q_1d * i] /= interp_1d[j + P_1d * j]; 315 } 316 for (j = P_1d; j < Q_1d; j++) collo_grad_1d[j + Q_1d * i] = 0; 317 } 318 319 // Apply Qtranspose, collo_grad = collo_grad Q_transpose 320 CeedCall(CeedHouseholderApplyQ(collo_grad_1d, interp_1d, tau, CEED_NOTRANSPOSE, Q_1d, Q_1d, P_1d, 1, Q_1d)); 321 322 CeedCall(CeedFree(&interp_1d)); 323 CeedCall(CeedFree(&grad_1d)); 324 CeedCall(CeedFree(&tau)); 325 return CEED_ERROR_SUCCESS; 326 } 327 328 /** 329 @brief Get tensor status for given CeedBasis 330 331 @param basis CeedBasis 332 @param[out] is_tensor Variable to store tensor status 333 334 @return An error code: 0 - success, otherwise - failure 335 336 @ref Backend 337 **/ 338 int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor) { 339 *is_tensor = basis->tensor_basis; 340 return CEED_ERROR_SUCCESS; 341 } 342 343 /** 344 @brief Get backend data of a CeedBasis 345 346 @param basis CeedBasis 347 @param[out] data Variable to store data 348 349 @return An error code: 0 - success, otherwise - failure 350 351 @ref Backend 352 **/ 353 int CeedBasisGetData(CeedBasis basis, void *data) { 354 *(void **)data = basis->data; 355 return CEED_ERROR_SUCCESS; 356 } 357 358 /** 359 @brief Set backend data of a CeedBasis 360 361 @param[out] basis CeedBasis 362 @param data Data to set 363 364 @return An error code: 0 - success, otherwise - failure 365 366 @ref Backend 367 **/ 368 int CeedBasisSetData(CeedBasis basis, void *data) { 369 basis->data = data; 370 return CEED_ERROR_SUCCESS; 371 } 372 373 /** 374 @brief Increment the reference counter for a CeedBasis 375 376 @param basis Basis to increment the reference counter 377 378 @return An error code: 0 - success, otherwise - failure 379 380 @ref Backend 381 **/ 382 int CeedBasisReference(CeedBasis basis) { 383 basis->ref_count++; 384 return CEED_ERROR_SUCCESS; 385 } 386 387 /** 388 @brief Estimate number of FLOPs required to apply CeedBasis in t_mode and eval_mode 389 390 @param basis Basis to estimate FLOPs for 391 @param t_mode Apply basis or transpose 392 @param eval_mode Basis evaluation mode 393 @param flops Address of variable to hold FLOPs estimate 394 395 @ref Backend 396 **/ 397 int CeedBasisGetFlopsEstimate(CeedBasis basis, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedSize *flops) { 398 bool is_tensor; 399 400 CeedCall(CeedBasisIsTensor(basis, &is_tensor)); 401 if (is_tensor) { 402 CeedInt dim, num_comp, P_1d, Q_1d; 403 CeedCall(CeedBasisGetDimension(basis, &dim)); 404 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 405 CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d)); 406 CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 407 if (t_mode == CEED_TRANSPOSE) { 408 P_1d = Q_1d; 409 Q_1d = P_1d; 410 } 411 CeedInt tensor_flops = 0, pre = num_comp * CeedIntPow(P_1d, dim - 1), post = 1; 412 for (CeedInt d = 0; d < dim; d++) { 413 tensor_flops += 2 * pre * P_1d * post * Q_1d; 414 pre /= P_1d; 415 post *= Q_1d; 416 } 417 switch (eval_mode) { 418 case CEED_EVAL_NONE: 419 *flops = 0; 420 break; 421 case CEED_EVAL_INTERP: 422 *flops = tensor_flops; 423 break; 424 case CEED_EVAL_GRAD: 425 *flops = tensor_flops * 2; 426 break; 427 case CEED_EVAL_DIV: 428 // LCOV_EXCL_START 429 return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, "Tensor CEED_EVAL_DIV not supported"); 430 break; 431 case CEED_EVAL_CURL: 432 return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, "Tensor CEED_EVAL_CURL not supported"); 433 break; 434 // LCOV_EXCL_STOP 435 case CEED_EVAL_WEIGHT: 436 *flops = dim * CeedIntPow(Q_1d, dim); 437 break; 438 } 439 } else { 440 CeedInt dim, num_comp, num_nodes, num_qpts, Q_comp; 441 CeedCall(CeedBasisGetDimension(basis, &dim)); 442 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 443 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 444 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 445 CeedCall(CeedBasisGetNumQuadratureComponents(basis, &Q_comp)); 446 switch (eval_mode) { 447 case CEED_EVAL_NONE: 448 *flops = 0; 449 break; 450 case CEED_EVAL_INTERP: 451 *flops = num_nodes * num_qpts * num_comp; 452 break; 453 case CEED_EVAL_GRAD: 454 *flops = num_nodes * num_qpts * num_comp * dim; 455 break; 456 case CEED_EVAL_DIV: 457 *flops = num_nodes * num_qpts; 458 break; 459 case CEED_EVAL_CURL: 460 *flops = num_nodes * num_qpts * dim; 461 break; 462 case CEED_EVAL_WEIGHT: 463 *flops = 0; 464 break; 465 } 466 } 467 468 return CEED_ERROR_SUCCESS; 469 } 470 471 /** 472 @brief Get dimension for given CeedElemTopology 473 474 @param topo CeedElemTopology 475 @param[out] dim Variable to store dimension of topology 476 477 @return An error code: 0 - success, otherwise - failure 478 479 @ref Backend 480 **/ 481 int CeedBasisGetTopologyDimension(CeedElemTopology topo, CeedInt *dim) { 482 *dim = (CeedInt)topo >> 16; 483 return CEED_ERROR_SUCCESS; 484 } 485 486 /** 487 @brief Get CeedTensorContract of a CeedBasis 488 489 @param basis CeedBasis 490 @param[out] contract Variable to store CeedTensorContract 491 492 @return An error code: 0 - success, otherwise - failure 493 494 @ref Backend 495 **/ 496 int CeedBasisGetTensorContract(CeedBasis basis, CeedTensorContract *contract) { 497 *contract = basis->contract; 498 return CEED_ERROR_SUCCESS; 499 } 500 501 /** 502 @brief Set CeedTensorContract of a CeedBasis 503 504 @param[out] basis CeedBasis 505 @param contract CeedTensorContract to set 506 507 @return An error code: 0 - success, otherwise - failure 508 509 @ref Backend 510 **/ 511 int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract contract) { 512 basis->contract = contract; 513 CeedCall(CeedTensorContractReference(contract)); 514 return CEED_ERROR_SUCCESS; 515 } 516 517 /** 518 @brief Return a reference implementation of matrix multiplication C = A B. 519 Note, this is a reference implementation for CPU CeedScalar pointers 520 that is not intended for high performance. 521 522 @param ceed A Ceed context for error handling 523 @param[in] mat_A Row-major matrix A 524 @param[in] mat_B Row-major matrix B 525 @param[out] mat_C Row-major output matrix C 526 @param m Number of rows of C 527 @param n Number of columns of C 528 @param kk Number of columns of A/rows of B 529 530 @return An error code: 0 - success, otherwise - failure 531 532 @ref Utility 533 **/ 534 int CeedMatrixMatrixMultiply(Ceed ceed, const CeedScalar *mat_A, const CeedScalar *mat_B, CeedScalar *mat_C, CeedInt m, CeedInt n, CeedInt kk) { 535 for (CeedInt i = 0; i < m; i++) { 536 for (CeedInt j = 0; j < n; j++) { 537 CeedScalar sum = 0; 538 for (CeedInt k = 0; k < kk; k++) sum += mat_A[k + i * kk] * mat_B[j + k * n]; 539 mat_C[j + i * n] = sum; 540 } 541 } 542 return CEED_ERROR_SUCCESS; 543 } 544 545 /// @} 546 547 /// ---------------------------------------------------------------------------- 548 /// CeedBasis Public API 549 /// ---------------------------------------------------------------------------- 550 /// @addtogroup CeedBasisUser 551 /// @{ 552 553 /** 554 @brief Create a tensor-product basis for H^1 discretizations 555 556 @param ceed A Ceed object where the CeedBasis will be created 557 @param dim Topological dimension 558 @param num_comp Number of field components (1 for scalar fields) 559 @param P_1d Number of nodes in one dimension 560 @param Q_1d Number of quadrature points in one dimension 561 @param interp_1d Row-major (Q_1d * P_1d) matrix expressing the values of nodal 562 basis functions at quadrature points 563 @param grad_1d Row-major (Q_1d * P_1d) matrix expressing derivatives of nodal 564 basis functions at quadrature points 565 @param q_ref_1d Array of length Q_1d holding the locations of quadrature points 566 on the 1D reference element [-1, 1] 567 @param q_weight_1d Array of length Q_1d holding the quadrature weights on the 568 reference element 569 @param[out] basis Address of the variable where the newly created 570 CeedBasis will be stored. 571 572 @return An error code: 0 - success, otherwise - failure 573 574 @ref User 575 **/ 576 int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, 577 const CeedScalar *grad_1d, const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis *basis) { 578 if (!ceed->BasisCreateTensorH1) { 579 Ceed delegate; 580 CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis")); 581 582 if (!delegate) { 583 // LCOV_EXCL_START 584 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateTensorH1"); 585 // LCOV_EXCL_STOP 586 } 587 588 CeedCall(CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis)); 589 return CEED_ERROR_SUCCESS; 590 } 591 592 if (dim < 1) { 593 // LCOV_EXCL_START 594 return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value"); 595 // LCOV_EXCL_STOP 596 } 597 598 if (num_comp < 1) { 599 // LCOV_EXCL_START 600 return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component"); 601 // LCOV_EXCL_STOP 602 } 603 604 if (P_1d < 1) { 605 // LCOV_EXCL_START 606 return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node"); 607 // LCOV_EXCL_STOP 608 } 609 610 if (Q_1d < 1) { 611 // LCOV_EXCL_START 612 return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point"); 613 // LCOV_EXCL_STOP 614 } 615 616 CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE : dim == 2 ? CEED_TOPOLOGY_QUAD : CEED_TOPOLOGY_HEX; 617 618 CeedCall(CeedCalloc(1, basis)); 619 (*basis)->ceed = ceed; 620 CeedCall(CeedReference(ceed)); 621 (*basis)->ref_count = 1; 622 (*basis)->tensor_basis = 1; 623 (*basis)->dim = dim; 624 (*basis)->topo = topo; 625 (*basis)->num_comp = num_comp; 626 (*basis)->P_1d = P_1d; 627 (*basis)->Q_1d = Q_1d; 628 (*basis)->P = CeedIntPow(P_1d, dim); 629 (*basis)->Q = CeedIntPow(Q_1d, dim); 630 (*basis)->Q_comp = 1; 631 (*basis)->basis_space = 1; // 1 for H^1 space 632 CeedCall(CeedCalloc(Q_1d, &(*basis)->q_ref_1d)); 633 CeedCall(CeedCalloc(Q_1d, &(*basis)->q_weight_1d)); 634 if (q_ref_1d) memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d * sizeof(q_ref_1d[0])); 635 if (q_weight_1d) memcpy((*basis)->q_weight_1d, q_weight_1d, Q_1d * sizeof(q_weight_1d[0])); 636 CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->interp_1d)); 637 CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->grad_1d)); 638 if (interp_1d) memcpy((*basis)->interp_1d, interp_1d, Q_1d * P_1d * sizeof(interp_1d[0])); 639 if (grad_1d) memcpy((*basis)->grad_1d, grad_1d, Q_1d * P_1d * sizeof(grad_1d[0])); 640 CeedCall(ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, *basis)); 641 return CEED_ERROR_SUCCESS; 642 } 643 644 /** 645 @brief Create a tensor-product Lagrange basis 646 647 @param ceed A Ceed object where the CeedBasis will be created 648 @param dim Topological dimension of element 649 @param num_comp Number of field components (1 for scalar fields) 650 @param P Number of Gauss-Lobatto nodes in one dimension. The 651 polynomial degree of the resulting Q_k element is k=P-1. 652 @param Q Number of quadrature points in one dimension. 653 @param quad_mode Distribution of the Q quadrature points (affects order of 654 accuracy for the quadrature) 655 @param[out] basis Address of the variable where the newly created 656 CeedBasis will be stored. 657 658 @return An error code: 0 - success, otherwise - failure 659 660 @ref User 661 **/ 662 int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P, CeedInt Q, CeedQuadMode quad_mode, CeedBasis *basis) { 663 // Allocate 664 int ierr = CEED_ERROR_SUCCESS, i, j, k; 665 CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d, *q_weight_1d; 666 667 if (dim < 1) { 668 // LCOV_EXCL_START 669 return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value"); 670 // LCOV_EXCL_STOP 671 } 672 673 if (num_comp < 1) { 674 // LCOV_EXCL_START 675 return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component"); 676 // LCOV_EXCL_STOP 677 } 678 679 if (P < 1) { 680 // LCOV_EXCL_START 681 return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node"); 682 // LCOV_EXCL_STOP 683 } 684 685 if (Q < 1) { 686 // LCOV_EXCL_START 687 return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point"); 688 // LCOV_EXCL_STOP 689 } 690 691 // Get Nodes and Weights 692 CeedCall(CeedCalloc(P * Q, &interp_1d)); 693 CeedCall(CeedCalloc(P * Q, &grad_1d)); 694 CeedCall(CeedCalloc(P, &nodes)); 695 CeedCall(CeedCalloc(Q, &q_ref_1d)); 696 CeedCall(CeedCalloc(Q, &q_weight_1d)); 697 if (CeedLobattoQuadrature(P, nodes, NULL) != CEED_ERROR_SUCCESS) goto cleanup; 698 switch (quad_mode) { 699 case CEED_GAUSS: 700 ierr = CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d); 701 break; 702 case CEED_GAUSS_LOBATTO: 703 ierr = CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d); 704 break; 705 } 706 if (ierr != CEED_ERROR_SUCCESS) goto cleanup; 707 708 // Build B, D matrix 709 // Fornberg, 1998 710 for (i = 0; i < Q; i++) { 711 c1 = 1.0; 712 c3 = nodes[0] - q_ref_1d[i]; 713 interp_1d[i * P + 0] = 1.0; 714 for (j = 1; j < P; j++) { 715 c2 = 1.0; 716 c4 = c3; 717 c3 = nodes[j] - q_ref_1d[i]; 718 for (k = 0; k < j; k++) { 719 dx = nodes[j] - nodes[k]; 720 c2 *= dx; 721 if (k == j - 1) { 722 grad_1d[i * P + j] = c1 * (interp_1d[i * P + k] - c4 * grad_1d[i * P + k]) / c2; 723 interp_1d[i * P + j] = -c1 * c4 * interp_1d[i * P + k] / c2; 724 } 725 grad_1d[i * P + k] = (c3 * grad_1d[i * P + k] - interp_1d[i * P + k]) / dx; 726 interp_1d[i * P + k] = c3 * interp_1d[i * P + k] / dx; 727 } 728 c1 = c2; 729 } 730 } 731 // Pass to CeedBasisCreateTensorH1 732 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis)); 733 cleanup: 734 CeedCall(CeedFree(&interp_1d)); 735 CeedCall(CeedFree(&grad_1d)); 736 CeedCall(CeedFree(&nodes)); 737 CeedCall(CeedFree(&q_ref_1d)); 738 CeedCall(CeedFree(&q_weight_1d)); 739 return CEED_ERROR_SUCCESS; 740 } 741 742 /** 743 @brief Create a non tensor-product basis for H^1 discretizations 744 745 @param ceed A Ceed object where the CeedBasis will be created 746 @param topo Topology of element, e.g. hypercube, simplex, ect 747 @param num_comp Number of field components (1 for scalar fields) 748 @param num_nodes Total number of nodes 749 @param num_qpts Total number of quadrature points 750 @param interp Row-major (num_qpts * num_nodes) matrix expressing the values of 751 nodal basis functions at quadrature points 752 @param grad Row-major (num_qpts * dim * num_nodes) matrix expressing 753 derivatives of nodal basis functions at quadrature points 754 @param q_ref Array of length num_qpts holding the locations of quadrature 755 points on the reference element 756 @param q_weight Array of length num_qpts holding the quadrature weights on the 757 reference element 758 @param[out] basis Address of the variable where the newly created 759 CeedBasis will be stored. 760 761 @return An error code: 0 - success, otherwise - failure 762 763 @ref User 764 **/ 765 int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 766 const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) { 767 CeedInt P = num_nodes, Q = num_qpts, dim = 0; 768 769 if (!ceed->BasisCreateH1) { 770 Ceed delegate; 771 CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis")); 772 773 if (!delegate) { 774 // LCOV_EXCL_START 775 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateH1"); 776 // LCOV_EXCL_STOP 777 } 778 779 CeedCall(CeedBasisCreateH1(delegate, topo, num_comp, num_nodes, num_qpts, interp, grad, q_ref, q_weight, basis)); 780 return CEED_ERROR_SUCCESS; 781 } 782 783 if (num_comp < 1) { 784 // LCOV_EXCL_START 785 return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component"); 786 // LCOV_EXCL_STOP 787 } 788 789 if (num_nodes < 1) { 790 // LCOV_EXCL_START 791 return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node"); 792 // LCOV_EXCL_STOP 793 } 794 795 if (num_qpts < 1) { 796 // LCOV_EXCL_START 797 return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point"); 798 // LCOV_EXCL_STOP 799 } 800 801 CeedCall(CeedCalloc(1, basis)); 802 803 CeedCall(CeedBasisGetTopologyDimension(topo, &dim)); 804 805 (*basis)->ceed = ceed; 806 CeedCall(CeedReference(ceed)); 807 (*basis)->ref_count = 1; 808 (*basis)->tensor_basis = 0; 809 (*basis)->dim = dim; 810 (*basis)->topo = topo; 811 (*basis)->num_comp = num_comp; 812 (*basis)->P = P; 813 (*basis)->Q = Q; 814 (*basis)->Q_comp = 1; 815 (*basis)->basis_space = 1; // 1 for H^1 space 816 CeedCall(CeedCalloc(Q * dim, &(*basis)->q_ref_1d)); 817 CeedCall(CeedCalloc(Q, &(*basis)->q_weight_1d)); 818 if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0])); 819 if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0])); 820 CeedCall(CeedCalloc(Q * P, &(*basis)->interp)); 821 CeedCall(CeedCalloc(dim * Q * P, &(*basis)->grad)); 822 if (interp) memcpy((*basis)->interp, interp, Q * P * sizeof(interp[0])); 823 if (grad) memcpy((*basis)->grad, grad, dim * Q * P * sizeof(grad[0])); 824 CeedCall(ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref, q_weight, *basis)); 825 return CEED_ERROR_SUCCESS; 826 } 827 828 /** 829 @brief Create a non tensor-product basis for H(div) discretizations 830 831 @param ceed A Ceed object where the CeedBasis will be created 832 @param topo Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.), 833 dimension of which is used in some array sizes below 834 @param num_comp Number of components (usually 1 for vectors in H(div) bases) 835 @param num_nodes Total number of nodes (dofs per element) 836 @param num_qpts Total number of quadrature points 837 @param interp Row-major (dim*num_qpts * num_nodes) matrix expressing the values of 838 nodal basis functions at quadrature points 839 @param div Row-major (num_qpts * num_nodes) matrix expressing 840 divergence of nodal basis functions at quadrature points 841 @param q_ref Array of length num_qpts holding the locations of quadrature 842 points on the reference element 843 @param q_weight Array of length num_qpts holding the quadrature weights on the 844 reference element 845 @param[out] basis Address of the variable where the newly created 846 CeedBasis will be stored. 847 848 @return An error code: 0 - success, otherwise - failure 849 850 @ref User 851 **/ 852 int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 853 const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) { 854 CeedInt Q = num_qpts, P = num_nodes, dim = 0; 855 CeedCall(CeedBasisGetTopologyDimension(topo, &dim)); 856 if (!ceed->BasisCreateHdiv) { 857 Ceed delegate; 858 CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis")); 859 860 if (!delegate) { 861 // LCOV_EXCL_START 862 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not implement BasisCreateHdiv"); 863 // LCOV_EXCL_STOP 864 } 865 866 CeedCall(CeedBasisCreateHdiv(delegate, topo, num_comp, num_nodes, num_qpts, interp, div, q_ref, q_weight, basis)); 867 return CEED_ERROR_SUCCESS; 868 } 869 870 if (num_comp < 1) { 871 // LCOV_EXCL_START 872 return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component"); 873 // LCOV_EXCL_STOP 874 } 875 876 if (num_nodes < 1) { 877 // LCOV_EXCL_START 878 return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node"); 879 // LCOV_EXCL_STOP 880 } 881 882 if (num_qpts < 1) { 883 // LCOV_EXCL_START 884 return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point"); 885 // LCOV_EXCL_STOP 886 } 887 888 CeedCall(CeedCalloc(1, basis)); 889 890 (*basis)->ceed = ceed; 891 CeedCall(CeedReference(ceed)); 892 (*basis)->ref_count = 1; 893 (*basis)->tensor_basis = 0; 894 (*basis)->dim = dim; 895 (*basis)->topo = topo; 896 (*basis)->num_comp = num_comp; 897 (*basis)->P = P; 898 (*basis)->Q = Q; 899 (*basis)->Q_comp = dim; 900 (*basis)->basis_space = 2; // 2 for H(div) space 901 CeedCall(CeedMalloc(Q * dim, &(*basis)->q_ref_1d)); 902 CeedCall(CeedMalloc(Q, &(*basis)->q_weight_1d)); 903 if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0])); 904 if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0])); 905 CeedCall(CeedMalloc(dim * Q * P, &(*basis)->interp)); 906 CeedCall(CeedMalloc(Q * P, &(*basis)->div)); 907 if (interp) memcpy((*basis)->interp, interp, dim * Q * P * sizeof(interp[0])); 908 if (div) memcpy((*basis)->div, div, Q * P * sizeof(div[0])); 909 CeedCall(ceed->BasisCreateHdiv(topo, dim, P, Q, interp, div, q_ref, q_weight, *basis)); 910 return CEED_ERROR_SUCCESS; 911 } 912 913 /** 914 @brief Create a CeedBasis for projection from the nodes of `basis_from` 915 to the nodes of `basis_to`. Only `CEED_EVAL_INTERP` and 916 `CEED_EVAL_GRAD` will be valid for the new basis, `basis_project`. 917 The interpolation is given by `interp_project = interp_to^+ * interp_from`, 918 where the pesudoinverse `interp_to^+` is given by QR factorization. 919 The gradient is given by `grad_project = interp_to^+ * grad_from`. 920 Note: `basis_from` and `basis_to` must have compatible quadrature 921 spaces. 922 Note: `basis_project` will have the same number of components as 923 `basis_from`, regardless of the number of components that 924 `basis_to` has. If `basis_from` has 3 components and `basis_to` 925 has 5 components, then `basis_project` will have 3 components. 926 927 @param[in] basis_from CeedBasis to prolong from 928 @param[in] basis_to CeedBasis to prolong to 929 @param[out] basis_project Address of the variable where the newly created 930 CeedBasis will be stored. 931 932 @return An error code: 0 - success, otherwise - failure 933 934 @ref User 935 **/ 936 int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, CeedBasis *basis_project) { 937 Ceed ceed; 938 CeedCall(CeedBasisGetCeed(basis_to, &ceed)); 939 940 // Create projectior matrix 941 CeedScalar *interp_project, *grad_project; 942 CeedCall(CeedBasisCreateProjectionMatrices(basis_from, basis_to, &interp_project, &grad_project)); 943 944 // Build basis 945 bool is_tensor; 946 CeedInt dim, num_comp; 947 CeedScalar *q_ref, *q_weight; 948 CeedCall(CeedBasisIsTensor(basis_to, &is_tensor)); 949 CeedCall(CeedBasisGetDimension(basis_to, &dim)); 950 CeedCall(CeedBasisGetNumComponents(basis_from, &num_comp)); 951 if (is_tensor) { 952 CeedInt P_1d_to, P_1d_from; 953 CeedCall(CeedBasisGetNumNodes1D(basis_from, &P_1d_from)); 954 CeedCall(CeedBasisGetNumNodes1D(basis_to, &P_1d_to)); 955 CeedCall(CeedCalloc(P_1d_to, &q_ref)); 956 CeedCall(CeedCalloc(P_1d_to, &q_weight)); 957 CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_from, P_1d_to, interp_project, grad_project, q_ref, q_weight, basis_project)); 958 } else { 959 CeedElemTopology topo; 960 CeedCall(CeedBasisGetTopology(basis_to, &topo)); 961 CeedInt num_nodes_to, num_nodes_from; 962 CeedCall(CeedBasisGetNumNodes(basis_from, &num_nodes_from)); 963 CeedCall(CeedBasisGetNumNodes(basis_to, &num_nodes_to)); 964 CeedCall(CeedCalloc(num_nodes_to * dim, &q_ref)); 965 CeedCall(CeedCalloc(num_nodes_to, &q_weight)); 966 CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_from, num_nodes_to, interp_project, grad_project, q_ref, q_weight, basis_project)); 967 } 968 969 // Cleanup 970 CeedCall(CeedFree(&interp_project)); 971 CeedCall(CeedFree(&grad_project)); 972 CeedCall(CeedFree(&q_ref)); 973 CeedCall(CeedFree(&q_weight)); 974 975 return CEED_ERROR_SUCCESS; 976 } 977 978 /** 979 @brief Copy the pointer to a CeedBasis. Both pointers should 980 be destroyed with `CeedBasisDestroy()`; 981 Note: If `*basis_copy` is non-NULL, then it is assumed that 982 `*basis_copy` is a pointer to a CeedBasis. This CeedBasis 983 will be destroyed if `*basis_copy` is the only 984 reference to this CeedBasis. 985 986 @param basis CeedBasis to copy reference to 987 @param[out] basis_copy Variable to store copied reference 988 989 @return An error code: 0 - success, otherwise - failure 990 991 @ref User 992 **/ 993 int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy) { 994 CeedCall(CeedBasisReference(basis)); 995 CeedCall(CeedBasisDestroy(basis_copy)); 996 *basis_copy = basis; 997 return CEED_ERROR_SUCCESS; 998 } 999 1000 /** 1001 @brief View a CeedBasis 1002 1003 @param basis CeedBasis to view 1004 @param stream Stream to view to, e.g., stdout 1005 1006 @return An error code: 0 - success, otherwise - failure 1007 1008 @ref User 1009 **/ 1010 int CeedBasisView(CeedBasis basis, FILE *stream) { 1011 CeedFESpace FE_space = basis->basis_space; 1012 CeedElemTopology topo = basis->topo; 1013 1014 // Print FE space and element topology of the basis 1015 if (basis->tensor_basis) { 1016 fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" CeedInt_FMT " Q=%" CeedInt_FMT "\n", CeedFESpaces[FE_space], 1017 CeedElemTopologies[topo], basis->dim, basis->P_1d, basis->Q_1d); 1018 } else { 1019 fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" CeedInt_FMT " Q=%" CeedInt_FMT "\n", CeedFESpaces[FE_space], 1020 CeedElemTopologies[topo], basis->dim, basis->P, basis->Q); 1021 } 1022 // Print quadrature data, interpolation/gradient/divergene/curl of the basis 1023 if (basis->tensor_basis) { // tensor basis 1024 CeedCall(CeedScalarView("qref1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_ref_1d, stream)); 1025 CeedCall(CeedScalarView("qweight1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_weight_1d, stream)); 1026 CeedCall(CeedScalarView("interp1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, basis->interp_1d, stream)); 1027 CeedCall(CeedScalarView("grad1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, basis->grad_1d, stream)); 1028 } else { // non-tensor basis 1029 CeedCall(CeedScalarView("qref", "\t% 12.8f", 1, basis->Q * basis->dim, basis->q_ref_1d, stream)); 1030 CeedCall(CeedScalarView("qweight", "\t% 12.8f", 1, basis->Q, basis->q_weight_1d, stream)); 1031 CeedCall(CeedScalarView("interp", "\t% 12.8f", basis->Q_comp * basis->Q, basis->P, basis->interp, stream)); 1032 if (basis->grad) { 1033 CeedCall(CeedScalarView("grad", "\t% 12.8f", basis->dim * basis->Q, basis->P, basis->grad, stream)); 1034 } 1035 if (basis->div) { 1036 CeedCall(CeedScalarView("div", "\t% 12.8f", basis->Q, basis->P, basis->div, stream)); 1037 } 1038 } 1039 return CEED_ERROR_SUCCESS; 1040 } 1041 1042 /** 1043 @brief Apply basis evaluation from nodes to quadrature points or vice versa 1044 1045 @param basis CeedBasis to evaluate 1046 @param num_elem The number of elements to apply the basis evaluation to; 1047 the backend will specify the ordering in 1048 CeedElemRestrictionCreateBlocked() 1049 @param t_mode \ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature 1050 points, \ref CEED_TRANSPOSE to apply the transpose, mapping 1051 from quadrature points to nodes 1052 @param eval_mode \ref CEED_EVAL_NONE to use values directly, 1053 \ref CEED_EVAL_INTERP to use interpolated values, 1054 \ref CEED_EVAL_GRAD to use gradients, 1055 \ref CEED_EVAL_WEIGHT to use quadrature weights. 1056 @param[in] u Input CeedVector 1057 @param[out] v Output CeedVector 1058 1059 @return An error code: 0 - success, otherwise - failure 1060 1061 @ref User 1062 **/ 1063 int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) { 1064 CeedSize u_length = 0, v_length; 1065 CeedInt dim, num_comp, num_nodes, num_qpts; 1066 CeedCall(CeedBasisGetDimension(basis, &dim)); 1067 CeedCall(CeedBasisGetNumComponents(basis, &num_comp)); 1068 CeedCall(CeedBasisGetNumNodes(basis, &num_nodes)); 1069 CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 1070 CeedCall(CeedVectorGetLength(v, &v_length)); 1071 if (u) { 1072 CeedCall(CeedVectorGetLength(u, &u_length)); 1073 } 1074 1075 if (!basis->Apply) { 1076 // LCOV_EXCL_START 1077 return CeedError(basis->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisApply"); 1078 // LCOV_EXCL_STOP 1079 } 1080 1081 // Check compatibility of topological and geometrical dimensions 1082 if ((t_mode == CEED_TRANSPOSE && (v_length % num_nodes != 0 || u_length % num_qpts != 0)) || 1083 (t_mode == CEED_NOTRANSPOSE && (u_length % num_nodes != 0 || v_length % num_qpts != 0))) { 1084 // LCOV_EXCL_START 1085 return CeedError(basis->ceed, CEED_ERROR_DIMENSION, "Length of input/output vectors incompatible with basis dimensions"); 1086 // LCOV_EXCL_STOP 1087 } 1088 1089 // Check vector lengths to prevent out of bounds issues 1090 bool bad_dims = false; 1091 switch (eval_mode) { 1092 case CEED_EVAL_NONE: 1093 case CEED_EVAL_INTERP: 1094 bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts || v_length < num_elem * num_comp * num_nodes)) || 1095 (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp || u_length < num_elem * num_comp * num_nodes))); 1096 break; 1097 case CEED_EVAL_GRAD: 1098 bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts * dim || v_length < num_elem * num_comp * num_nodes)) || 1099 (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp * dim || u_length < num_elem * num_comp * num_nodes))); 1100 break; 1101 case CEED_EVAL_WEIGHT: 1102 bad_dims = v_length < num_elem * num_qpts; 1103 break; 1104 // LCOV_EXCL_START 1105 case CEED_EVAL_DIV: 1106 bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts || v_length < num_elem * num_comp * num_nodes)) || 1107 (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp || u_length < num_elem * num_comp * num_nodes))); 1108 break; 1109 case CEED_EVAL_CURL: 1110 bad_dims = ((t_mode == CEED_TRANSPOSE && (u_length < num_elem * num_comp * num_qpts || v_length < num_elem * num_comp * num_nodes)) || 1111 (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem * num_qpts * num_comp || u_length < num_elem * num_comp * num_nodes))); 1112 break; 1113 // LCOV_EXCL_STOP 1114 } 1115 if (bad_dims) { 1116 // LCOV_EXCL_START 1117 return CeedError(basis->ceed, CEED_ERROR_DIMENSION, "Input/output vectors too short for basis and evaluation mode"); 1118 // LCOV_EXCL_STOP 1119 } 1120 1121 CeedCall(basis->Apply(basis, num_elem, t_mode, eval_mode, u, v)); 1122 return CEED_ERROR_SUCCESS; 1123 } 1124 1125 /** 1126 @brief Get Ceed associated with a CeedBasis 1127 1128 @param basis CeedBasis 1129 @param[out] ceed Variable to store Ceed 1130 1131 @return An error code: 0 - success, otherwise - failure 1132 1133 @ref Advanced 1134 **/ 1135 int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed) { 1136 *ceed = basis->ceed; 1137 return CEED_ERROR_SUCCESS; 1138 } 1139 1140 /** 1141 @brief Get dimension for given CeedBasis 1142 1143 @param basis CeedBasis 1144 @param[out] dim Variable to store dimension of basis 1145 1146 @return An error code: 0 - success, otherwise - failure 1147 1148 @ref Advanced 1149 **/ 1150 int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim) { 1151 *dim = basis->dim; 1152 return CEED_ERROR_SUCCESS; 1153 } 1154 1155 /** 1156 @brief Get topology for given CeedBasis 1157 1158 @param basis CeedBasis 1159 @param[out] topo Variable to store topology of basis 1160 1161 @return An error code: 0 - success, otherwise - failure 1162 1163 @ref Advanced 1164 **/ 1165 int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo) { 1166 *topo = basis->topo; 1167 return CEED_ERROR_SUCCESS; 1168 } 1169 1170 /** 1171 @brief Get number of Q-vector components for given CeedBasis 1172 1173 @param basis CeedBasis 1174 @param[out] Q_comp Variable to store number of Q-vector components of basis 1175 1176 @return An error code: 0 - success, otherwise - failure 1177 1178 @ref Advanced 1179 **/ 1180 int CeedBasisGetNumQuadratureComponents(CeedBasis basis, CeedInt *Q_comp) { 1181 *Q_comp = basis->Q_comp; 1182 return CEED_ERROR_SUCCESS; 1183 } 1184 1185 /** 1186 @brief Get number of components for given CeedBasis 1187 1188 @param basis CeedBasis 1189 @param[out] num_comp Variable to store number of components of basis 1190 1191 @return An error code: 0 - success, otherwise - failure 1192 1193 @ref Advanced 1194 **/ 1195 int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *num_comp) { 1196 *num_comp = basis->num_comp; 1197 return CEED_ERROR_SUCCESS; 1198 } 1199 1200 /** 1201 @brief Get total number of nodes (in dim dimensions) of a CeedBasis 1202 1203 @param basis CeedBasis 1204 @param[out] P Variable to store number of nodes 1205 1206 @return An error code: 0 - success, otherwise - failure 1207 1208 @ref Utility 1209 **/ 1210 int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P) { 1211 *P = basis->P; 1212 return CEED_ERROR_SUCCESS; 1213 } 1214 1215 /** 1216 @brief Get total number of nodes (in 1 dimension) of a CeedBasis 1217 1218 @param basis CeedBasis 1219 @param[out] P_1d Variable to store number of nodes 1220 1221 @return An error code: 0 - success, otherwise - failure 1222 1223 @ref Advanced 1224 **/ 1225 int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P_1d) { 1226 if (!basis->tensor_basis) { 1227 // LCOV_EXCL_START 1228 return CeedError(basis->ceed, CEED_ERROR_MINOR, "Cannot supply P_1d for non-tensor basis"); 1229 // LCOV_EXCL_STOP 1230 } 1231 1232 *P_1d = basis->P_1d; 1233 return CEED_ERROR_SUCCESS; 1234 } 1235 1236 /** 1237 @brief Get total number of quadrature points (in dim dimensions) of a CeedBasis 1238 1239 @param basis CeedBasis 1240 @param[out] Q Variable to store number of quadrature points 1241 1242 @return An error code: 0 - success, otherwise - failure 1243 1244 @ref Utility 1245 **/ 1246 int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q) { 1247 *Q = basis->Q; 1248 return CEED_ERROR_SUCCESS; 1249 } 1250 1251 /** 1252 @brief Get total number of quadrature points (in 1 dimension) of a CeedBasis 1253 1254 @param basis CeedBasis 1255 @param[out] Q_1d Variable to store number of quadrature points 1256 1257 @return An error code: 0 - success, otherwise - failure 1258 1259 @ref Advanced 1260 **/ 1261 int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, CeedInt *Q_1d) { 1262 if (!basis->tensor_basis) { 1263 // LCOV_EXCL_START 1264 return CeedError(basis->ceed, CEED_ERROR_MINOR, "Cannot supply Q_1d for non-tensor basis"); 1265 // LCOV_EXCL_STOP 1266 } 1267 1268 *Q_1d = basis->Q_1d; 1269 return CEED_ERROR_SUCCESS; 1270 } 1271 1272 /** 1273 @brief Get reference coordinates of quadrature points (in dim dimensions) 1274 of a CeedBasis 1275 1276 @param basis CeedBasis 1277 @param[out] q_ref Variable to store reference coordinates of quadrature points 1278 1279 @return An error code: 0 - success, otherwise - failure 1280 1281 @ref Advanced 1282 **/ 1283 int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **q_ref) { 1284 *q_ref = basis->q_ref_1d; 1285 return CEED_ERROR_SUCCESS; 1286 } 1287 1288 /** 1289 @brief Get quadrature weights of quadrature points (in dim dimensions) 1290 of a CeedBasis 1291 1292 @param basis CeedBasis 1293 @param[out] q_weight Variable to store quadrature weights 1294 1295 @return An error code: 0 - success, otherwise - failure 1296 1297 @ref Advanced 1298 **/ 1299 int CeedBasisGetQWeights(CeedBasis basis, const CeedScalar **q_weight) { 1300 *q_weight = basis->q_weight_1d; 1301 return CEED_ERROR_SUCCESS; 1302 } 1303 1304 /** 1305 @brief Get interpolation matrix of a CeedBasis 1306 1307 @param basis CeedBasis 1308 @param[out] interp Variable to store interpolation matrix 1309 1310 @return An error code: 0 - success, otherwise - failure 1311 1312 @ref Advanced 1313 **/ 1314 int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp) { 1315 if (!basis->interp && basis->tensor_basis) { 1316 // Allocate 1317 CeedCall(CeedMalloc(basis->Q * basis->P, &basis->interp)); 1318 1319 // Initialize 1320 for (CeedInt i = 0; i < basis->Q * basis->P; i++) basis->interp[i] = 1.0; 1321 1322 // Calculate 1323 for (CeedInt d = 0; d < basis->dim; d++) { 1324 for (CeedInt qpt = 0; qpt < basis->Q; qpt++) { 1325 for (CeedInt node = 0; node < basis->P; node++) { 1326 CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d; 1327 CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d; 1328 basis->interp[qpt * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p]; 1329 } 1330 } 1331 } 1332 } 1333 *interp = basis->interp; 1334 return CEED_ERROR_SUCCESS; 1335 } 1336 1337 /** 1338 @brief Get 1D interpolation matrix of a tensor product CeedBasis 1339 1340 @param basis CeedBasis 1341 @param[out] interp_1d Variable to store interpolation matrix 1342 1343 @return An error code: 0 - success, otherwise - failure 1344 1345 @ref Backend 1346 **/ 1347 int CeedBasisGetInterp1D(CeedBasis basis, const CeedScalar **interp_1d) { 1348 if (!basis->tensor_basis) { 1349 // LCOV_EXCL_START 1350 return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis is not a tensor product basis."); 1351 // LCOV_EXCL_STOP 1352 } 1353 1354 *interp_1d = basis->interp_1d; 1355 return CEED_ERROR_SUCCESS; 1356 } 1357 1358 /** 1359 @brief Get gradient matrix of a CeedBasis 1360 1361 @param basis CeedBasis 1362 @param[out] grad Variable to store gradient matrix 1363 1364 @return An error code: 0 - success, otherwise - failure 1365 1366 @ref Advanced 1367 **/ 1368 int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad) { 1369 if (!basis->grad && basis->tensor_basis) { 1370 // Allocate 1371 CeedCall(CeedMalloc(basis->dim * basis->Q * basis->P, &basis->grad)); 1372 1373 // Initialize 1374 for (CeedInt i = 0; i < basis->dim * basis->Q * basis->P; i++) basis->grad[i] = 1.0; 1375 1376 // Calculate 1377 for (CeedInt d = 0; d < basis->dim; d++) { 1378 for (CeedInt i = 0; i < basis->dim; i++) { 1379 for (CeedInt qpt = 0; qpt < basis->Q; qpt++) { 1380 for (CeedInt node = 0; node < basis->P; node++) { 1381 CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d; 1382 CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d; 1383 if (i == d) basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->grad_1d[q * basis->P_1d + p]; 1384 else basis->grad[(i * basis->Q + qpt) * (basis->P) + node] *= basis->interp_1d[q * basis->P_1d + p]; 1385 } 1386 } 1387 } 1388 } 1389 } 1390 *grad = basis->grad; 1391 return CEED_ERROR_SUCCESS; 1392 } 1393 1394 /** 1395 @brief Get 1D gradient matrix of a tensor product CeedBasis 1396 1397 @param basis CeedBasis 1398 @param[out] grad_1d Variable to store gradient matrix 1399 1400 @return An error code: 0 - success, otherwise - failure 1401 1402 @ref Advanced 1403 **/ 1404 int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad_1d) { 1405 if (!basis->tensor_basis) { 1406 // LCOV_EXCL_START 1407 return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis is not a tensor product basis."); 1408 // LCOV_EXCL_STOP 1409 } 1410 1411 *grad_1d = basis->grad_1d; 1412 return CEED_ERROR_SUCCESS; 1413 } 1414 1415 /** 1416 @brief Get divergence matrix of a CeedBasis 1417 1418 @param basis CeedBasis 1419 @param[out] div Variable to store divergence matrix 1420 1421 @return An error code: 0 - success, otherwise - failure 1422 1423 @ref Advanced 1424 **/ 1425 int CeedBasisGetDiv(CeedBasis basis, const CeedScalar **div) { 1426 if (!basis->div) { 1427 // LCOV_EXCL_START 1428 return CeedError(basis->ceed, CEED_ERROR_MINOR, "CeedBasis does not have divergence matrix."); 1429 // LCOV_EXCL_STOP 1430 } 1431 1432 *div = basis->div; 1433 return CEED_ERROR_SUCCESS; 1434 } 1435 1436 /** 1437 @brief Destroy a CeedBasis 1438 1439 @param basis CeedBasis to destroy 1440 1441 @return An error code: 0 - success, otherwise - failure 1442 1443 @ref User 1444 **/ 1445 int CeedBasisDestroy(CeedBasis *basis) { 1446 if (!*basis || --(*basis)->ref_count > 0) return CEED_ERROR_SUCCESS; 1447 if ((*basis)->Destroy) CeedCall((*basis)->Destroy(*basis)); 1448 if ((*basis)->contract) CeedCall(CeedTensorContractDestroy(&(*basis)->contract)); 1449 CeedCall(CeedFree(&(*basis)->interp)); 1450 CeedCall(CeedFree(&(*basis)->interp_1d)); 1451 CeedCall(CeedFree(&(*basis)->grad)); 1452 CeedCall(CeedFree(&(*basis)->div)); 1453 CeedCall(CeedFree(&(*basis)->grad_1d)); 1454 CeedCall(CeedFree(&(*basis)->q_ref_1d)); 1455 CeedCall(CeedFree(&(*basis)->q_weight_1d)); 1456 CeedCall(CeedDestroy(&(*basis)->ceed)); 1457 CeedCall(CeedFree(basis)); 1458 return CEED_ERROR_SUCCESS; 1459 } 1460 1461 /** 1462 @brief Construct a Gauss-Legendre quadrature 1463 1464 @param Q Number of quadrature points (integrates polynomials of 1465 degree 2*Q-1 exactly) 1466 @param[out] q_ref_1d Array of length Q to hold the abscissa on [-1, 1] 1467 @param[out] q_weight_1d Array of length Q to hold the weights 1468 1469 @return An error code: 0 - success, otherwise - failure 1470 1471 @ref Utility 1472 **/ 1473 int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) { 1474 // Allocate 1475 CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0 * atan(1.0); 1476 // Build q_ref_1d, q_weight_1d 1477 for (CeedInt i = 0; i <= Q / 2; i++) { 1478 // Guess 1479 xi = cos(PI * (CeedScalar)(2 * i + 1) / ((CeedScalar)(2 * Q))); 1480 // Pn(xi) 1481 P0 = 1.0; 1482 P1 = xi; 1483 P2 = 0.0; 1484 for (CeedInt j = 2; j <= Q; j++) { 1485 P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j)); 1486 P0 = P1; 1487 P1 = P2; 1488 } 1489 // First Newton Step 1490 dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0); 1491 xi = xi - P2 / dP2; 1492 // Newton to convergence 1493 for (CeedInt k = 0; k < 100 && fabs(P2) > 10 * CEED_EPSILON; k++) { 1494 P0 = 1.0; 1495 P1 = xi; 1496 for (CeedInt j = 2; j <= Q; j++) { 1497 P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j)); 1498 P0 = P1; 1499 P1 = P2; 1500 } 1501 dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0); 1502 xi = xi - P2 / dP2; 1503 } 1504 // Save xi, wi 1505 wi = 2.0 / ((1.0 - xi * xi) * dP2 * dP2); 1506 q_weight_1d[i] = wi; 1507 q_weight_1d[Q - 1 - i] = wi; 1508 q_ref_1d[i] = -xi; 1509 q_ref_1d[Q - 1 - i] = xi; 1510 } 1511 return CEED_ERROR_SUCCESS; 1512 } 1513 1514 /** 1515 @brief Construct a Gauss-Legendre-Lobatto quadrature 1516 1517 @param Q Number of quadrature points (integrates polynomials of 1518 degree 2*Q-3 exactly) 1519 @param[out] q_ref_1d Array of length Q to hold the abscissa on [-1, 1] 1520 @param[out] q_weight_1d Array of length Q to hold the weights 1521 1522 @return An error code: 0 - success, otherwise - failure 1523 1524 @ref Utility 1525 **/ 1526 int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d) { 1527 // Allocate 1528 CeedScalar P0, P1, P2, dP2, d2P2, xi, wi, PI = 4.0 * atan(1.0); 1529 // Build q_ref_1d, q_weight_1d 1530 // Set endpoints 1531 if (Q < 2) { 1532 // LCOV_EXCL_START 1533 return CeedError(NULL, CEED_ERROR_DIMENSION, "Cannot create Lobatto quadrature with Q=%" CeedInt_FMT " < 2 points", Q); 1534 // LCOV_EXCL_STOP 1535 } 1536 wi = 2.0 / ((CeedScalar)(Q * (Q - 1))); 1537 if (q_weight_1d) { 1538 q_weight_1d[0] = wi; 1539 q_weight_1d[Q - 1] = wi; 1540 } 1541 q_ref_1d[0] = -1.0; 1542 q_ref_1d[Q - 1] = 1.0; 1543 // Interior 1544 for (CeedInt i = 1; i <= (Q - 1) / 2; i++) { 1545 // Guess 1546 xi = cos(PI * (CeedScalar)(i) / (CeedScalar)(Q - 1)); 1547 // Pn(xi) 1548 P0 = 1.0; 1549 P1 = xi; 1550 P2 = 0.0; 1551 for (CeedInt j = 2; j < Q; j++) { 1552 P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j)); 1553 P0 = P1; 1554 P1 = P2; 1555 } 1556 // First Newton step 1557 dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0); 1558 d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi); 1559 xi = xi - dP2 / d2P2; 1560 // Newton to convergence 1561 for (CeedInt k = 0; k < 100 && fabs(dP2) > 10 * CEED_EPSILON; k++) { 1562 P0 = 1.0; 1563 P1 = xi; 1564 for (CeedInt j = 2; j < Q; j++) { 1565 P2 = (((CeedScalar)(2 * j - 1)) * xi * P1 - ((CeedScalar)(j - 1)) * P0) / ((CeedScalar)(j)); 1566 P0 = P1; 1567 P1 = P2; 1568 } 1569 dP2 = (xi * P2 - P0) * (CeedScalar)Q / (xi * xi - 1.0); 1570 d2P2 = (2 * xi * dP2 - (CeedScalar)(Q * (Q - 1)) * P2) / (1.0 - xi * xi); 1571 xi = xi - dP2 / d2P2; 1572 } 1573 // Save xi, wi 1574 wi = 2.0 / (((CeedScalar)(Q * (Q - 1))) * P2 * P2); 1575 if (q_weight_1d) { 1576 q_weight_1d[i] = wi; 1577 q_weight_1d[Q - 1 - i] = wi; 1578 } 1579 q_ref_1d[i] = -xi; 1580 q_ref_1d[Q - 1 - i] = xi; 1581 } 1582 return CEED_ERROR_SUCCESS; 1583 } 1584 1585 /** 1586 @brief Return QR Factorization of a matrix 1587 1588 @param ceed A Ceed context for error handling 1589 @param[in,out] mat Row-major matrix to be factorized in place 1590 @param[in,out] tau Vector of length m of scaling factors 1591 @param m Number of rows 1592 @param n Number of columns 1593 1594 @return An error code: 0 - success, otherwise - failure 1595 1596 @ref Utility 1597 **/ 1598 int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau, CeedInt m, CeedInt n) { 1599 CeedScalar v[m]; 1600 1601 // Check matrix shape 1602 if (n > m) { 1603 // LCOV_EXCL_START 1604 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute QR factorization with n > m"); 1605 // LCOV_EXCL_STOP 1606 } 1607 1608 for (CeedInt i = 0; i < n; i++) { 1609 if (i >= m - 1) { // last row of matrix, no reflection needed 1610 tau[i] = 0.; 1611 break; 1612 } 1613 // Calculate Householder vector, magnitude 1614 CeedScalar sigma = 0.0; 1615 v[i] = mat[i + n * i]; 1616 for (CeedInt j = i + 1; j < m; j++) { 1617 v[j] = mat[i + n * j]; 1618 sigma += v[j] * v[j]; 1619 } 1620 CeedScalar norm = sqrt(v[i] * v[i] + sigma); // norm of v[i:m] 1621 CeedScalar R_ii = -copysign(norm, v[i]); 1622 v[i] -= R_ii; 1623 // norm of v[i:m] after modification above and scaling below 1624 // norm = sqrt(v[i]*v[i] + sigma) / v[i]; 1625 // tau = 2 / (norm*norm) 1626 tau[i] = 2 * v[i] * v[i] / (v[i] * v[i] + sigma); 1627 for (CeedInt j = i + 1; j < m; j++) v[j] /= v[i]; 1628 1629 // Apply Householder reflector to lower right panel 1630 CeedHouseholderReflect(&mat[i * n + i + 1], &v[i], tau[i], m - i, n - i - 1, n, 1); 1631 // Save v 1632 mat[i + n * i] = R_ii; 1633 for (CeedInt j = i + 1; j < m; j++) mat[i + n * j] = v[j]; 1634 } 1635 return CEED_ERROR_SUCCESS; 1636 } 1637 1638 /** 1639 @brief Return symmetric Schur decomposition of the symmetric matrix mat via 1640 symmetric QR factorization 1641 1642 @param ceed A Ceed context for error handling 1643 @param[in,out] mat Row-major matrix to be factorized in place 1644 @param[out] lambda Vector of length n of eigenvalues 1645 @param n Number of rows/columns 1646 1647 @return An error code: 0 - success, otherwise - failure 1648 1649 @ref Utility 1650 **/ 1651 CeedPragmaOptimizeOff int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat, CeedScalar *lambda, CeedInt n) { 1652 // Check bounds for clang-tidy 1653 if (n < 2) { 1654 // LCOV_EXCL_START 1655 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute symmetric Schur decomposition of scalars"); 1656 // LCOV_EXCL_STOP 1657 } 1658 1659 CeedScalar v[n - 1], tau[n - 1], mat_T[n * n]; 1660 1661 // Copy mat to mat_T and set mat to I 1662 memcpy(mat_T, mat, n * n * sizeof(mat[0])); 1663 for (CeedInt i = 0; i < n; i++) { 1664 for (CeedInt j = 0; j < n; j++) mat[j + n * i] = (i == j) ? 1 : 0; 1665 } 1666 1667 // Reduce to tridiagonal 1668 for (CeedInt i = 0; i < n - 1; i++) { 1669 // Calculate Householder vector, magnitude 1670 CeedScalar sigma = 0.0; 1671 v[i] = mat_T[i + n * (i + 1)]; 1672 for (CeedInt j = i + 1; j < n - 1; j++) { 1673 v[j] = mat_T[i + n * (j + 1)]; 1674 sigma += v[j] * v[j]; 1675 } 1676 CeedScalar norm = sqrt(v[i] * v[i] + sigma); // norm of v[i:n-1] 1677 CeedScalar R_ii = -copysign(norm, v[i]); 1678 v[i] -= R_ii; 1679 // norm of v[i:m] after modification above and scaling below 1680 // norm = sqrt(v[i]*v[i] + sigma) / v[i]; 1681 // tau = 2 / (norm*norm) 1682 tau[i] = i == n - 2 ? 2 : 2 * v[i] * v[i] / (v[i] * v[i] + sigma); 1683 for (CeedInt j = i + 1; j < n - 1; j++) v[j] /= v[i]; 1684 1685 // Update sub and super diagonal 1686 for (CeedInt j = i + 2; j < n; j++) { 1687 mat_T[i + n * j] = 0; 1688 mat_T[j + n * i] = 0; 1689 } 1690 // Apply symmetric Householder reflector to lower right panel 1691 CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1); 1692 CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), 1, n); 1693 1694 // Save v 1695 mat_T[i + n * (i + 1)] = R_ii; 1696 mat_T[(i + 1) + n * i] = R_ii; 1697 for (CeedInt j = i + 1; j < n - 1; j++) { 1698 mat_T[i + n * (j + 1)] = v[j]; 1699 } 1700 } 1701 // Backwards accumulation of Q 1702 for (CeedInt i = n - 2; i >= 0; i--) { 1703 if (tau[i] > 0.0) { 1704 v[i] = 1; 1705 for (CeedInt j = i + 1; j < n - 1; j++) { 1706 v[j] = mat_T[i + n * (j + 1)]; 1707 mat_T[i + n * (j + 1)] = 0; 1708 } 1709 CeedHouseholderReflect(&mat[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1); 1710 } 1711 } 1712 1713 // Reduce sub and super diagonal 1714 CeedInt p = 0, q = 0, itr = 0, max_itr = n * n * n * n; 1715 CeedScalar tol = CEED_EPSILON; 1716 1717 while (itr < max_itr) { 1718 // Update p, q, size of reduced portions of diagonal 1719 p = 0; 1720 q = 0; 1721 for (CeedInt i = n - 2; i >= 0; i--) { 1722 if (fabs(mat_T[i + n * (i + 1)]) < tol) q += 1; 1723 else break; 1724 } 1725 for (CeedInt i = 0; i < n - q - 1; i++) { 1726 if (fabs(mat_T[i + n * (i + 1)]) < tol) p += 1; 1727 else break; 1728 } 1729 if (q == n - 1) break; // Finished reducing 1730 1731 // Reduce tridiagonal portion 1732 CeedScalar t_nn = mat_T[(n - 1 - q) + n * (n - 1 - q)], t_nnm1 = mat_T[(n - 2 - q) + n * (n - 1 - q)]; 1733 CeedScalar d = (mat_T[(n - 2 - q) + n * (n - 2 - q)] - t_nn) / 2; 1734 CeedScalar mu = t_nn - t_nnm1 * t_nnm1 / (d + copysign(sqrt(d * d + t_nnm1 * t_nnm1), d)); 1735 CeedScalar x = mat_T[p + n * p] - mu; 1736 CeedScalar z = mat_T[p + n * (p + 1)]; 1737 for (CeedInt k = p; k < n - q - 1; k++) { 1738 // Compute Givens rotation 1739 CeedScalar c = 1, s = 0; 1740 if (fabs(z) > tol) { 1741 if (fabs(z) > fabs(x)) { 1742 CeedScalar tau = -x / z; 1743 s = 1 / sqrt(1 + tau * tau), c = s * tau; 1744 } else { 1745 CeedScalar tau = -z / x; 1746 c = 1 / sqrt(1 + tau * tau), s = c * tau; 1747 } 1748 } 1749 1750 // Apply Givens rotation to T 1751 CeedGivensRotation(mat_T, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n); 1752 CeedGivensRotation(mat_T, c, s, CEED_TRANSPOSE, k, k + 1, n, n); 1753 1754 // Apply Givens rotation to Q 1755 CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n); 1756 1757 // Update x, z 1758 if (k < n - q - 2) { 1759 x = mat_T[k + n * (k + 1)]; 1760 z = mat_T[k + n * (k + 2)]; 1761 } 1762 } 1763 itr++; 1764 } 1765 1766 // Save eigenvalues 1767 for (CeedInt i = 0; i < n; i++) lambda[i] = mat_T[i + n * i]; 1768 1769 // Check convergence 1770 if (itr == max_itr && q < n - 1) { 1771 // LCOV_EXCL_START 1772 return CeedError(ceed, CEED_ERROR_MINOR, "Symmetric QR failed to converge"); 1773 // LCOV_EXCL_STOP 1774 } 1775 return CEED_ERROR_SUCCESS; 1776 } 1777 CeedPragmaOptimizeOn 1778 1779 /** 1780 @brief Return Simultaneous Diagonalization of two matrices. This solves the 1781 generalized eigenvalue problem A x = lambda B x, where A and B 1782 are symmetric and B is positive definite. We generate the matrix X 1783 and vector Lambda such that X^T A X = Lambda and X^T B X = I. This 1784 is equivalent to the LAPACK routine 'sygv' with TYPE = 1. 1785 1786 @param ceed A Ceed context for error handling 1787 @param[in] mat_A Row-major matrix to be factorized with eigenvalues 1788 @param[in] mat_B Row-major matrix to be factorized to identity 1789 @param[out] mat_X Row-major orthogonal matrix 1790 @param[out] lambda Vector of length n of generalized eigenvalues 1791 @param n Number of rows/columns 1792 1793 @return An error code: 0 - success, otherwise - failure 1794 1795 @ref Utility 1796 **/ 1797 CeedPragmaOptimizeOff int 1798 CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A, CeedScalar *mat_B, CeedScalar *mat_X, CeedScalar *lambda, CeedInt n) { 1799 CeedScalar *mat_C, *mat_G, *vec_D; 1800 CeedCall(CeedCalloc(n * n, &mat_C)); 1801 CeedCall(CeedCalloc(n * n, &mat_G)); 1802 CeedCall(CeedCalloc(n, &vec_D)); 1803 1804 // Compute B = G D G^T 1805 memcpy(mat_G, mat_B, n * n * sizeof(mat_B[0])); 1806 CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_G, vec_D, n)); 1807 1808 // Sort eigenvalues 1809 for (CeedInt i = n - 1; i >= 0; i--) { 1810 for (CeedInt j = 0; j < i; j++) { 1811 if (fabs(vec_D[j]) > fabs(vec_D[j + 1])) { 1812 CeedScalar temp; 1813 temp = vec_D[j]; 1814 vec_D[j] = vec_D[j + 1]; 1815 vec_D[j + 1] = temp; 1816 for (CeedInt k = 0; k < n; k++) { 1817 temp = mat_G[k * n + j]; 1818 mat_G[k * n + j] = mat_G[k * n + j + 1]; 1819 mat_G[k * n + j + 1] = temp; 1820 } 1821 } 1822 } 1823 } 1824 1825 // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T 1826 // = D^-1/2 G^T A G D^-1/2 1827 // -- D = D^-1/2 1828 for (CeedInt i = 0; i < n; i++) vec_D[i] = 1. / sqrt(vec_D[i]); 1829 // -- G = G D^-1/2 1830 // -- C = D^-1/2 G^T 1831 for (CeedInt i = 0; i < n; i++) { 1832 for (CeedInt j = 0; j < n; j++) { 1833 mat_G[i * n + j] *= vec_D[j]; 1834 mat_C[j * n + i] = mat_G[i * n + j]; 1835 } 1836 } 1837 // -- X = (D^-1/2 G^T) A 1838 CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_C, (const CeedScalar *)mat_A, mat_X, n, n, n)); 1839 // -- C = (D^-1/2 G^T A) (G D^-1/2) 1840 CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_X, (const CeedScalar *)mat_G, mat_C, n, n, n)); 1841 1842 // Compute Q^T C Q = lambda 1843 CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n)); 1844 1845 // Sort eigenvalues 1846 for (CeedInt i = n - 1; i >= 0; i--) { 1847 for (CeedInt j = 0; j < i; j++) { 1848 if (fabs(lambda[j]) > fabs(lambda[j + 1])) { 1849 CeedScalar temp; 1850 temp = lambda[j]; 1851 lambda[j] = lambda[j + 1]; 1852 lambda[j + 1] = temp; 1853 for (CeedInt k = 0; k < n; k++) { 1854 temp = mat_C[k * n + j]; 1855 mat_C[k * n + j] = mat_C[k * n + j + 1]; 1856 mat_C[k * n + j + 1] = temp; 1857 } 1858 } 1859 } 1860 } 1861 1862 // Set X = (G D^1/2)^-T Q 1863 // = G D^-1/2 Q 1864 CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_G, (const CeedScalar *)mat_C, mat_X, n, n, n)); 1865 1866 // Cleanup 1867 CeedCall(CeedFree(&mat_C)); 1868 CeedCall(CeedFree(&mat_G)); 1869 CeedCall(CeedFree(&vec_D)); 1870 return CEED_ERROR_SUCCESS; 1871 } 1872 CeedPragmaOptimizeOn 1873 1874 /// @} 1875