1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed/ceed.h> 9 #include <ceed/backend.h> 10 #include <ceed-impl.h> 11 #include <math.h> 12 #include <stdbool.h> 13 #include <stdio.h> 14 #include <string.h> 15 16 /// @file 17 /// Implementation of CeedBasis interfaces 18 19 /// @cond DOXYGEN_SKIP 20 static struct CeedBasis_private ceed_basis_collocated; 21 /// @endcond 22 23 /// @addtogroup CeedBasisUser 24 /// @{ 25 26 /// Indicate that the quadrature points are collocated with the nodes 27 const CeedBasis CEED_BASIS_COLLOCATED = &ceed_basis_collocated; 28 29 /// @} 30 31 /// ---------------------------------------------------------------------------- 32 /// CeedBasis Library Internal Functions 33 /// ---------------------------------------------------------------------------- 34 /// @addtogroup CeedBasisDeveloper 35 /// @{ 36 37 /** 38 @brief Compute Householder reflection 39 40 Computes A = (I - b v v^T) A 41 where A is an mxn matrix indexed as A[i*row + j*col] 42 43 @param[in,out] A Matrix to apply Householder reflection to, in place 44 @param v Householder vector 45 @param b Scaling factor 46 @param m Number of rows in A 47 @param n Number of columns in A 48 @param row Row stride 49 @param col Col stride 50 51 @return An error code: 0 - success, otherwise - failure 52 53 @ref Developer 54 **/ 55 static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v, 56 CeedScalar b, CeedInt m, CeedInt n, 57 CeedInt row, CeedInt col) { 58 for (CeedInt j=0; j<n; j++) { 59 CeedScalar w = A[0*row + j*col]; 60 for (CeedInt i=1; i<m; i++) 61 w += v[i] * A[i*row + j*col]; 62 A[0*row + j*col] -= b * w; 63 for (CeedInt i=1; i<m; i++) 64 A[i*row + j*col] -= b * w * v[i]; 65 } 66 return CEED_ERROR_SUCCESS; 67 } 68 69 /** 70 @brief Apply Householder Q matrix 71 72 Compute A = Q A where Q is mxm and A is mxn. 73 74 @param[in,out] A Matrix to apply Householder Q to, in place 75 @param Q Householder Q matrix 76 @param tau Householder scaling factors 77 @param t_mode Transpose mode for application 78 @param m Number of rows in A 79 @param n Number of columns in A 80 @param k Number of elementary reflectors in Q, k<m 81 @param row Row stride in A 82 @param col Col stride in A 83 84 @return An error code: 0 - success, otherwise - failure 85 86 @ref Developer 87 **/ 88 int CeedHouseholderApplyQ(CeedScalar *A, const CeedScalar *Q, 89 const CeedScalar *tau, CeedTransposeMode t_mode, 90 CeedInt m, CeedInt n, CeedInt k, 91 CeedInt row, CeedInt col) { 92 int ierr; 93 CeedScalar *v; 94 ierr = CeedMalloc(m, &v); CeedChk(ierr); 95 for (CeedInt ii=0; ii<k; ii++) { 96 CeedInt i = t_mode == CEED_TRANSPOSE ? ii : k-1-ii; 97 for (CeedInt j=i+1; j<m; j++) 98 v[j] = Q[j*k+i]; 99 // Apply Householder reflector (I - tau v v^T) collo_grad_1d^T 100 ierr = CeedHouseholderReflect(&A[i*row], &v[i], tau[i], m-i, n, row, col); 101 CeedChk(ierr); 102 } 103 ierr = CeedFree(&v); CeedChk(ierr); 104 return CEED_ERROR_SUCCESS; 105 } 106 107 /** 108 @brief Compute Givens rotation 109 110 Computes A = G A (or G^T A in transpose mode) 111 where A is an mxn matrix indexed as A[i*n + j*m] 112 113 @param[in,out] A Row major matrix to apply Givens rotation to, in place 114 @param c Cosine factor 115 @param s Sine factor 116 @param t_mode @ref CEED_NOTRANSPOSE to rotate the basis counter-clockwise, 117 which has the effect of rotating columns of A clockwise; 118 @ref CEED_TRANSPOSE for the opposite rotation 119 @param i First row/column to apply rotation 120 @param k Second row/column to apply rotation 121 @param m Number of rows in A 122 @param n Number of columns in A 123 124 @return An error code: 0 - success, otherwise - failure 125 126 @ref Developer 127 **/ 128 static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s, 129 CeedTransposeMode t_mode, CeedInt i, CeedInt k, 130 CeedInt m, CeedInt n) { 131 CeedInt stride_j = 1, stride_ik = m, num_its = n; 132 if (t_mode == CEED_NOTRANSPOSE) { 133 stride_j = n; stride_ik = 1; num_its = m; 134 } 135 136 // Apply rotation 137 for (CeedInt j=0; j<num_its; j++) { 138 CeedScalar tau1 = A[i*stride_ik+j*stride_j], tau2 = A[k*stride_ik+j*stride_j]; 139 A[i*stride_ik+j*stride_j] = c*tau1 - s*tau2; 140 A[k*stride_ik+j*stride_j] = s*tau1 + c*tau2; 141 } 142 return CEED_ERROR_SUCCESS; 143 } 144 145 /** 146 @brief View an array stored in a CeedBasis 147 148 @param[in] name Name of array 149 @param[in] fp_fmt Printing format 150 @param[in] m Number of rows in array 151 @param[in] n Number of columns in array 152 @param[in] a Array to be viewed 153 @param[in] stream Stream to view to, e.g., stdout 154 155 @return An error code: 0 - success, otherwise - failure 156 157 @ref Developer 158 **/ 159 static int CeedScalarView(const char *name, const char *fp_fmt, CeedInt m, 160 CeedInt n, const CeedScalar *a, FILE *stream) { 161 for (CeedInt i=0; i<m; i++) { 162 if (m > 1) 163 fprintf(stream, "%12s[%" CeedInt_FMT "]:", name, i); 164 else 165 fprintf(stream, "%12s:", name); 166 for (CeedInt j=0; j<n; j++) 167 fprintf(stream, fp_fmt, fabs(a[i*n+j]) > 1E-14 ? a[i*n+j] : 0); 168 fputs("\n", stream); 169 } 170 return CEED_ERROR_SUCCESS; 171 } 172 173 /** 174 @brief Create the interpolation and gradient matrices for projection from 175 the nodes of `basis_from` to the nodes of `basis_to`. 176 The interpolation is given by `interp_project = interp_to^+ * interp_from`, 177 where the pesudoinverse `interp_to^+` is given by QR factorization. 178 The gradient is given by `grad_project = interp_to^+ * grad_from`. 179 Note: `basis_from` and `basis_to` must have compatible quadrature 180 spaces. 181 182 @param[in] basis_from CeedBasis to project from 183 @param[in] basis_to CeedBasis to project to 184 @param[out] interp_project Address of the variable where the newly created 185 interpolation matrix will be stored. 186 @param[out] grad_project Address of the variable where the newly created 187 gradient matrix will be stored. 188 189 @return An error code: 0 - success, otherwise - failure 190 191 @ref Developer 192 **/ 193 static int CeedBasisCreateProjectionMatrices(CeedBasis basis_from, 194 CeedBasis basis_to, CeedScalar **interp_project, CeedScalar **grad_project) { 195 int ierr; 196 Ceed ceed; 197 ierr = CeedBasisGetCeed(basis_to, &ceed); CeedChk(ierr); 198 199 // Check for compatible quadrature spaces 200 CeedInt Q_to, Q_from; 201 ierr = CeedBasisGetNumQuadraturePoints(basis_to, &Q_to); CeedChk(ierr); 202 ierr = CeedBasisGetNumQuadraturePoints(basis_from, &Q_from); CeedChk(ierr); 203 if (Q_to != Q_from) 204 // LCOV_EXCL_START 205 return CeedError(ceed, CEED_ERROR_DIMENSION, 206 "Bases must have compatible quadrature spaces"); 207 // LCOV_EXCL_STOP 208 209 // Check for matching tensor or non-tensor 210 CeedInt P_to, P_from, Q = Q_to; 211 bool is_tensor_to, is_tensor_from; 212 ierr = CeedBasisIsTensor(basis_to, &is_tensor_to); CeedChk(ierr); 213 ierr = CeedBasisIsTensor(basis_from, &is_tensor_from); CeedChk(ierr); 214 if (is_tensor_to && is_tensor_from) { 215 ierr = CeedBasisGetNumNodes1D(basis_to, &P_to); CeedChk(ierr); 216 ierr = CeedBasisGetNumNodes1D(basis_from, &P_from); CeedChk(ierr); 217 ierr = CeedBasisGetNumQuadraturePoints1D(basis_from, &Q); CeedChk(ierr); 218 } else if (!is_tensor_to && !is_tensor_from) { 219 ierr = CeedBasisGetNumNodes(basis_to, &P_to); CeedChk(ierr); 220 ierr = CeedBasisGetNumNodes(basis_from, &P_from); CeedChk(ierr); 221 } else { 222 // LCOV_EXCL_START 223 return CeedError(ceed, CEED_ERROR_MINOR, 224 "Bases must both be tensor or non-tensor"); 225 // LCOV_EXCL_STOP 226 } 227 228 // Get source matrices 229 CeedInt dim; 230 CeedScalar *interp_to, *interp_from, *tau; 231 ierr = CeedBasisGetDimension(basis_to, &dim); CeedChk(ierr); 232 ierr = CeedMalloc(Q * P_from, &interp_from); CeedChk(ierr); 233 ierr = CeedMalloc(Q * P_to, &interp_to); CeedChk(ierr); 234 ierr = CeedCalloc(P_to * P_from, interp_project); CeedChk(ierr); 235 ierr = CeedCalloc(P_to * P_from * (is_tensor_to ? 1 : dim), grad_project); 236 CeedChk(ierr); 237 ierr = CeedMalloc(Q, &tau); CeedChk(ierr); 238 const CeedScalar *interp_to_source = NULL, *interp_from_source = NULL, 239 *grad_from_source; 240 if (is_tensor_to) { 241 ierr = CeedBasisGetInterp1D(basis_to, &interp_to_source); CeedChk(ierr); 242 ierr = CeedBasisGetInterp1D(basis_from, &interp_from_source); CeedChk(ierr); 243 ierr = CeedBasisGetGrad1D(basis_from, &grad_from_source); CeedChk(ierr); 244 } else { 245 ierr = CeedBasisGetInterp(basis_to, &interp_to_source); CeedChk(ierr); 246 ierr = CeedBasisGetInterp(basis_from, &interp_from_source); CeedChk(ierr); 247 ierr = CeedBasisGetGrad(basis_from, &grad_from_source); CeedChk(ierr); 248 } 249 250 // Build matrices 251 CeedInt num_matrices = 1 + (is_tensor_to ? 1 : dim); 252 CeedScalar *input_from[num_matrices], *output_project[num_matrices]; 253 input_from[0] = (CeedScalar *)interp_from_source; 254 output_project[0] = *interp_project; 255 for (CeedInt m = 1; m < num_matrices; m++) { 256 input_from[m] = (CeedScalar *)&grad_from_source[(m - 1) * Q * P_from]; 257 output_project[m] = &(*grad_project[(m - 1) * P_to * P_from]); 258 } 259 for (CeedInt m = 0; m < num_matrices; m++) { 260 // -- QR Factorization, interp_to = Q R 261 memcpy(interp_to, interp_to_source, Q * P_to * sizeof(interp_to_source[0])); 262 ierr = CeedQRFactorization(ceed, interp_to, tau, Q, P_to); CeedChk(ierr); 263 264 // -- Apply Qtranspose, interp_to = Qtranspose interp_from 265 memcpy(interp_from, input_from[m], Q * P_from * sizeof(input_from[m][0])); 266 ierr = CeedHouseholderApplyQ(interp_from, interp_to, tau, CEED_TRANSPOSE, 267 Q, P_from, P_to, P_from, 1); CeedChk(ierr); 268 269 // -- Apply Rinv, interp_project = Rinv interp_c 270 for (CeedInt j = 0; j < P_from; j++) { // Column j 271 output_project[m][j + P_from * (P_to - 1)] = interp_from[j + P_from * 272 (P_to - 1)] / interp_to[P_to * P_to - 1]; 273 for (CeedInt i = P_to - 2; i >= 0; i--) { // Row i 274 output_project[m][j + P_from * i] = interp_from[j + P_from * i]; 275 for (CeedInt k = i+1; k < P_to; k++) { 276 output_project[m][j + P_from * i] -= interp_to[k + P_to * i]* 277 output_project[m][j + P_from * k]; 278 } 279 output_project[m][j + P_from * i] /= interp_to[i + P_to * i]; 280 } 281 } 282 } 283 284 // Cleanup 285 ierr = CeedFree(&tau); CeedChk(ierr); 286 ierr = CeedFree(&interp_to); CeedChk(ierr); 287 ierr = CeedFree(&interp_from); CeedChk(ierr); 288 289 return CEED_ERROR_SUCCESS; 290 } 291 292 /// @} 293 294 /// ---------------------------------------------------------------------------- 295 /// Ceed Backend API 296 /// ---------------------------------------------------------------------------- 297 /// @addtogroup CeedBasisBackend 298 /// @{ 299 300 /** 301 @brief Return collocated grad matrix 302 303 @param basis CeedBasis 304 @param[out] collo_grad_1d Row-major (Q_1d * Q_1d) matrix expressing derivatives of 305 basis functions at quadrature points 306 307 @return An error code: 0 - success, otherwise - failure 308 309 @ref Backend 310 **/ 311 int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collo_grad_1d) { 312 int i, j, k; 313 Ceed ceed; 314 CeedInt ierr, P_1d=(basis)->P_1d, Q_1d=(basis)->Q_1d; 315 CeedScalar *interp_1d, *grad_1d, *tau; 316 317 ierr = CeedMalloc(Q_1d*P_1d, &interp_1d); CeedChk(ierr); 318 ierr = CeedMalloc(Q_1d*P_1d, &grad_1d); CeedChk(ierr); 319 ierr = CeedMalloc(Q_1d, &tau); CeedChk(ierr); 320 memcpy(interp_1d, (basis)->interp_1d, Q_1d*P_1d*sizeof(basis)->interp_1d[0]); 321 memcpy(grad_1d, (basis)->grad_1d, Q_1d*P_1d*sizeof(basis)->interp_1d[0]); 322 323 // QR Factorization, interp_1d = Q R 324 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 325 ierr = CeedQRFactorization(ceed, interp_1d, tau, Q_1d, P_1d); CeedChk(ierr); 326 // Note: This function is for backend use, so all errors are terminal 327 // and we do not need to clean up memory on failure. 328 329 // Apply Rinv, collo_grad_1d = grad_1d Rinv 330 for (i=0; i<Q_1d; i++) { // Row i 331 collo_grad_1d[Q_1d*i] = grad_1d[P_1d*i]/interp_1d[0]; 332 for (j=1; j<P_1d; j++) { // Column j 333 collo_grad_1d[j+Q_1d*i] = grad_1d[j+P_1d*i]; 334 for (k=0; k<j; k++) 335 collo_grad_1d[j+Q_1d*i] -= interp_1d[j+P_1d*k]*collo_grad_1d[k+Q_1d*i]; 336 collo_grad_1d[j+Q_1d*i] /= interp_1d[j+P_1d*j]; 337 } 338 for (j=P_1d; j<Q_1d; j++) 339 collo_grad_1d[j+Q_1d*i] = 0; 340 } 341 342 // Apply Qtranspose, collo_grad = collo_grad Q_transpose 343 ierr = CeedHouseholderApplyQ(collo_grad_1d, interp_1d, tau, CEED_NOTRANSPOSE, 344 Q_1d, Q_1d, P_1d, 1, Q_1d); CeedChk(ierr); 345 346 ierr = CeedFree(&interp_1d); CeedChk(ierr); 347 ierr = CeedFree(&grad_1d); CeedChk(ierr); 348 ierr = CeedFree(&tau); CeedChk(ierr); 349 return CEED_ERROR_SUCCESS; 350 } 351 352 /** 353 @brief Get tensor status for given CeedBasis 354 355 @param basis CeedBasis 356 @param[out] is_tensor Variable to store tensor status 357 358 @return An error code: 0 - success, otherwise - failure 359 360 @ref Backend 361 **/ 362 int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor) { 363 *is_tensor = basis->tensor_basis; 364 return CEED_ERROR_SUCCESS; 365 } 366 367 /** 368 @brief Get backend data of a CeedBasis 369 370 @param basis CeedBasis 371 @param[out] data Variable to store data 372 373 @return An error code: 0 - success, otherwise - failure 374 375 @ref Backend 376 **/ 377 int CeedBasisGetData(CeedBasis basis, void *data) { 378 *(void **)data = basis->data; 379 return CEED_ERROR_SUCCESS; 380 } 381 382 /** 383 @brief Set backend data of a CeedBasis 384 385 @param[out] basis CeedBasis 386 @param data Data to set 387 388 @return An error code: 0 - success, otherwise - failure 389 390 @ref Backend 391 **/ 392 int CeedBasisSetData(CeedBasis basis, void *data) { 393 basis->data = data; 394 return CEED_ERROR_SUCCESS; 395 } 396 397 /** 398 @brief Increment the reference counter for a CeedBasis 399 400 @param basis Basis to increment the reference counter 401 402 @return An error code: 0 - success, otherwise - failure 403 404 @ref Backend 405 **/ 406 int CeedBasisReference(CeedBasis basis) { 407 basis->ref_count++; 408 return CEED_ERROR_SUCCESS; 409 } 410 411 /** 412 @brief Estimate number of FLOPs required to apply CeedBasis in t_mode and eval_mode 413 414 @param basis Basis to estimate FLOPs for 415 @param t_mode Apply basis or transpose 416 @param eval_mode Basis evaluation mode 417 @param flops Address of variable to hold FLOPs estimate 418 419 @ref Backend 420 **/ 421 int CeedBasisGetFlopsEstimate(CeedBasis basis, CeedTransposeMode t_mode, 422 CeedEvalMode eval_mode, CeedSize *flops) { 423 int ierr; 424 bool is_tensor; 425 426 ierr = CeedBasisIsTensor(basis, &is_tensor); CeedChk(ierr); 427 if (is_tensor) { 428 CeedInt dim, num_comp, P_1d, Q_1d; 429 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 430 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 431 ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr); 432 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr); 433 if (t_mode == CEED_TRANSPOSE) { 434 P_1d = Q_1d; Q_1d = P_1d; 435 } 436 CeedInt tensor_flops = 0, pre = num_comp * CeedIntPow(P_1d, dim-1), post = 1; 437 for (CeedInt d = 0; d < dim; d++) { 438 tensor_flops += 2 * pre * P_1d * post * Q_1d; 439 pre /= P_1d; 440 post *= Q_1d; 441 } 442 switch (eval_mode) { 443 case CEED_EVAL_NONE: *flops = 0; break; 444 case CEED_EVAL_INTERP: *flops = tensor_flops; break; 445 case CEED_EVAL_GRAD: *flops = tensor_flops * 2; break; 446 case CEED_EVAL_DIV: 447 // LCOV_EXCL_START 448 return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, 449 "Tensor CEED_EVAL_DIV not supported"); break; 450 case CEED_EVAL_CURL: 451 return CeedError(basis->ceed, CEED_ERROR_INCOMPATIBLE, 452 "Tensor CEED_EVAL_CURL not supported"); break; 453 // LCOV_EXCL_STOP 454 case CEED_EVAL_WEIGHT: *flops = dim * CeedIntPow(Q_1d, dim); break; 455 } 456 } else { 457 CeedInt dim, num_comp, num_nodes, num_qpts, Q_comp; 458 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 459 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 460 ierr = CeedBasisGetNumNodes(basis, &num_nodes); CeedChk(ierr); 461 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 462 ierr = CeedBasisGetNumQuadratureComponents(basis, &Q_comp); CeedChk(ierr); 463 switch (eval_mode) { 464 case CEED_EVAL_NONE: *flops = 0; break; 465 case CEED_EVAL_INTERP: *flops = num_nodes * num_qpts * num_comp; break; 466 case CEED_EVAL_GRAD: *flops = num_nodes * num_qpts * num_comp * dim; break; 467 case CEED_EVAL_DIV: *flops = num_nodes * num_qpts; break; 468 case CEED_EVAL_CURL: *flops = num_nodes * num_qpts * dim; break; 469 case CEED_EVAL_WEIGHT: *flops = 0; break; 470 } 471 } 472 473 return CEED_ERROR_SUCCESS; 474 } 475 476 /** 477 @brief Get dimension for given CeedElemTopology 478 479 @param topo CeedElemTopology 480 @param[out] dim Variable to store dimension of topology 481 482 @return An error code: 0 - success, otherwise - failure 483 484 @ref Backend 485 **/ 486 int CeedBasisGetTopologyDimension(CeedElemTopology topo, CeedInt *dim) { 487 *dim = (CeedInt) topo >> 16; 488 return CEED_ERROR_SUCCESS; 489 } 490 491 /** 492 @brief Get CeedTensorContract of a CeedBasis 493 494 @param basis CeedBasis 495 @param[out] contract Variable to store CeedTensorContract 496 497 @return An error code: 0 - success, otherwise - failure 498 499 @ref Backend 500 **/ 501 int CeedBasisGetTensorContract(CeedBasis basis, CeedTensorContract *contract) { 502 *contract = basis->contract; 503 return CEED_ERROR_SUCCESS; 504 } 505 506 /** 507 @brief Set CeedTensorContract of a CeedBasis 508 509 @param[out] basis CeedBasis 510 @param contract CeedTensorContract to set 511 512 @return An error code: 0 - success, otherwise - failure 513 514 @ref Backend 515 **/ 516 int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract contract) { 517 int ierr; 518 basis->contract = contract; 519 ierr = CeedTensorContractReference(contract); CeedChk(ierr); 520 return CEED_ERROR_SUCCESS; 521 } 522 523 /** 524 @brief Return a reference implementation of matrix multiplication C = A B. 525 Note, this is a reference implementation for CPU CeedScalar pointers 526 that is not intended for high performance. 527 528 @param ceed A Ceed context for error handling 529 @param[in] mat_A Row-major matrix A 530 @param[in] mat_B Row-major matrix B 531 @param[out] mat_C Row-major output matrix C 532 @param m Number of rows of C 533 @param n Number of columns of C 534 @param kk Number of columns of A/rows of B 535 536 @return An error code: 0 - success, otherwise - failure 537 538 @ref Utility 539 **/ 540 int CeedMatrixMatrixMultiply(Ceed ceed, const CeedScalar *mat_A, 541 const CeedScalar *mat_B, CeedScalar *mat_C, 542 CeedInt m, CeedInt n, CeedInt kk) { 543 for (CeedInt i=0; i<m; i++) 544 for (CeedInt j=0; j<n; j++) { 545 CeedScalar sum = 0; 546 for (CeedInt k=0; k<kk; k++) 547 sum += mat_A[k+i*kk]*mat_B[j+k*n]; 548 mat_C[j+i*n] = sum; 549 } 550 return CEED_ERROR_SUCCESS; 551 } 552 553 /// @} 554 555 /// ---------------------------------------------------------------------------- 556 /// CeedBasis Public API 557 /// ---------------------------------------------------------------------------- 558 /// @addtogroup CeedBasisUser 559 /// @{ 560 561 /** 562 @brief Create a tensor-product basis for H^1 discretizations 563 564 @param ceed A Ceed object where the CeedBasis will be created 565 @param dim Topological dimension 566 @param num_comp Number of field components (1 for scalar fields) 567 @param P_1d Number of nodes in one dimension 568 @param Q_1d Number of quadrature points in one dimension 569 @param interp_1d Row-major (Q_1d * P_1d) matrix expressing the values of nodal 570 basis functions at quadrature points 571 @param grad_1d Row-major (Q_1d * P_1d) matrix expressing derivatives of nodal 572 basis functions at quadrature points 573 @param q_ref_1d Array of length Q_1d holding the locations of quadrature points 574 on the 1D reference element [-1, 1] 575 @param q_weight_1d Array of length Q_1d holding the quadrature weights on the 576 reference element 577 @param[out] basis Address of the variable where the newly created 578 CeedBasis will be stored. 579 580 @return An error code: 0 - success, otherwise - failure 581 582 @ref User 583 **/ 584 int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp, 585 CeedInt P_1d, CeedInt Q_1d, 586 const CeedScalar *interp_1d, 587 const CeedScalar *grad_1d, const CeedScalar *q_ref_1d, 588 const CeedScalar *q_weight_1d, CeedBasis *basis) { 589 int ierr; 590 591 if (!ceed->BasisCreateTensorH1) { 592 Ceed delegate; 593 ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr); 594 595 if (!delegate) 596 // LCOV_EXCL_START 597 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 598 "Backend does not support BasisCreateTensorH1"); 599 // LCOV_EXCL_STOP 600 601 ierr = CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d, 602 Q_1d, interp_1d, grad_1d, q_ref_1d, 603 q_weight_1d, basis); CeedChk(ierr); 604 return CEED_ERROR_SUCCESS; 605 } 606 607 if (dim < 1) 608 // LCOV_EXCL_START 609 return CeedError(ceed, CEED_ERROR_DIMENSION, 610 "Basis dimension must be a positive value"); 611 // LCOV_EXCL_STOP 612 613 if (num_comp < 1) 614 // LCOV_EXCL_START 615 return CeedError(ceed, CEED_ERROR_DIMENSION, 616 "Basis must have at least 1 component"); 617 // LCOV_EXCL_STOP 618 619 if (P_1d < 1) 620 // LCOV_EXCL_START 621 return CeedError(ceed, CEED_ERROR_DIMENSION, 622 "Basis must have at least 1 node"); 623 // LCOV_EXCL_STOP 624 625 if (Q_1d < 1) 626 // LCOV_EXCL_START 627 return CeedError(ceed, CEED_ERROR_DIMENSION, 628 "Basis must have at least 1 quadrature point"); 629 // LCOV_EXCL_STOP 630 631 CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE 632 : dim == 2 ? CEED_TOPOLOGY_QUAD 633 : CEED_TOPOLOGY_HEX; 634 635 ierr = CeedCalloc(1, basis); CeedChk(ierr); 636 (*basis)->ceed = ceed; 637 ierr = CeedReference(ceed); CeedChk(ierr); 638 (*basis)->ref_count = 1; 639 (*basis)->tensor_basis = 1; 640 (*basis)->dim = dim; 641 (*basis)->topo = topo; 642 (*basis)->num_comp = num_comp; 643 (*basis)->P_1d = P_1d; 644 (*basis)->Q_1d = Q_1d; 645 (*basis)->P = CeedIntPow(P_1d, dim); 646 (*basis)->Q = CeedIntPow(Q_1d, dim); 647 (*basis)->Q_comp = 1; 648 (*basis)->basis_space = 1; // 1 for H^1 space 649 ierr = CeedCalloc(Q_1d, &(*basis)->q_ref_1d); CeedChk(ierr); 650 ierr = CeedCalloc(Q_1d, &(*basis)->q_weight_1d); CeedChk(ierr); 651 if (q_ref_1d) memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d*sizeof(q_ref_1d[0])); 652 if (q_weight_1d) memcpy((*basis)->q_weight_1d, q_weight_1d, 653 Q_1d*sizeof(q_weight_1d[0])); 654 ierr = CeedCalloc(Q_1d*P_1d, &(*basis)->interp_1d); CeedChk(ierr); 655 ierr = CeedCalloc(Q_1d*P_1d, &(*basis)->grad_1d); CeedChk(ierr); 656 if (interp_1d) memcpy((*basis)->interp_1d, interp_1d, 657 Q_1d*P_1d*sizeof(interp_1d[0])); 658 if (grad_1d) memcpy((*basis)->grad_1d, grad_1d, Q_1d*P_1d*sizeof(grad_1d[0])); 659 ierr = ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, 660 q_weight_1d, *basis); CeedChk(ierr); 661 return CEED_ERROR_SUCCESS; 662 } 663 664 /** 665 @brief Create a tensor-product Lagrange basis 666 667 @param ceed A Ceed object where the CeedBasis will be created 668 @param dim Topological dimension of element 669 @param num_comp Number of field components (1 for scalar fields) 670 @param P Number of Gauss-Lobatto nodes in one dimension. The 671 polynomial degree of the resulting Q_k element is k=P-1. 672 @param Q Number of quadrature points in one dimension. 673 @param quad_mode Distribution of the Q quadrature points (affects order of 674 accuracy for the quadrature) 675 @param[out] basis Address of the variable where the newly created 676 CeedBasis will be stored. 677 678 @return An error code: 0 - success, otherwise - failure 679 680 @ref User 681 **/ 682 int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp, 683 CeedInt P, CeedInt Q, CeedQuadMode quad_mode, 684 CeedBasis *basis) { 685 // Allocate 686 int ierr, ierr2, i, j, k; 687 CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d, 688 *q_weight_1d; 689 690 if (dim < 1) 691 // LCOV_EXCL_START 692 return CeedError(ceed, CEED_ERROR_DIMENSION, 693 "Basis dimension must be a positive value"); 694 // LCOV_EXCL_STOP 695 696 if (num_comp < 1) 697 // LCOV_EXCL_START 698 return CeedError(ceed, CEED_ERROR_DIMENSION, 699 "Basis must have at least 1 component"); 700 // LCOV_EXCL_STOP 701 702 if (P < 1) 703 // LCOV_EXCL_START 704 return CeedError(ceed, CEED_ERROR_DIMENSION, 705 "Basis must have at least 1 node"); 706 // LCOV_EXCL_STOP 707 708 if (Q < 1) 709 // LCOV_EXCL_START 710 return CeedError(ceed, CEED_ERROR_DIMENSION, 711 "Basis must have at least 1 quadrature point"); 712 // LCOV_EXCL_STOP 713 714 // Get Nodes and Weights 715 ierr = CeedCalloc(P*Q, &interp_1d); CeedChk(ierr); 716 ierr = CeedCalloc(P*Q, &grad_1d); CeedChk(ierr); 717 ierr = CeedCalloc(P, &nodes); CeedChk(ierr); 718 ierr = CeedCalloc(Q, &q_ref_1d); CeedChk(ierr); 719 ierr = CeedCalloc(Q, &q_weight_1d); CeedChk(ierr); 720 ierr = CeedLobattoQuadrature(P, nodes, NULL); 721 if (ierr) { goto cleanup; } CeedChk(ierr); 722 switch (quad_mode) { 723 case CEED_GAUSS: 724 ierr = CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d); 725 break; 726 case CEED_GAUSS_LOBATTO: 727 ierr = CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d); 728 break; 729 } 730 if (ierr) { goto cleanup; } CeedChk(ierr); 731 732 // Build B, D matrix 733 // Fornberg, 1998 734 for (i = 0; i < Q; i++) { 735 c1 = 1.0; 736 c3 = nodes[0] - q_ref_1d[i]; 737 interp_1d[i*P+0] = 1.0; 738 for (j = 1; j < P; j++) { 739 c2 = 1.0; 740 c4 = c3; 741 c3 = nodes[j] - q_ref_1d[i]; 742 for (k = 0; k < j; k++) { 743 dx = nodes[j] - nodes[k]; 744 c2 *= dx; 745 if (k == j - 1) { 746 grad_1d[i*P + j] = c1*(interp_1d[i*P + k] - c4*grad_1d[i*P + k]) / c2; 747 interp_1d[i*P + j] = - c1*c4*interp_1d[i*P + k] / c2; 748 } 749 grad_1d[i*P + k] = (c3*grad_1d[i*P + k] - interp_1d[i*P + k]) / dx; 750 interp_1d[i*P + k] = c3*interp_1d[i*P + k] / dx; 751 } 752 c1 = c2; 753 } 754 } 755 // Pass to CeedBasisCreateTensorH1 756 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d, 757 q_ref_1d, q_weight_1d, basis); CeedChk(ierr); 758 cleanup: 759 ierr2 = CeedFree(&interp_1d); CeedChk(ierr2); 760 ierr2 = CeedFree(&grad_1d); CeedChk(ierr2); 761 ierr2 = CeedFree(&nodes); CeedChk(ierr2); 762 ierr2 = CeedFree(&q_ref_1d); CeedChk(ierr2); 763 ierr2 = CeedFree(&q_weight_1d); CeedChk(ierr2); 764 CeedChk(ierr); 765 return CEED_ERROR_SUCCESS; 766 } 767 768 /** 769 @brief Create a non tensor-product basis for H^1 discretizations 770 771 @param ceed A Ceed object where the CeedBasis will be created 772 @param topo Topology of element, e.g. hypercube, simplex, ect 773 @param num_comp Number of field components (1 for scalar fields) 774 @param num_nodes Total number of nodes 775 @param num_qpts Total number of quadrature points 776 @param interp Row-major (num_qpts * num_nodes) matrix expressing the values of 777 nodal basis functions at quadrature points 778 @param grad Row-major (num_qpts * dim * num_nodes) matrix expressing 779 derivatives of nodal basis functions at quadrature points 780 @param q_ref Array of length num_qpts holding the locations of quadrature 781 points on the reference element 782 @param q_weight Array of length num_qpts holding the quadrature weights on the 783 reference element 784 @param[out] basis Address of the variable where the newly created 785 CeedBasis will be stored. 786 787 @return An error code: 0 - success, otherwise - failure 788 789 @ref User 790 **/ 791 int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, 792 CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 793 const CeedScalar *grad, const CeedScalar *q_ref, 794 const CeedScalar *q_weight, CeedBasis *basis) { 795 int ierr; 796 CeedInt P = num_nodes, Q = num_qpts, dim = 0; 797 798 if (!ceed->BasisCreateH1) { 799 Ceed delegate; 800 ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr); 801 802 if (!delegate) 803 // LCOV_EXCL_START 804 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 805 "Backend does not support BasisCreateH1"); 806 // LCOV_EXCL_STOP 807 808 ierr = CeedBasisCreateH1(delegate, topo, num_comp, num_nodes, 809 num_qpts, interp, grad, q_ref, 810 q_weight, basis); CeedChk(ierr); 811 return CEED_ERROR_SUCCESS; 812 } 813 814 if (num_comp < 1) 815 // LCOV_EXCL_START 816 return CeedError(ceed, CEED_ERROR_DIMENSION, 817 "Basis must have at least 1 component"); 818 // LCOV_EXCL_STOP 819 820 if (num_nodes < 1) 821 // LCOV_EXCL_START 822 return CeedError(ceed, CEED_ERROR_DIMENSION, 823 "Basis must have at least 1 node"); 824 // LCOV_EXCL_STOP 825 826 if (num_qpts < 1) 827 // LCOV_EXCL_START 828 return CeedError(ceed, CEED_ERROR_DIMENSION, 829 "Basis must have at least 1 quadrature point"); 830 // LCOV_EXCL_STOP 831 832 ierr = CeedCalloc(1, basis); CeedChk(ierr); 833 834 ierr = CeedBasisGetTopologyDimension(topo, &dim); CeedChk(ierr); 835 836 (*basis)->ceed = ceed; 837 ierr = CeedReference(ceed); CeedChk(ierr); 838 (*basis)->ref_count = 1; 839 (*basis)->tensor_basis = 0; 840 (*basis)->dim = dim; 841 (*basis)->topo = topo; 842 (*basis)->num_comp = num_comp; 843 (*basis)->P = P; 844 (*basis)->Q = Q; 845 (*basis)->Q_comp = 1; 846 (*basis)->basis_space = 1; // 1 for H^1 space 847 ierr = CeedCalloc(Q*dim, &(*basis)->q_ref_1d); CeedChk(ierr); 848 ierr = CeedCalloc(Q, &(*basis)->q_weight_1d); CeedChk(ierr); 849 if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q*dim*sizeof(q_ref[0])); 850 if(q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q*sizeof(q_weight[0])); 851 ierr = CeedCalloc(Q*P, &(*basis)->interp); CeedChk(ierr); 852 ierr = CeedCalloc(dim*Q*P, &(*basis)->grad); CeedChk(ierr); 853 if(interp) memcpy((*basis)->interp, interp, Q*P*sizeof(interp[0])); 854 if(grad) memcpy((*basis)->grad, grad, dim*Q*P*sizeof(grad[0])); 855 ierr = ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref, 856 q_weight, *basis); CeedChk(ierr); 857 return CEED_ERROR_SUCCESS; 858 } 859 860 /** 861 @brief Create a non tensor-product basis for H(div) discretizations 862 863 @param ceed A Ceed object where the CeedBasis will be created 864 @param topo Topology of element (`CEED_TOPOLOGY_QUAD`, `CEED_TOPOLOGY_PRISM`, etc.), 865 dimension of which is used in some array sizes below 866 @param num_comp Number of components (usually 1 for vectors in H(div) bases) 867 @param num_nodes Total number of nodes (dofs per element) 868 @param num_qpts Total number of quadrature points 869 @param interp Row-major (dim*num_qpts * num_nodes) matrix expressing the values of 870 nodal basis functions at quadrature points 871 @param div Row-major (num_qpts * num_nodes) matrix expressing 872 divergence of nodal basis functions at quadrature points 873 @param q_ref Array of length num_qpts holding the locations of quadrature 874 points on the reference element 875 @param q_weight Array of length num_qpts holding the quadrature weights on the 876 reference element 877 @param[out] basis Address of the variable where the newly created 878 CeedBasis will be stored. 879 880 @return An error code: 0 - success, otherwise - failure 881 882 @ref User 883 **/ 884 int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, 885 CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 886 const CeedScalar *div, const CeedScalar *q_ref, 887 const CeedScalar *q_weight, CeedBasis *basis) { 888 int ierr; 889 CeedInt Q = num_qpts, P = num_nodes, dim = 0; 890 ierr = CeedBasisGetTopologyDimension(topo, &dim); CeedChk(ierr); 891 if (!ceed->BasisCreateHdiv) { 892 Ceed delegate; 893 ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr); 894 895 if (!delegate) 896 // LCOV_EXCL_START 897 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 898 "Backend does not implement BasisCreateHdiv"); 899 // LCOV_EXCL_STOP 900 901 ierr = CeedBasisCreateHdiv(delegate, topo, num_comp, num_nodes, 902 num_qpts, interp, div, q_ref, 903 q_weight, basis); CeedChk(ierr); 904 return CEED_ERROR_SUCCESS; 905 } 906 907 if (num_comp < 1) 908 // LCOV_EXCL_START 909 return CeedError(ceed, CEED_ERROR_DIMENSION, 910 "Basis must have at least 1 component"); 911 // LCOV_EXCL_STOP 912 913 if (num_nodes < 1) 914 // LCOV_EXCL_START 915 return CeedError(ceed, CEED_ERROR_DIMENSION, 916 "Basis must have at least 1 node"); 917 // LCOV_EXCL_STOP 918 919 if (num_qpts < 1) 920 // LCOV_EXCL_START 921 return CeedError(ceed, CEED_ERROR_DIMENSION, 922 "Basis must have at least 1 quadrature point"); 923 // LCOV_EXCL_STOP 924 925 ierr = CeedCalloc(1, basis); CeedChk(ierr); 926 927 (*basis)->ceed = ceed; 928 ierr = CeedReference(ceed); CeedChk(ierr); 929 (*basis)->ref_count = 1; 930 (*basis)->tensor_basis = 0; 931 (*basis)->dim = dim; 932 (*basis)->topo = topo; 933 (*basis)->num_comp = num_comp; 934 (*basis)->P = P; 935 (*basis)->Q = Q; 936 (*basis)->Q_comp = dim; 937 (*basis)->basis_space = 2; // 2 for H(div) space 938 ierr = CeedMalloc(Q*dim, &(*basis)->q_ref_1d); CeedChk(ierr); 939 ierr = CeedMalloc(Q, &(*basis)->q_weight_1d); CeedChk(ierr); 940 if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q*dim*sizeof(q_ref[0])); 941 if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q*sizeof(q_weight[0])); 942 ierr = CeedMalloc(dim*Q*P, &(*basis)->interp); CeedChk(ierr); 943 ierr = CeedMalloc(Q*P, &(*basis)->div); CeedChk(ierr); 944 if (interp) memcpy((*basis)->interp, interp, dim*Q*P*sizeof(interp[0])); 945 if (div) memcpy((*basis)->div, div, Q*P*sizeof(div[0])); 946 ierr = ceed->BasisCreateHdiv(topo, dim, P, Q, interp, div, q_ref, 947 q_weight, *basis); CeedChk(ierr); 948 return CEED_ERROR_SUCCESS; 949 } 950 951 /** 952 @brief Create a CeedBasis for projection from the nodes of `basis_from` 953 to the nodes of `basis_to`. Only `CEED_EVAL_INTERP` and 954 `CEED_EVAL_GRAD` will be valid for the new basis, `basis_project`. 955 The interpolation is given by `interp_project = interp_to^+ * interp_from`, 956 where the pesudoinverse `interp_to^+` is given by QR factorization. 957 The gradient is given by `grad_project = interp_to^+ * grad_from`. 958 Note: `basis_from` and `basis_to` must have compatible quadrature 959 spaces. 960 Note: `basis_project` will have the same number of components as 961 `basis_from`, regardless of the number of components that 962 `basis_to` has. If `basis_from` has 3 components and `basis_to` 963 has 5 components, then `basis_project` will have 3 components. 964 965 @param[in] basis_from CeedBasis to prolong from 966 @param[in] basis_to CeedBasis to prolong to 967 @param[out] basis_project Address of the variable where the newly created 968 CeedBasis will be stored. 969 970 @return An error code: 0 - success, otherwise - failure 971 972 @ref User 973 **/ 974 int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, 975 CeedBasis *basis_project) { 976 int ierr; 977 Ceed ceed; 978 ierr = CeedBasisGetCeed(basis_to, &ceed); CeedChk(ierr); 979 980 // Create projectior matrix 981 CeedScalar *interp_project, *grad_project; 982 ierr = CeedBasisCreateProjectionMatrices(basis_from, basis_to, 983 &interp_project, &grad_project); 984 CeedChk(ierr); 985 986 // Build basis 987 bool is_tensor; 988 CeedInt dim, num_comp; 989 CeedScalar *q_ref, *q_weight; 990 ierr = CeedBasisIsTensor(basis_to, &is_tensor); CeedChk(ierr); 991 ierr = CeedBasisGetDimension(basis_to, &dim); CeedChk(ierr); 992 ierr = CeedBasisGetNumComponents(basis_from, &num_comp); CeedChk(ierr); 993 if (is_tensor) { 994 CeedInt P_1d_to, P_1d_from; 995 ierr = CeedBasisGetNumNodes1D(basis_from, &P_1d_from); CeedChk(ierr); 996 ierr = CeedBasisGetNumNodes1D(basis_to, &P_1d_to); CeedChk(ierr); 997 ierr = CeedCalloc(P_1d_to, &q_ref); CeedChk(ierr); 998 ierr = CeedCalloc(P_1d_to, &q_weight); CeedChk(ierr); 999 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_from, P_1d_to, 1000 interp_project, grad_project, q_ref, q_weight, basis_project); 1001 CeedChk(ierr); 1002 } else { 1003 CeedElemTopology topo; 1004 ierr = CeedBasisGetTopology(basis_to, &topo); CeedChk(ierr); 1005 CeedInt num_nodes_to, num_nodes_from; 1006 ierr = CeedBasisGetNumNodes(basis_from, &num_nodes_from); CeedChk(ierr); 1007 ierr = CeedBasisGetNumNodes(basis_to, &num_nodes_to); CeedChk(ierr); 1008 ierr = CeedCalloc(num_nodes_to * dim, &q_ref); CeedChk(ierr); 1009 ierr = CeedCalloc(num_nodes_to, &q_weight); CeedChk(ierr); 1010 ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_from, num_nodes_to, 1011 interp_project, grad_project, q_ref, q_weight, basis_project); 1012 CeedChk(ierr); 1013 } 1014 1015 // Cleanup 1016 ierr = CeedFree(&interp_project); CeedChk(ierr); 1017 ierr = CeedFree(&grad_project); CeedChk(ierr); 1018 ierr = CeedFree(&q_ref); CeedChk(ierr); 1019 ierr = CeedFree(&q_weight); CeedChk(ierr); 1020 1021 return CEED_ERROR_SUCCESS; 1022 } 1023 1024 /** 1025 @brief Copy the pointer to a CeedBasis. Both pointers should 1026 be destroyed with `CeedBasisDestroy()`; 1027 Note: If `*basis_copy` is non-NULL, then it is assumed that 1028 `*basis_copy` is a pointer to a CeedBasis. This CeedBasis 1029 will be destroyed if `*basis_copy` is the only 1030 reference to this CeedBasis. 1031 1032 @param basis CeedBasis to copy reference to 1033 @param[out] basis_copy Variable to store copied reference 1034 1035 @return An error code: 0 - success, otherwise - failure 1036 1037 @ref User 1038 **/ 1039 int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy) { 1040 int ierr; 1041 1042 ierr = CeedBasisReference(basis); CeedChk(ierr); 1043 ierr = CeedBasisDestroy(basis_copy); CeedChk(ierr); 1044 *basis_copy = basis; 1045 return CEED_ERROR_SUCCESS; 1046 } 1047 1048 /** 1049 @brief View a CeedBasis 1050 1051 @param basis CeedBasis to view 1052 @param stream Stream to view to, e.g., stdout 1053 1054 @return An error code: 0 - success, otherwise - failure 1055 1056 @ref User 1057 **/ 1058 int CeedBasisView(CeedBasis basis, FILE *stream) { 1059 int ierr; 1060 CeedFESpace FE_space = basis->basis_space; 1061 CeedElemTopology topo = basis->topo; 1062 // Print FE space and element topology of the basis 1063 if (basis->tensor_basis) { 1064 fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" 1065 CeedInt_FMT " Q=%" CeedInt_FMT "\n", 1066 CeedFESpaces[FE_space], CeedElemTopologies[topo], 1067 basis->dim, basis->P_1d, basis->Q_1d); 1068 } else { 1069 fprintf(stream, "CeedBasis (%s on a %s element): dim=%" CeedInt_FMT " P=%" 1070 CeedInt_FMT " Q=%" CeedInt_FMT "\n", 1071 CeedFESpaces[FE_space], CeedElemTopologies[topo], 1072 basis->dim, basis->P, basis->Q); 1073 } 1074 // Print quadrature data, interpolation/gradient/divergene/curl of the basis 1075 if (basis->tensor_basis) { // tensor basis 1076 ierr = CeedScalarView("qref1d", "\t% 12.8f", 1, basis->Q_1d, basis->q_ref_1d, 1077 stream); CeedChk(ierr); 1078 ierr = CeedScalarView("qweight1d", "\t% 12.8f", 1, basis->Q_1d, 1079 basis->q_weight_1d, stream); CeedChk(ierr); 1080 ierr = CeedScalarView("interp1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, 1081 basis->interp_1d, stream); CeedChk(ierr); 1082 ierr = CeedScalarView("grad1d", "\t% 12.8f", basis->Q_1d, basis->P_1d, 1083 basis->grad_1d, stream); CeedChk(ierr); 1084 } else { // non-tensor basis 1085 ierr = CeedScalarView("qref", "\t% 12.8f", 1, basis->Q*basis->dim, 1086 basis->q_ref_1d, 1087 stream); CeedChk(ierr); 1088 ierr = CeedScalarView("qweight", "\t% 12.8f", 1, basis->Q, basis->q_weight_1d, 1089 stream); CeedChk(ierr); 1090 ierr = CeedScalarView("interp", "\t% 12.8f", basis->Q_comp*basis->Q, basis->P, 1091 basis->interp, stream); CeedChk(ierr); 1092 if (basis->grad) { 1093 ierr = CeedScalarView("grad", "\t% 12.8f", basis->dim*basis->Q, basis->P, 1094 basis->grad, stream); CeedChk(ierr); 1095 } 1096 if (basis->div) { 1097 ierr = CeedScalarView("div", "\t% 12.8f", basis->Q, basis->P, 1098 basis->div, stream); CeedChk(ierr); 1099 } 1100 } 1101 return CEED_ERROR_SUCCESS; 1102 } 1103 1104 /** 1105 @brief Apply basis evaluation from nodes to quadrature points or vice versa 1106 1107 @param basis CeedBasis to evaluate 1108 @param num_elem The number of elements to apply the basis evaluation to; 1109 the backend will specify the ordering in 1110 CeedElemRestrictionCreateBlocked() 1111 @param t_mode \ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature 1112 points, \ref CEED_TRANSPOSE to apply the transpose, mapping 1113 from quadrature points to nodes 1114 @param eval_mode \ref CEED_EVAL_NONE to use values directly, 1115 \ref CEED_EVAL_INTERP to use interpolated values, 1116 \ref CEED_EVAL_GRAD to use gradients, 1117 \ref CEED_EVAL_WEIGHT to use quadrature weights. 1118 @param[in] u Input CeedVector 1119 @param[out] v Output CeedVector 1120 1121 @return An error code: 0 - success, otherwise - failure 1122 1123 @ref User 1124 **/ 1125 int CeedBasisApply(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, 1126 CeedEvalMode eval_mode, CeedVector u, CeedVector v) { 1127 int ierr; 1128 CeedSize u_length = 0, v_length; 1129 CeedInt dim, num_comp, num_nodes, num_qpts; 1130 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 1131 ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr); 1132 ierr = CeedBasisGetNumNodes(basis, &num_nodes); CeedChk(ierr); 1133 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr); 1134 ierr = CeedVectorGetLength(v, &v_length); CeedChk(ierr); 1135 if (u) { 1136 ierr = CeedVectorGetLength(u, &u_length); CeedChk(ierr); 1137 } 1138 1139 if (!basis->Apply) 1140 // LCOV_EXCL_START 1141 return CeedError(basis->ceed, CEED_ERROR_UNSUPPORTED, 1142 "Backend does not support BasisApply"); 1143 // LCOV_EXCL_STOP 1144 1145 // Check compatibility of topological and geometrical dimensions 1146 if ((t_mode == CEED_TRANSPOSE && (v_length%num_nodes != 0 || 1147 u_length%num_qpts != 0)) || 1148 (t_mode == CEED_NOTRANSPOSE && (u_length%num_nodes != 0 || 1149 v_length%num_qpts != 0))) 1150 // LCOV_EXCL_START 1151 return CeedError(basis->ceed, CEED_ERROR_DIMENSION, 1152 "Length of input/output vectors " 1153 "incompatible with basis dimensions"); 1154 // LCOV_EXCL_STOP 1155 1156 // Check vector lengths to prevent out of bounds issues 1157 bool bad_dims = false; 1158 switch (eval_mode) { 1159 case CEED_EVAL_NONE: 1160 case CEED_EVAL_INTERP: bad_dims = 1161 ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts || 1162 v_length < num_elem*num_comp*num_nodes)) || 1163 (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp || 1164 u_length < num_elem*num_comp*num_nodes))); 1165 break; 1166 case CEED_EVAL_GRAD: bad_dims = 1167 ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts*dim || 1168 v_length < num_elem*num_comp*num_nodes)) || 1169 (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp*dim || 1170 u_length < num_elem*num_comp*num_nodes))); 1171 break; 1172 case CEED_EVAL_WEIGHT: 1173 bad_dims = v_length < num_elem*num_qpts; 1174 break; 1175 // LCOV_EXCL_START 1176 case CEED_EVAL_DIV: bad_dims = 1177 ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts || 1178 v_length < num_elem*num_comp*num_nodes)) || 1179 (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp || 1180 u_length < num_elem*num_comp*num_nodes))); 1181 break; 1182 case CEED_EVAL_CURL: bad_dims = 1183 ((t_mode == CEED_TRANSPOSE && (u_length < num_elem*num_comp*num_qpts || 1184 v_length < num_elem*num_comp*num_nodes)) || 1185 (t_mode == CEED_NOTRANSPOSE && (v_length < num_elem*num_qpts*num_comp || 1186 u_length < num_elem*num_comp*num_nodes))); 1187 break; 1188 // LCOV_EXCL_STOP 1189 } 1190 if (bad_dims) 1191 // LCOV_EXCL_START 1192 return CeedError(basis->ceed, CEED_ERROR_DIMENSION, 1193 "Input/output vectors too short for basis and evaluation mode"); 1194 // LCOV_EXCL_STOP 1195 1196 ierr = basis->Apply(basis, num_elem, t_mode, eval_mode, u, v); CeedChk(ierr); 1197 return CEED_ERROR_SUCCESS; 1198 } 1199 1200 /** 1201 @brief Get Ceed associated with a CeedBasis 1202 1203 @param basis CeedBasis 1204 @param[out] ceed Variable to store Ceed 1205 1206 @return An error code: 0 - success, otherwise - failure 1207 1208 @ref Advanced 1209 **/ 1210 int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed) { 1211 *ceed = basis->ceed; 1212 return CEED_ERROR_SUCCESS; 1213 } 1214 1215 /** 1216 @brief Get dimension for given CeedBasis 1217 1218 @param basis CeedBasis 1219 @param[out] dim Variable to store dimension of basis 1220 1221 @return An error code: 0 - success, otherwise - failure 1222 1223 @ref Advanced 1224 **/ 1225 int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim) { 1226 *dim = basis->dim; 1227 return CEED_ERROR_SUCCESS; 1228 } 1229 1230 /** 1231 @brief Get topology for given CeedBasis 1232 1233 @param basis CeedBasis 1234 @param[out] topo Variable to store topology of basis 1235 1236 @return An error code: 0 - success, otherwise - failure 1237 1238 @ref Advanced 1239 **/ 1240 int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo) { 1241 *topo = basis->topo; 1242 return CEED_ERROR_SUCCESS; 1243 } 1244 1245 /** 1246 @brief Get number of Q-vector components for given CeedBasis 1247 1248 @param basis CeedBasis 1249 @param[out] Q_comp Variable to store number of Q-vector components of basis 1250 1251 @return An error code: 0 - success, otherwise - failure 1252 1253 @ref Advanced 1254 **/ 1255 int CeedBasisGetNumQuadratureComponents(CeedBasis basis, CeedInt *Q_comp) { 1256 *Q_comp = basis->Q_comp; 1257 return CEED_ERROR_SUCCESS; 1258 } 1259 1260 /** 1261 @brief Get number of components for given CeedBasis 1262 1263 @param basis CeedBasis 1264 @param[out] num_comp Variable to store number of components of basis 1265 1266 @return An error code: 0 - success, otherwise - failure 1267 1268 @ref Advanced 1269 **/ 1270 int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *num_comp) { 1271 *num_comp = basis->num_comp; 1272 return CEED_ERROR_SUCCESS; 1273 } 1274 1275 /** 1276 @brief Get total number of nodes (in dim dimensions) of a CeedBasis 1277 1278 @param basis CeedBasis 1279 @param[out] P Variable to store number of nodes 1280 1281 @return An error code: 0 - success, otherwise - failure 1282 1283 @ref Utility 1284 **/ 1285 int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P) { 1286 *P = basis->P; 1287 return CEED_ERROR_SUCCESS; 1288 } 1289 1290 /** 1291 @brief Get total number of nodes (in 1 dimension) of a CeedBasis 1292 1293 @param basis CeedBasis 1294 @param[out] P_1d Variable to store number of nodes 1295 1296 @return An error code: 0 - success, otherwise - failure 1297 1298 @ref Advanced 1299 **/ 1300 int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P_1d) { 1301 if (!basis->tensor_basis) 1302 // LCOV_EXCL_START 1303 return CeedError(basis->ceed, CEED_ERROR_MINOR, 1304 "Cannot supply P_1d for non-tensor basis"); 1305 // LCOV_EXCL_STOP 1306 1307 *P_1d = basis->P_1d; 1308 return CEED_ERROR_SUCCESS; 1309 } 1310 1311 /** 1312 @brief Get total number of quadrature points (in dim dimensions) of a CeedBasis 1313 1314 @param basis CeedBasis 1315 @param[out] Q Variable to store number of quadrature points 1316 1317 @return An error code: 0 - success, otherwise - failure 1318 1319 @ref Utility 1320 **/ 1321 int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q) { 1322 *Q = basis->Q; 1323 return CEED_ERROR_SUCCESS; 1324 } 1325 1326 /** 1327 @brief Get total number of quadrature points (in 1 dimension) of a CeedBasis 1328 1329 @param basis CeedBasis 1330 @param[out] Q_1d Variable to store number of quadrature points 1331 1332 @return An error code: 0 - success, otherwise - failure 1333 1334 @ref Advanced 1335 **/ 1336 int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, CeedInt *Q_1d) { 1337 if (!basis->tensor_basis) 1338 // LCOV_EXCL_START 1339 return CeedError(basis->ceed, CEED_ERROR_MINOR, 1340 "Cannot supply Q_1d for non-tensor basis"); 1341 // LCOV_EXCL_STOP 1342 1343 *Q_1d = basis->Q_1d; 1344 return CEED_ERROR_SUCCESS; 1345 } 1346 1347 /** 1348 @brief Get reference coordinates of quadrature points (in dim dimensions) 1349 of a CeedBasis 1350 1351 @param basis CeedBasis 1352 @param[out] q_ref Variable to store reference coordinates of quadrature points 1353 1354 @return An error code: 0 - success, otherwise - failure 1355 1356 @ref Advanced 1357 **/ 1358 int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **q_ref) { 1359 *q_ref = basis->q_ref_1d; 1360 return CEED_ERROR_SUCCESS; 1361 } 1362 1363 /** 1364 @brief Get quadrature weights of quadrature points (in dim dimensions) 1365 of a CeedBasis 1366 1367 @param basis CeedBasis 1368 @param[out] q_weight Variable to store quadrature weights 1369 1370 @return An error code: 0 - success, otherwise - failure 1371 1372 @ref Advanced 1373 **/ 1374 int CeedBasisGetQWeights(CeedBasis basis, const CeedScalar **q_weight) { 1375 *q_weight = basis->q_weight_1d; 1376 return CEED_ERROR_SUCCESS; 1377 } 1378 1379 /** 1380 @brief Get interpolation matrix of a CeedBasis 1381 1382 @param basis CeedBasis 1383 @param[out] interp Variable to store interpolation matrix 1384 1385 @return An error code: 0 - success, otherwise - failure 1386 1387 @ref Advanced 1388 **/ 1389 int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp) { 1390 if (!basis->interp && basis->tensor_basis) { 1391 // Allocate 1392 int ierr; 1393 ierr = CeedMalloc(basis->Q*basis->P, &basis->interp); CeedChk(ierr); 1394 1395 // Initialize 1396 for (CeedInt i=0; i<basis->Q*basis->P; i++) 1397 basis->interp[i] = 1.0; 1398 1399 // Calculate 1400 for (CeedInt d=0; d<basis->dim; d++) 1401 for (CeedInt qpt=0; qpt<basis->Q; qpt++) 1402 for (CeedInt node=0; node<basis->P; node++) { 1403 CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d; 1404 CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d; 1405 basis->interp[qpt*(basis->P)+node] *= basis->interp_1d[q*basis->P_1d+p]; 1406 } 1407 } 1408 *interp = basis->interp; 1409 return CEED_ERROR_SUCCESS; 1410 } 1411 1412 /** 1413 @brief Get 1D interpolation matrix of a tensor product CeedBasis 1414 1415 @param basis CeedBasis 1416 @param[out] interp_1d Variable to store interpolation matrix 1417 1418 @return An error code: 0 - success, otherwise - failure 1419 1420 @ref Backend 1421 **/ 1422 int CeedBasisGetInterp1D(CeedBasis basis, const CeedScalar **interp_1d) { 1423 if (!basis->tensor_basis) 1424 // LCOV_EXCL_START 1425 return CeedError(basis->ceed, CEED_ERROR_MINOR, 1426 "CeedBasis is not a tensor product basis."); 1427 // LCOV_EXCL_STOP 1428 1429 *interp_1d = basis->interp_1d; 1430 return CEED_ERROR_SUCCESS; 1431 } 1432 1433 /** 1434 @brief Get gradient matrix of a CeedBasis 1435 1436 @param basis CeedBasis 1437 @param[out] grad Variable to store gradient matrix 1438 1439 @return An error code: 0 - success, otherwise - failure 1440 1441 @ref Advanced 1442 **/ 1443 int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad) { 1444 if (!basis->grad && basis->tensor_basis) { 1445 // Allocate 1446 int ierr; 1447 ierr = CeedMalloc(basis->dim*basis->Q*basis->P, &basis->grad); 1448 CeedChk(ierr); 1449 1450 // Initialize 1451 for (CeedInt i=0; i<basis->dim*basis->Q*basis->P; i++) 1452 basis->grad[i] = 1.0; 1453 1454 // Calculate 1455 for (CeedInt d=0; d<basis->dim; d++) 1456 for (CeedInt i=0; i<basis->dim; i++) 1457 for (CeedInt qpt=0; qpt<basis->Q; qpt++) 1458 for (CeedInt node=0; node<basis->P; node++) { 1459 CeedInt p = (node / CeedIntPow(basis->P_1d, d)) % basis->P_1d; 1460 CeedInt q = (qpt / CeedIntPow(basis->Q_1d, d)) % basis->Q_1d; 1461 if (i == d) 1462 basis->grad[(i*basis->Q+qpt)*(basis->P)+node] *= 1463 basis->grad_1d[q*basis->P_1d+p]; 1464 else 1465 basis->grad[(i*basis->Q+qpt)*(basis->P)+node] *= 1466 basis->interp_1d[q*basis->P_1d+p]; 1467 } 1468 } 1469 *grad = basis->grad; 1470 return CEED_ERROR_SUCCESS; 1471 } 1472 1473 /** 1474 @brief Get 1D gradient matrix of a tensor product CeedBasis 1475 1476 @param basis CeedBasis 1477 @param[out] grad_1d Variable to store gradient matrix 1478 1479 @return An error code: 0 - success, otherwise - failure 1480 1481 @ref Advanced 1482 **/ 1483 int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad_1d) { 1484 if (!basis->tensor_basis) 1485 // LCOV_EXCL_START 1486 return CeedError(basis->ceed, CEED_ERROR_MINOR, 1487 "CeedBasis is not a tensor product basis."); 1488 // LCOV_EXCL_STOP 1489 1490 *grad_1d = basis->grad_1d; 1491 return CEED_ERROR_SUCCESS; 1492 } 1493 1494 /** 1495 @brief Get divergence matrix of a CeedBasis 1496 1497 @param basis CeedBasis 1498 @param[out] div Variable to store divergence matrix 1499 1500 @return An error code: 0 - success, otherwise - failure 1501 1502 @ref Advanced 1503 **/ 1504 int CeedBasisGetDiv(CeedBasis basis, const CeedScalar **div) { 1505 if (!basis->div) 1506 // LCOV_EXCL_START 1507 return CeedError(basis->ceed, CEED_ERROR_MINOR, 1508 "CeedBasis does not have divergence matrix."); 1509 // LCOV_EXCL_STOP 1510 1511 *div = basis->div; 1512 return CEED_ERROR_SUCCESS; 1513 } 1514 1515 /** 1516 @brief Destroy a CeedBasis 1517 1518 @param basis CeedBasis to destroy 1519 1520 @return An error code: 0 - success, otherwise - failure 1521 1522 @ref User 1523 **/ 1524 int CeedBasisDestroy(CeedBasis *basis) { 1525 int ierr; 1526 1527 if (!*basis || --(*basis)->ref_count > 0) return CEED_ERROR_SUCCESS; 1528 if ((*basis)->Destroy) { 1529 ierr = (*basis)->Destroy(*basis); CeedChk(ierr); 1530 } 1531 if ((*basis)->contract) { 1532 ierr = CeedTensorContractDestroy(&(*basis)->contract); CeedChk(ierr); 1533 } 1534 ierr = CeedFree(&(*basis)->interp); CeedChk(ierr); 1535 ierr = CeedFree(&(*basis)->interp_1d); CeedChk(ierr); 1536 ierr = CeedFree(&(*basis)->grad); CeedChk(ierr); 1537 ierr = CeedFree(&(*basis)->div); CeedChk(ierr); 1538 ierr = CeedFree(&(*basis)->grad_1d); CeedChk(ierr); 1539 ierr = CeedFree(&(*basis)->q_ref_1d); CeedChk(ierr); 1540 ierr = CeedFree(&(*basis)->q_weight_1d); CeedChk(ierr); 1541 ierr = CeedDestroy(&(*basis)->ceed); CeedChk(ierr); 1542 ierr = CeedFree(basis); CeedChk(ierr); 1543 return CEED_ERROR_SUCCESS; 1544 } 1545 1546 /** 1547 @brief Construct a Gauss-Legendre quadrature 1548 1549 @param Q Number of quadrature points (integrates polynomials of 1550 degree 2*Q-1 exactly) 1551 @param[out] q_ref_1d Array of length Q to hold the abscissa on [-1, 1] 1552 @param[out] q_weight_1d Array of length Q to hold the weights 1553 1554 @return An error code: 0 - success, otherwise - failure 1555 1556 @ref Utility 1557 **/ 1558 int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d, 1559 CeedScalar *q_weight_1d) { 1560 // Allocate 1561 CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0*atan(1.0); 1562 // Build q_ref_1d, q_weight_1d 1563 for (CeedInt i = 0; i <= Q/2; i++) { 1564 // Guess 1565 xi = cos(PI*(CeedScalar)(2*i+1)/((CeedScalar)(2*Q))); 1566 // Pn(xi) 1567 P0 = 1.0; 1568 P1 = xi; 1569 P2 = 0.0; 1570 for (CeedInt j = 2; j <= Q; j++) { 1571 P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j)); 1572 P0 = P1; 1573 P1 = P2; 1574 } 1575 // First Newton Step 1576 dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0); 1577 xi = xi-P2/dP2; 1578 // Newton to convergence 1579 for (CeedInt k=0; k<100 && fabs(P2)>10*CEED_EPSILON; k++) { 1580 P0 = 1.0; 1581 P1 = xi; 1582 for (CeedInt j = 2; j <= Q; j++) { 1583 P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j)); 1584 P0 = P1; 1585 P1 = P2; 1586 } 1587 dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0); 1588 xi = xi-P2/dP2; 1589 } 1590 // Save xi, wi 1591 wi = 2.0/((1.0-xi*xi)*dP2*dP2); 1592 q_weight_1d[i] = wi; 1593 q_weight_1d[Q-1-i] = wi; 1594 q_ref_1d[i] = -xi; 1595 q_ref_1d[Q-1-i]= xi; 1596 } 1597 return CEED_ERROR_SUCCESS; 1598 } 1599 1600 /** 1601 @brief Construct a Gauss-Legendre-Lobatto quadrature 1602 1603 @param Q Number of quadrature points (integrates polynomials of 1604 degree 2*Q-3 exactly) 1605 @param[out] q_ref_1d Array of length Q to hold the abscissa on [-1, 1] 1606 @param[out] q_weight_1d Array of length Q to hold the weights 1607 1608 @return An error code: 0 - success, otherwise - failure 1609 1610 @ref Utility 1611 **/ 1612 int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d, 1613 CeedScalar *q_weight_1d) { 1614 // Allocate 1615 CeedScalar P0, P1, P2, dP2, d2P2, xi, wi, PI = 4.0*atan(1.0); 1616 // Build q_ref_1d, q_weight_1d 1617 // Set endpoints 1618 if (Q < 2) 1619 // LCOV_EXCL_START 1620 return CeedError(NULL, CEED_ERROR_DIMENSION, 1621 "Cannot create Lobatto quadrature with Q=%" CeedInt_FMT " < 2 points", Q); 1622 // LCOV_EXCL_STOP 1623 wi = 2.0/((CeedScalar)(Q*(Q-1))); 1624 if (q_weight_1d) { 1625 q_weight_1d[0] = wi; 1626 q_weight_1d[Q-1] = wi; 1627 } 1628 q_ref_1d[0] = -1.0; 1629 q_ref_1d[Q-1] = 1.0; 1630 // Interior 1631 for (CeedInt i = 1; i <= (Q-1)/2; i++) { 1632 // Guess 1633 xi = cos(PI*(CeedScalar)(i)/(CeedScalar)(Q-1)); 1634 // Pn(xi) 1635 P0 = 1.0; 1636 P1 = xi; 1637 P2 = 0.0; 1638 for (CeedInt j = 2; j < Q; j++) { 1639 P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j)); 1640 P0 = P1; 1641 P1 = P2; 1642 } 1643 // First Newton step 1644 dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0); 1645 d2P2 = (2*xi*dP2 - (CeedScalar)(Q*(Q-1))*P2)/(1.0-xi*xi); 1646 xi = xi-dP2/d2P2; 1647 // Newton to convergence 1648 for (CeedInt k=0; k<100 && fabs(dP2)>10*CEED_EPSILON; k++) { 1649 P0 = 1.0; 1650 P1 = xi; 1651 for (CeedInt j = 2; j < Q; j++) { 1652 P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j)); 1653 P0 = P1; 1654 P1 = P2; 1655 } 1656 dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0); 1657 d2P2 = (2*xi*dP2 - (CeedScalar)(Q*(Q-1))*P2)/(1.0-xi*xi); 1658 xi = xi-dP2/d2P2; 1659 } 1660 // Save xi, wi 1661 wi = 2.0/(((CeedScalar)(Q*(Q-1)))*P2*P2); 1662 if (q_weight_1d) { 1663 q_weight_1d[i] = wi; 1664 q_weight_1d[Q-1-i] = wi; 1665 } 1666 q_ref_1d[i] = -xi; 1667 q_ref_1d[Q-1-i]= xi; 1668 } 1669 return CEED_ERROR_SUCCESS; 1670 } 1671 1672 /** 1673 @brief Return QR Factorization of a matrix 1674 1675 @param ceed A Ceed context for error handling 1676 @param[in,out] mat Row-major matrix to be factorized in place 1677 @param[in,out] tau Vector of length m of scaling factors 1678 @param m Number of rows 1679 @param n Number of columns 1680 1681 @return An error code: 0 - success, otherwise - failure 1682 1683 @ref Utility 1684 **/ 1685 int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau, 1686 CeedInt m, CeedInt n) { 1687 CeedScalar v[m]; 1688 1689 // Check m >= n 1690 if (n > m) 1691 // LCOV_EXCL_START 1692 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 1693 "Cannot compute QR factorization with n > m"); 1694 // LCOV_EXCL_STOP 1695 1696 for (CeedInt i=0; i<n; i++) { 1697 if (i >= m-1) { // last row of matrix, no reflection needed 1698 tau[i] = 0.; 1699 break; 1700 } 1701 // Calculate Householder vector, magnitude 1702 CeedScalar sigma = 0.0; 1703 v[i] = mat[i+n*i]; 1704 for (CeedInt j=i+1; j<m; j++) { 1705 v[j] = mat[i+n*j]; 1706 sigma += v[j] * v[j]; 1707 } 1708 CeedScalar norm = sqrt(v[i]*v[i] + sigma); // norm of v[i:m] 1709 CeedScalar R_ii = -copysign(norm, v[i]); 1710 v[i] -= R_ii; 1711 // norm of v[i:m] after modification above and scaling below 1712 // norm = sqrt(v[i]*v[i] + sigma) / v[i]; 1713 // tau = 2 / (norm*norm) 1714 tau[i] = 2 * v[i]*v[i] / (v[i]*v[i] + sigma); 1715 for (CeedInt j=i+1; j<m; j++) 1716 v[j] /= v[i]; 1717 1718 // Apply Householder reflector to lower right panel 1719 CeedHouseholderReflect(&mat[i*n+i+1], &v[i], tau[i], m-i, n-i-1, n, 1); 1720 // Save v 1721 mat[i+n*i] = R_ii; 1722 for (CeedInt j=i+1; j<m; j++) 1723 mat[i+n*j] = v[j]; 1724 } 1725 return CEED_ERROR_SUCCESS; 1726 } 1727 1728 /** 1729 @brief Return symmetric Schur decomposition of the symmetric matrix mat via 1730 symmetric QR factorization 1731 1732 @param ceed A Ceed context for error handling 1733 @param[in,out] mat Row-major matrix to be factorized in place 1734 @param[out] lambda Vector of length n of eigenvalues 1735 @param n Number of rows/columns 1736 1737 @return An error code: 0 - success, otherwise - failure 1738 1739 @ref Utility 1740 **/ 1741 CeedPragmaOptimizeOff 1742 int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat, 1743 CeedScalar *lambda, CeedInt n) { 1744 // Check bounds for clang-tidy 1745 if (n<2) 1746 // LCOV_EXCL_START 1747 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 1748 "Cannot compute symmetric Schur decomposition of scalars"); 1749 // LCOV_EXCL_STOP 1750 1751 CeedScalar v[n-1], tau[n-1], mat_T[n*n]; 1752 1753 // Copy mat to mat_T and set mat to I 1754 memcpy(mat_T, mat, n*n*sizeof(mat[0])); 1755 for (CeedInt i=0; i<n; i++) 1756 for (CeedInt j=0; j<n; j++) 1757 mat[j+n*i] = (i==j) ? 1 : 0; 1758 1759 // Reduce to tridiagonal 1760 for (CeedInt i=0; i<n-1; i++) { 1761 // Calculate Householder vector, magnitude 1762 CeedScalar sigma = 0.0; 1763 v[i] = mat_T[i+n*(i+1)]; 1764 for (CeedInt j=i+1; j<n-1; j++) { 1765 v[j] = mat_T[i+n*(j+1)]; 1766 sigma += v[j] * v[j]; 1767 } 1768 CeedScalar norm = sqrt(v[i]*v[i] + sigma); // norm of v[i:n-1] 1769 CeedScalar R_ii = -copysign(norm, v[i]); 1770 v[i] -= R_ii; 1771 // norm of v[i:m] after modification above and scaling below 1772 // norm = sqrt(v[i]*v[i] + sigma) / v[i]; 1773 // tau = 2 / (norm*norm) 1774 tau[i] = i == n - 2 ? 2 : 2 * v[i]*v[i] / (v[i]*v[i] + sigma); 1775 for (CeedInt j=i+1; j<n-1; j++) 1776 v[j] /= v[i]; 1777 1778 // Update sub and super diagonal 1779 for (CeedInt j=i+2; j<n; j++) { 1780 mat_T[i+n*j] = 0; mat_T[j+n*i] = 0; 1781 } 1782 // Apply symmetric Householder reflector to lower right panel 1783 CeedHouseholderReflect(&mat_T[(i+1)+n*(i+1)], &v[i], tau[i], 1784 n-(i+1), n-(i+1), n, 1); 1785 CeedHouseholderReflect(&mat_T[(i+1)+n*(i+1)], &v[i], tau[i], 1786 n-(i+1), n-(i+1), 1, n); 1787 1788 // Save v 1789 mat_T[i+n*(i+1)] = R_ii; 1790 mat_T[(i+1)+n*i] = R_ii; 1791 for (CeedInt j=i+1; j<n-1; j++) { 1792 mat_T[i+n*(j+1)] = v[j]; 1793 } 1794 } 1795 // Backwards accumulation of Q 1796 for (CeedInt i=n-2; i>=0; i--) { 1797 if (tau[i] > 0.0) { 1798 v[i] = 1; 1799 for (CeedInt j=i+1; j<n-1; j++) { 1800 v[j] = mat_T[i+n*(j+1)]; 1801 mat_T[i+n*(j+1)] = 0; 1802 } 1803 CeedHouseholderReflect(&mat[(i+1)+n*(i+1)], &v[i], tau[i], 1804 n-(i+1), n-(i+1), n, 1); 1805 } 1806 } 1807 1808 // Reduce sub and super diagonal 1809 CeedInt p = 0, q = 0, itr = 0, max_itr = n*n*n*n; 1810 CeedScalar tol = CEED_EPSILON; 1811 1812 while (itr < max_itr) { 1813 // Update p, q, size of reduced portions of diagonal 1814 p = 0; q = 0; 1815 for (CeedInt i=n-2; i>=0; i--) { 1816 if (fabs(mat_T[i+n*(i+1)]) < tol) 1817 q += 1; 1818 else 1819 break; 1820 } 1821 for (CeedInt i=0; i<n-q-1; i++) { 1822 if (fabs(mat_T[i+n*(i+1)]) < tol) 1823 p += 1; 1824 else 1825 break; 1826 } 1827 if (q == n-1) break; // Finished reducing 1828 1829 // Reduce tridiagonal portion 1830 CeedScalar t_nn = mat_T[(n-1-q)+n*(n-1-q)], 1831 t_nnm1 = mat_T[(n-2-q)+n*(n-1-q)]; 1832 CeedScalar d = (mat_T[(n-2-q)+n*(n-2-q)] - t_nn)/2; 1833 CeedScalar mu = t_nn - t_nnm1*t_nnm1 / 1834 (d + copysign(sqrt(d*d + t_nnm1*t_nnm1), d)); 1835 CeedScalar x = mat_T[p+n*p] - mu; 1836 CeedScalar z = mat_T[p+n*(p+1)]; 1837 for (CeedInt k=p; k<n-q-1; k++) { 1838 // Compute Givens rotation 1839 CeedScalar c = 1, s = 0; 1840 if (fabs(z) > tol) { 1841 if (fabs(z) > fabs(x)) { 1842 CeedScalar tau = -x/z; 1843 s = 1/sqrt(1+tau*tau), c = s*tau; 1844 } else { 1845 CeedScalar tau = -z/x; 1846 c = 1/sqrt(1+tau*tau), s = c*tau; 1847 } 1848 } 1849 1850 // Apply Givens rotation to T 1851 CeedGivensRotation(mat_T, c, s, CEED_NOTRANSPOSE, k, k+1, n, n); 1852 CeedGivensRotation(mat_T, c, s, CEED_TRANSPOSE, k, k+1, n, n); 1853 1854 // Apply Givens rotation to Q 1855 CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k+1, n, n); 1856 1857 // Update x, z 1858 if (k < n-q-2) { 1859 x = mat_T[k+n*(k+1)]; 1860 z = mat_T[k+n*(k+2)]; 1861 } 1862 } 1863 itr++; 1864 } 1865 1866 // Save eigenvalues 1867 for (CeedInt i=0; i<n; i++) 1868 lambda[i] = mat_T[i+n*i]; 1869 1870 // Check convergence 1871 if (itr == max_itr && q < n-1) 1872 // LCOV_EXCL_START 1873 return CeedError(ceed, CEED_ERROR_MINOR, 1874 "Symmetric QR failed to converge"); 1875 // LCOV_EXCL_STOP 1876 return CEED_ERROR_SUCCESS; 1877 } 1878 CeedPragmaOptimizeOn 1879 1880 /** 1881 @brief Return Simultaneous Diagonalization of two matrices. This solves the 1882 generalized eigenvalue problem A x = lambda B x, where A and B 1883 are symmetric and B is positive definite. We generate the matrix X 1884 and vector Lambda such that X^T A X = Lambda and X^T B X = I. This 1885 is equivalent to the LAPACK routine 'sygv' with TYPE = 1. 1886 1887 @param ceed A Ceed context for error handling 1888 @param[in] mat_A Row-major matrix to be factorized with eigenvalues 1889 @param[in] mat_B Row-major matrix to be factorized to identity 1890 @param[out] mat_X Row-major orthogonal matrix 1891 @param[out] lambda Vector of length n of generalized eigenvalues 1892 @param n Number of rows/columns 1893 1894 @return An error code: 0 - success, otherwise - failure 1895 1896 @ref Utility 1897 **/ 1898 CeedPragmaOptimizeOff 1899 int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A, 1900 CeedScalar *mat_B, CeedScalar *mat_X, 1901 CeedScalar *lambda, CeedInt n) { 1902 int ierr; 1903 CeedScalar *mat_C, *mat_G, *vec_D; 1904 ierr = CeedCalloc(n*n, &mat_C); CeedChk(ierr); 1905 ierr = CeedCalloc(n*n, &mat_G); CeedChk(ierr); 1906 ierr = CeedCalloc(n, &vec_D); CeedChk(ierr); 1907 1908 // Compute B = G D G^T 1909 memcpy(mat_G, mat_B, n*n*sizeof(mat_B[0])); 1910 ierr = CeedSymmetricSchurDecomposition(ceed, mat_G, vec_D, n); CeedChk(ierr); 1911 1912 // Sort eigenvalues 1913 for (CeedInt i=n-1; i>=0; i--) 1914 for (CeedInt j=0; j<i; j++) { 1915 if (fabs(vec_D[j]) > fabs(vec_D[j+1])) { 1916 CeedScalar temp; 1917 temp = vec_D[j]; vec_D[j] = vec_D[j+1]; vec_D[j+1] = temp; 1918 for (CeedInt k=0; k<n; k++) { 1919 temp = mat_G[k*n+j]; mat_G[k*n+j] = mat_G[k*n+j+1]; mat_G[k*n+j+1] = temp; 1920 } 1921 } 1922 } 1923 1924 // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T 1925 // = D^-1/2 G^T A G D^-1/2 1926 // -- D = D^-1/2 1927 for (CeedInt i=0; i<n; i++) 1928 vec_D[i] = 1./sqrt(vec_D[i]); 1929 // -- G = G D^-1/2 1930 // -- C = D^-1/2 G^T 1931 for (CeedInt i=0; i<n; i++) 1932 for (CeedInt j=0; j<n; j++) { 1933 mat_G[i*n+j] *= vec_D[j]; 1934 mat_C[j*n+i] = mat_G[i*n+j]; 1935 } 1936 // -- X = (D^-1/2 G^T) A 1937 ierr = CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_C, 1938 (const CeedScalar *)mat_A, mat_X, n, n, n); 1939 CeedChk(ierr); 1940 // -- C = (D^-1/2 G^T A) (G D^-1/2) 1941 ierr = CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_X, 1942 (const CeedScalar *)mat_G, mat_C, n, n, n); 1943 CeedChk(ierr); 1944 1945 // Compute Q^T C Q = lambda 1946 ierr = CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n); CeedChk(ierr); 1947 1948 // Sort eigenvalues 1949 for (CeedInt i=n-1; i>=0; i--) 1950 for (CeedInt j=0; j<i; j++) { 1951 if (fabs(lambda[j]) > fabs(lambda[j+1])) { 1952 CeedScalar temp; 1953 temp = lambda[j]; lambda[j] = lambda[j+1]; lambda[j+1] = temp; 1954 for (CeedInt k=0; k<n; k++) { 1955 temp = mat_C[k*n+j]; mat_C[k*n+j] = mat_C[k*n+j+1]; mat_C[k*n+j+1] = temp; 1956 } 1957 } 1958 } 1959 1960 // Set X = (G D^1/2)^-T Q 1961 // = G D^-1/2 Q 1962 ierr = CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_G, 1963 (const CeedScalar *)mat_C, mat_X, n, n, n); 1964 CeedChk(ierr); 1965 1966 // Cleanup 1967 ierr = CeedFree(&mat_C); CeedChk(ierr); 1968 ierr = CeedFree(&mat_G); CeedChk(ierr); 1969 ierr = CeedFree(&vec_D); CeedChk(ierr); 1970 return CEED_ERROR_SUCCESS; 1971 } 1972 CeedPragmaOptimizeOn 1973 1974 /// @} 1975