1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed.h> 18 #include <ceed-backend.h> 19 #include <ceed-impl.h> 20 #include <math.h> 21 #include <stdbool.h> 22 #include <stdio.h> 23 #include <string.h> 24 25 /// @file 26 /// Implementation of CeedBasis interfaces 27 28 /// @cond DOXYGEN_SKIP 29 static struct CeedBasis_private ceed_basis_collocated; 30 /// @endcond 31 32 /// @addtogroup CeedBasisUser 33 /// @{ 34 35 /// Indicate that the quadrature points are collocated with the nodes 36 const CeedBasis CEED_BASIS_COLLOCATED = &ceed_basis_collocated; 37 38 /// @} 39 40 /// ---------------------------------------------------------------------------- 41 /// CeedBasis Library Internal Functions 42 /// ---------------------------------------------------------------------------- 43 /// @addtogroup CeedBasisDeveloper 44 /// @{ 45 46 /** 47 @brief Compute Householder reflection 48 49 Computes A = (I - b v v^T) A 50 where A is an mxn matrix indexed as A[i*row + j*col] 51 52 @param[in,out] A Matrix to apply Householder reflection to, in place 53 @param v Householder vector 54 @param b Scaling factor 55 @param m Number of rows in A 56 @param n Number of columns in A 57 @param row Row stride 58 @param col Col stride 59 60 @return An error code: 0 - success, otherwise - failure 61 62 @ref Developer 63 **/ 64 static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v, 65 CeedScalar b, CeedInt m, CeedInt n, 66 CeedInt row, CeedInt col) { 67 for (CeedInt j=0; j<n; j++) { 68 CeedScalar w = A[0*row + j*col]; 69 for (CeedInt i=1; i<m; i++) 70 w += v[i] * A[i*row + j*col]; 71 A[0*row + j*col] -= b * w; 72 for (CeedInt i=1; i<m; i++) 73 A[i*row + j*col] -= b * w * v[i]; 74 } 75 return CEED_ERROR_SUCCESS; 76 } 77 78 /** 79 @brief Apply Householder Q matrix 80 81 Compute A = Q A where Q is mxm and A is mxn. 82 83 @param[in,out] A Matrix to apply Householder Q to, in place 84 @param Q Householder Q matrix 85 @param tau Householder scaling factors 86 @param tmode Transpose mode for application 87 @param m Number of rows in A 88 @param n Number of columns in A 89 @param k Number of elementary reflectors in Q, k<m 90 @param row Row stride in A 91 @param col Col stride in A 92 93 @return An error code: 0 - success, otherwise - failure 94 95 @ref Developer 96 **/ 97 int CeedHouseholderApplyQ(CeedScalar *A, const CeedScalar *Q, 98 const CeedScalar *tau, CeedTransposeMode tmode, 99 CeedInt m, CeedInt n, CeedInt k, 100 CeedInt row, CeedInt col) { 101 int ierr; 102 CeedScalar v[m]; 103 for (CeedInt ii=0; ii<k; ii++) { 104 CeedInt i = tmode == CEED_TRANSPOSE ? ii : k-1-ii; 105 for (CeedInt j=i+1; j<m; j++) 106 v[j] = Q[j*k+i]; 107 // Apply Householder reflector (I - tau v v^T) collograd1d^T 108 ierr = CeedHouseholderReflect(&A[i*row], &v[i], tau[i], m-i, n, row, col); 109 CeedChk(ierr); 110 } 111 return CEED_ERROR_SUCCESS; 112 } 113 114 /** 115 @brief Compute Givens rotation 116 117 Computes A = G A (or G^T A in transpose mode) 118 where A is an mxn matrix indexed as A[i*n + j*m] 119 120 @param[in,out] A Row major matrix to apply Givens rotation to, in place 121 @param c Cosine factor 122 @param s Sine factor 123 @param tmode @ref CEED_NOTRANSPOSE to rotate the basis counter-clockwise, 124 which has the effect of rotating columns of A clockwise; 125 @ref CEED_TRANSPOSE for the opposite rotation 126 @param i First row/column to apply rotation 127 @param k Second row/column to apply rotation 128 @param m Number of rows in A 129 @param n Number of columns in A 130 131 @return An error code: 0 - success, otherwise - failure 132 133 @ref Developer 134 **/ 135 static int CeedGivensRotation(CeedScalar *A, CeedScalar c, CeedScalar s, 136 CeedTransposeMode tmode, CeedInt i, CeedInt k, 137 CeedInt m, CeedInt n) { 138 CeedInt stridej = 1, strideik = m, numits = n; 139 if (tmode == CEED_NOTRANSPOSE) { 140 stridej = n; strideik = 1; numits = m; 141 } 142 143 // Apply rotation 144 for (CeedInt j=0; j<numits; j++) { 145 CeedScalar tau1 = A[i*strideik+j*stridej], tau2 = A[k*strideik+j*stridej]; 146 A[i*strideik+j*stridej] = c*tau1 - s*tau2; 147 A[k*strideik+j*stridej] = s*tau1 + c*tau2; 148 } 149 return CEED_ERROR_SUCCESS; 150 } 151 152 /** 153 @brief View an array stored in a CeedBasis 154 155 @param[in] name Name of array 156 @param[in] fpformat Printing format 157 @param[in] m Number of rows in array 158 @param[in] n Number of columns in array 159 @param[in] a Array to be viewed 160 @param[in] stream Stream to view to, e.g., stdout 161 162 @return An error code: 0 - success, otherwise - failure 163 164 @ref Developer 165 **/ 166 static int CeedScalarView(const char *name, const char *fpformat, CeedInt m, 167 CeedInt n, const CeedScalar *a, FILE *stream) { 168 for (int i=0; i<m; i++) { 169 if (m > 1) 170 fprintf(stream, "%12s[%d]:", name, i); 171 else 172 fprintf(stream, "%12s:", name); 173 for (int j=0; j<n; j++) 174 fprintf(stream, fpformat, fabs(a[i*n+j]) > 1E-14 ? a[i*n+j] : 0); 175 fputs("\n", stream); 176 } 177 return CEED_ERROR_SUCCESS; 178 } 179 180 /// @} 181 182 /// ---------------------------------------------------------------------------- 183 /// Ceed Backend API 184 /// ---------------------------------------------------------------------------- 185 /// @addtogroup CeedBasisBackend 186 /// @{ 187 188 /** 189 @brief Return collocated grad matrix 190 191 @param basis CeedBasis 192 @param[out] collograd1d Row-major (Q1d * Q1d) matrix expressing derivatives of 193 basis functions at quadrature points 194 195 @return An error code: 0 - success, otherwise - failure 196 197 @ref Backend 198 **/ 199 int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collograd1d) { 200 int i, j, k; 201 Ceed ceed; 202 CeedInt ierr, P1d=(basis)->P1d, Q1d=(basis)->Q1d; 203 CeedScalar *interp1d, *grad1d, tau[Q1d]; 204 205 ierr = CeedMalloc(Q1d*P1d, &interp1d); CeedChk(ierr); 206 ierr = CeedMalloc(Q1d*P1d, &grad1d); CeedChk(ierr); 207 memcpy(interp1d, (basis)->interp1d, Q1d*P1d*sizeof(basis)->interp1d[0]); 208 memcpy(grad1d, (basis)->grad1d, Q1d*P1d*sizeof(basis)->interp1d[0]); 209 210 // QR Factorization, interp1d = Q R 211 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 212 ierr = CeedQRFactorization(ceed, interp1d, tau, Q1d, P1d); CeedChk(ierr); 213 // Note: This function is for backend use, so all errors are terminal 214 // and we do not need to clean up memory on failure. 215 216 // Apply Rinv, collograd1d = grad1d Rinv 217 for (i=0; i<Q1d; i++) { // Row i 218 collograd1d[Q1d*i] = grad1d[P1d*i]/interp1d[0]; 219 for (j=1; j<P1d; j++) { // Column j 220 collograd1d[j+Q1d*i] = grad1d[j+P1d*i]; 221 for (k=0; k<j; k++) 222 collograd1d[j+Q1d*i] -= interp1d[j+P1d*k]*collograd1d[k+Q1d*i]; 223 collograd1d[j+Q1d*i] /= interp1d[j+P1d*j]; 224 } 225 for (j=P1d; j<Q1d; j++) 226 collograd1d[j+Q1d*i] = 0; 227 } 228 229 // Apply Qtranspose, collograd = collograd Qtranspose 230 ierr = CeedHouseholderApplyQ(collograd1d, interp1d, tau, CEED_NOTRANSPOSE, 231 Q1d, Q1d, P1d, 1, Q1d); CeedChk(ierr); 232 233 ierr = CeedFree(&interp1d); CeedChk(ierr); 234 ierr = CeedFree(&grad1d); CeedChk(ierr); 235 return CEED_ERROR_SUCCESS; 236 } 237 238 /** 239 @brief Get Ceed associated with a CeedBasis 240 241 @param basis CeedBasis 242 @param[out] ceed Variable to store Ceed 243 244 @return An error code: 0 - success, otherwise - failure 245 246 @ref Backend 247 **/ 248 int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed) { 249 *ceed = basis->ceed; 250 return CEED_ERROR_SUCCESS; 251 } 252 253 /** 254 @brief Get tensor status for given CeedBasis 255 256 @param basis CeedBasis 257 @param[out] istensor Variable to store tensor status 258 259 @return An error code: 0 - success, otherwise - failure 260 261 @ref Backend 262 **/ 263 int CeedBasisIsTensor(CeedBasis basis, bool *istensor) { 264 *istensor = basis->tensorbasis; 265 return CEED_ERROR_SUCCESS; 266 } 267 268 /** 269 @brief Get backend data of a CeedBasis 270 271 @param basis CeedBasis 272 @param[out] data Variable to store data 273 274 @return An error code: 0 - success, otherwise - failure 275 276 @ref Backend 277 **/ 278 int CeedBasisGetData(CeedBasis basis, void *data) { 279 *(void **)data = basis->data; 280 return CEED_ERROR_SUCCESS; 281 } 282 283 /** 284 @brief Set backend data of a CeedBasis 285 286 @param[out] basis CeedBasis 287 @param data Data to set 288 289 @return An error code: 0 - success, otherwise - failure 290 291 @ref Backend 292 **/ 293 int CeedBasisSetData(CeedBasis basis, void *data) { 294 basis->data = data; 295 return CEED_ERROR_SUCCESS; 296 } 297 298 /** 299 @brief Get dimension for given CeedElemTopology 300 301 @param topo CeedElemTopology 302 @param[out] dim Variable to store dimension of topology 303 304 @return An error code: 0 - success, otherwise - failure 305 306 @ref Backend 307 **/ 308 int CeedBasisGetTopologyDimension(CeedElemTopology topo, CeedInt *dim) { 309 *dim = (CeedInt) topo >> 16; 310 return CEED_ERROR_SUCCESS; 311 } 312 313 /** 314 @brief Get CeedTensorContract of a CeedBasis 315 316 @param basis CeedBasis 317 @param[out] contract Variable to store CeedTensorContract 318 319 @return An error code: 0 - success, otherwise - failure 320 321 @ref Backend 322 **/ 323 int CeedBasisGetTensorContract(CeedBasis basis, CeedTensorContract *contract) { 324 *contract = basis->contract; 325 return CEED_ERROR_SUCCESS; 326 } 327 328 /** 329 @brief Set CeedTensorContract of a CeedBasis 330 331 @param[out] basis CeedBasis 332 @param contract CeedTensorContract to set 333 334 @return An error code: 0 - success, otherwise - failure 335 336 @ref Backend 337 **/ 338 int CeedBasisSetTensorContract(CeedBasis basis, CeedTensorContract *contract) { 339 basis->contract = *contract; 340 return CEED_ERROR_SUCCESS; 341 } 342 343 /** 344 @brief Return a reference implementation of matrix multiplication C = A B. 345 Note, this is a reference implementation for CPU CeedScalar pointers 346 that is not intended for high performance. 347 348 @param ceed A Ceed context for error handling 349 @param[in] matA Row-major matrix A 350 @param[in] matB Row-major matrix B 351 @param[out] matC Row-major output matrix C 352 @param m Number of rows of C 353 @param n Number of columns of C 354 @param kk Number of columns of A/rows of B 355 356 @return An error code: 0 - success, otherwise - failure 357 358 @ref Utility 359 **/ 360 int CeedMatrixMultiply(Ceed ceed, const CeedScalar *matA, 361 const CeedScalar *matB, CeedScalar *matC, CeedInt m, 362 CeedInt n, CeedInt kk) { 363 for (CeedInt i=0; i<m; i++) 364 for (CeedInt j=0; j<n; j++) { 365 CeedScalar sum = 0; 366 for (CeedInt k=0; k<kk; k++) 367 sum += matA[k+i*kk]*matB[j+k*n]; 368 matC[j+i*n] = sum; 369 } 370 return CEED_ERROR_SUCCESS; 371 } 372 373 /// @} 374 375 /// ---------------------------------------------------------------------------- 376 /// CeedBasis Public API 377 /// ---------------------------------------------------------------------------- 378 /// @addtogroup CeedBasisUser 379 /// @{ 380 381 /** 382 @brief Create a tensor-product basis for H^1 discretizations 383 384 @param ceed A Ceed object where the CeedBasis will be created 385 @param dim Topological dimension 386 @param ncomp Number of field components (1 for scalar fields) 387 @param P1d Number of nodes in one dimension 388 @param Q1d Number of quadrature points in one dimension 389 @param interp1d Row-major (Q1d * P1d) matrix expressing the values of nodal 390 basis functions at quadrature points 391 @param grad1d Row-major (Q1d * P1d) matrix expressing derivatives of nodal 392 basis functions at quadrature points 393 @param qref1d Array of length Q1d holding the locations of quadrature points 394 on the 1D reference element [-1, 1] 395 @param qweight1d Array of length Q1d holding the quadrature weights on the 396 reference element 397 @param[out] basis Address of the variable where the newly created 398 CeedBasis will be stored. 399 400 @return An error code: 0 - success, otherwise - failure 401 402 @ref User 403 **/ 404 int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt ncomp, CeedInt P1d, 405 CeedInt Q1d, const CeedScalar *interp1d, 406 const CeedScalar *grad1d, const CeedScalar *qref1d, 407 const CeedScalar *qweight1d, CeedBasis *basis) { 408 int ierr; 409 410 if (!ceed->BasisCreateTensorH1) { 411 Ceed delegate; 412 ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr); 413 414 if (!delegate) 415 // LCOV_EXCL_START 416 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 417 "Backend does not support BasisCreateTensorH1"); 418 // LCOV_EXCL_STOP 419 420 ierr = CeedBasisCreateTensorH1(delegate, dim, ncomp, P1d, 421 Q1d, interp1d, grad1d, qref1d, 422 qweight1d, basis); CeedChk(ierr); 423 return CEED_ERROR_SUCCESS; 424 } 425 426 if (dim<1) 427 // LCOV_EXCL_START 428 return CeedError(ceed, CEED_ERROR_DIMENSION, 429 "Basis dimension must be a positive value"); 430 // LCOV_EXCL_STOP 431 CeedElemTopology topo = dim == 1 ? CEED_LINE : 432 dim == 2 ? CEED_QUAD : 433 CEED_HEX; 434 435 ierr = CeedCalloc(1, basis); CeedChk(ierr); 436 (*basis)->ceed = ceed; 437 ceed->refcount++; 438 (*basis)->refcount = 1; 439 (*basis)->tensorbasis = 1; 440 (*basis)->dim = dim; 441 (*basis)->topo = topo; 442 (*basis)->ncomp = ncomp; 443 (*basis)->P1d = P1d; 444 (*basis)->Q1d = Q1d; 445 (*basis)->P = CeedIntPow(P1d, dim); 446 (*basis)->Q = CeedIntPow(Q1d, dim); 447 ierr = CeedMalloc(Q1d,&(*basis)->qref1d); CeedChk(ierr); 448 ierr = CeedMalloc(Q1d,&(*basis)->qweight1d); CeedChk(ierr); 449 memcpy((*basis)->qref1d, qref1d, Q1d*sizeof(qref1d[0])); 450 memcpy((*basis)->qweight1d, qweight1d, Q1d*sizeof(qweight1d[0])); 451 ierr = CeedMalloc(Q1d*P1d,&(*basis)->interp1d); CeedChk(ierr); 452 ierr = CeedMalloc(Q1d*P1d,&(*basis)->grad1d); CeedChk(ierr); 453 memcpy((*basis)->interp1d, interp1d, Q1d*P1d*sizeof(interp1d[0])); 454 memcpy((*basis)->grad1d, grad1d, Q1d*P1d*sizeof(grad1d[0])); 455 ierr = ceed->BasisCreateTensorH1(dim, P1d, Q1d, interp1d, grad1d, qref1d, 456 qweight1d, *basis); CeedChk(ierr); 457 return CEED_ERROR_SUCCESS; 458 } 459 460 /** 461 @brief Create a tensor-product Lagrange basis 462 463 @param ceed A Ceed object where the CeedBasis will be created 464 @param dim Topological dimension of element 465 @param ncomp Number of field components (1 for scalar fields) 466 @param P Number of Gauss-Lobatto nodes in one dimension. The 467 polynomial degree of the resulting Q_k element is k=P-1. 468 @param Q Number of quadrature points in one dimension. 469 @param qmode Distribution of the Q quadrature points (affects order of 470 accuracy for the quadrature) 471 @param[out] basis Address of the variable where the newly created 472 CeedBasis will be stored. 473 474 @return An error code: 0 - success, otherwise - failure 475 476 @ref User 477 **/ 478 int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt ncomp, 479 CeedInt P, CeedInt Q, CeedQuadMode qmode, 480 CeedBasis *basis) { 481 // Allocate 482 int ierr, ierr2, i, j, k; 483 CeedScalar c1, c2, c3, c4, dx, *nodes, *interp1d, *grad1d, *qref1d, *qweight1d; 484 485 if (dim<1) 486 // LCOV_EXCL_START 487 return CeedError(ceed, CEED_ERROR_DIMENSION, 488 "Basis dimension must be a positive value"); 489 // LCOV_EXCL_STOP 490 491 // Get Nodes and Weights 492 ierr = CeedCalloc(P*Q, &interp1d); CeedChk(ierr); 493 ierr = CeedCalloc(P*Q, &grad1d); CeedChk(ierr); 494 ierr = CeedCalloc(P, &nodes); CeedChk(ierr); 495 ierr = CeedCalloc(Q, &qref1d); CeedChk(ierr); 496 ierr = CeedCalloc(Q, &qweight1d); CeedChk(ierr); 497 ierr = CeedLobattoQuadrature(P, nodes, NULL); 498 if (ierr) { goto cleanup; } CeedChk(ierr); 499 switch (qmode) { 500 case CEED_GAUSS: 501 ierr = CeedGaussQuadrature(Q, qref1d, qweight1d); 502 break; 503 case CEED_GAUSS_LOBATTO: 504 ierr = CeedLobattoQuadrature(Q, qref1d, qweight1d); 505 break; 506 } 507 if (ierr) { goto cleanup; } CeedChk(ierr); 508 509 // Build B, D matrix 510 // Fornberg, 1998 511 for (i = 0; i < Q; i++) { 512 c1 = 1.0; 513 c3 = nodes[0] - qref1d[i]; 514 interp1d[i*P+0] = 1.0; 515 for (j = 1; j < P; j++) { 516 c2 = 1.0; 517 c4 = c3; 518 c3 = nodes[j] - qref1d[i]; 519 for (k = 0; k < j; k++) { 520 dx = nodes[j] - nodes[k]; 521 c2 *= dx; 522 if (k == j - 1) { 523 grad1d[i*P + j] = c1*(interp1d[i*P + k] - c4*grad1d[i*P + k]) / c2; 524 interp1d[i*P + j] = - c1*c4*interp1d[i*P + k] / c2; 525 } 526 grad1d[i*P + k] = (c3*grad1d[i*P + k] - interp1d[i*P + k]) / dx; 527 interp1d[i*P + k] = c3*interp1d[i*P + k] / dx; 528 } 529 c1 = c2; 530 } 531 } 532 // // Pass to CeedBasisCreateTensorH1 533 ierr = CeedBasisCreateTensorH1(ceed, dim, ncomp, P, Q, interp1d, grad1d, qref1d, 534 qweight1d, basis); CeedChk(ierr); 535 cleanup: 536 ierr2 = CeedFree(&interp1d); CeedChk(ierr2); 537 ierr2 = CeedFree(&grad1d); CeedChk(ierr2); 538 ierr2 = CeedFree(&nodes); CeedChk(ierr2); 539 ierr2 = CeedFree(&qref1d); CeedChk(ierr2); 540 ierr2 = CeedFree(&qweight1d); CeedChk(ierr2); 541 CeedChk(ierr); 542 return CEED_ERROR_SUCCESS; 543 } 544 545 /** 546 @brief Create a non tensor-product basis for H^1 discretizations 547 548 @param ceed A Ceed object where the CeedBasis will be created 549 @param topo Topology of element, e.g. hypercube, simplex, ect 550 @param ncomp Number of field components (1 for scalar fields) 551 @param nnodes Total number of nodes 552 @param nqpts Total number of quadrature points 553 @param interp Row-major (nqpts * nnodes) matrix expressing the values of 554 nodal basis functions at quadrature points 555 @param grad Row-major (nqpts * dim * nnodes) matrix expressing 556 derivatives of nodal basis functions at quadrature points 557 @param qref Array of length nqpts holding the locations of quadrature 558 points on the reference element [-1, 1] 559 @param qweight Array of length nqpts holding the quadrature weights on the 560 reference element 561 @param[out] basis Address of the variable where the newly created 562 CeedBasis will be stored. 563 564 @return An error code: 0 - success, otherwise - failure 565 566 @ref User 567 **/ 568 int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt ncomp, 569 CeedInt nnodes, CeedInt nqpts, const CeedScalar *interp, 570 const CeedScalar *grad, const CeedScalar *qref, 571 const CeedScalar *qweight, CeedBasis *basis) { 572 int ierr; 573 CeedInt P = nnodes, Q = nqpts, dim = 0; 574 575 if (!ceed->BasisCreateH1) { 576 Ceed delegate; 577 ierr = CeedGetObjectDelegate(ceed, &delegate, "Basis"); CeedChk(ierr); 578 579 if (!delegate) 580 // LCOV_EXCL_START 581 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 582 "Backend does not support BasisCreateH1"); 583 // LCOV_EXCL_STOP 584 585 ierr = CeedBasisCreateH1(delegate, topo, ncomp, nnodes, 586 nqpts, interp, grad, qref, 587 qweight, basis); CeedChk(ierr); 588 return CEED_ERROR_SUCCESS; 589 } 590 591 ierr = CeedCalloc(1,basis); CeedChk(ierr); 592 593 ierr = CeedBasisGetTopologyDimension(topo, &dim); CeedChk(ierr); 594 595 (*basis)->ceed = ceed; 596 ceed->refcount++; 597 (*basis)->refcount = 1; 598 (*basis)->tensorbasis = 0; 599 (*basis)->dim = dim; 600 (*basis)->topo = topo; 601 (*basis)->ncomp = ncomp; 602 (*basis)->P = P; 603 (*basis)->Q = Q; 604 ierr = CeedMalloc(Q*dim,&(*basis)->qref1d); CeedChk(ierr); 605 ierr = CeedMalloc(Q,&(*basis)->qweight1d); CeedChk(ierr); 606 memcpy((*basis)->qref1d, qref, Q*dim*sizeof(qref[0])); 607 memcpy((*basis)->qweight1d, qweight, Q*sizeof(qweight[0])); 608 ierr = CeedMalloc(Q*P, &(*basis)->interp); CeedChk(ierr); 609 ierr = CeedMalloc(dim*Q*P, &(*basis)->grad); CeedChk(ierr); 610 memcpy((*basis)->interp, interp, Q*P*sizeof(interp[0])); 611 memcpy((*basis)->grad, grad, dim*Q*P*sizeof(grad[0])); 612 ierr = ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, qref, 613 qweight, *basis); CeedChk(ierr); 614 return CEED_ERROR_SUCCESS; 615 } 616 617 /** 618 @brief View a CeedBasis 619 620 @param basis CeedBasis to view 621 @param stream Stream to view to, e.g., stdout 622 623 @return An error code: 0 - success, otherwise - failure 624 625 @ref User 626 **/ 627 int CeedBasisView(CeedBasis basis, FILE *stream) { 628 int ierr; 629 630 if (basis->tensorbasis) { 631 fprintf(stream, "CeedBasis: dim=%d P=%d Q=%d\n", basis->dim, basis->P1d, 632 basis->Q1d); 633 ierr = CeedScalarView("qref1d", "\t% 12.8f", 1, basis->Q1d, basis->qref1d, 634 stream); CeedChk(ierr); 635 ierr = CeedScalarView("qweight1d", "\t% 12.8f", 1, basis->Q1d, 636 basis->qweight1d, stream); CeedChk(ierr); 637 ierr = CeedScalarView("interp1d", "\t% 12.8f", basis->Q1d, basis->P1d, 638 basis->interp1d, stream); CeedChk(ierr); 639 ierr = CeedScalarView("grad1d", "\t% 12.8f", basis->Q1d, basis->P1d, 640 basis->grad1d, stream); CeedChk(ierr); 641 } else { 642 fprintf(stream, "CeedBasis: dim=%d P=%d Q=%d\n", basis->dim, basis->P, 643 basis->Q); 644 ierr = CeedScalarView("qref", "\t% 12.8f", 1, basis->Q*basis->dim, 645 basis->qref1d, 646 stream); CeedChk(ierr); 647 ierr = CeedScalarView("qweight", "\t% 12.8f", 1, basis->Q, basis->qweight1d, 648 stream); CeedChk(ierr); 649 ierr = CeedScalarView("interp", "\t% 12.8f", basis->Q, basis->P, 650 basis->interp, stream); CeedChk(ierr); 651 ierr = CeedScalarView("grad", "\t% 12.8f", basis->dim*basis->Q, basis->P, 652 basis->grad, stream); CeedChk(ierr); 653 } 654 return CEED_ERROR_SUCCESS; 655 } 656 657 /** 658 @brief Apply basis evaluation from nodes to quadrature points or vice versa 659 660 @param basis CeedBasis to evaluate 661 @param nelem The number of elements to apply the basis evaluation to; 662 the backend will specify the ordering in 663 CeedElemRestrictionCreateBlocked() 664 @param tmode \ref CEED_NOTRANSPOSE to evaluate from nodes to quadrature 665 points, \ref CEED_TRANSPOSE to apply the transpose, mapping 666 from quadrature points to nodes 667 @param emode \ref CEED_EVAL_NONE to use values directly, 668 \ref CEED_EVAL_INTERP to use interpolated values, 669 \ref CEED_EVAL_GRAD to use gradients, 670 \ref CEED_EVAL_WEIGHT to use quadrature weights. 671 @param[in] u Input CeedVector 672 @param[out] v Output CeedVector 673 674 @return An error code: 0 - success, otherwise - failure 675 676 @ref User 677 **/ 678 int CeedBasisApply(CeedBasis basis, CeedInt nelem, CeedTransposeMode tmode, 679 CeedEvalMode emode, CeedVector u, CeedVector v) { 680 int ierr; 681 CeedInt ulength = 0, vlength, dim, ncomp, nnodes, nqpts; 682 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 683 ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr); 684 ierr = CeedBasisGetNumNodes(basis, &nnodes); CeedChk(ierr); 685 ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpts); CeedChk(ierr); 686 ierr = CeedVectorGetLength(v, &vlength); CeedChk(ierr); 687 if (u) { 688 ierr = CeedVectorGetLength(u, &ulength); CeedChk(ierr); 689 } 690 691 if (!basis->Apply) 692 // LCOV_EXCL_START 693 return CeedError(basis->ceed, CEED_ERROR_UNSUPPORTED, 694 "Backend does not support BasisApply"); 695 // LCOV_EXCL_STOP 696 697 // Check compatibility of topological and geometrical dimensions 698 if ((tmode == CEED_TRANSPOSE && (vlength%nnodes != 0 || ulength%nqpts != 0)) || 699 (tmode == CEED_NOTRANSPOSE && (ulength%nnodes != 0 || vlength%nqpts != 0))) 700 return CeedError(basis->ceed, CEED_ERROR_DIMENSION, 701 "Length of input/output vectors " 702 "incompatible with basis dimensions"); 703 704 // Check vector lengths to prevent out of bounds issues 705 bool baddims = false; 706 switch (emode) { 707 case CEED_EVAL_NONE: 708 case CEED_EVAL_INTERP: baddims = 709 ((tmode == CEED_TRANSPOSE && (ulength < nelem*ncomp*nqpts 710 || vlength < nelem*ncomp*nnodes)) || 711 (tmode == CEED_NOTRANSPOSE && (vlength < nelem*nqpts*ncomp 712 || ulength < nelem*ncomp*nnodes))); 713 break; 714 case CEED_EVAL_GRAD: baddims = 715 ((tmode == CEED_TRANSPOSE && (ulength < nelem*ncomp*nqpts*dim 716 || vlength < nelem*ncomp*nnodes)) || 717 (tmode == CEED_NOTRANSPOSE && (vlength < nelem*nqpts*ncomp*dim 718 || ulength < nelem*ncomp*nnodes))); 719 break; 720 case CEED_EVAL_WEIGHT: 721 baddims = vlength < nelem*nqpts; 722 break; 723 // LCOV_EXCL_START 724 case CEED_EVAL_DIV: baddims = 725 ((tmode == CEED_TRANSPOSE && (ulength < nelem*ncomp*nqpts 726 || vlength < nelem*ncomp*nnodes)) || 727 (tmode == CEED_NOTRANSPOSE && (vlength < nelem*nqpts*ncomp 728 || ulength < nelem*ncomp*nnodes))); 729 break; 730 case CEED_EVAL_CURL: baddims = 731 ((tmode == CEED_TRANSPOSE && (ulength < nelem*ncomp*nqpts 732 || vlength < nelem*ncomp*nnodes)) || 733 (tmode == CEED_NOTRANSPOSE && (vlength < nelem*nqpts*ncomp 734 || ulength < nelem*ncomp*nnodes))); 735 break; 736 // LCOV_EXCL_STOP 737 } 738 if (baddims) 739 // LCOV_EXCL_START 740 return CeedError(basis->ceed, CEED_ERROR_DIMENSION, 741 "Input/output vectors too short for basis and evalualtion mode"); 742 // LCOV_EXCL_STOP 743 744 ierr = basis->Apply(basis, nelem, tmode, emode, u, v); CeedChk(ierr); 745 return CEED_ERROR_SUCCESS; 746 } 747 748 /** 749 @brief Get dimension for given CeedBasis 750 751 @param basis CeedBasis 752 @param[out] dim Variable to store dimension of basis 753 754 @return An error code: 0 - success, otherwise - failure 755 756 @ref Backend 757 **/ 758 int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim) { 759 *dim = basis->dim; 760 return CEED_ERROR_SUCCESS; 761 } 762 763 /** 764 @brief Get topology for given CeedBasis 765 766 @param basis CeedBasis 767 @param[out] topo Variable to store topology of basis 768 769 @return An error code: 0 - success, otherwise - failure 770 771 @ref Backend 772 **/ 773 int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo) { 774 *topo = basis->topo; 775 return CEED_ERROR_SUCCESS; 776 } 777 778 /** 779 @brief Get number of components for given CeedBasis 780 781 @param basis CeedBasis 782 @param[out] numcomp Variable to store number of components of basis 783 784 @return An error code: 0 - success, otherwise - failure 785 786 @ref Backend 787 **/ 788 int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *numcomp) { 789 *numcomp = basis->ncomp; 790 return CEED_ERROR_SUCCESS; 791 } 792 793 /** 794 @brief Get total number of nodes (in dim dimensions) of a CeedBasis 795 796 @param basis CeedBasis 797 @param[out] P Variable to store number of nodes 798 799 @return An error code: 0 - success, otherwise - failure 800 801 @ref Utility 802 **/ 803 int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P) { 804 *P = basis->P; 805 return CEED_ERROR_SUCCESS; 806 } 807 808 /** 809 @brief Get total number of nodes (in 1 dimension) of a CeedBasis 810 811 @param basis CeedBasis 812 @param[out] P1d Variable to store number of nodes 813 814 @return An error code: 0 - success, otherwise - failure 815 816 @ref Backend 817 **/ 818 int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P1d) { 819 if (!basis->tensorbasis) 820 // LCOV_EXCL_START 821 return CeedError(basis->ceed, CEED_ERROR_MINOR, 822 "Cannot supply P1d for non-tensor basis"); 823 // LCOV_EXCL_STOP 824 825 *P1d = basis->P1d; 826 return CEED_ERROR_SUCCESS; 827 } 828 829 /** 830 @brief Get total number of quadrature points (in dim dimensions) of a CeedBasis 831 832 @param basis CeedBasis 833 @param[out] Q Variable to store number of quadrature points 834 835 @return An error code: 0 - success, otherwise - failure 836 837 @ref Utility 838 **/ 839 int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q) { 840 *Q = basis->Q; 841 return CEED_ERROR_SUCCESS; 842 } 843 844 /** 845 @brief Get total number of quadrature points (in 1 dimension) of a CeedBasis 846 847 @param basis CeedBasis 848 @param[out] Q1d Variable to store number of quadrature points 849 850 @return An error code: 0 - success, otherwise - failure 851 852 @ref Backend 853 **/ 854 int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, CeedInt *Q1d) { 855 if (!basis->tensorbasis) 856 // LCOV_EXCL_START 857 return CeedError(basis->ceed, CEED_ERROR_MINOR, 858 "Cannot supply Q1d for non-tensor basis"); 859 // LCOV_EXCL_STOP 860 861 *Q1d = basis->Q1d; 862 return CEED_ERROR_SUCCESS; 863 } 864 865 /** 866 @brief Get reference coordinates of quadrature points (in dim dimensions) 867 of a CeedBasis 868 869 @param basis CeedBasis 870 @param[out] qref Variable to store reference coordinates of quadrature points 871 872 @return An error code: 0 - success, otherwise - failure 873 874 @ref Backend 875 **/ 876 int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **qref) { 877 *qref = basis->qref1d; 878 return CEED_ERROR_SUCCESS; 879 } 880 881 /** 882 @brief Get quadrature weights of quadrature points (in dim dimensions) 883 of a CeedBasis 884 885 @param basis CeedBasis 886 @param[out] qweight Variable to store quadrature weights 887 888 @return An error code: 0 - success, otherwise - failure 889 890 @ref Backend 891 **/ 892 int CeedBasisGetQWeights(CeedBasis basis, const CeedScalar **qweight) { 893 *qweight = basis->qweight1d; 894 return CEED_ERROR_SUCCESS; 895 } 896 897 /** 898 @brief Get interpolation matrix of a CeedBasis 899 900 @param basis CeedBasis 901 @param[out] interp Variable to store interpolation matrix 902 903 @return An error code: 0 - success, otherwise - failure 904 905 @ref Backend 906 **/ 907 int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp) { 908 if (!basis->interp && basis->tensorbasis) { 909 // Allocate 910 int ierr; 911 ierr = CeedMalloc(basis->Q*basis->P, &basis->interp); CeedChk(ierr); 912 913 // Initialize 914 for (CeedInt i=0; i<basis->Q*basis->P; i++) 915 basis->interp[i] = 1.0; 916 917 // Calculate 918 for (CeedInt d=0; d<basis->dim; d++) 919 for (CeedInt qpt=0; qpt<basis->Q; qpt++) 920 for (CeedInt node=0; node<basis->P; node++) { 921 CeedInt p = (node / CeedIntPow(basis->P1d, d)) % basis->P1d; 922 CeedInt q = (qpt / CeedIntPow(basis->Q1d, d)) % basis->Q1d; 923 basis->interp[qpt*(basis->P)+node] *= basis->interp1d[q*basis->P1d+p]; 924 } 925 } 926 *interp = basis->interp; 927 return CEED_ERROR_SUCCESS; 928 } 929 930 /** 931 @brief Get 1D interpolation matrix of a tensor product CeedBasis 932 933 @param basis CeedBasis 934 @param[out] interp1d Variable to store interpolation matrix 935 936 @return An error code: 0 - success, otherwise - failure 937 938 @ref Backend 939 **/ 940 int CeedBasisGetInterp1D(CeedBasis basis, const CeedScalar **interp1d) { 941 if (!basis->tensorbasis) 942 // LCOV_EXCL_START 943 return CeedError(basis->ceed, CEED_ERROR_MINOR, 944 "CeedBasis is not a tensor product basis."); 945 // LCOV_EXCL_STOP 946 947 *interp1d = basis->interp1d; 948 return CEED_ERROR_SUCCESS; 949 } 950 951 /** 952 @brief Get gradient matrix of a CeedBasis 953 954 @param basis CeedBasis 955 @param[out] grad Variable to store gradient matrix 956 957 @return An error code: 0 - success, otherwise - failure 958 959 @ref Backend 960 **/ 961 int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad) { 962 if (!basis->grad && basis->tensorbasis) { 963 // Allocate 964 int ierr; 965 ierr = CeedMalloc(basis->dim*basis->Q*basis->P, &basis->grad); 966 CeedChk(ierr); 967 968 // Initialize 969 for (CeedInt i=0; i<basis->dim*basis->Q*basis->P; i++) 970 basis->grad[i] = 1.0; 971 972 // Calculate 973 for (CeedInt d=0; d<basis->dim; d++) 974 for (CeedInt i=0; i<basis->dim; i++) 975 for (CeedInt qpt=0; qpt<basis->Q; qpt++) 976 for (CeedInt node=0; node<basis->P; node++) { 977 CeedInt p = (node / CeedIntPow(basis->P1d, d)) % basis->P1d; 978 CeedInt q = (qpt / CeedIntPow(basis->Q1d, d)) % basis->Q1d; 979 if (i == d) 980 basis->grad[(i*basis->Q+qpt)*(basis->P)+node] *= 981 basis->grad1d[q*basis->P1d+p]; 982 else 983 basis->grad[(i*basis->Q+qpt)*(basis->P)+node] *= 984 basis->interp1d[q*basis->P1d+p]; 985 } 986 } 987 *grad = basis->grad; 988 return CEED_ERROR_SUCCESS; 989 } 990 991 /** 992 @brief Get 1D gradient matrix of a tensor product CeedBasis 993 994 @param basis CeedBasis 995 @param[out] grad1d Variable to store gradient matrix 996 997 @return An error code: 0 - success, otherwise - failure 998 999 @ref Backend 1000 **/ 1001 int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad1d) { 1002 if (!basis->tensorbasis) 1003 // LCOV_EXCL_START 1004 return CeedError(basis->ceed, CEED_ERROR_MINOR, 1005 "CeedBasis is not a tensor product basis."); 1006 // LCOV_EXCL_STOP 1007 1008 *grad1d = basis->grad1d; 1009 return CEED_ERROR_SUCCESS; 1010 } 1011 1012 /** 1013 @brief Destroy a CeedBasis 1014 1015 @param basis CeedBasis to destroy 1016 1017 @return An error code: 0 - success, otherwise - failure 1018 1019 @ref User 1020 **/ 1021 int CeedBasisDestroy(CeedBasis *basis) { 1022 int ierr; 1023 1024 if (!*basis || --(*basis)->refcount > 0) return CEED_ERROR_SUCCESS; 1025 if ((*basis)->Destroy) { 1026 ierr = (*basis)->Destroy(*basis); CeedChk(ierr); 1027 } 1028 ierr = CeedFree(&(*basis)->interp); CeedChk(ierr); 1029 ierr = CeedFree(&(*basis)->interp1d); CeedChk(ierr); 1030 ierr = CeedFree(&(*basis)->grad); CeedChk(ierr); 1031 ierr = CeedFree(&(*basis)->grad1d); CeedChk(ierr); 1032 ierr = CeedFree(&(*basis)->qref1d); CeedChk(ierr); 1033 ierr = CeedFree(&(*basis)->qweight1d); CeedChk(ierr); 1034 ierr = CeedDestroy(&(*basis)->ceed); CeedChk(ierr); 1035 ierr = CeedFree(basis); CeedChk(ierr); 1036 return CEED_ERROR_SUCCESS; 1037 } 1038 1039 /** 1040 @brief Construct a Gauss-Legendre quadrature 1041 1042 @param Q Number of quadrature points (integrates polynomials of 1043 degree 2*Q-1 exactly) 1044 @param[out] qref1d Array of length Q to hold the abscissa on [-1, 1] 1045 @param[out] qweight1d Array of length Q to hold the weights 1046 1047 @return An error code: 0 - success, otherwise - failure 1048 1049 @ref Utility 1050 **/ 1051 int CeedGaussQuadrature(CeedInt Q, CeedScalar *qref1d, CeedScalar *qweight1d) { 1052 // Allocate 1053 CeedScalar P0, P1, P2, dP2, xi, wi, PI = 4.0*atan(1.0); 1054 // Build qref1d, qweight1d 1055 for (int i = 0; i <= Q/2; i++) { 1056 // Guess 1057 xi = cos(PI*(CeedScalar)(2*i+1)/((CeedScalar)(2*Q))); 1058 // Pn(xi) 1059 P0 = 1.0; 1060 P1 = xi; 1061 P2 = 0.0; 1062 for (int j = 2; j <= Q; j++) { 1063 P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j)); 1064 P0 = P1; 1065 P1 = P2; 1066 } 1067 // First Newton Step 1068 dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0); 1069 xi = xi-P2/dP2; 1070 // Newton to convergence 1071 for (int k=0; k<100 && fabs(P2)>10*CEED_EPSILON; k++) { 1072 P0 = 1.0; 1073 P1 = xi; 1074 for (int j = 2; j <= Q; j++) { 1075 P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j)); 1076 P0 = P1; 1077 P1 = P2; 1078 } 1079 dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0); 1080 xi = xi-P2/dP2; 1081 } 1082 // Save xi, wi 1083 wi = 2.0/((1.0-xi*xi)*dP2*dP2); 1084 qweight1d[i] = wi; 1085 qweight1d[Q-1-i] = wi; 1086 qref1d[i] = -xi; 1087 qref1d[Q-1-i]= xi; 1088 } 1089 return CEED_ERROR_SUCCESS; 1090 } 1091 1092 /** 1093 @brief Construct a Gauss-Legendre-Lobatto quadrature 1094 1095 @param Q Number of quadrature points (integrates polynomials of 1096 degree 2*Q-3 exactly) 1097 @param[out] qref1d Array of length Q to hold the abscissa on [-1, 1] 1098 @param[out] qweight1d Array of length Q to hold the weights 1099 1100 @return An error code: 0 - success, otherwise - failure 1101 1102 @ref Utility 1103 **/ 1104 int CeedLobattoQuadrature(CeedInt Q, CeedScalar *qref1d, 1105 CeedScalar *qweight1d) { 1106 // Allocate 1107 CeedScalar P0, P1, P2, dP2, d2P2, xi, wi, PI = 4.0*atan(1.0); 1108 // Build qref1d, qweight1d 1109 // Set endpoints 1110 if (Q < 2) 1111 // LCOV_EXCL_START 1112 return CeedError(NULL, CEED_ERROR_DIMENSION, 1113 "Cannot create Lobatto quadrature with Q=%d < 2 points", Q); 1114 // LCOV_EXCL_STOP 1115 wi = 2.0/((CeedScalar)(Q*(Q-1))); 1116 if (qweight1d) { 1117 qweight1d[0] = wi; 1118 qweight1d[Q-1] = wi; 1119 } 1120 qref1d[0] = -1.0; 1121 qref1d[Q-1] = 1.0; 1122 // Interior 1123 for (int i = 1; i <= (Q-1)/2; i++) { 1124 // Guess 1125 xi = cos(PI*(CeedScalar)(i)/(CeedScalar)(Q-1)); 1126 // Pn(xi) 1127 P0 = 1.0; 1128 P1 = xi; 1129 P2 = 0.0; 1130 for (int j = 2; j < Q; j++) { 1131 P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j)); 1132 P0 = P1; 1133 P1 = P2; 1134 } 1135 // First Newton step 1136 dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0); 1137 d2P2 = (2*xi*dP2 - (CeedScalar)(Q*(Q-1))*P2)/(1.0-xi*xi); 1138 xi = xi-dP2/d2P2; 1139 // Newton to convergence 1140 for (int k=0; k<100 && fabs(dP2)>10*CEED_EPSILON; k++) { 1141 P0 = 1.0; 1142 P1 = xi; 1143 for (int j = 2; j < Q; j++) { 1144 P2 = (((CeedScalar)(2*j-1))*xi*P1-((CeedScalar)(j-1))*P0)/((CeedScalar)(j)); 1145 P0 = P1; 1146 P1 = P2; 1147 } 1148 dP2 = (xi*P2 - P0)*(CeedScalar)Q/(xi*xi-1.0); 1149 d2P2 = (2*xi*dP2 - (CeedScalar)(Q*(Q-1))*P2)/(1.0-xi*xi); 1150 xi = xi-dP2/d2P2; 1151 } 1152 // Save xi, wi 1153 wi = 2.0/(((CeedScalar)(Q*(Q-1)))*P2*P2); 1154 if (qweight1d) { 1155 qweight1d[i] = wi; 1156 qweight1d[Q-1-i] = wi; 1157 } 1158 qref1d[i] = -xi; 1159 qref1d[Q-1-i]= xi; 1160 } 1161 return CEED_ERROR_SUCCESS; 1162 } 1163 1164 /** 1165 @brief Return QR Factorization of a matrix 1166 1167 @param ceed A Ceed context for error handling 1168 @param[in,out] mat Row-major matrix to be factorized in place 1169 @param[in,out] tau Vector of length m of scaling factors 1170 @param m Number of rows 1171 @param n Number of columns 1172 1173 @return An error code: 0 - success, otherwise - failure 1174 1175 @ref Utility 1176 **/ 1177 int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau, 1178 CeedInt m, CeedInt n) { 1179 CeedScalar v[m]; 1180 1181 // Check m >= n 1182 if (n > m) 1183 // LCOV_EXCL_START 1184 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 1185 "Cannot compute QR factorization with n > m"); 1186 // LCOV_EXCL_STOP 1187 1188 for (CeedInt i=0; i<n; i++) { 1189 // Calculate Householder vector, magnitude 1190 CeedScalar sigma = 0.0; 1191 v[i] = mat[i+n*i]; 1192 for (CeedInt j=i+1; j<m; j++) { 1193 v[j] = mat[i+n*j]; 1194 sigma += v[j] * v[j]; 1195 } 1196 CeedScalar norm = sqrt(v[i]*v[i] + sigma); // norm of v[i:m] 1197 CeedScalar Rii = -copysign(norm, v[i]); 1198 v[i] -= Rii; 1199 // norm of v[i:m] after modification above and scaling below 1200 // norm = sqrt(v[i]*v[i] + sigma) / v[i]; 1201 // tau = 2 / (norm*norm) 1202 tau[i] = 2 * v[i]*v[i] / (v[i]*v[i] + sigma); 1203 1204 for (CeedInt j=i+1; j<m; j++) 1205 v[j] /= v[i]; 1206 1207 // Apply Householder reflector to lower right panel 1208 CeedHouseholderReflect(&mat[i*n+i+1], &v[i], tau[i], m-i, n-i-1, n, 1); 1209 // Save v 1210 mat[i+n*i] = Rii; 1211 for (CeedInt j=i+1; j<m; j++) 1212 mat[i+n*j] = v[j]; 1213 } 1214 return CEED_ERROR_SUCCESS; 1215 } 1216 1217 /** 1218 @brief Return symmetric Schur decomposition of the symmetric matrix mat via 1219 symmetric QR factorization 1220 1221 @param ceed A Ceed context for error handling 1222 @param[in,out] mat Row-major matrix to be factorized in place 1223 @param[out] lambda Vector of length n of eigenvalues 1224 @param n Number of rows/columns 1225 1226 @return An error code: 0 - success, otherwise - failure 1227 1228 @ref Utility 1229 **/ 1230 int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat, 1231 CeedScalar *lambda, CeedInt n) { 1232 // Check bounds for clang-tidy 1233 if (n<2) 1234 // LCOV_EXCL_START 1235 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, 1236 "Cannot compute symmetric Schur decomposition of scalars"); 1237 // LCOV_EXCL_STOP 1238 1239 CeedScalar v[n-1], tau[n-1], matT[n*n]; 1240 1241 // Copy mat to matT and set mat to I 1242 memcpy(matT, mat, n*n*sizeof(mat[0])); 1243 for (CeedInt i=0; i<n; i++) 1244 for (CeedInt j=0; j<n; j++) 1245 mat[j+n*i] = (i==j) ? 1 : 0; 1246 1247 // Reduce to tridiagonal 1248 for (CeedInt i=0; i<n-1; i++) { 1249 // Calculate Householder vector, magnitude 1250 CeedScalar sigma = 0.0; 1251 v[i] = matT[i+n*(i+1)]; 1252 for (CeedInt j=i+1; j<n-1; j++) { 1253 v[j] = matT[i+n*(j+1)]; 1254 sigma += v[j] * v[j]; 1255 } 1256 CeedScalar norm = sqrt(v[i]*v[i] + sigma); // norm of v[i:n-1] 1257 CeedScalar Rii = -copysign(norm, v[i]); 1258 v[i] -= Rii; 1259 // norm of v[i:m] after modification above and scaling below 1260 // norm = sqrt(v[i]*v[i] + sigma) / v[i]; 1261 // tau = 2 / (norm*norm) 1262 if (sigma > 10*CEED_EPSILON) 1263 tau[i] = 2 * v[i]*v[i] / (v[i]*v[i] + sigma); 1264 else 1265 tau[i] = 0; 1266 1267 for (CeedInt j=i+1; j<n-1; j++) 1268 v[j] /= v[i]; 1269 1270 // Update sub and super diagonal 1271 matT[i+n*(i+1)] = Rii; 1272 matT[(i+1)+n*i] = Rii; 1273 for (CeedInt j=i+2; j<n; j++) { 1274 matT[i+n*j] = 0; matT[j+n*i] = 0; 1275 } 1276 // Apply symmetric Householder reflector to lower right panel 1277 CeedHouseholderReflect(&matT[(i+1)+n*(i+1)], &v[i], tau[i], 1278 n-(i+1), n-(i+1), n, 1); 1279 CeedHouseholderReflect(&matT[(i+1)+n*(i+1)], &v[i], tau[i], 1280 n-(i+1), n-(i+1), 1, n); 1281 // Save v 1282 for (CeedInt j=i+1; j<n-1; j++) { 1283 matT[i+n*(j+1)] = v[j]; 1284 } 1285 } 1286 // Backwards accumulation of Q 1287 for (CeedInt i=n-2; i>=0; i--) { 1288 v[i] = 1; 1289 for (CeedInt j=i+1; j<n-1; j++) { 1290 v[j] = matT[i+n*(j+1)]; 1291 matT[i+n*(j+1)] = 0; 1292 } 1293 CeedHouseholderReflect(&mat[(i+1)+n*(i+1)], &v[i], tau[i], 1294 n-(i+1), n-(i+1), n, 1); 1295 } 1296 1297 // Reduce sub and super diagonal 1298 CeedInt p = 0, q = 0, itr = 0, maxitr = n*n*n; 1299 CeedScalar tol = 10*CEED_EPSILON; 1300 1301 while (q < n && itr < maxitr) { 1302 // Update p, q, size of reduced portions of diagonal 1303 p = 0; q = 0; 1304 for (CeedInt i=n-2; i>=0; i--) { 1305 if (fabs(matT[i+n*(i+1)]) < tol) 1306 q += 1; 1307 else 1308 break; 1309 } 1310 for (CeedInt i=0; i<n-1-q; i++) { 1311 if (fabs(matT[i+n*(i+1)]) < tol) 1312 p += 1; 1313 else 1314 break; 1315 } 1316 if (q == n-1) break; // Finished reducing 1317 1318 // Reduce tridiagonal portion 1319 CeedScalar tnn = matT[(n-1-q)+n*(n-1-q)], 1320 tnnm1 = matT[(n-2-q)+n*(n-1-q)]; 1321 CeedScalar d = (matT[(n-2-q)+n*(n-2-q)] - tnn)/2; 1322 CeedScalar mu = tnn - tnnm1*tnnm1 / 1323 (d + copysign(sqrt(d*d + tnnm1*tnnm1), d)); 1324 CeedScalar x = matT[p+n*p] - mu; 1325 CeedScalar z = matT[p+n*(p+1)]; 1326 for (CeedInt k=p; k<n-1-q; k++) { 1327 // Compute Givens rotation 1328 CeedScalar c = 1, s = 0; 1329 if (fabs(z) > tol) { 1330 if (fabs(z) > fabs(x)) { 1331 CeedScalar tau = -x/z; 1332 s = 1/sqrt(1+tau*tau), c = s*tau; 1333 } else { 1334 CeedScalar tau = -z/x; 1335 c = 1/sqrt(1+tau*tau), s = c*tau; 1336 } 1337 } 1338 1339 // Apply Givens rotation to T 1340 CeedGivensRotation(matT, c, s, CEED_NOTRANSPOSE, k, k+1, n, n); 1341 CeedGivensRotation(matT, c, s, CEED_TRANSPOSE, k, k+1, n, n); 1342 1343 // Apply Givens rotation to Q 1344 CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k+1, n, n); 1345 1346 // Update x, z 1347 if (k < n-q-2) { 1348 x = matT[k+n*(k+1)]; 1349 z = matT[k+n*(k+2)]; 1350 } 1351 } 1352 itr++; 1353 } 1354 // Save eigenvalues 1355 for (CeedInt i=0; i<n; i++) 1356 lambda[i] = matT[i+n*i]; 1357 1358 // Check convergence 1359 if (itr == maxitr && q < n-1) 1360 // LCOV_EXCL_START 1361 return CeedError(ceed, CEED_ERROR_MINOR, 1362 "Symmetric QR failed to converge"); 1363 // LCOV_EXCL_STOP 1364 return CEED_ERROR_SUCCESS; 1365 } 1366 1367 /** 1368 @brief Return Simultaneous Diagonalization of two matrices. This solves the 1369 generalized eigenvalue problem A x = lambda B x, where A and B 1370 are symmetric and B is positive definite. We generate the matrix X 1371 and vector Lambda such that X^T A X = Lambda and X^T B X = I. This 1372 is equivalent to the LAPACK routine 'sygv' with TYPE = 1. 1373 1374 @param ceed A Ceed context for error handling 1375 @param[in] matA Row-major matrix to be factorized with eigenvalues 1376 @param[in] matB Row-major matrix to be factorized to identity 1377 @param[out] x Row-major orthogonal matrix 1378 @param[out] lambda Vector of length n of generalized eigenvalues 1379 @param n Number of rows/columns 1380 1381 @return An error code: 0 - success, otherwise - failure 1382 1383 @ref Utility 1384 **/ 1385 int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *matA, 1386 CeedScalar *matB, CeedScalar *x, 1387 CeedScalar *lambda, CeedInt n) { 1388 int ierr; 1389 CeedScalar matC[n*n], matG[n*n], vecD[n]; 1390 1391 // Compute B = G D G^T 1392 memcpy(matG, matB, n*n*sizeof(matB[0])); 1393 ierr = CeedSymmetricSchurDecomposition(ceed, matG, vecD, n); CeedChk(ierr); 1394 for (CeedInt i=0; i<n; i++) 1395 vecD[i] = sqrt(vecD[i]); 1396 1397 // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T 1398 // = D^-1/2 G^T A G D^-1/2 1399 for (CeedInt i=0; i<n; i++) 1400 for (CeedInt j=0; j<n; j++) 1401 matC[j+i*n] = matG[i+j*n] / vecD[i]; 1402 ierr = CeedMatrixMultiply(ceed, (const CeedScalar *)matC, 1403 (const CeedScalar *)matA, x, n, n, n); 1404 CeedChk(ierr); 1405 for (CeedInt i=0; i<n; i++) 1406 for (CeedInt j=0; j<n; j++) 1407 matG[j+i*n] = matG[j+i*n] / vecD[j]; 1408 ierr = CeedMatrixMultiply(ceed, (const CeedScalar *)x, 1409 (const CeedScalar *)matG, matC, n, n, n); 1410 CeedChk(ierr); 1411 1412 // Compute Q^T C Q = lambda 1413 ierr = CeedSymmetricSchurDecomposition(ceed, matC, lambda, n); CeedChk(ierr); 1414 1415 // Set x = (G D^1/2)^-T Q 1416 // = G D^-1/2 Q 1417 ierr = CeedMatrixMultiply(ceed, (const CeedScalar *)matG, 1418 (const CeedScalar *)matC, x, n, n, n); 1419 CeedChk(ierr); 1420 return CEED_ERROR_SUCCESS; 1421 } 1422 1423 /// @} 1424