ceed-basis.c (3aa039c16e73fd1bee14152fdafb4ed119c9c595) ceed-basis.c (dc7d240c1c3ecbc40e893ea38e37c8e19d48593c)
1// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3// reserved. See files LICENSE and NOTICE for details.
4//
5// This file is part of CEED, a collection of benchmarks, miniapps, software
6// libraries and APIs for efficient high-order finite element and spectral
7// element discretizations for exascale applications. For more information and
8// source code availability see http://github.com/ceed.

--- 486 unchanged lines hidden (view full) ---

495 const CeedScalar *tau, CeedTransposeMode tmode,
496 CeedInt m, CeedInt n, CeedInt k,
497 CeedInt row, CeedInt col) {
498 CeedScalar v[m];
499 for (CeedInt ii=0; ii<k; ii++) {
500 CeedInt i = tmode == CEED_TRANSPOSE ? ii : k-1-ii;
501 for (CeedInt j=i+1; j<m; j++)
502 v[j] = Q[j*k+i];
1// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3// reserved. See files LICENSE and NOTICE for details.
4//
5// This file is part of CEED, a collection of benchmarks, miniapps, software
6// libraries and APIs for efficient high-order finite element and spectral
7// element discretizations for exascale applications. For more information and
8// source code availability see http://github.com/ceed.

--- 486 unchanged lines hidden (view full) ---

495 const CeedScalar *tau, CeedTransposeMode tmode,
496 CeedInt m, CeedInt n, CeedInt k,
497 CeedInt row, CeedInt col) {
498 CeedScalar v[m];
499 for (CeedInt ii=0; ii<k; ii++) {
500 CeedInt i = tmode == CEED_TRANSPOSE ? ii : k-1-ii;
501 for (CeedInt j=i+1; j<m; j++)
502 v[j] = Q[j*k+i];
503 // Apply Householder reflector (I - tau v v^T) colograd1d^T
503 // Apply Householder reflector (I - tau v v^T) collograd1d^T
504 CeedHouseholderReflect(&A[i*row], &v[i], tau[i], m-i, n, row, col);
505 }
506 return 0;
507}
508
509/**
510 @brief Compute Givens rotation
511

--- 303 unchanged lines hidden (view full) ---

815
816 return 0;
817}
818
819/**
820 @brief Return collocated grad matrix
821
822 @param basis CeedBasis
504 CeedHouseholderReflect(&A[i*row], &v[i], tau[i], m-i, n, row, col);
505 }
506 return 0;
507}
508
509/**
510 @brief Compute Givens rotation
511

--- 303 unchanged lines hidden (view full) ---

815
816 return 0;
817}
818
819/**
820 @brief Return collocated grad matrix
821
822 @param basis CeedBasis
823 @param[out] colograd1d Row-major Q1d × Q1d matrix expressing derivatives of
823 @param[out] collograd1d Row-major Q1d × Q1d matrix expressing derivatives of
824 basis functions at quadrature points
825
826 @return An error code: 0 - success, otherwise - failure
827
828 @ref Advanced
829**/
824 basis functions at quadrature points
825
826 @return An error code: 0 - success, otherwise - failure
827
828 @ref Advanced
829**/
830int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *colograd1d) {
830int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *collograd1d) {
831 int i, j, k;
832 Ceed ceed;
833 CeedInt ierr, P1d=(basis)->P1d, Q1d=(basis)->Q1d;
834 CeedScalar *interp1d, *grad1d, tau[Q1d];
835
836 ierr = CeedMalloc(Q1d*P1d, &interp1d); CeedChk(ierr);
837 ierr = CeedMalloc(Q1d*P1d, &grad1d); CeedChk(ierr);
838 memcpy(interp1d, (basis)->interp1d, Q1d*P1d*sizeof(basis)->interp1d[0]);
839 memcpy(grad1d, (basis)->grad1d, Q1d*P1d*sizeof(basis)->interp1d[0]);
840
841 // QR Factorization, interp1d = Q R
842 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
843 ierr = CeedQRFactorization(ceed, interp1d, tau, Q1d, P1d); CeedChk(ierr);
844
831 int i, j, k;
832 Ceed ceed;
833 CeedInt ierr, P1d=(basis)->P1d, Q1d=(basis)->Q1d;
834 CeedScalar *interp1d, *grad1d, tau[Q1d];
835
836 ierr = CeedMalloc(Q1d*P1d, &interp1d); CeedChk(ierr);
837 ierr = CeedMalloc(Q1d*P1d, &grad1d); CeedChk(ierr);
838 memcpy(interp1d, (basis)->interp1d, Q1d*P1d*sizeof(basis)->interp1d[0]);
839 memcpy(grad1d, (basis)->grad1d, Q1d*P1d*sizeof(basis)->interp1d[0]);
840
841 // QR Factorization, interp1d = Q R
842 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
843 ierr = CeedQRFactorization(ceed, interp1d, tau, Q1d, P1d); CeedChk(ierr);
844
845 // Apply Rinv, colograd1d = grad1d Rinv
845 // Apply Rinv, collograd1d = grad1d Rinv
846 for (i=0; i<Q1d; i++) { // Row i
846 for (i=0; i<Q1d; i++) { // Row i
847 colograd1d[Q1d*i] = grad1d[P1d*i]/interp1d[0];
847 collograd1d[Q1d*i] = grad1d[P1d*i]/interp1d[0];
848 for (j=1; j<P1d; j++) { // Column j
848 for (j=1; j<P1d; j++) { // Column j
849 colograd1d[j+Q1d*i] = grad1d[j+P1d*i];
849 collograd1d[j+Q1d*i] = grad1d[j+P1d*i];
850 for (k=0; k<j; k++)
850 for (k=0; k<j; k++)
851 colograd1d[j+Q1d*i] -= interp1d[j+P1d*k]*colograd1d[k+Q1d*i];
852 colograd1d[j+Q1d*i] /= interp1d[j+P1d*j];
851 collograd1d[j+Q1d*i] -= interp1d[j+P1d*k]*collograd1d[k+Q1d*i];
852 collograd1d[j+Q1d*i] /= interp1d[j+P1d*j];
853 }
854 for (j=P1d; j<Q1d; j++)
853 }
854 for (j=P1d; j<Q1d; j++)
855 colograd1d[j+Q1d*i] = 0;
855 collograd1d[j+Q1d*i] = 0;
856 }
857
856 }
857
858 // Apply Qtranspose, colograd = colograd Qtranspose
859 CeedHouseholderApplyQ(colograd1d, interp1d, tau, CEED_NOTRANSPOSE,
858 // Apply Qtranspose, collograd = collograd Qtranspose
859 CeedHouseholderApplyQ(collograd1d, interp1d, tau, CEED_NOTRANSPOSE,
860 Q1d, Q1d, P1d, 1, Q1d);
861
862 ierr = CeedFree(&interp1d); CeedChk(ierr);
863 ierr = CeedFree(&grad1d); CeedChk(ierr);
864
865 return 0;
866}
867

--- 421 unchanged lines hidden ---
860 Q1d, Q1d, P1d, 1, Q1d);
861
862 ierr = CeedFree(&interp1d); CeedChk(ierr);
863 ierr = CeedFree(&grad1d); CeedChk(ierr);
864
865 return 0;
866}
867

--- 421 unchanged lines hidden ---