1*cb32e2e7SValeria Barra // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2*cb32e2e7SValeria Barra // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3*cb32e2e7SValeria Barra // reserved. See files LICENSE and NOTICE for details. 4*cb32e2e7SValeria Barra // 5*cb32e2e7SValeria Barra // This file is part of CEED, a collection of benchmarks, miniapps, software 6*cb32e2e7SValeria Barra // libraries and APIs for efficient high-order finite element and spectral 7*cb32e2e7SValeria Barra // element discretizations for exascale applications. For more information and 8*cb32e2e7SValeria Barra // source code availability see http://github.com/ceed. 9*cb32e2e7SValeria Barra // 10*cb32e2e7SValeria Barra // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11*cb32e2e7SValeria Barra // a collaborative effort of two U.S. Department of Energy organizations (Office 12*cb32e2e7SValeria Barra // of Science and the National Nuclear Security Administration) responsible for 13*cb32e2e7SValeria Barra // the planning and preparation of a capable exascale ecosystem, including 14*cb32e2e7SValeria Barra // software, applications, hardware, advanced system engineering and early 15*cb32e2e7SValeria Barra // testbed platforms, in support of the nation's exascale computing imperative. 16*cb32e2e7SValeria Barra 17*cb32e2e7SValeria Barra /// @file 18*cb32e2e7SValeria Barra /// libCEED QFunctions for diffusion operator example using PETSc 19*cb32e2e7SValeria Barra 20*cb32e2e7SValeria Barra // ----------------------------------------------------------------------------- 21*cb32e2e7SValeria Barra CEED_QFUNCTION(SetupDiffGeo)(void *ctx, CeedInt Q, 22*cb32e2e7SValeria Barra const CeedScalar *const *in, 23*cb32e2e7SValeria Barra CeedScalar *const *out) { 24*cb32e2e7SValeria Barra const CeedScalar *J = in[0], *w = in[1]; 25*cb32e2e7SValeria Barra CeedScalar *qd = out[0]; 26*cb32e2e7SValeria Barra 27*cb32e2e7SValeria Barra // Quadrature Point Loop 28*cb32e2e7SValeria Barra CeedPragmaSIMD 29*cb32e2e7SValeria Barra for (CeedInt i=0; i<Q; i++) { 30*cb32e2e7SValeria Barra const CeedScalar J11 = J[i+Q*0]; 31*cb32e2e7SValeria Barra const CeedScalar J21 = J[i+Q*1]; 32*cb32e2e7SValeria Barra const CeedScalar J31 = J[i+Q*2]; 33*cb32e2e7SValeria Barra const CeedScalar J12 = J[i+Q*3]; 34*cb32e2e7SValeria Barra const CeedScalar J22 = J[i+Q*4]; 35*cb32e2e7SValeria Barra const CeedScalar J32 = J[i+Q*5]; 36*cb32e2e7SValeria Barra const CeedScalar J13 = J[i+Q*6]; 37*cb32e2e7SValeria Barra const CeedScalar J23 = J[i+Q*7]; 38*cb32e2e7SValeria Barra const CeedScalar J33 = J[i+Q*8]; 39*cb32e2e7SValeria Barra const CeedScalar A11 = J22*J33 - J23*J32; 40*cb32e2e7SValeria Barra const CeedScalar A12 = J13*J32 - J12*J33; 41*cb32e2e7SValeria Barra const CeedScalar A13 = J12*J23 - J13*J22; 42*cb32e2e7SValeria Barra const CeedScalar A21 = J23*J31 - J21*J33; 43*cb32e2e7SValeria Barra const CeedScalar A22 = J11*J33 - J13*J31; 44*cb32e2e7SValeria Barra const CeedScalar A23 = J13*J21 - J11*J23; 45*cb32e2e7SValeria Barra const CeedScalar A31 = J21*J32 - J22*J31; 46*cb32e2e7SValeria Barra const CeedScalar A32 = J12*J31 - J11*J32; 47*cb32e2e7SValeria Barra const CeedScalar A33 = J11*J22 - J12*J21; 48*cb32e2e7SValeria Barra const CeedScalar qw = w[i] / (J11*A11 + J21*A12 + J31*A13); 49*cb32e2e7SValeria Barra qd[i+Q*0] = qw * (A11*A11 + A12*A12 + A13*A13); 50*cb32e2e7SValeria Barra qd[i+Q*1] = qw * (A11*A21 + A12*A22 + A13*A23); 51*cb32e2e7SValeria Barra qd[i+Q*2] = qw * (A11*A31 + A12*A32 + A13*A33); 52*cb32e2e7SValeria Barra qd[i+Q*3] = qw * (A21*A21 + A22*A22 + A23*A23); 53*cb32e2e7SValeria Barra qd[i+Q*4] = qw * (A21*A31 + A22*A32 + A23*A33); 54*cb32e2e7SValeria Barra qd[i+Q*5] = qw * (A31*A31 + A32*A32 + A33*A33); 55*cb32e2e7SValeria Barra } // End of Quadrature Point Loop 56*cb32e2e7SValeria Barra 57*cb32e2e7SValeria Barra return 0; 58*cb32e2e7SValeria Barra } 59*cb32e2e7SValeria Barra 60*cb32e2e7SValeria Barra // ----------------------------------------------------------------------------- 61*cb32e2e7SValeria Barra CEED_QFUNCTION(SetupDiffRhs)(void *ctx, CeedInt Q, 62*cb32e2e7SValeria Barra const CeedScalar *const *in, 63*cb32e2e7SValeria Barra CeedScalar *const *out) { 64*cb32e2e7SValeria Barra #ifndef M_PI 65*cb32e2e7SValeria Barra # define M_PI 3.14159265358979323846 66*cb32e2e7SValeria Barra #endif 67*cb32e2e7SValeria Barra const CeedScalar *x = in[0], *J = in[1], *w = in[2]; 68*cb32e2e7SValeria Barra CeedScalar *true_soln = out[0], *rhs = out[1]; 69*cb32e2e7SValeria Barra 70*cb32e2e7SValeria Barra // Quadrature Point Loop 71*cb32e2e7SValeria Barra CeedPragmaSIMD 72*cb32e2e7SValeria Barra for (CeedInt i=0; i<Q; i++) { 73*cb32e2e7SValeria Barra const CeedScalar J11 = J[i+Q*0]; 74*cb32e2e7SValeria Barra const CeedScalar J21 = J[i+Q*1]; 75*cb32e2e7SValeria Barra const CeedScalar J31 = J[i+Q*2]; 76*cb32e2e7SValeria Barra const CeedScalar J12 = J[i+Q*3]; 77*cb32e2e7SValeria Barra const CeedScalar J22 = J[i+Q*4]; 78*cb32e2e7SValeria Barra const CeedScalar J32 = J[i+Q*5]; 79*cb32e2e7SValeria Barra const CeedScalar J13 = J[i+Q*6]; 80*cb32e2e7SValeria Barra const CeedScalar J23 = J[i+Q*7]; 81*cb32e2e7SValeria Barra const CeedScalar J33 = J[i+Q*8]; 82*cb32e2e7SValeria Barra const CeedScalar A11 = J22*J33 - J23*J32; 83*cb32e2e7SValeria Barra const CeedScalar A12 = J13*J32 - J12*J33; 84*cb32e2e7SValeria Barra const CeedScalar A13 = J12*J23 - J13*J22; 85*cb32e2e7SValeria Barra 86*cb32e2e7SValeria Barra const CeedScalar c[3] = { 0, 1., 2. }; 87*cb32e2e7SValeria Barra const CeedScalar k[3] = { 1., 2., 3. }; 88*cb32e2e7SValeria Barra 89*cb32e2e7SValeria Barra true_soln[i] = sin(M_PI*(c[0] + k[0]*x[i+Q*0])) * 90*cb32e2e7SValeria Barra sin(M_PI*(c[1] + k[1]*x[i+Q*1])) * 91*cb32e2e7SValeria Barra sin(M_PI*(c[2] + k[2]*x[i+Q*2])); 92*cb32e2e7SValeria Barra 93*cb32e2e7SValeria Barra const CeedScalar rho = w[i] * (J11*A11 + J21*A12 + J31*A13); 94*cb32e2e7SValeria Barra rhs[i] = rho * M_PI*M_PI * (k[0]*k[0] + k[1]*k[1] + k[2]*k[2]) * 95*cb32e2e7SValeria Barra true_soln[i]; 96*cb32e2e7SValeria Barra } // End of Quadrature Point Loop 97*cb32e2e7SValeria Barra 98*cb32e2e7SValeria Barra return 0; 99*cb32e2e7SValeria Barra } 100*cb32e2e7SValeria Barra 101*cb32e2e7SValeria Barra // ----------------------------------------------------------------------------- 102*cb32e2e7SValeria Barra CEED_QFUNCTION(Diff)(void *ctx, CeedInt Q, 103*cb32e2e7SValeria Barra const CeedScalar *const *in, CeedScalar *const *out) { 104*cb32e2e7SValeria Barra const CeedScalar *ug = in[0], *qd = in[1]; 105*cb32e2e7SValeria Barra CeedScalar *vg = out[0]; 106*cb32e2e7SValeria Barra 107*cb32e2e7SValeria Barra // Quadrature Point Loop 108*cb32e2e7SValeria Barra CeedPragmaSIMD 109*cb32e2e7SValeria Barra for (CeedInt i=0; i<Q; i++) { 110*cb32e2e7SValeria Barra // Read spatial derivatives of u 111*cb32e2e7SValeria Barra const CeedScalar du[3] = {ug[i+Q*0], 112*cb32e2e7SValeria Barra ug[i+Q*1], 113*cb32e2e7SValeria Barra ug[i+Q*2] 114*cb32e2e7SValeria Barra }; 115*cb32e2e7SValeria Barra // Read qdata (dXdxdXdxT symmetric matrix) 116*cb32e2e7SValeria Barra const CeedScalar dXdxdXdxT[3][3] = {{qd[i+0*Q], 117*cb32e2e7SValeria Barra qd[i+1*Q], 118*cb32e2e7SValeria Barra qd[i+2*Q]}, 119*cb32e2e7SValeria Barra {qd[i+1*Q], 120*cb32e2e7SValeria Barra qd[i+3*Q], 121*cb32e2e7SValeria Barra qd[i+4*Q]}, 122*cb32e2e7SValeria Barra {qd[i+2*Q], 123*cb32e2e7SValeria Barra qd[i+4*Q], 124*cb32e2e7SValeria Barra qd[i+5*Q]} 125*cb32e2e7SValeria Barra }; 126*cb32e2e7SValeria Barra 127*cb32e2e7SValeria Barra for (int j=0; j<3; j++) // j = direction of vg 128*cb32e2e7SValeria Barra vg[i+j*Q] = (du[0] * dXdxdXdxT[0][j] + 129*cb32e2e7SValeria Barra du[1] * dXdxdXdxT[1][j] + 130*cb32e2e7SValeria Barra du[2] * dXdxdXdxT[2][j]); 131*cb32e2e7SValeria Barra 132*cb32e2e7SValeria Barra } // End of Quadrature Point Loop 133*cb32e2e7SValeria Barra return 0; 134*cb32e2e7SValeria Barra } 135*cb32e2e7SValeria Barra // ----------------------------------------------------------------------------- 136