1*66087c08SValeria Barra // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2*66087c08SValeria Barra // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3*66087c08SValeria Barra // All Rights reserved. See files LICENSE and NOTICE for details. 4*66087c08SValeria Barra // 5*66087c08SValeria Barra // This file is part of CEED, a collection of benchmarks, miniapps, software 6*66087c08SValeria Barra // libraries and APIs for efficient high-order finite element and spectral 7*66087c08SValeria Barra // element discretizations for exascale applications. For more information and 8*66087c08SValeria Barra // source code availability see http://github.com/ceed. 9*66087c08SValeria Barra // 10*66087c08SValeria Barra // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11*66087c08SValeria Barra // a collaborative effort of two U.S. Department of Energy organizations (Office 12*66087c08SValeria Barra // of Science and the National Nuclear Security Administration) responsible for 13*66087c08SValeria Barra // the planning and preparation of a capable exascale ecosystem, including 14*66087c08SValeria Barra // software, applications, hardware, advanced system engineering and early 15*66087c08SValeria Barra // testbed platforms, in support of the nation's exascale computing imperative. 16*66087c08SValeria Barra 17*66087c08SValeria Barra /// A structure used to pass additional data to f_build_diff 18*66087c08SValeria Barra struct BuildContext { CeedInt dim, space_dim; }; 19*66087c08SValeria Barra 20*66087c08SValeria Barra /// libCEED Q-function for building quadrature data for a diffusion operator 21*66087c08SValeria Barra CEED_QFUNCTION(f_build_diff)(void *ctx, const CeedInt Q, 22*66087c08SValeria Barra const CeedScalar *const *in, CeedScalar *const *out) { 23*66087c08SValeria Barra struct BuildContext *bc = (struct BuildContext *)ctx; 24*66087c08SValeria Barra // in[0] is Jacobians with shape [dim, nc=dim, Q] 25*66087c08SValeria Barra // in[1] is quadrature weights, size (Q) 26*66087c08SValeria Barra // 27*66087c08SValeria Barra // At every quadrature point, compute w/det(J).adj(J).adj(J)^T and store 28*66087c08SValeria Barra // the symmetric part of the result. 29*66087c08SValeria Barra const CeedScalar *J = in[0], *w = in[1]; 30*66087c08SValeria Barra CeedScalar *qdata = out[0]; 31*66087c08SValeria Barra 32*66087c08SValeria Barra switch (bc->dim + 10*bc->space_dim) { 33*66087c08SValeria Barra case 11: 34*66087c08SValeria Barra CeedPragmaSIMD 35*66087c08SValeria Barra for (CeedInt i=0; i<Q; i++) { 36*66087c08SValeria Barra qdata[i] = w[i] / J[i]; 37*66087c08SValeria Barra } // End of Quadrature Point Loop 38*66087c08SValeria Barra break; 39*66087c08SValeria Barra case 22: 40*66087c08SValeria Barra CeedPragmaSIMD 41*66087c08SValeria Barra for (CeedInt i=0; i<Q; i++) { 42*66087c08SValeria Barra // J: 0 2 qdata: 0 2 adj(J): J22 -J12 43*66087c08SValeria Barra // 1 3 2 1 -J21 J11 44*66087c08SValeria Barra const CeedScalar J11 = J[i+Q*0]; 45*66087c08SValeria Barra const CeedScalar J21 = J[i+Q*1]; 46*66087c08SValeria Barra const CeedScalar J12 = J[i+Q*2]; 47*66087c08SValeria Barra const CeedScalar J22 = J[i+Q*3]; 48*66087c08SValeria Barra const CeedScalar qw = w[i] / (J11*J22 - J21*J12); 49*66087c08SValeria Barra qdata[i+Q*0] = qw * (J12*J12 + J22*J22); 50*66087c08SValeria Barra qdata[i+Q*1] = qw * (J11*J11 + J21*J21); 51*66087c08SValeria Barra qdata[i+Q*2] = - qw * (J11*J12 + J21*J22); 52*66087c08SValeria Barra } // End of Quadrature Point Loop 53*66087c08SValeria Barra break; 54*66087c08SValeria Barra case 33: 55*66087c08SValeria Barra CeedPragmaSIMD 56*66087c08SValeria Barra for (CeedInt i=0; i<Q; i++) { 57*66087c08SValeria Barra // Compute the adjoint 58*66087c08SValeria Barra CeedScalar A[3][3]; 59*66087c08SValeria Barra for (CeedInt j=0; j<3; j++) 60*66087c08SValeria Barra for (CeedInt k=0; k<3; k++) 61*66087c08SValeria Barra // Equivalent code with J as a VLA and no mod operations: 62*66087c08SValeria Barra // A[k][j] = J[j+1][k+1]*J[j+2][k+2] - J[j+1][k+2]*J[j+2][k+1] 63*66087c08SValeria Barra A[k][j] = J[i+Q*((j+1)%3+3*((k+1)%3))]*J[i+Q*((j+2)%3+3*((k+2)%3))] - 64*66087c08SValeria Barra J[i+Q*((j+1)%3+3*((k+2)%3))]*J[i+Q*((j+2)%3+3*((k+1)%3))]; 65*66087c08SValeria Barra 66*66087c08SValeria Barra // Compute quadrature weight / det(J) 67*66087c08SValeria Barra const CeedScalar qw = w[i] / (J[i+Q*0]*A[0][0] + J[i+Q*1]*A[1][1] + 68*66087c08SValeria Barra J[i+Q*2]*A[2][2]); 69*66087c08SValeria Barra 70*66087c08SValeria Barra // Compute geometric factors 71*66087c08SValeria Barra // Stored in Voigt convention 72*66087c08SValeria Barra // 0 5 4 73*66087c08SValeria Barra // 5 1 3 74*66087c08SValeria Barra // 4 3 2 75*66087c08SValeria Barra qdata[i+Q*0] = qw * (A[0][0]*A[0][0] + A[0][1]*A[0][1] + A[0][2]*A[0][2]); 76*66087c08SValeria Barra qdata[i+Q*1] = qw * (A[1][0]*A[1][0] + A[1][1]*A[1][1] + A[1][2]*A[1][2]); 77*66087c08SValeria Barra qdata[i+Q*2] = qw * (A[2][0]*A[2][0] + A[2][1]*A[2][1] + A[2][2]*A[2][2]); 78*66087c08SValeria Barra qdata[i+Q*3] = qw * (A[1][0]*A[2][0] + A[1][1]*A[2][1] + A[1][2]*A[2][2]); 79*66087c08SValeria Barra qdata[i+Q*4] = qw * (A[0][0]*A[2][0] + A[0][1]*A[2][1] + A[0][2]*A[2][2]); 80*66087c08SValeria Barra qdata[i+Q*5] = qw * (A[0][0]*A[1][0] + A[0][1]*A[1][1] + A[0][2]*A[1][2]); 81*66087c08SValeria Barra } // End of Quadrature Point Loop 82*66087c08SValeria Barra break; 83*66087c08SValeria Barra } 84*66087c08SValeria Barra return 0; 85*66087c08SValeria Barra } 86*66087c08SValeria Barra 87*66087c08SValeria Barra /// libCEED Q-function for applying a diff operator 88*66087c08SValeria Barra CEED_QFUNCTION(f_apply_diff)(void *ctx, const CeedInt Q, 89*66087c08SValeria Barra const CeedScalar *const *in, CeedScalar *const *out) { 90*66087c08SValeria Barra struct BuildContext *bc = (struct BuildContext *)ctx; 91*66087c08SValeria Barra // in[0], out[0] have shape [dim, nc=1, Q] 92*66087c08SValeria Barra const CeedScalar *ug = in[0], *qdata = in[1]; 93*66087c08SValeria Barra CeedScalar *vg = out[0]; 94*66087c08SValeria Barra 95*66087c08SValeria Barra switch (bc->dim) { 96*66087c08SValeria Barra case 1: 97*66087c08SValeria Barra CeedPragmaSIMD 98*66087c08SValeria Barra for (CeedInt i=0; i<Q; i++) { 99*66087c08SValeria Barra vg[i] = ug[i] * qdata[i]; 100*66087c08SValeria Barra } // End of Quadrature Point Loop 101*66087c08SValeria Barra break; 102*66087c08SValeria Barra case 2: 103*66087c08SValeria Barra CeedPragmaSIMD 104*66087c08SValeria Barra for (CeedInt i=0; i<Q; i++) { 105*66087c08SValeria Barra // Read spatial derivatives of u 106*66087c08SValeria Barra const CeedScalar du[2] = {ug[i+Q*0], 107*66087c08SValeria Barra ug[i+Q*1] 108*66087c08SValeria Barra }; 109*66087c08SValeria Barra 110*66087c08SValeria Barra // Read qdata (dXdxdXdxT symmetric matrix) 111*66087c08SValeria Barra // Stored in Voigt convention 112*66087c08SValeria Barra // 0 2 113*66087c08SValeria Barra // 2 1 114*66087c08SValeria Barra // *INDENT-OFF* 115*66087c08SValeria Barra const CeedScalar dXdxdXdxT[2][2] = {{qdata[i+0*Q], 116*66087c08SValeria Barra qdata[i+2*Q]}, 117*66087c08SValeria Barra {qdata[i+2*Q], 118*66087c08SValeria Barra qdata[i+1*Q]}}; 119*66087c08SValeria Barra // *INDENT-ON* 120*66087c08SValeria Barra // j = direction of vg 121*66087c08SValeria Barra for (int j=0; j<2; j++) 122*66087c08SValeria Barra vg[i+j*Q] = (du[0] * dXdxdXdxT[0][j] + 123*66087c08SValeria Barra du[1] * dXdxdXdxT[1][j]); 124*66087c08SValeria Barra } // End of Quadrature Point Loop 125*66087c08SValeria Barra break; 126*66087c08SValeria Barra case 3: 127*66087c08SValeria Barra CeedPragmaSIMD 128*66087c08SValeria Barra for (CeedInt i=0; i<Q; i++) { 129*66087c08SValeria Barra // Read spatial derivatives of u 130*66087c08SValeria Barra const CeedScalar du[3] = {ug[i+Q*0], 131*66087c08SValeria Barra ug[i+Q*1], 132*66087c08SValeria Barra ug[i+Q*2] 133*66087c08SValeria Barra }; 134*66087c08SValeria Barra 135*66087c08SValeria Barra // Read qdata (dXdxdXdxT symmetric matrix) 136*66087c08SValeria Barra // Stored in Voigt convention 137*66087c08SValeria Barra // 0 5 4 138*66087c08SValeria Barra // 5 1 3 139*66087c08SValeria Barra // 4 3 2 140*66087c08SValeria Barra // *INDENT-OFF* 141*66087c08SValeria Barra const CeedScalar dXdxdXdxT[3][3] = {{qdata[i+0*Q], 142*66087c08SValeria Barra qdata[i+5*Q], 143*66087c08SValeria Barra qdata[i+4*Q]}, 144*66087c08SValeria Barra {qdata[i+5*Q], 145*66087c08SValeria Barra qdata[i+1*Q], 146*66087c08SValeria Barra qdata[i+3*Q]}, 147*66087c08SValeria Barra {qdata[i+4*Q], 148*66087c08SValeria Barra qdata[i+3*Q], 149*66087c08SValeria Barra qdata[i+2*Q]} 150*66087c08SValeria Barra }; 151*66087c08SValeria Barra // *INDENT-ON* 152*66087c08SValeria Barra // j = direction of vg 153*66087c08SValeria Barra for (int j=0; j<3; j++) 154*66087c08SValeria Barra vg[i+j*Q] = (du[0] * dXdxdXdxT[0][j] + 155*66087c08SValeria Barra du[1] * dXdxdXdxT[1][j] + 156*66087c08SValeria Barra du[2] * dXdxdXdxT[2][j]); 157*66087c08SValeria Barra } // End of Quadrature Point Loop 158*66087c08SValeria Barra break; 159*66087c08SValeria Barra } 160*66087c08SValeria Barra return 0; 161*66087c08SValeria Barra } 162