115ed3f7dSjeremylt /// @file 215ed3f7dSjeremylt /// Test creation and use of FDM element inverse 315ed3f7dSjeremylt /// \test Test creation and use of FDM element inverse 42b730f8bSJeremy L Thompson #include "t541-operator.h" 52b730f8bSJeremy L Thompson 615ed3f7dSjeremylt #include <ceed.h> 72b730f8bSJeremy L Thompson #include <math.h> 815ed3f7dSjeremylt #include <stdlib.h> 915ed3f7dSjeremylt #include <string.h> 1015ed3f7dSjeremylt 1115ed3f7dSjeremylt int main(int argc, char **argv) { 1215ed3f7dSjeremylt Ceed ceed; 13*4fee36f0SJeremy L Thompson CeedElemRestriction elem_restriction_x, elem_restriction_u, elem_restriction_q_data; 1415ed3f7dSjeremylt CeedBasis basis_x, basis_u; 1515ed3f7dSjeremylt CeedQFunction qf_setup_diff, qf_apply; 16*4fee36f0SJeremy L Thompson CeedOperator op_setup_diff, op_apply, op_inverse; 17*4fee36f0SJeremy L Thompson CeedVector q_data_diff, x, u, v, w; 18*4fee36f0SJeremy L Thompson CeedInt num_elem = 1, p = 4, q = 5, dim = 2; 19*4fee36f0SJeremy L Thompson CeedInt num_dofs = p * p, num_qpts = num_elem * q * q, q_data_size = dim * (dim + 1) / 2; 2015ed3f7dSjeremylt 2115ed3f7dSjeremylt CeedInit(argv[1], &ceed); 2215ed3f7dSjeremylt 2380a9ef05SNatalie Beams // Test skipped if using single precision 242b730f8bSJeremy L Thompson if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Test not implemented in single precision"); 2580a9ef05SNatalie Beams 26*4fee36f0SJeremy L Thompson // Vectors 27*4fee36f0SJeremy L Thompson CeedVectorCreate(ceed, dim * num_elem * (2 * 2), &x); 28*4fee36f0SJeremy L Thompson { 29*4fee36f0SJeremy L Thompson CeedScalar x_array[dim * num_elem * (2 * 2)]; 30*4fee36f0SJeremy L Thompson 312b730f8bSJeremy L Thompson for (CeedInt i = 0; i < 2; i++) { 3215ed3f7dSjeremylt for (CeedInt j = 0; j < 2; j++) { 33*4fee36f0SJeremy L Thompson x_array[i + j * 2 + 0 * 4] = i; 34*4fee36f0SJeremy L Thompson x_array[i + j * 2 + 1 * 4] = j; 3515ed3f7dSjeremylt } 362b730f8bSJeremy L Thompson } 37*4fee36f0SJeremy L Thompson CeedVectorSetArray(x, CEED_MEM_HOST, CEED_COPY_VALUES, x_array); 38*4fee36f0SJeremy L Thompson } 39*4fee36f0SJeremy L Thompson CeedVectorCreate(ceed, num_dofs, &u); 40*4fee36f0SJeremy L Thompson CeedVectorCreate(ceed, num_dofs, &v); 41*4fee36f0SJeremy L Thompson CeedVectorCreate(ceed, num_dofs, &w); 4215ed3f7dSjeremylt CeedVectorCreate(ceed, q_data_size * num_qpts, &q_data_diff); 4315ed3f7dSjeremylt 4415ed3f7dSjeremylt // Restrictions 4515ed3f7dSjeremylt CeedInt strides_x[3] = {1, 2 * 2, 2 * 2 * dim}; 46*4fee36f0SJeremy L Thompson CeedElemRestrictionCreateStrided(ceed, num_elem, 2 * 2, dim, dim * num_elem * 2 * 2, strides_x, &elem_restriction_x); 4715ed3f7dSjeremylt 48*4fee36f0SJeremy L Thompson CeedInt strides_u[3] = {1, p * p, p * p}; 49*4fee36f0SJeremy L Thompson CeedElemRestrictionCreateStrided(ceed, num_elem, p * p, 1, num_dofs, strides_u, &elem_restriction_u); 5015ed3f7dSjeremylt 51*4fee36f0SJeremy L Thompson CeedInt strides_q_data[3] = {1, q * q, q_data_size * q * q}; 52*4fee36f0SJeremy L Thompson CeedElemRestrictionCreateStrided(ceed, num_elem, q * q, q_data_size, num_qpts * q_data_size, strides_q_data, &elem_restriction_q_data); 5315ed3f7dSjeremylt 5415ed3f7dSjeremylt // Bases 55*4fee36f0SJeremy L Thompson CeedBasisCreateTensorH1Lagrange(ceed, dim, dim, 2, q, CEED_GAUSS, &basis_x); 56*4fee36f0SJeremy L Thompson CeedBasisCreateTensorH1Lagrange(ceed, dim, 1, p, q, CEED_GAUSS, &basis_u); 5715ed3f7dSjeremylt 5815ed3f7dSjeremylt // QFunction - setup diff 592b730f8bSJeremy L Thompson CeedQFunctionCreateInterior(ceed, 1, setup_diff, setup_diff_loc, &qf_setup_diff); 6015ed3f7dSjeremylt CeedQFunctionAddInput(qf_setup_diff, "dx", dim * dim, CEED_EVAL_GRAD); 6115ed3f7dSjeremylt CeedQFunctionAddInput(qf_setup_diff, "weight", 1, CEED_EVAL_WEIGHT); 6215ed3f7dSjeremylt CeedQFunctionAddOutput(qf_setup_diff, "q data", q_data_size, CEED_EVAL_NONE); 6315ed3f7dSjeremylt 6415ed3f7dSjeremylt // Operator - setup diff 652b730f8bSJeremy L Thompson CeedOperatorCreate(ceed, qf_setup_diff, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_setup_diff); 66*4fee36f0SJeremy L Thompson CeedOperatorSetField(op_setup_diff, "dx", elem_restriction_x, basis_x, CEED_VECTOR_ACTIVE); 672b730f8bSJeremy L Thompson CeedOperatorSetField(op_setup_diff, "weight", CEED_ELEMRESTRICTION_NONE, basis_x, CEED_VECTOR_NONE); 68*4fee36f0SJeremy L Thompson CeedOperatorSetField(op_setup_diff, "q data", elem_restriction_q_data, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 6915ed3f7dSjeremylt 7015ed3f7dSjeremylt // Apply Setup Operator 71*4fee36f0SJeremy L Thompson CeedOperatorApply(op_setup_diff, x, q_data_diff, CEED_REQUEST_IMMEDIATE); 7215ed3f7dSjeremylt 7315ed3f7dSjeremylt // QFunction - apply 7415ed3f7dSjeremylt CeedQFunctionCreateInterior(ceed, 1, apply, apply_loc, &qf_apply); 7515ed3f7dSjeremylt CeedQFunctionAddInput(qf_apply, "u", dim, CEED_EVAL_GRAD); 76*4fee36f0SJeremy L Thompson CeedQFunctionAddInput(qf_apply, "q data diff", q_data_size, CEED_EVAL_NONE); 7715ed3f7dSjeremylt CeedQFunctionAddOutput(qf_apply, "v", dim, CEED_EVAL_GRAD); 7815ed3f7dSjeremylt 7915ed3f7dSjeremylt // Operator - apply 802b730f8bSJeremy L Thompson CeedOperatorCreate(ceed, qf_apply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_apply); 81*4fee36f0SJeremy L Thompson CeedOperatorSetField(op_apply, "u", elem_restriction_u, basis_u, CEED_VECTOR_ACTIVE); 82*4fee36f0SJeremy L Thompson CeedOperatorSetField(op_apply, "q data diff", elem_restriction_q_data, CEED_BASIS_COLLOCATED, q_data_diff); 83*4fee36f0SJeremy L Thompson CeedOperatorSetField(op_apply, "v", elem_restriction_u, basis_u, CEED_VECTOR_ACTIVE); 8415ed3f7dSjeremylt 8515ed3f7dSjeremylt // Create FDM element inverse 86*4fee36f0SJeremy L Thompson CeedOperatorCreateFDMElementInverse(op_apply, &op_inverse, CEED_REQUEST_IMMEDIATE); 8715ed3f7dSjeremylt 8815ed3f7dSjeremylt // Create Schur complement for element corners 8915ed3f7dSjeremylt CeedScalar S[16]; 9015ed3f7dSjeremylt for (CeedInt i = 0; i < 4; i++) { 91*4fee36f0SJeremy L Thompson CeedScalar *u_array; 92*4fee36f0SJeremy L Thompson 93*4fee36f0SJeremy L Thompson CeedVectorSetValue(u, 0.0); 94*4fee36f0SJeremy L Thompson CeedVectorGetArray(u, CEED_MEM_HOST, &u_array); 9515ed3f7dSjeremylt switch (i) { 962b730f8bSJeremy L Thompson case 0: 97*4fee36f0SJeremy L Thompson u_array[0] = 1.0; 982b730f8bSJeremy L Thompson break; 992b730f8bSJeremy L Thompson case 1: 100*4fee36f0SJeremy L Thompson u_array[p - 1] = 1.0; 1012b730f8bSJeremy L Thompson break; 1022b730f8bSJeremy L Thompson case 2: 103*4fee36f0SJeremy L Thompson u_array[p * p - p] = 1.0; 1042b730f8bSJeremy L Thompson break; 1052b730f8bSJeremy L Thompson case 3: 106*4fee36f0SJeremy L Thompson u_array[p * p - 1] = 1.0; 1072b730f8bSJeremy L Thompson break; 10815ed3f7dSjeremylt } 109*4fee36f0SJeremy L Thompson CeedVectorRestoreArray(u, &u_array); 11015ed3f7dSjeremylt 111*4fee36f0SJeremy L Thompson CeedOperatorApply(op_inverse, u, v, CEED_REQUEST_IMMEDIATE); 11215ed3f7dSjeremylt 113*4fee36f0SJeremy L Thompson const CeedScalar *v_array; 114*4fee36f0SJeremy L Thompson 115*4fee36f0SJeremy L Thompson CeedVectorGetArrayRead(v, CEED_MEM_HOST, &v_array); 116*4fee36f0SJeremy L Thompson S[0 * 4 + i] = -v_array[0]; 117*4fee36f0SJeremy L Thompson S[1 * 4 + i] = -v_array[p - 1]; 118*4fee36f0SJeremy L Thompson S[2 * 4 + i] = -v_array[p * p - p]; 119*4fee36f0SJeremy L Thompson S[3 * 4 + i] = -v_array[p * p - 1]; 120*4fee36f0SJeremy L Thompson CeedVectorRestoreArrayRead(v, &v_array); 12115ed3f7dSjeremylt } 12215ed3f7dSjeremylt CeedScalar S_inv[16]; 12315ed3f7dSjeremylt { 12415ed3f7dSjeremylt CeedScalar det; 1252b730f8bSJeremy L Thompson S_inv[0] = S[5] * S[10] * S[15] - S[5] * S[11] * S[14] - S[9] * S[6] * S[15] + S[9] * S[7] * S[14] + S[13] * S[6] * S[11] - S[13] * S[7] * S[10]; 12615ed3f7dSjeremylt 1272b730f8bSJeremy L Thompson S_inv[4] = -S[4] * S[10] * S[15] + S[4] * S[11] * S[14] + S[8] * S[6] * S[15] - S[8] * S[7] * S[14] - S[12] * S[6] * S[11] + S[12] * S[7] * S[10]; 12815ed3f7dSjeremylt 1292b730f8bSJeremy L Thompson S_inv[8] = S[4] * S[9] * S[15] - S[4] * S[11] * S[13] - S[8] * S[5] * S[15] + S[8] * S[7] * S[13] + S[12] * S[5] * S[11] - S[12] * S[7] * S[9]; 13015ed3f7dSjeremylt 1312b730f8bSJeremy L Thompson S_inv[12] = -S[4] * S[9] * S[14] + S[4] * S[10] * S[13] + S[8] * S[5] * S[14] - S[8] * S[6] * S[13] - S[12] * S[5] * S[10] + S[12] * S[6] * S[9]; 13215ed3f7dSjeremylt 1332b730f8bSJeremy L Thompson S_inv[1] = -S[1] * S[10] * S[15] + S[1] * S[11] * S[14] + S[9] * S[2] * S[15] - S[9] * S[3] * S[14] - S[13] * S[2] * S[11] + S[13] * S[3] * S[10]; 13415ed3f7dSjeremylt 1352b730f8bSJeremy L Thompson S_inv[5] = S[0] * S[10] * S[15] - S[0] * S[11] * S[14] - S[8] * S[2] * S[15] + S[8] * S[3] * S[14] + S[12] * S[2] * S[11] - S[12] * S[3] * S[10]; 13615ed3f7dSjeremylt 1372b730f8bSJeremy L Thompson S_inv[9] = -S[0] * S[9] * S[15] + S[0] * S[11] * S[13] + S[8] * S[1] * S[15] - S[8] * S[3] * S[13] - S[12] * S[1] * S[11] + S[12] * S[3] * S[9]; 13815ed3f7dSjeremylt 1392b730f8bSJeremy L Thompson S_inv[13] = S[0] * S[9] * S[14] - S[0] * S[10] * S[13] - S[8] * S[1] * S[14] + S[8] * S[2] * S[13] + S[12] * S[1] * S[10] - S[12] * S[2] * S[9]; 14015ed3f7dSjeremylt 1412b730f8bSJeremy L Thompson S_inv[2] = S[1] * S[6] * S[15] - S[1] * S[7] * S[14] - S[5] * S[2] * S[15] + S[5] * S[3] * S[14] + S[13] * S[2] * S[7] - S[13] * S[3] * S[6]; 14215ed3f7dSjeremylt 1432b730f8bSJeremy L Thompson S_inv[6] = -S[0] * S[6] * S[15] + S[0] * S[7] * S[14] + S[4] * S[2] * S[15] - S[4] * S[3] * S[14] - S[12] * S[2] * S[7] + S[12] * S[3] * S[6]; 14415ed3f7dSjeremylt 1452b730f8bSJeremy L Thompson S_inv[10] = S[0] * S[5] * S[15] - S[0] * S[7] * S[13] - S[4] * S[1] * S[15] + S[4] * S[3] * S[13] + S[12] * S[1] * S[7] - S[12] * S[3] * S[5]; 14615ed3f7dSjeremylt 1472b730f8bSJeremy L Thompson S_inv[14] = -S[0] * S[5] * S[14] + S[0] * S[6] * S[13] + S[4] * S[1] * S[14] - S[4] * S[2] * S[13] - S[12] * S[1] * S[6] + S[12] * S[2] * S[5]; 14815ed3f7dSjeremylt 1492b730f8bSJeremy L Thompson S_inv[3] = -S[1] * S[6] * S[11] + S[1] * S[7] * S[10] + S[5] * S[2] * S[11] - S[5] * S[3] * S[10] - S[9] * S[2] * S[7] + S[9] * S[3] * S[6]; 15015ed3f7dSjeremylt 1512b730f8bSJeremy L Thompson S_inv[7] = S[0] * S[6] * S[11] - S[0] * S[7] * S[10] - S[4] * S[2] * S[11] + S[4] * S[3] * S[10] + S[8] * S[2] * S[7] - S[8] * S[3] * S[6]; 15215ed3f7dSjeremylt 1532b730f8bSJeremy L Thompson S_inv[11] = -S[0] * S[5] * S[11] + S[0] * S[7] * S[9] + S[4] * S[1] * S[11] - S[4] * S[3] * S[9] - S[8] * S[1] * S[7] + S[8] * S[3] * S[5]; 15415ed3f7dSjeremylt 1552b730f8bSJeremy L Thompson S_inv[15] = S[0] * S[5] * S[10] - S[0] * S[6] * S[9] - S[4] * S[1] * S[10] + S[4] * S[2] * S[9] + S[8] * S[1] * S[6] - S[8] * S[2] * S[5]; 15615ed3f7dSjeremylt 15715ed3f7dSjeremylt det = 1 / (S[0] * S_inv[0] + S[1] * S_inv[4] + S[2] * S_inv[8] + S[3] * S_inv[12]); 15815ed3f7dSjeremylt 1592b730f8bSJeremy L Thompson for (CeedInt i = 0; i < 16; i++) S_inv[i] *= det; 16015ed3f7dSjeremylt } 16115ed3f7dSjeremylt 16215ed3f7dSjeremylt // Set initial values 16315ed3f7dSjeremylt { 164*4fee36f0SJeremy L Thompson CeedScalar nodes[p]; 165*4fee36f0SJeremy L Thompson CeedScalar *u_array; 166*4fee36f0SJeremy L Thompson 167*4fee36f0SJeremy L Thompson CeedLobattoQuadrature(p, nodes, NULL); 168*4fee36f0SJeremy L Thompson CeedVectorGetArray(u, CEED_MEM_HOST, &u_array); 169*4fee36f0SJeremy L Thompson for (CeedInt i = 0; i < p; i++) { 170*4fee36f0SJeremy L Thompson for (CeedInt j = 0; j < p; j++) u_array[i * p + j] = -(nodes[i] - 1.0) * (nodes[i] + 1.0) - (nodes[j] - 1.0) * (nodes[j] + 1.0); 1712b730f8bSJeremy L Thompson } 172*4fee36f0SJeremy L Thompson CeedVectorRestoreArray(u, &u_array); 17315ed3f7dSjeremylt } 17415ed3f7dSjeremylt 17515ed3f7dSjeremylt // Apply original operator 176*4fee36f0SJeremy L Thompson CeedOperatorApply(op_apply, u, v, CEED_REQUEST_IMMEDIATE); 17715ed3f7dSjeremylt 17815ed3f7dSjeremylt // Apply FDM element inverse 17915ed3f7dSjeremylt { 18015ed3f7dSjeremylt // -- Zero corners 181*4fee36f0SJeremy L Thompson CeedScalar *v_array; 182*4fee36f0SJeremy L Thompson 183*4fee36f0SJeremy L Thompson CeedVectorGetArray(v, CEED_MEM_HOST, &v_array); 184*4fee36f0SJeremy L Thompson v_array[0] = 0.0; 185*4fee36f0SJeremy L Thompson v_array[p - 1] = 0.0; 186*4fee36f0SJeremy L Thompson v_array[p * p - p] = 0.0; 187*4fee36f0SJeremy L Thompson v_array[p * p - 1] = 0.0; 188*4fee36f0SJeremy L Thompson CeedVectorRestoreArray(v, &v_array); 18915ed3f7dSjeremylt 19015ed3f7dSjeremylt // -- Apply FDM inverse to interior 191*4fee36f0SJeremy L Thompson CeedOperatorApply(op_inverse, v, w, CEED_REQUEST_IMMEDIATE); 19215ed3f7dSjeremylt 19315ed3f7dSjeremylt // -- Pick off corners 194*4fee36f0SJeremy L Thompson const CeedScalar *w_array; 19515ed3f7dSjeremylt CeedScalar w_Pi[4]; 196*4fee36f0SJeremy L Thompson 197*4fee36f0SJeremy L Thompson CeedVectorGetArrayRead(w, CEED_MEM_HOST, &w_array); 198*4fee36f0SJeremy L Thompson w_Pi[0] = w_array[0]; 199*4fee36f0SJeremy L Thompson w_Pi[1] = w_array[p - 1]; 200*4fee36f0SJeremy L Thompson w_Pi[2] = w_array[p * p - p]; 201*4fee36f0SJeremy L Thompson w_Pi[3] = w_array[p * p - 1]; 202*4fee36f0SJeremy L Thompson CeedVectorRestoreArrayRead(w, &w_array); 20315ed3f7dSjeremylt 20415ed3f7dSjeremylt // -- Apply inverse of Schur complement 20515ed3f7dSjeremylt CeedScalar v_Pi[4]; 20615ed3f7dSjeremylt for (CeedInt i = 0; i < 4; i++) { 20715ed3f7dSjeremylt CeedScalar sum = 0.0; 20815ed3f7dSjeremylt for (CeedInt j = 0; j < 4; j++) { 20915ed3f7dSjeremylt sum += w_Pi[j] * S_inv[i * 4 + j]; 21015ed3f7dSjeremylt } 21185cf89eaSjeremylt v_Pi[i] = sum; 21215ed3f7dSjeremylt } 21315ed3f7dSjeremylt 21415ed3f7dSjeremylt // -- Set corners 215*4fee36f0SJeremy L Thompson CeedVectorGetArray(v, CEED_MEM_HOST, &v_array); 216*4fee36f0SJeremy L Thompson v_array[0] = v_Pi[0]; 217*4fee36f0SJeremy L Thompson v_array[p - 1] = v_Pi[1]; 218*4fee36f0SJeremy L Thompson v_array[p * p - p] = v_Pi[2]; 219*4fee36f0SJeremy L Thompson v_array[p * p - 1] = v_Pi[3]; 220*4fee36f0SJeremy L Thompson CeedVectorRestoreArray(v, &v_array); 22115ed3f7dSjeremylt 22215ed3f7dSjeremylt // -- Apply full FDM inverse again 223*4fee36f0SJeremy L Thompson CeedOperatorApply(op_inverse, v, w, CEED_REQUEST_IMMEDIATE); 22415ed3f7dSjeremylt } 22515ed3f7dSjeremylt 22615ed3f7dSjeremylt // Check output 22715ed3f7dSjeremylt { 228*4fee36f0SJeremy L Thompson const CeedScalar *u_array, *w_array; 229*4fee36f0SJeremy L Thompson CeedVectorGetArrayRead(u, CEED_MEM_HOST, &u_array); 230*4fee36f0SJeremy L Thompson CeedVectorGetArrayRead(w, CEED_MEM_HOST, &w_array); 231*4fee36f0SJeremy L Thompson for (CeedInt i = 0; i < p; i++) { 232*4fee36f0SJeremy L Thompson for (CeedInt j = 0; j < p; j++) { 233*4fee36f0SJeremy L Thompson if (fabs(u_array[i * p + j] - w_array[i * p + j]) > 2e-3) { 23415ed3f7dSjeremylt // LCOV_EXCL_START 235*4fee36f0SJeremy L Thompson printf("[%" CeedInt_FMT ", %" CeedInt_FMT "] Error in inverse: %e != %e\n", i, j, w_array[i * p + j], u_array[i * p + j]); 23615ed3f7dSjeremylt // LCOV_EXCL_STOP 2372b730f8bSJeremy L Thompson } 2382b730f8bSJeremy L Thompson } 2392b730f8bSJeremy L Thompson } 240*4fee36f0SJeremy L Thompson CeedVectorRestoreArrayRead(u, &u_array); 241*4fee36f0SJeremy L Thompson CeedVectorRestoreArrayRead(w, &w_array); 24215ed3f7dSjeremylt } 24315ed3f7dSjeremylt 24415ed3f7dSjeremylt // Cleanup 245*4fee36f0SJeremy L Thompson CeedVectorDestroy(&x); 246*4fee36f0SJeremy L Thompson CeedVectorDestroy(&q_data_diff); 247*4fee36f0SJeremy L Thompson CeedVectorDestroy(&u); 248*4fee36f0SJeremy L Thompson CeedVectorDestroy(&v); 249*4fee36f0SJeremy L Thompson CeedVectorDestroy(&w); 250*4fee36f0SJeremy L Thompson CeedElemRestrictionDestroy(&elem_restriction_u); 251*4fee36f0SJeremy L Thompson CeedElemRestrictionDestroy(&elem_restriction_x); 252*4fee36f0SJeremy L Thompson CeedElemRestrictionDestroy(&elem_restriction_q_data); 253*4fee36f0SJeremy L Thompson CeedBasisDestroy(&basis_x); 254*4fee36f0SJeremy L Thompson CeedBasisDestroy(&basis_u); 25515ed3f7dSjeremylt CeedQFunctionDestroy(&qf_setup_diff); 25615ed3f7dSjeremylt CeedQFunctionDestroy(&qf_apply); 25715ed3f7dSjeremylt CeedOperatorDestroy(&op_setup_diff); 25815ed3f7dSjeremylt CeedOperatorDestroy(&op_apply); 259*4fee36f0SJeremy L Thompson CeedOperatorDestroy(&op_inverse); 26015ed3f7dSjeremylt CeedDestroy(&ceed); 26115ed3f7dSjeremylt return 0; 26215ed3f7dSjeremylt } 263