1 // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <algorithm> 10 #include <array> 11 #include <chrono> 12 #include <iostream> 13 #include <random> 14 #include <vector> 15 16 // clang-format off 17 // Triplets of {P, Q, dim}. For now, includes standard H1 spaces on triangles and tetrahedra with p such that P <= 40, 18 // and quadrature rules of degree 2p and 2p - 2. These can be expanded to more element types and quadrature rules in the 19 // future. 20 constexpr static std::array<std::array<int, 3>, 22> PQ_VALUES = { 21 {{3, 1, 2}, {6, 3, 2}, {10, 6, 2}, {15, 12, 2}, {21, 16, 2}, {28, 25, 2}, {36, 33, 2}, 22 {3, 3, 2}, {6, 6, 2}, {10, 12, 2}, {15, 16, 2}, {21, 25, 2}, {28, 33, 2}, 23 {4, 1, 3}, {10, 4, 3}, {20, 11, 3}, {20, 14, 3}, {35, 24, 3}, 24 {4, 4, 3}, {10, 11, 3}, {10, 14, 3}, {20, 24, 3}} 25 }; 26 // clang-format on 27 28 constexpr static std::array<std::pair<int, int>, 7> N_VALUES = { 29 {{1024, 200}, {5120, 200}, {10240, 100}, {51200, 100}, {102400, 50}, {512000, 50}, {1024000, 25}} 30 }; 31 32 using Clock = std::chrono::steady_clock; 33 using Duration = std::chrono::duration<double>; 34 35 int main(int argc, char **argv) { 36 Ceed ceed; 37 38 std::random_device rand_device; 39 std::default_random_engine rand_engine(rand_device()); 40 std::uniform_real_distribution<> rand_dist(0.0, 1.0); 41 auto generate_random = [&rand_dist, &rand_engine]() { return rand_dist(rand_engine); }; 42 43 if (argc < 2) { 44 printf("Usage: ./tuning <CEED_RESOURCE>"); 45 return 1; 46 } 47 CeedInit(argv[1], &ceed); 48 CeedSetErrorHandler(ceed, CeedErrorStore); 49 50 for (const auto [P, Q, dim] : PQ_VALUES) { 51 CeedBasis basis; 52 CeedVector u, v; 53 54 std::vector<double> q_ref(dim * Q, 0.0), q_weight(Q, 0.0), interp(P * Q), grad(P * Q * dim); 55 std::generate(interp.begin(), interp.end(), generate_random); 56 std::generate(grad.begin(), grad.end(), generate_random); 57 58 CeedBasisCreateH1(ceed, (dim < 3) ? CEED_TOPOLOGY_TRIANGLE : CEED_TOPOLOGY_TET, 1, P, Q, interp.data(), grad.data(), q_ref.data(), 59 q_weight.data(), &basis); 60 61 for (const auto [N, NUM_TRIALS] : N_VALUES) { 62 double data_interp_n = 0.0, data_interp_t = 0.0, data_grad_n = 0.0, data_grad_t = 0.0; 63 int ierr; 64 65 // Interp 66 { 67 CeedVectorCreate(ceed, P * N, &u); 68 CeedVectorCreate(ceed, Q * N, &v); 69 70 // NoTranspose 71 CeedVectorSetValue(u, 1.0); 72 CeedVectorSetValue(v, 0.0); 73 ierr = CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, v); 74 if (!ierr) { 75 const auto start = Clock::now(); 76 for (int trial = 0; trial < NUM_TRIALS; trial++) { 77 CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, v); 78 } 79 data_interp_n = std::chrono::duration_cast<Duration>(Clock::now() - start).count(); 80 } 81 82 // Transpose 83 CeedVectorSetValue(u, 1.0); 84 CeedVectorSetValue(v, 0.0); 85 ierr = CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_INTERP, v, u); 86 if (!ierr) { 87 const auto start = Clock::now(); 88 for (int trial = 0; trial < NUM_TRIALS; trial++) { 89 CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_INTERP, v, u); 90 } 91 data_interp_t = std::chrono::duration_cast<Duration>(Clock::now() - start).count(); 92 } 93 94 CeedVectorDestroy(&u); 95 CeedVectorDestroy(&v); 96 } 97 98 // Grad 99 { 100 CeedVectorCreate(ceed, P * N, &u); 101 CeedVectorCreate(ceed, dim * Q * N, &v); 102 103 // NoTranspose 104 CeedVectorSetValue(u, 1.0); 105 CeedVectorSetValue(v, 0.0); 106 ierr = CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, u, v); 107 if (!ierr) { 108 const auto start = Clock::now(); 109 for (int trial = 0; trial < NUM_TRIALS; trial++) { 110 CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, u, v); 111 } 112 data_grad_n = std::chrono::duration_cast<Duration>(Clock::now() - start).count(); 113 } 114 115 // Transpose 116 CeedVectorSetValue(u, 1.0); 117 CeedVectorSetValue(v, 0.0); 118 ierr = CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_GRAD, v, u); 119 if (!ierr) { 120 const auto start = Clock::now(); 121 for (int trial = 0; trial < NUM_TRIALS; trial++) { 122 CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_GRAD, v, u); 123 } 124 data_grad_t = std::chrono::duration_cast<Duration>(Clock::now() - start).count(); 125 } 126 127 CeedVectorDestroy(&u); 128 CeedVectorDestroy(&v); 129 } 130 131 // Postprocess and log the data 132 const double interp_flops = P * Q * (double)N; 133 const double grad_flops = P * Q * dim * (double)N; 134 constexpr int width = 12, precision = 2; 135 // clang-format off 136 std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n", 137 width, P, width, Q, width, N, width, 1, width, 0, width, precision, 138 (data_interp_n > 0.0) ? 1e-6 * NUM_TRIALS * interp_flops / data_interp_n : 0.0); 139 std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n", 140 width, P, width, Q, width, N, width, 1, width, 1, width, precision, 141 (data_interp_t > 0.0) ? 1e-6 * NUM_TRIALS * interp_flops / data_interp_t : 0.0); 142 std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n", 143 width, P, width, Q, width, N, width, dim, width, 0, width, precision, 144 (data_grad_n > 0.0) ? 1e-6 * NUM_TRIALS * grad_flops / data_grad_n : 0.0); 145 std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n", 146 width, P, width, Q, width, N, width, dim, width, 1, width, precision, 147 (data_grad_t > 0.0) ? 1e-6 * NUM_TRIALS * grad_flops / data_grad_t : 0.0); 148 // clang-format on 149 } 150 151 CeedBasisDestroy(&basis); 152 } 153 154 CeedDestroy(&ceed); 155 return 0; 156 } 157