1ac8b7a1cSSebastian Grimberg // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2ac8b7a1cSSebastian Grimberg // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3ac8b7a1cSSebastian Grimberg // 4ac8b7a1cSSebastian Grimberg // SPDX-License-Identifier: BSD-2-Clause 5ac8b7a1cSSebastian Grimberg // 6ac8b7a1cSSebastian Grimberg // This file is part of CEED: http://github.com/ceed 7ac8b7a1cSSebastian Grimberg 8ac8b7a1cSSebastian Grimberg #include <ceed.h> 9ac8b7a1cSSebastian Grimberg #include <algorithm> 10ac8b7a1cSSebastian Grimberg #include <array> 11ac8b7a1cSSebastian Grimberg #include <chrono> 12ac8b7a1cSSebastian Grimberg #include <iostream> 13ac8b7a1cSSebastian Grimberg #include <random> 14ac8b7a1cSSebastian Grimberg #include <vector> 15ac8b7a1cSSebastian Grimberg 16ac8b7a1cSSebastian Grimberg // clang-format off 17*47cb5789SSebastian Grimberg // Triplets of {P, Q, dim}. For now, includes standard H1 spaces on triangles and tetrahedra with p such that P <= 40, 18*47cb5789SSebastian Grimberg // and quadrature rules of degree 2p and 2p - 2. These can be expanded to more element types and quadrature rules in the 19*47cb5789SSebastian Grimberg // future. 20*47cb5789SSebastian Grimberg constexpr static std::array<std::array<int, 3>, 22> PQ_VALUES = { 21ac8b7a1cSSebastian Grimberg {{3, 1, 2}, {6, 3, 2}, {10, 6, 2}, {15, 12, 2}, {21, 16, 2}, {28, 25, 2}, {36, 33, 2}, 22*47cb5789SSebastian Grimberg {3, 3, 2}, {6, 6, 2}, {10, 12, 2}, {15, 16, 2}, {21, 25, 2}, {28, 33, 2}, 23*47cb5789SSebastian Grimberg {4, 1, 3}, {10, 4, 3}, {20, 11, 3}, {20, 14, 3}, {35, 24, 3}, 24*47cb5789SSebastian Grimberg {4, 4, 3}, {10, 11, 3}, {10, 14, 3}, {20, 24, 3}} 25ac8b7a1cSSebastian Grimberg }; 26ac8b7a1cSSebastian Grimberg // clang-format on 27ac8b7a1cSSebastian Grimberg 28acc0bb12SSebastian Grimberg constexpr static std::array<std::pair<int, int>, 7> N_VALUES = { 29acc0bb12SSebastian Grimberg {{1024, 200}, {5120, 200}, {10240, 100}, {51200, 100}, {102400, 50}, {512000, 50}, {1024000, 25}} 30acc0bb12SSebastian Grimberg }; 31ac8b7a1cSSebastian Grimberg 32ac8b7a1cSSebastian Grimberg using Clock = std::chrono::steady_clock; 33ac8b7a1cSSebastian Grimberg using Duration = std::chrono::duration<double>; 34ac8b7a1cSSebastian Grimberg 35ac8b7a1cSSebastian Grimberg int main(int argc, char **argv) { 36ac8b7a1cSSebastian Grimberg Ceed ceed; 37ac8b7a1cSSebastian Grimberg 38ac8b7a1cSSebastian Grimberg std::random_device rand_device; 39ac8b7a1cSSebastian Grimberg std::default_random_engine rand_engine(rand_device()); 40ac8b7a1cSSebastian Grimberg std::uniform_real_distribution<> rand_dist(0.0, 1.0); 41ac8b7a1cSSebastian Grimberg auto generate_random = [&rand_dist, &rand_engine]() { return rand_dist(rand_engine); }; 42ac8b7a1cSSebastian Grimberg 43acc0bb12SSebastian Grimberg if (argc < 2) { 44acc0bb12SSebastian Grimberg printf("Usage: ./tuning <CEED_RESOURCE>"); 45acc0bb12SSebastian Grimberg return 1; 46acc0bb12SSebastian Grimberg } 47acc0bb12SSebastian Grimberg CeedInit(argv[1], &ceed); 48ac8b7a1cSSebastian Grimberg CeedSetErrorHandler(ceed, CeedErrorStore); 49ac8b7a1cSSebastian Grimberg 50ac8b7a1cSSebastian Grimberg for (const auto [P, Q, dim] : PQ_VALUES) { 51ac8b7a1cSSebastian Grimberg CeedBasis basis; 52ac8b7a1cSSebastian Grimberg CeedVector u, v; 53ac8b7a1cSSebastian Grimberg 54ac8b7a1cSSebastian Grimberg std::vector<double> q_ref(dim * Q, 0.0), q_weight(Q, 0.0), interp(P * Q), grad(P * Q * dim); 55ac8b7a1cSSebastian Grimberg std::generate(interp.begin(), interp.end(), generate_random); 56ac8b7a1cSSebastian Grimberg std::generate(grad.begin(), grad.end(), generate_random); 57ac8b7a1cSSebastian Grimberg 58ac8b7a1cSSebastian Grimberg CeedBasisCreateH1(ceed, (dim < 3) ? CEED_TOPOLOGY_TRIANGLE : CEED_TOPOLOGY_TET, 1, P, Q, interp.data(), grad.data(), q_ref.data(), 59ac8b7a1cSSebastian Grimberg q_weight.data(), &basis); 60ac8b7a1cSSebastian Grimberg 61acc0bb12SSebastian Grimberg for (const auto [N, NUM_TRIALS] : N_VALUES) { 62ac8b7a1cSSebastian Grimberg double data_interp_n = 0.0, data_interp_t = 0.0, data_grad_n = 0.0, data_grad_t = 0.0; 63acc0bb12SSebastian Grimberg int ierr; 64ac8b7a1cSSebastian Grimberg 65ac8b7a1cSSebastian Grimberg // Interp 66ac8b7a1cSSebastian Grimberg { 67ac8b7a1cSSebastian Grimberg CeedVectorCreate(ceed, P * N, &u); 68ac8b7a1cSSebastian Grimberg CeedVectorCreate(ceed, Q * N, &v); 69ac8b7a1cSSebastian Grimberg 70ac8b7a1cSSebastian Grimberg // NoTranspose 71ac8b7a1cSSebastian Grimberg CeedVectorSetValue(u, 1.0); 72ac8b7a1cSSebastian Grimberg CeedVectorSetValue(v, 0.0); 73acc0bb12SSebastian Grimberg ierr = CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, v); 74acc0bb12SSebastian Grimberg if (!ierr) { 75ac8b7a1cSSebastian Grimberg const auto start = Clock::now(); 76acc0bb12SSebastian Grimberg for (int trial = 0; trial < NUM_TRIALS; trial++) { 77acc0bb12SSebastian Grimberg CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, v); 78ac8b7a1cSSebastian Grimberg } 79acc0bb12SSebastian Grimberg data_interp_n = std::chrono::duration_cast<Duration>(Clock::now() - start).count(); 80ac8b7a1cSSebastian Grimberg } 81ac8b7a1cSSebastian Grimberg 82ac8b7a1cSSebastian Grimberg // Transpose 83acc0bb12SSebastian Grimberg CeedVectorSetValue(u, 1.0); 84acc0bb12SSebastian Grimberg CeedVectorSetValue(v, 0.0); 85acc0bb12SSebastian Grimberg ierr = CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_INTERP, v, u); 86acc0bb12SSebastian Grimberg if (!ierr) { 87ac8b7a1cSSebastian Grimberg const auto start = Clock::now(); 88acc0bb12SSebastian Grimberg for (int trial = 0; trial < NUM_TRIALS; trial++) { 89acc0bb12SSebastian Grimberg CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_INTERP, v, u); 90ac8b7a1cSSebastian Grimberg } 91acc0bb12SSebastian Grimberg data_interp_t = std::chrono::duration_cast<Duration>(Clock::now() - start).count(); 92ac8b7a1cSSebastian Grimberg } 93ac8b7a1cSSebastian Grimberg 94ac8b7a1cSSebastian Grimberg CeedVectorDestroy(&u); 95ac8b7a1cSSebastian Grimberg CeedVectorDestroy(&v); 96ac8b7a1cSSebastian Grimberg } 97ac8b7a1cSSebastian Grimberg 98ac8b7a1cSSebastian Grimberg // Grad 99ac8b7a1cSSebastian Grimberg { 100ac8b7a1cSSebastian Grimberg CeedVectorCreate(ceed, P * N, &u); 101ac8b7a1cSSebastian Grimberg CeedVectorCreate(ceed, dim * Q * N, &v); 102ac8b7a1cSSebastian Grimberg 103ac8b7a1cSSebastian Grimberg // NoTranspose 104ac8b7a1cSSebastian Grimberg CeedVectorSetValue(u, 1.0); 105ac8b7a1cSSebastian Grimberg CeedVectorSetValue(v, 0.0); 106acc0bb12SSebastian Grimberg ierr = CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, u, v); 107acc0bb12SSebastian Grimberg if (!ierr) { 108ac8b7a1cSSebastian Grimberg const auto start = Clock::now(); 109acc0bb12SSebastian Grimberg for (int trial = 0; trial < NUM_TRIALS; trial++) { 110acc0bb12SSebastian Grimberg CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, u, v); 111ac8b7a1cSSebastian Grimberg } 112acc0bb12SSebastian Grimberg data_grad_n = std::chrono::duration_cast<Duration>(Clock::now() - start).count(); 113ac8b7a1cSSebastian Grimberg } 114ac8b7a1cSSebastian Grimberg 115ac8b7a1cSSebastian Grimberg // Transpose 116acc0bb12SSebastian Grimberg CeedVectorSetValue(u, 1.0); 117acc0bb12SSebastian Grimberg CeedVectorSetValue(v, 0.0); 118acc0bb12SSebastian Grimberg ierr = CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_GRAD, v, u); 119acc0bb12SSebastian Grimberg if (!ierr) { 120ac8b7a1cSSebastian Grimberg const auto start = Clock::now(); 121acc0bb12SSebastian Grimberg for (int trial = 0; trial < NUM_TRIALS; trial++) { 122acc0bb12SSebastian Grimberg CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_GRAD, v, u); 123ac8b7a1cSSebastian Grimberg } 124acc0bb12SSebastian Grimberg data_grad_t = std::chrono::duration_cast<Duration>(Clock::now() - start).count(); 125ac8b7a1cSSebastian Grimberg } 126ac8b7a1cSSebastian Grimberg 127ac8b7a1cSSebastian Grimberg CeedVectorDestroy(&u); 128ac8b7a1cSSebastian Grimberg CeedVectorDestroy(&v); 129ac8b7a1cSSebastian Grimberg } 130ac8b7a1cSSebastian Grimberg 131ac8b7a1cSSebastian Grimberg // Postprocess and log the data 132ac8b7a1cSSebastian Grimberg const double interp_flops = P * Q * (double)N; 133ac8b7a1cSSebastian Grimberg const double grad_flops = P * Q * dim * (double)N; 134ac8b7a1cSSebastian Grimberg constexpr int width = 12, precision = 2; 135ac8b7a1cSSebastian Grimberg // clang-format off 136ac8b7a1cSSebastian Grimberg std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n", 137acc0bb12SSebastian Grimberg width, P, width, Q, width, N, width, 1, width, 0, width, precision, 138acc0bb12SSebastian Grimberg (data_interp_n > 0.0) ? 1e-6 * NUM_TRIALS * interp_flops / data_interp_n : 0.0); 139ac8b7a1cSSebastian Grimberg std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n", 140acc0bb12SSebastian Grimberg width, P, width, Q, width, N, width, 1, width, 1, width, precision, 141acc0bb12SSebastian Grimberg (data_interp_t > 0.0) ? 1e-6 * NUM_TRIALS * interp_flops / data_interp_t : 0.0); 142ac8b7a1cSSebastian Grimberg std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n", 143acc0bb12SSebastian Grimberg width, P, width, Q, width, N, width, dim, width, 0, width, precision, 144acc0bb12SSebastian Grimberg (data_grad_n > 0.0) ? 1e-6 * NUM_TRIALS * grad_flops / data_grad_n : 0.0); 145ac8b7a1cSSebastian Grimberg std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n", 146acc0bb12SSebastian Grimberg width, P, width, Q, width, N, width, dim, width, 1, width, precision, 147acc0bb12SSebastian Grimberg (data_grad_t > 0.0) ? 1e-6 * NUM_TRIALS * grad_flops / data_grad_t : 0.0); 148ac8b7a1cSSebastian Grimberg // clang-format on 149ac8b7a1cSSebastian Grimberg } 150ac8b7a1cSSebastian Grimberg 151ac8b7a1cSSebastian Grimberg CeedBasisDestroy(&basis); 152ac8b7a1cSSebastian Grimberg } 153ac8b7a1cSSebastian Grimberg 154ac8b7a1cSSebastian Grimberg CeedDestroy(&ceed); 155ac8b7a1cSSebastian Grimberg return 0; 156ac8b7a1cSSebastian Grimberg } 157