1*ac8b7a1cSSebastian Grimberg // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2*ac8b7a1cSSebastian Grimberg // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3*ac8b7a1cSSebastian Grimberg // 4*ac8b7a1cSSebastian Grimberg // SPDX-License-Identifier: BSD-2-Clause 5*ac8b7a1cSSebastian Grimberg // 6*ac8b7a1cSSebastian Grimberg // This file is part of CEED: http://github.com/ceed 7*ac8b7a1cSSebastian Grimberg 8*ac8b7a1cSSebastian Grimberg #include <ceed.h> 9*ac8b7a1cSSebastian Grimberg #include <algorithm> 10*ac8b7a1cSSebastian Grimberg #include <array> 11*ac8b7a1cSSebastian Grimberg #include <chrono> 12*ac8b7a1cSSebastian Grimberg #include <iostream> 13*ac8b7a1cSSebastian Grimberg #include <random> 14*ac8b7a1cSSebastian Grimberg #include <vector> 15*ac8b7a1cSSebastian Grimberg 16*ac8b7a1cSSebastian Grimberg // XX TODO WIP: Add other quadrature orders, prism/pyramid, ... 17*ac8b7a1cSSebastian Grimberg // clang-format off 18*ac8b7a1cSSebastian Grimberg constexpr static std::array<std::array<int, 3>, 11> PQ_VALUES = { 19*ac8b7a1cSSebastian Grimberg {{3, 1, 2}, {6, 3, 2}, {10, 6, 2}, {15, 12, 2}, {21, 16, 2}, {28, 25, 2}, {36, 33, 2}, 20*ac8b7a1cSSebastian Grimberg {4, 1, 3}, {10, 4, 3}, {20, 11, 3}, {35, 24, 3}} 21*ac8b7a1cSSebastian Grimberg }; 22*ac8b7a1cSSebastian Grimberg // clang-format on 23*ac8b7a1cSSebastian Grimberg 24*ac8b7a1cSSebastian Grimberg constexpr static std::array<int, 7> N_VALUES = {1024, 5120, 10240, 51200, 102400, 512000, 1024000}; 25*ac8b7a1cSSebastian Grimberg 26*ac8b7a1cSSebastian Grimberg constexpr int NUM_TRIALS = 25; 27*ac8b7a1cSSebastian Grimberg 28*ac8b7a1cSSebastian Grimberg using Clock = std::chrono::steady_clock; 29*ac8b7a1cSSebastian Grimberg using Duration = std::chrono::duration<double>; 30*ac8b7a1cSSebastian Grimberg 31*ac8b7a1cSSebastian Grimberg int main(int argc, char **argv) { 32*ac8b7a1cSSebastian Grimberg Ceed ceed; 33*ac8b7a1cSSebastian Grimberg 34*ac8b7a1cSSebastian Grimberg std::random_device rand_device; 35*ac8b7a1cSSebastian Grimberg std::default_random_engine rand_engine(rand_device()); 36*ac8b7a1cSSebastian Grimberg std::uniform_real_distribution<> rand_dist(0.0, 1.0); 37*ac8b7a1cSSebastian Grimberg auto generate_random = [&rand_dist, &rand_engine]() { return rand_dist(rand_engine); }; 38*ac8b7a1cSSebastian Grimberg 39*ac8b7a1cSSebastian Grimberg CeedInit((argc < 2) ? "/gpu/cuda/magma" : argv[1], &ceed); 40*ac8b7a1cSSebastian Grimberg CeedSetErrorHandler(ceed, CeedErrorStore); 41*ac8b7a1cSSebastian Grimberg 42*ac8b7a1cSSebastian Grimberg for (const auto [P, Q, dim] : PQ_VALUES) { 43*ac8b7a1cSSebastian Grimberg CeedBasis basis; 44*ac8b7a1cSSebastian Grimberg CeedVector u, v; 45*ac8b7a1cSSebastian Grimberg 46*ac8b7a1cSSebastian Grimberg std::vector<double> q_ref(dim * Q, 0.0), q_weight(Q, 0.0), interp(P * Q), grad(P * Q * dim); 47*ac8b7a1cSSebastian Grimberg std::generate(interp.begin(), interp.end(), generate_random); 48*ac8b7a1cSSebastian Grimberg std::generate(grad.begin(), grad.end(), generate_random); 49*ac8b7a1cSSebastian Grimberg 50*ac8b7a1cSSebastian Grimberg CeedBasisCreateH1(ceed, (dim < 3) ? CEED_TOPOLOGY_TRIANGLE : CEED_TOPOLOGY_TET, 1, P, Q, interp.data(), grad.data(), q_ref.data(), 51*ac8b7a1cSSebastian Grimberg q_weight.data(), &basis); 52*ac8b7a1cSSebastian Grimberg 53*ac8b7a1cSSebastian Grimberg for (const auto N : N_VALUES) { 54*ac8b7a1cSSebastian Grimberg double data_interp_n = 0.0, data_interp_t = 0.0, data_grad_n = 0.0, data_grad_t = 0.0; 55*ac8b7a1cSSebastian Grimberg 56*ac8b7a1cSSebastian Grimberg // Interp 57*ac8b7a1cSSebastian Grimberg { 58*ac8b7a1cSSebastian Grimberg CeedVectorCreate(ceed, P * N, &u); 59*ac8b7a1cSSebastian Grimberg CeedVectorCreate(ceed, Q * N, &v); 60*ac8b7a1cSSebastian Grimberg 61*ac8b7a1cSSebastian Grimberg // NoTranspose 62*ac8b7a1cSSebastian Grimberg CeedVectorSetValue(u, 1.0); 63*ac8b7a1cSSebastian Grimberg for (int trial = 0; trial <= NUM_TRIALS; trial++) { 64*ac8b7a1cSSebastian Grimberg CeedVectorSetValue(v, 0.0); 65*ac8b7a1cSSebastian Grimberg 66*ac8b7a1cSSebastian Grimberg const auto start = Clock::now(); 67*ac8b7a1cSSebastian Grimberg int ierr = CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, v); 68*ac8b7a1cSSebastian Grimberg if (ierr) { 69*ac8b7a1cSSebastian Grimberg break; 70*ac8b7a1cSSebastian Grimberg } 71*ac8b7a1cSSebastian Grimberg if (trial > 0) { 72*ac8b7a1cSSebastian Grimberg data_interp_n += std::chrono::duration_cast<Duration>(Clock::now() - start).count(); 73*ac8b7a1cSSebastian Grimberg } 74*ac8b7a1cSSebastian Grimberg } 75*ac8b7a1cSSebastian Grimberg 76*ac8b7a1cSSebastian Grimberg // Transpose 77*ac8b7a1cSSebastian Grimberg CeedVectorSetValue(v, 1.0); 78*ac8b7a1cSSebastian Grimberg for (int trial = 0; trial <= NUM_TRIALS; trial++) { 79*ac8b7a1cSSebastian Grimberg CeedVectorSetValue(u, 0.0); 80*ac8b7a1cSSebastian Grimberg 81*ac8b7a1cSSebastian Grimberg const auto start = Clock::now(); 82*ac8b7a1cSSebastian Grimberg int ierr = CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_INTERP, v, u); 83*ac8b7a1cSSebastian Grimberg if (ierr) { 84*ac8b7a1cSSebastian Grimberg break; 85*ac8b7a1cSSebastian Grimberg } 86*ac8b7a1cSSebastian Grimberg if (trial > 0) { 87*ac8b7a1cSSebastian Grimberg data_interp_t += std::chrono::duration_cast<Duration>(Clock::now() - start).count(); 88*ac8b7a1cSSebastian Grimberg } 89*ac8b7a1cSSebastian Grimberg } 90*ac8b7a1cSSebastian Grimberg 91*ac8b7a1cSSebastian Grimberg CeedVectorDestroy(&u); 92*ac8b7a1cSSebastian Grimberg CeedVectorDestroy(&v); 93*ac8b7a1cSSebastian Grimberg } 94*ac8b7a1cSSebastian Grimberg 95*ac8b7a1cSSebastian Grimberg // Grad 96*ac8b7a1cSSebastian Grimberg { 97*ac8b7a1cSSebastian Grimberg CeedVectorCreate(ceed, P * N, &u); 98*ac8b7a1cSSebastian Grimberg CeedVectorCreate(ceed, dim * Q * N, &v); 99*ac8b7a1cSSebastian Grimberg 100*ac8b7a1cSSebastian Grimberg // NoTranspose 101*ac8b7a1cSSebastian Grimberg CeedVectorSetValue(u, 1.0); 102*ac8b7a1cSSebastian Grimberg for (int trial = 0; trial < NUM_TRIALS; trial++) { 103*ac8b7a1cSSebastian Grimberg CeedVectorSetValue(v, 0.0); 104*ac8b7a1cSSebastian Grimberg 105*ac8b7a1cSSebastian Grimberg const auto start = Clock::now(); 106*ac8b7a1cSSebastian Grimberg int ierr = CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, u, v); 107*ac8b7a1cSSebastian Grimberg if (ierr) { 108*ac8b7a1cSSebastian Grimberg break; 109*ac8b7a1cSSebastian Grimberg } 110*ac8b7a1cSSebastian Grimberg if (trial > 0) { 111*ac8b7a1cSSebastian Grimberg data_grad_n += std::chrono::duration_cast<Duration>(Clock::now() - start).count(); 112*ac8b7a1cSSebastian Grimberg } 113*ac8b7a1cSSebastian Grimberg } 114*ac8b7a1cSSebastian Grimberg 115*ac8b7a1cSSebastian Grimberg // Transpose 116*ac8b7a1cSSebastian Grimberg CeedVectorSetValue(v, 1.0); 117*ac8b7a1cSSebastian Grimberg for (int trial = 0; trial < NUM_TRIALS; trial++) { 118*ac8b7a1cSSebastian Grimberg CeedVectorSetValue(u, 0.0); 119*ac8b7a1cSSebastian Grimberg 120*ac8b7a1cSSebastian Grimberg const auto start = Clock::now(); 121*ac8b7a1cSSebastian Grimberg int ierr = CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_GRAD, v, u); 122*ac8b7a1cSSebastian Grimberg if (ierr) { 123*ac8b7a1cSSebastian Grimberg break; 124*ac8b7a1cSSebastian Grimberg } 125*ac8b7a1cSSebastian Grimberg if (trial > 0) { 126*ac8b7a1cSSebastian Grimberg data_grad_t += std::chrono::duration_cast<Duration>(Clock::now() - start).count(); 127*ac8b7a1cSSebastian Grimberg } 128*ac8b7a1cSSebastian Grimberg } 129*ac8b7a1cSSebastian Grimberg 130*ac8b7a1cSSebastian Grimberg CeedVectorDestroy(&u); 131*ac8b7a1cSSebastian Grimberg CeedVectorDestroy(&v); 132*ac8b7a1cSSebastian Grimberg } 133*ac8b7a1cSSebastian Grimberg 134*ac8b7a1cSSebastian Grimberg // Postprocess and log the data 135*ac8b7a1cSSebastian Grimberg const double interp_flops = P * Q * (double)N; 136*ac8b7a1cSSebastian Grimberg const double grad_flops = P * Q * dim * (double)N; 137*ac8b7a1cSSebastian Grimberg constexpr int width = 12, precision = 2; 138*ac8b7a1cSSebastian Grimberg // clang-format off 139*ac8b7a1cSSebastian Grimberg std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n", 140*ac8b7a1cSSebastian Grimberg width, P, width, N, width, Q, width, 1, width, 0, width, precision, 141*ac8b7a1cSSebastian Grimberg (data_interp_n > 0.0) ? NUM_TRIALS * interp_flops / data_interp_n * 1.0e-6 : 0.0); 142*ac8b7a1cSSebastian Grimberg std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n", 143*ac8b7a1cSSebastian Grimberg width, P, width, N, width, Q, width, 1, width, 1, width, precision, 144*ac8b7a1cSSebastian Grimberg (data_interp_t > 0.0) ? NUM_TRIALS * interp_flops / data_interp_t * 1.0e-6 : 0.0); 145*ac8b7a1cSSebastian Grimberg std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n", 146*ac8b7a1cSSebastian Grimberg width, P, width, N, width, Q, width, dim, width, 0, width, precision, 147*ac8b7a1cSSebastian Grimberg (data_grad_n > 0.0) ? NUM_TRIALS * grad_flops / data_grad_n * 1.0e-6 : 0.0); 148*ac8b7a1cSSebastian Grimberg std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n", 149*ac8b7a1cSSebastian Grimberg width, P, width, N, width, Q, width, dim, width, 1, width, precision, 150*ac8b7a1cSSebastian Grimberg (data_grad_n > 0.0) ? NUM_TRIALS * grad_flops / data_grad_n * 1.0e-6 : 0.0); 151*ac8b7a1cSSebastian Grimberg // clang-format on 152*ac8b7a1cSSebastian Grimberg } 153*ac8b7a1cSSebastian Grimberg 154*ac8b7a1cSSebastian Grimberg CeedBasisDestroy(&basis); 155*ac8b7a1cSSebastian Grimberg } 156*ac8b7a1cSSebastian Grimberg 157*ac8b7a1cSSebastian Grimberg CeedDestroy(&ceed); 158*ac8b7a1cSSebastian Grimberg return 0; 159*ac8b7a1cSSebastian Grimberg } 160