xref: /libCEED/rust/libceed-sys/c-src/backends/magma/tuning/tuning.cpp (revision ac8b7a1c891ad04d71a0a3b312ee8eb2f11e3b62)
1*ac8b7a1cSSebastian Grimberg // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2*ac8b7a1cSSebastian Grimberg // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3*ac8b7a1cSSebastian Grimberg //
4*ac8b7a1cSSebastian Grimberg // SPDX-License-Identifier: BSD-2-Clause
5*ac8b7a1cSSebastian Grimberg //
6*ac8b7a1cSSebastian Grimberg // This file is part of CEED:  http://github.com/ceed
7*ac8b7a1cSSebastian Grimberg 
8*ac8b7a1cSSebastian Grimberg #include <ceed.h>
9*ac8b7a1cSSebastian Grimberg #include <algorithm>
10*ac8b7a1cSSebastian Grimberg #include <array>
11*ac8b7a1cSSebastian Grimberg #include <chrono>
12*ac8b7a1cSSebastian Grimberg #include <iostream>
13*ac8b7a1cSSebastian Grimberg #include <random>
14*ac8b7a1cSSebastian Grimberg #include <vector>
15*ac8b7a1cSSebastian Grimberg 
16*ac8b7a1cSSebastian Grimberg // XX TODO WIP: Add other quadrature orders, prism/pyramid, ...
17*ac8b7a1cSSebastian Grimberg // clang-format off
18*ac8b7a1cSSebastian Grimberg constexpr static std::array<std::array<int, 3>, 11> PQ_VALUES = {
19*ac8b7a1cSSebastian Grimberg     {{3, 1, 2}, {6, 3,  2}, {10, 6,  2}, {15, 12, 2}, {21, 16, 2}, {28, 25, 2}, {36, 33, 2},
20*ac8b7a1cSSebastian Grimberg      {4, 1, 3}, {10, 4, 3}, {20, 11, 3}, {35, 24, 3}}
21*ac8b7a1cSSebastian Grimberg };
22*ac8b7a1cSSebastian Grimberg // clang-format on
23*ac8b7a1cSSebastian Grimberg 
24*ac8b7a1cSSebastian Grimberg constexpr static std::array<int, 7> N_VALUES = {1024, 5120, 10240, 51200, 102400, 512000, 1024000};
25*ac8b7a1cSSebastian Grimberg 
26*ac8b7a1cSSebastian Grimberg constexpr int NUM_TRIALS = 25;
27*ac8b7a1cSSebastian Grimberg 
28*ac8b7a1cSSebastian Grimberg using Clock    = std::chrono::steady_clock;
29*ac8b7a1cSSebastian Grimberg using Duration = std::chrono::duration<double>;
30*ac8b7a1cSSebastian Grimberg 
31*ac8b7a1cSSebastian Grimberg int main(int argc, char **argv) {
32*ac8b7a1cSSebastian Grimberg   Ceed ceed;
33*ac8b7a1cSSebastian Grimberg 
34*ac8b7a1cSSebastian Grimberg   std::random_device               rand_device;
35*ac8b7a1cSSebastian Grimberg   std::default_random_engine       rand_engine(rand_device());
36*ac8b7a1cSSebastian Grimberg   std::uniform_real_distribution<> rand_dist(0.0, 1.0);
37*ac8b7a1cSSebastian Grimberg   auto                             generate_random = [&rand_dist, &rand_engine]() { return rand_dist(rand_engine); };
38*ac8b7a1cSSebastian Grimberg 
39*ac8b7a1cSSebastian Grimberg   CeedInit((argc < 2) ? "/gpu/cuda/magma" : argv[1], &ceed);
40*ac8b7a1cSSebastian Grimberg   CeedSetErrorHandler(ceed, CeedErrorStore);
41*ac8b7a1cSSebastian Grimberg 
42*ac8b7a1cSSebastian Grimberg   for (const auto [P, Q, dim] : PQ_VALUES) {
43*ac8b7a1cSSebastian Grimberg     CeedBasis  basis;
44*ac8b7a1cSSebastian Grimberg     CeedVector u, v;
45*ac8b7a1cSSebastian Grimberg 
46*ac8b7a1cSSebastian Grimberg     std::vector<double> q_ref(dim * Q, 0.0), q_weight(Q, 0.0), interp(P * Q), grad(P * Q * dim);
47*ac8b7a1cSSebastian Grimberg     std::generate(interp.begin(), interp.end(), generate_random);
48*ac8b7a1cSSebastian Grimberg     std::generate(grad.begin(), grad.end(), generate_random);
49*ac8b7a1cSSebastian Grimberg 
50*ac8b7a1cSSebastian Grimberg     CeedBasisCreateH1(ceed, (dim < 3) ? CEED_TOPOLOGY_TRIANGLE : CEED_TOPOLOGY_TET, 1, P, Q, interp.data(), grad.data(), q_ref.data(),
51*ac8b7a1cSSebastian Grimberg                       q_weight.data(), &basis);
52*ac8b7a1cSSebastian Grimberg 
53*ac8b7a1cSSebastian Grimberg     for (const auto N : N_VALUES) {
54*ac8b7a1cSSebastian Grimberg       double data_interp_n = 0.0, data_interp_t = 0.0, data_grad_n = 0.0, data_grad_t = 0.0;
55*ac8b7a1cSSebastian Grimberg 
56*ac8b7a1cSSebastian Grimberg       // Interp
57*ac8b7a1cSSebastian Grimberg       {
58*ac8b7a1cSSebastian Grimberg         CeedVectorCreate(ceed, P * N, &u);
59*ac8b7a1cSSebastian Grimberg         CeedVectorCreate(ceed, Q * N, &v);
60*ac8b7a1cSSebastian Grimberg 
61*ac8b7a1cSSebastian Grimberg         // NoTranspose
62*ac8b7a1cSSebastian Grimberg         CeedVectorSetValue(u, 1.0);
63*ac8b7a1cSSebastian Grimberg         for (int trial = 0; trial <= NUM_TRIALS; trial++) {
64*ac8b7a1cSSebastian Grimberg           CeedVectorSetValue(v, 0.0);
65*ac8b7a1cSSebastian Grimberg 
66*ac8b7a1cSSebastian Grimberg           const auto start = Clock::now();
67*ac8b7a1cSSebastian Grimberg           int        ierr  = CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, v);
68*ac8b7a1cSSebastian Grimberg           if (ierr) {
69*ac8b7a1cSSebastian Grimberg             break;
70*ac8b7a1cSSebastian Grimberg           }
71*ac8b7a1cSSebastian Grimberg           if (trial > 0) {
72*ac8b7a1cSSebastian Grimberg             data_interp_n += std::chrono::duration_cast<Duration>(Clock::now() - start).count();
73*ac8b7a1cSSebastian Grimberg           }
74*ac8b7a1cSSebastian Grimberg         }
75*ac8b7a1cSSebastian Grimberg 
76*ac8b7a1cSSebastian Grimberg         // Transpose
77*ac8b7a1cSSebastian Grimberg         CeedVectorSetValue(v, 1.0);
78*ac8b7a1cSSebastian Grimberg         for (int trial = 0; trial <= NUM_TRIALS; trial++) {
79*ac8b7a1cSSebastian Grimberg           CeedVectorSetValue(u, 0.0);
80*ac8b7a1cSSebastian Grimberg 
81*ac8b7a1cSSebastian Grimberg           const auto start = Clock::now();
82*ac8b7a1cSSebastian Grimberg           int        ierr  = CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_INTERP, v, u);
83*ac8b7a1cSSebastian Grimberg           if (ierr) {
84*ac8b7a1cSSebastian Grimberg             break;
85*ac8b7a1cSSebastian Grimberg           }
86*ac8b7a1cSSebastian Grimberg           if (trial > 0) {
87*ac8b7a1cSSebastian Grimberg             data_interp_t += std::chrono::duration_cast<Duration>(Clock::now() - start).count();
88*ac8b7a1cSSebastian Grimberg           }
89*ac8b7a1cSSebastian Grimberg         }
90*ac8b7a1cSSebastian Grimberg 
91*ac8b7a1cSSebastian Grimberg         CeedVectorDestroy(&u);
92*ac8b7a1cSSebastian Grimberg         CeedVectorDestroy(&v);
93*ac8b7a1cSSebastian Grimberg       }
94*ac8b7a1cSSebastian Grimberg 
95*ac8b7a1cSSebastian Grimberg       // Grad
96*ac8b7a1cSSebastian Grimberg       {
97*ac8b7a1cSSebastian Grimberg         CeedVectorCreate(ceed, P * N, &u);
98*ac8b7a1cSSebastian Grimberg         CeedVectorCreate(ceed, dim * Q * N, &v);
99*ac8b7a1cSSebastian Grimberg 
100*ac8b7a1cSSebastian Grimberg         // NoTranspose
101*ac8b7a1cSSebastian Grimberg         CeedVectorSetValue(u, 1.0);
102*ac8b7a1cSSebastian Grimberg         for (int trial = 0; trial < NUM_TRIALS; trial++) {
103*ac8b7a1cSSebastian Grimberg           CeedVectorSetValue(v, 0.0);
104*ac8b7a1cSSebastian Grimberg 
105*ac8b7a1cSSebastian Grimberg           const auto start = Clock::now();
106*ac8b7a1cSSebastian Grimberg           int        ierr  = CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, u, v);
107*ac8b7a1cSSebastian Grimberg           if (ierr) {
108*ac8b7a1cSSebastian Grimberg             break;
109*ac8b7a1cSSebastian Grimberg           }
110*ac8b7a1cSSebastian Grimberg           if (trial > 0) {
111*ac8b7a1cSSebastian Grimberg             data_grad_n += std::chrono::duration_cast<Duration>(Clock::now() - start).count();
112*ac8b7a1cSSebastian Grimberg           }
113*ac8b7a1cSSebastian Grimberg         }
114*ac8b7a1cSSebastian Grimberg 
115*ac8b7a1cSSebastian Grimberg         // Transpose
116*ac8b7a1cSSebastian Grimberg         CeedVectorSetValue(v, 1.0);
117*ac8b7a1cSSebastian Grimberg         for (int trial = 0; trial < NUM_TRIALS; trial++) {
118*ac8b7a1cSSebastian Grimberg           CeedVectorSetValue(u, 0.0);
119*ac8b7a1cSSebastian Grimberg 
120*ac8b7a1cSSebastian Grimberg           const auto start = Clock::now();
121*ac8b7a1cSSebastian Grimberg           int        ierr  = CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_GRAD, v, u);
122*ac8b7a1cSSebastian Grimberg           if (ierr) {
123*ac8b7a1cSSebastian Grimberg             break;
124*ac8b7a1cSSebastian Grimberg           }
125*ac8b7a1cSSebastian Grimberg           if (trial > 0) {
126*ac8b7a1cSSebastian Grimberg             data_grad_t += std::chrono::duration_cast<Duration>(Clock::now() - start).count();
127*ac8b7a1cSSebastian Grimberg           }
128*ac8b7a1cSSebastian Grimberg         }
129*ac8b7a1cSSebastian Grimberg 
130*ac8b7a1cSSebastian Grimberg         CeedVectorDestroy(&u);
131*ac8b7a1cSSebastian Grimberg         CeedVectorDestroy(&v);
132*ac8b7a1cSSebastian Grimberg       }
133*ac8b7a1cSSebastian Grimberg 
134*ac8b7a1cSSebastian Grimberg       // Postprocess and log the data
135*ac8b7a1cSSebastian Grimberg       const double  interp_flops = P * Q * (double)N;
136*ac8b7a1cSSebastian Grimberg       const double  grad_flops   = P * Q * dim * (double)N;
137*ac8b7a1cSSebastian Grimberg       constexpr int width = 12, precision = 2;
138*ac8b7a1cSSebastian Grimberg       // clang-format off
139*ac8b7a1cSSebastian Grimberg       std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
140*ac8b7a1cSSebastian Grimberg                   width, P, width, N, width, Q, width, 1, width, 0, width, precision,
141*ac8b7a1cSSebastian Grimberg                   (data_interp_n > 0.0) ? NUM_TRIALS * interp_flops / data_interp_n * 1.0e-6 : 0.0);
142*ac8b7a1cSSebastian Grimberg       std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
143*ac8b7a1cSSebastian Grimberg                   width, P, width, N, width, Q, width, 1, width, 1, width, precision,
144*ac8b7a1cSSebastian Grimberg                   (data_interp_t > 0.0) ? NUM_TRIALS * interp_flops / data_interp_t * 1.0e-6 : 0.0);
145*ac8b7a1cSSebastian Grimberg       std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
146*ac8b7a1cSSebastian Grimberg                   width, P, width, N, width, Q, width, dim, width, 0, width, precision,
147*ac8b7a1cSSebastian Grimberg                   (data_grad_n > 0.0) ? NUM_TRIALS * grad_flops / data_grad_n * 1.0e-6 : 0.0);
148*ac8b7a1cSSebastian Grimberg       std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
149*ac8b7a1cSSebastian Grimberg                   width, P, width, N, width, Q, width, dim, width, 1, width, precision,
150*ac8b7a1cSSebastian Grimberg                   (data_grad_n > 0.0) ? NUM_TRIALS * grad_flops / data_grad_n * 1.0e-6 : 0.0);
151*ac8b7a1cSSebastian Grimberg       // clang-format on
152*ac8b7a1cSSebastian Grimberg     }
153*ac8b7a1cSSebastian Grimberg 
154*ac8b7a1cSSebastian Grimberg     CeedBasisDestroy(&basis);
155*ac8b7a1cSSebastian Grimberg   }
156*ac8b7a1cSSebastian Grimberg 
157*ac8b7a1cSSebastian Grimberg   CeedDestroy(&ceed);
158*ac8b7a1cSSebastian Grimberg   return 0;
159*ac8b7a1cSSebastian Grimberg }
160