xref: /libCEED/backends/magma/tuning/tuning.cpp (revision acc0bb127f9d52b89fa0cb7f74c98dc79acc3cb0)
1ac8b7a1cSSebastian Grimberg // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2ac8b7a1cSSebastian Grimberg // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3ac8b7a1cSSebastian Grimberg //
4ac8b7a1cSSebastian Grimberg // SPDX-License-Identifier: BSD-2-Clause
5ac8b7a1cSSebastian Grimberg //
6ac8b7a1cSSebastian Grimberg // This file is part of CEED:  http://github.com/ceed
7ac8b7a1cSSebastian Grimberg 
8ac8b7a1cSSebastian Grimberg #include <ceed.h>
9ac8b7a1cSSebastian Grimberg #include <algorithm>
10ac8b7a1cSSebastian Grimberg #include <array>
11ac8b7a1cSSebastian Grimberg #include <chrono>
12ac8b7a1cSSebastian Grimberg #include <iostream>
13ac8b7a1cSSebastian Grimberg #include <random>
14ac8b7a1cSSebastian Grimberg #include <vector>
15ac8b7a1cSSebastian Grimberg 
16ac8b7a1cSSebastian Grimberg // clang-format off
17*acc0bb12SSebastian Grimberg // Triplets of {P, Q, dim}. For now, includes some standard H1 spaces on triangles and tetrahedra, but can be
18*acc0bb12SSebastian Grimberg // expanded to more quadrature rules and element types in the future.
19ac8b7a1cSSebastian Grimberg constexpr static std::array<std::array<int, 3>, 11> PQ_VALUES = {
20ac8b7a1cSSebastian Grimberg     {{3, 1, 2}, {6, 3,  2}, {10, 6,  2}, {15, 12, 2}, {21, 16, 2}, {28, 25, 2}, {36, 33, 2},
21ac8b7a1cSSebastian Grimberg      {4, 1, 3}, {10, 4, 3}, {20, 11, 3}, {35, 24, 3}}
22ac8b7a1cSSebastian Grimberg };
23ac8b7a1cSSebastian Grimberg // clang-format on
24ac8b7a1cSSebastian Grimberg 
25*acc0bb12SSebastian Grimberg constexpr static std::array<std::pair<int, int>, 7> N_VALUES = {
26*acc0bb12SSebastian Grimberg     {{1024, 200}, {5120, 200}, {10240, 100}, {51200, 100}, {102400, 50}, {512000, 50}, {1024000, 25}}
27*acc0bb12SSebastian Grimberg };
28ac8b7a1cSSebastian Grimberg 
29ac8b7a1cSSebastian Grimberg using Clock    = std::chrono::steady_clock;
30ac8b7a1cSSebastian Grimberg using Duration = std::chrono::duration<double>;
31ac8b7a1cSSebastian Grimberg 
32ac8b7a1cSSebastian Grimberg int main(int argc, char **argv) {
33ac8b7a1cSSebastian Grimberg   Ceed ceed;
34ac8b7a1cSSebastian Grimberg 
35ac8b7a1cSSebastian Grimberg   std::random_device               rand_device;
36ac8b7a1cSSebastian Grimberg   std::default_random_engine       rand_engine(rand_device());
37ac8b7a1cSSebastian Grimberg   std::uniform_real_distribution<> rand_dist(0.0, 1.0);
38ac8b7a1cSSebastian Grimberg   auto                             generate_random = [&rand_dist, &rand_engine]() { return rand_dist(rand_engine); };
39ac8b7a1cSSebastian Grimberg 
40*acc0bb12SSebastian Grimberg   if (argc < 2) {
41*acc0bb12SSebastian Grimberg     printf("Usage: ./tuning <CEED_RESOURCE>");
42*acc0bb12SSebastian Grimberg     return 1;
43*acc0bb12SSebastian Grimberg   }
44*acc0bb12SSebastian Grimberg   CeedInit(argv[1], &ceed);
45ac8b7a1cSSebastian Grimberg   CeedSetErrorHandler(ceed, CeedErrorStore);
46ac8b7a1cSSebastian Grimberg 
47ac8b7a1cSSebastian Grimberg   for (const auto [P, Q, dim] : PQ_VALUES) {
48ac8b7a1cSSebastian Grimberg     CeedBasis  basis;
49ac8b7a1cSSebastian Grimberg     CeedVector u, v;
50ac8b7a1cSSebastian Grimberg 
51ac8b7a1cSSebastian Grimberg     std::vector<double> q_ref(dim * Q, 0.0), q_weight(Q, 0.0), interp(P * Q), grad(P * Q * dim);
52ac8b7a1cSSebastian Grimberg     std::generate(interp.begin(), interp.end(), generate_random);
53ac8b7a1cSSebastian Grimberg     std::generate(grad.begin(), grad.end(), generate_random);
54ac8b7a1cSSebastian Grimberg 
55ac8b7a1cSSebastian Grimberg     CeedBasisCreateH1(ceed, (dim < 3) ? CEED_TOPOLOGY_TRIANGLE : CEED_TOPOLOGY_TET, 1, P, Q, interp.data(), grad.data(), q_ref.data(),
56ac8b7a1cSSebastian Grimberg                       q_weight.data(), &basis);
57ac8b7a1cSSebastian Grimberg 
58*acc0bb12SSebastian Grimberg     for (const auto [N, NUM_TRIALS] : N_VALUES) {
59ac8b7a1cSSebastian Grimberg       double data_interp_n = 0.0, data_interp_t = 0.0, data_grad_n = 0.0, data_grad_t = 0.0;
60*acc0bb12SSebastian Grimberg       int    ierr;
61ac8b7a1cSSebastian Grimberg 
62ac8b7a1cSSebastian Grimberg       // Interp
63ac8b7a1cSSebastian Grimberg       {
64ac8b7a1cSSebastian Grimberg         CeedVectorCreate(ceed, P * N, &u);
65ac8b7a1cSSebastian Grimberg         CeedVectorCreate(ceed, Q * N, &v);
66ac8b7a1cSSebastian Grimberg 
67ac8b7a1cSSebastian Grimberg         // NoTranspose
68ac8b7a1cSSebastian Grimberg         CeedVectorSetValue(u, 1.0);
69ac8b7a1cSSebastian Grimberg         CeedVectorSetValue(v, 0.0);
70*acc0bb12SSebastian Grimberg         ierr = CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, v);
71*acc0bb12SSebastian Grimberg         if (!ierr) {
72ac8b7a1cSSebastian Grimberg           const auto start = Clock::now();
73*acc0bb12SSebastian Grimberg           for (int trial = 0; trial < NUM_TRIALS; trial++) {
74*acc0bb12SSebastian Grimberg             CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, v);
75ac8b7a1cSSebastian Grimberg           }
76*acc0bb12SSebastian Grimberg           data_interp_n = std::chrono::duration_cast<Duration>(Clock::now() - start).count();
77ac8b7a1cSSebastian Grimberg         }
78ac8b7a1cSSebastian Grimberg 
79ac8b7a1cSSebastian Grimberg         // Transpose
80*acc0bb12SSebastian Grimberg         CeedVectorSetValue(u, 1.0);
81*acc0bb12SSebastian Grimberg         CeedVectorSetValue(v, 0.0);
82*acc0bb12SSebastian Grimberg         ierr = CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_INTERP, v, u);
83*acc0bb12SSebastian Grimberg         if (!ierr) {
84ac8b7a1cSSebastian Grimberg           const auto start = Clock::now();
85*acc0bb12SSebastian Grimberg           for (int trial = 0; trial < NUM_TRIALS; trial++) {
86*acc0bb12SSebastian Grimberg             CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_INTERP, v, u);
87ac8b7a1cSSebastian Grimberg           }
88*acc0bb12SSebastian Grimberg           data_interp_t = std::chrono::duration_cast<Duration>(Clock::now() - start).count();
89ac8b7a1cSSebastian Grimberg         }
90ac8b7a1cSSebastian Grimberg 
91ac8b7a1cSSebastian Grimberg         CeedVectorDestroy(&u);
92ac8b7a1cSSebastian Grimberg         CeedVectorDestroy(&v);
93ac8b7a1cSSebastian Grimberg       }
94ac8b7a1cSSebastian Grimberg 
95ac8b7a1cSSebastian Grimberg       // Grad
96ac8b7a1cSSebastian Grimberg       {
97ac8b7a1cSSebastian Grimberg         CeedVectorCreate(ceed, P * N, &u);
98ac8b7a1cSSebastian Grimberg         CeedVectorCreate(ceed, dim * Q * N, &v);
99ac8b7a1cSSebastian Grimberg 
100ac8b7a1cSSebastian Grimberg         // NoTranspose
101ac8b7a1cSSebastian Grimberg         CeedVectorSetValue(u, 1.0);
102ac8b7a1cSSebastian Grimberg         CeedVectorSetValue(v, 0.0);
103*acc0bb12SSebastian Grimberg         ierr = CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, u, v);
104*acc0bb12SSebastian Grimberg         if (!ierr) {
105ac8b7a1cSSebastian Grimberg           const auto start = Clock::now();
106*acc0bb12SSebastian Grimberg           for (int trial = 0; trial < NUM_TRIALS; trial++) {
107*acc0bb12SSebastian Grimberg             CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, u, v);
108ac8b7a1cSSebastian Grimberg           }
109*acc0bb12SSebastian Grimberg           data_grad_n = std::chrono::duration_cast<Duration>(Clock::now() - start).count();
110ac8b7a1cSSebastian Grimberg         }
111ac8b7a1cSSebastian Grimberg 
112ac8b7a1cSSebastian Grimberg         // Transpose
113*acc0bb12SSebastian Grimberg         CeedVectorSetValue(u, 1.0);
114*acc0bb12SSebastian Grimberg         CeedVectorSetValue(v, 0.0);
115*acc0bb12SSebastian Grimberg         ierr = CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_GRAD, v, u);
116*acc0bb12SSebastian Grimberg         if (!ierr) {
117ac8b7a1cSSebastian Grimberg           const auto start = Clock::now();
118*acc0bb12SSebastian Grimberg           for (int trial = 0; trial < NUM_TRIALS; trial++) {
119*acc0bb12SSebastian Grimberg             CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_GRAD, v, u);
120ac8b7a1cSSebastian Grimberg           }
121*acc0bb12SSebastian Grimberg           data_grad_t = std::chrono::duration_cast<Duration>(Clock::now() - start).count();
122ac8b7a1cSSebastian Grimberg         }
123ac8b7a1cSSebastian Grimberg 
124ac8b7a1cSSebastian Grimberg         CeedVectorDestroy(&u);
125ac8b7a1cSSebastian Grimberg         CeedVectorDestroy(&v);
126ac8b7a1cSSebastian Grimberg       }
127ac8b7a1cSSebastian Grimberg 
128ac8b7a1cSSebastian Grimberg       // Postprocess and log the data
129ac8b7a1cSSebastian Grimberg       const double  interp_flops = P * Q * (double)N;
130ac8b7a1cSSebastian Grimberg       const double  grad_flops   = P * Q * dim * (double)N;
131ac8b7a1cSSebastian Grimberg       constexpr int width = 12, precision = 2;
132ac8b7a1cSSebastian Grimberg       // clang-format off
133ac8b7a1cSSebastian Grimberg       std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
134*acc0bb12SSebastian Grimberg                   width, P, width, Q, width, N, width, 1, width, 0, width, precision,
135*acc0bb12SSebastian Grimberg                   (data_interp_n > 0.0) ? 1e-6 * NUM_TRIALS * interp_flops / data_interp_n : 0.0);
136ac8b7a1cSSebastian Grimberg       std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
137*acc0bb12SSebastian Grimberg                   width, P, width, Q, width, N, width, 1, width, 1, width, precision,
138*acc0bb12SSebastian Grimberg                   (data_interp_t > 0.0) ? 1e-6 * NUM_TRIALS * interp_flops / data_interp_t : 0.0);
139ac8b7a1cSSebastian Grimberg       std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
140*acc0bb12SSebastian Grimberg                   width, P, width, Q, width, N, width, dim, width, 0, width, precision,
141*acc0bb12SSebastian Grimberg                   (data_grad_n > 0.0) ? 1e-6 * NUM_TRIALS * grad_flops / data_grad_n : 0.0);
142ac8b7a1cSSebastian Grimberg       std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
143*acc0bb12SSebastian Grimberg                   width, P, width, Q, width, N, width, dim, width, 1, width, precision,
144*acc0bb12SSebastian Grimberg                   (data_grad_t > 0.0) ? 1e-6 * NUM_TRIALS * grad_flops / data_grad_t : 0.0);
145ac8b7a1cSSebastian Grimberg       // clang-format on
146ac8b7a1cSSebastian Grimberg     }
147ac8b7a1cSSebastian Grimberg 
148ac8b7a1cSSebastian Grimberg     CeedBasisDestroy(&basis);
149ac8b7a1cSSebastian Grimberg   }
150ac8b7a1cSSebastian Grimberg 
151ac8b7a1cSSebastian Grimberg   CeedDestroy(&ceed);
152ac8b7a1cSSebastian Grimberg   return 0;
153ac8b7a1cSSebastian Grimberg }
154