xref: /libCEED/rust/libceed-sys/c-src/backends/magma/tuning/tuning.cpp (revision d4cc18453651bd0f94c1a2e078b2646a92dafdcc)
1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2ac8b7a1cSSebastian Grimberg // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3ac8b7a1cSSebastian Grimberg //
4ac8b7a1cSSebastian Grimberg // SPDX-License-Identifier: BSD-2-Clause
5ac8b7a1cSSebastian Grimberg //
6ac8b7a1cSSebastian Grimberg // This file is part of CEED:  http://github.com/ceed
7ac8b7a1cSSebastian Grimberg 
8ac8b7a1cSSebastian Grimberg #include <ceed.h>
9ac8b7a1cSSebastian Grimberg #include <algorithm>
10ac8b7a1cSSebastian Grimberg #include <array>
11ac8b7a1cSSebastian Grimberg #include <chrono>
12ac8b7a1cSSebastian Grimberg #include <iostream>
13ac8b7a1cSSebastian Grimberg #include <random>
14ac8b7a1cSSebastian Grimberg #include <vector>
15ac8b7a1cSSebastian Grimberg 
16ac8b7a1cSSebastian Grimberg // clang-format off
1747cb5789SSebastian Grimberg // Triplets of {P, Q, dim}. For now, includes standard H1 spaces on triangles and tetrahedra with p such that P <= 40,
1847cb5789SSebastian Grimberg // and quadrature rules of degree 2p and 2p - 2. These can be expanded to more element types and quadrature rules in the
1947cb5789SSebastian Grimberg // future.
2047cb5789SSebastian Grimberg constexpr static std::array<std::array<int, 3>, 22> PQ_VALUES = {
21ac8b7a1cSSebastian Grimberg     {{3, 1, 2}, {6,  3,  2}, {10, 6,  2}, {15, 12, 2}, {21, 16, 2}, {28, 25, 2}, {36, 33, 2},
2247cb5789SSebastian Grimberg      {3, 3, 2}, {6,  6,  2}, {10, 12, 2}, {15, 16, 2}, {21, 25, 2}, {28, 33, 2},
2347cb5789SSebastian Grimberg      {4, 1, 3}, {10, 4,  3}, {20, 11, 3}, {20, 14, 3}, {35, 24, 3},
2447cb5789SSebastian Grimberg      {4, 4, 3}, {10, 11, 3}, {10, 14, 3}, {20, 24, 3}}
25ac8b7a1cSSebastian Grimberg };
26ac8b7a1cSSebastian Grimberg // clang-format on
27ac8b7a1cSSebastian Grimberg 
28acc0bb12SSebastian Grimberg constexpr static std::array<std::pair<int, int>, 7> N_VALUES = {
29acc0bb12SSebastian Grimberg     {{1024, 200}, {5120, 200}, {10240, 100}, {51200, 100}, {102400, 50}, {512000, 50}, {1024000, 25}}
30acc0bb12SSebastian Grimberg };
31ac8b7a1cSSebastian Grimberg 
32ac8b7a1cSSebastian Grimberg using Clock    = std::chrono::steady_clock;
33ac8b7a1cSSebastian Grimberg using Duration = std::chrono::duration<double>;
34ac8b7a1cSSebastian Grimberg 
main(int argc,char ** argv)35ac8b7a1cSSebastian Grimberg int main(int argc, char **argv) {
36ac8b7a1cSSebastian Grimberg   Ceed ceed;
37ac8b7a1cSSebastian Grimberg 
38ac8b7a1cSSebastian Grimberg   std::random_device               rand_device;
39ac8b7a1cSSebastian Grimberg   std::default_random_engine       rand_engine(rand_device());
40ac8b7a1cSSebastian Grimberg   std::uniform_real_distribution<> rand_dist(0.0, 1.0);
41ac8b7a1cSSebastian Grimberg   auto                             generate_random = [&rand_dist, &rand_engine]() { return rand_dist(rand_engine); };
42ac8b7a1cSSebastian Grimberg 
43acc0bb12SSebastian Grimberg   if (argc < 2) {
44acc0bb12SSebastian Grimberg     printf("Usage: ./tuning <CEED_RESOURCE>");
45acc0bb12SSebastian Grimberg     return 1;
46acc0bb12SSebastian Grimberg   }
47acc0bb12SSebastian Grimberg   CeedInit(argv[1], &ceed);
48ac8b7a1cSSebastian Grimberg   CeedSetErrorHandler(ceed, CeedErrorStore);
49ac8b7a1cSSebastian Grimberg 
50ac8b7a1cSSebastian Grimberg   for (const auto [P, Q, dim] : PQ_VALUES) {
51ac8b7a1cSSebastian Grimberg     CeedBasis  basis;
52ac8b7a1cSSebastian Grimberg     CeedVector u, v;
53ac8b7a1cSSebastian Grimberg 
54ac8b7a1cSSebastian Grimberg     std::vector<double> q_ref(dim * Q, 0.0), q_weight(Q, 0.0), interp(P * Q), grad(P * Q * dim);
55ac8b7a1cSSebastian Grimberg     std::generate(interp.begin(), interp.end(), generate_random);
56ac8b7a1cSSebastian Grimberg     std::generate(grad.begin(), grad.end(), generate_random);
57ac8b7a1cSSebastian Grimberg 
58ac8b7a1cSSebastian Grimberg     CeedBasisCreateH1(ceed, (dim < 3) ? CEED_TOPOLOGY_TRIANGLE : CEED_TOPOLOGY_TET, 1, P, Q, interp.data(), grad.data(), q_ref.data(),
59ac8b7a1cSSebastian Grimberg                       q_weight.data(), &basis);
60ac8b7a1cSSebastian Grimberg 
61acc0bb12SSebastian Grimberg     for (const auto [N, NUM_TRIALS] : N_VALUES) {
62ac8b7a1cSSebastian Grimberg       double data_interp_n = 0.0, data_interp_t = 0.0, data_grad_n = 0.0, data_grad_t = 0.0;
63acc0bb12SSebastian Grimberg       int    ierr;
64ac8b7a1cSSebastian Grimberg 
65ac8b7a1cSSebastian Grimberg       // Interp
66ac8b7a1cSSebastian Grimberg       {
67ac8b7a1cSSebastian Grimberg         CeedVectorCreate(ceed, P * N, &u);
68ac8b7a1cSSebastian Grimberg         CeedVectorCreate(ceed, Q * N, &v);
69ac8b7a1cSSebastian Grimberg 
70ac8b7a1cSSebastian Grimberg         // NoTranspose
71ac8b7a1cSSebastian Grimberg         CeedVectorSetValue(u, 1.0);
72ac8b7a1cSSebastian Grimberg         CeedVectorSetValue(v, 0.0);
73acc0bb12SSebastian Grimberg         ierr = CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, v);
74acc0bb12SSebastian Grimberg         if (!ierr) {
75ac8b7a1cSSebastian Grimberg           const auto start = Clock::now();
76acc0bb12SSebastian Grimberg           for (int trial = 0; trial < NUM_TRIALS; trial++) {
77acc0bb12SSebastian Grimberg             CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, v);
78ac8b7a1cSSebastian Grimberg           }
79acc0bb12SSebastian Grimberg           data_interp_n = std::chrono::duration_cast<Duration>(Clock::now() - start).count();
80ac8b7a1cSSebastian Grimberg         }
81ac8b7a1cSSebastian Grimberg 
82ac8b7a1cSSebastian Grimberg         // Transpose
83acc0bb12SSebastian Grimberg         CeedVectorSetValue(u, 1.0);
84acc0bb12SSebastian Grimberg         CeedVectorSetValue(v, 0.0);
85acc0bb12SSebastian Grimberg         ierr = CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_INTERP, v, u);
86acc0bb12SSebastian Grimberg         if (!ierr) {
87ac8b7a1cSSebastian Grimberg           const auto start = Clock::now();
88acc0bb12SSebastian Grimberg           for (int trial = 0; trial < NUM_TRIALS; trial++) {
89acc0bb12SSebastian Grimberg             CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_INTERP, v, u);
90ac8b7a1cSSebastian Grimberg           }
91acc0bb12SSebastian Grimberg           data_interp_t = std::chrono::duration_cast<Duration>(Clock::now() - start).count();
92ac8b7a1cSSebastian Grimberg         }
93ac8b7a1cSSebastian Grimberg 
94ac8b7a1cSSebastian Grimberg         CeedVectorDestroy(&u);
95ac8b7a1cSSebastian Grimberg         CeedVectorDestroy(&v);
96ac8b7a1cSSebastian Grimberg       }
97ac8b7a1cSSebastian Grimberg 
98ac8b7a1cSSebastian Grimberg       // Grad
99ac8b7a1cSSebastian Grimberg       {
100ac8b7a1cSSebastian Grimberg         CeedVectorCreate(ceed, P * N, &u);
101ac8b7a1cSSebastian Grimberg         CeedVectorCreate(ceed, dim * Q * N, &v);
102ac8b7a1cSSebastian Grimberg 
103ac8b7a1cSSebastian Grimberg         // NoTranspose
104ac8b7a1cSSebastian Grimberg         CeedVectorSetValue(u, 1.0);
105ac8b7a1cSSebastian Grimberg         CeedVectorSetValue(v, 0.0);
106acc0bb12SSebastian Grimberg         ierr = CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, u, v);
107acc0bb12SSebastian Grimberg         if (!ierr) {
108ac8b7a1cSSebastian Grimberg           const auto start = Clock::now();
109acc0bb12SSebastian Grimberg           for (int trial = 0; trial < NUM_TRIALS; trial++) {
110acc0bb12SSebastian Grimberg             CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, u, v);
111ac8b7a1cSSebastian Grimberg           }
112acc0bb12SSebastian Grimberg           data_grad_n = std::chrono::duration_cast<Duration>(Clock::now() - start).count();
113ac8b7a1cSSebastian Grimberg         }
114ac8b7a1cSSebastian Grimberg 
115ac8b7a1cSSebastian Grimberg         // Transpose
116acc0bb12SSebastian Grimberg         CeedVectorSetValue(u, 1.0);
117acc0bb12SSebastian Grimberg         CeedVectorSetValue(v, 0.0);
118acc0bb12SSebastian Grimberg         ierr = CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_GRAD, v, u);
119acc0bb12SSebastian Grimberg         if (!ierr) {
120ac8b7a1cSSebastian Grimberg           const auto start = Clock::now();
121acc0bb12SSebastian Grimberg           for (int trial = 0; trial < NUM_TRIALS; trial++) {
122acc0bb12SSebastian Grimberg             CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_GRAD, v, u);
123ac8b7a1cSSebastian Grimberg           }
124acc0bb12SSebastian Grimberg           data_grad_t = std::chrono::duration_cast<Duration>(Clock::now() - start).count();
125ac8b7a1cSSebastian Grimberg         }
126ac8b7a1cSSebastian Grimberg 
127ac8b7a1cSSebastian Grimberg         CeedVectorDestroy(&u);
128ac8b7a1cSSebastian Grimberg         CeedVectorDestroy(&v);
129ac8b7a1cSSebastian Grimberg       }
130ac8b7a1cSSebastian Grimberg 
131ac8b7a1cSSebastian Grimberg       // Postprocess and log the data
132ac8b7a1cSSebastian Grimberg       const double  interp_flops = P * Q * (double)N;
133ac8b7a1cSSebastian Grimberg       const double  grad_flops   = P * Q * dim * (double)N;
134ac8b7a1cSSebastian Grimberg       constexpr int width = 12, precision = 2;
135ac8b7a1cSSebastian Grimberg       // clang-format off
136ac8b7a1cSSebastian Grimberg       std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
137acc0bb12SSebastian Grimberg                   width, P, width, Q, width, N, width, 1, width, 0, width, precision,
138acc0bb12SSebastian Grimberg                   (data_interp_n > 0.0) ? 1e-6 * NUM_TRIALS * interp_flops / data_interp_n : 0.0);
139ac8b7a1cSSebastian Grimberg       std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
140acc0bb12SSebastian Grimberg                   width, P, width, Q, width, N, width, 1, width, 1, width, precision,
141acc0bb12SSebastian Grimberg                   (data_interp_t > 0.0) ? 1e-6 * NUM_TRIALS * interp_flops / data_interp_t : 0.0);
142ac8b7a1cSSebastian Grimberg       std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
143acc0bb12SSebastian Grimberg                   width, P, width, Q, width, N, width, dim, width, 0, width, precision,
144acc0bb12SSebastian Grimberg                   (data_grad_n > 0.0) ? 1e-6 * NUM_TRIALS * grad_flops / data_grad_n : 0.0);
145ac8b7a1cSSebastian Grimberg       std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
146acc0bb12SSebastian Grimberg                   width, P, width, Q, width, N, width, dim, width, 1, width, precision,
147acc0bb12SSebastian Grimberg                   (data_grad_t > 0.0) ? 1e-6 * NUM_TRIALS * grad_flops / data_grad_t : 0.0);
148ac8b7a1cSSebastian Grimberg       // clang-format on
149ac8b7a1cSSebastian Grimberg     }
150ac8b7a1cSSebastian Grimberg 
151ac8b7a1cSSebastian Grimberg     CeedBasisDestroy(&basis);
152ac8b7a1cSSebastian Grimberg   }
153ac8b7a1cSSebastian Grimberg 
154ac8b7a1cSSebastian Grimberg   CeedDestroy(&ceed);
155ac8b7a1cSSebastian Grimberg   return 0;
156ac8b7a1cSSebastian Grimberg }
157