xref: /libCEED/backends/magma/tuning/tuning.cpp (revision 51d50b591dd93a8ae376a17a79659a99c85c5407)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed.h>
9 #include <algorithm>
10 #include <array>
11 #include <chrono>
12 #include <iostream>
13 #include <random>
14 #include <vector>
15 
16 // clang-format off
17 // Triplets of {P, Q, dim}. For now, includes standard H1 spaces on triangles and tetrahedra with p such that P <= 40,
18 // and quadrature rules of degree 2p and 2p - 2. These can be expanded to more element types and quadrature rules in the
19 // future.
20 constexpr static std::array<std::array<int, 3>, 22> PQ_VALUES = {
21     {{3, 1, 2}, {6,  3,  2}, {10, 6,  2}, {15, 12, 2}, {21, 16, 2}, {28, 25, 2}, {36, 33, 2},
22      {3, 3, 2}, {6,  6,  2}, {10, 12, 2}, {15, 16, 2}, {21, 25, 2}, {28, 33, 2},
23      {4, 1, 3}, {10, 4,  3}, {20, 11, 3}, {20, 14, 3}, {35, 24, 3},
24      {4, 4, 3}, {10, 11, 3}, {10, 14, 3}, {20, 24, 3}}
25 };
26 // clang-format on
27 
28 constexpr static std::array<std::pair<int, int>, 7> N_VALUES = {
29     {{1024, 200}, {5120, 200}, {10240, 100}, {51200, 100}, {102400, 50}, {512000, 50}, {1024000, 25}}
30 };
31 
32 using Clock    = std::chrono::steady_clock;
33 using Duration = std::chrono::duration<double>;
34 
35 int main(int argc, char **argv) {
36   Ceed ceed;
37 
38   std::random_device               rand_device;
39   std::default_random_engine       rand_engine(rand_device());
40   std::uniform_real_distribution<> rand_dist(0.0, 1.0);
41   auto                             generate_random = [&rand_dist, &rand_engine]() { return rand_dist(rand_engine); };
42 
43   if (argc < 2) {
44     printf("Usage: ./tuning <CEED_RESOURCE>");
45     return 1;
46   }
47   CeedInit(argv[1], &ceed);
48   CeedSetErrorHandler(ceed, CeedErrorStore);
49 
50   for (const auto [P, Q, dim] : PQ_VALUES) {
51     CeedBasis  basis;
52     CeedVector u, v;
53 
54     std::vector<double> q_ref(dim * Q, 0.0), q_weight(Q, 0.0), interp(P * Q), grad(P * Q * dim);
55     std::generate(interp.begin(), interp.end(), generate_random);
56     std::generate(grad.begin(), grad.end(), generate_random);
57 
58     CeedBasisCreateH1(ceed, (dim < 3) ? CEED_TOPOLOGY_TRIANGLE : CEED_TOPOLOGY_TET, 1, P, Q, interp.data(), grad.data(), q_ref.data(),
59                       q_weight.data(), &basis);
60 
61     for (const auto [N, NUM_TRIALS] : N_VALUES) {
62       double data_interp_n = 0.0, data_interp_t = 0.0, data_grad_n = 0.0, data_grad_t = 0.0;
63       int    ierr;
64 
65       // Interp
66       {
67         CeedVectorCreate(ceed, P * N, &u);
68         CeedVectorCreate(ceed, Q * N, &v);
69 
70         // NoTranspose
71         CeedVectorSetValue(u, 1.0);
72         CeedVectorSetValue(v, 0.0);
73         ierr = CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, v);
74         if (!ierr) {
75           const auto start = Clock::now();
76           for (int trial = 0; trial < NUM_TRIALS; trial++) {
77             CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, v);
78           }
79           data_interp_n = std::chrono::duration_cast<Duration>(Clock::now() - start).count();
80         }
81 
82         // Transpose
83         CeedVectorSetValue(u, 1.0);
84         CeedVectorSetValue(v, 0.0);
85         ierr = CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_INTERP, v, u);
86         if (!ierr) {
87           const auto start = Clock::now();
88           for (int trial = 0; trial < NUM_TRIALS; trial++) {
89             CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_INTERP, v, u);
90           }
91           data_interp_t = std::chrono::duration_cast<Duration>(Clock::now() - start).count();
92         }
93 
94         CeedVectorDestroy(&u);
95         CeedVectorDestroy(&v);
96       }
97 
98       // Grad
99       {
100         CeedVectorCreate(ceed, P * N, &u);
101         CeedVectorCreate(ceed, dim * Q * N, &v);
102 
103         // NoTranspose
104         CeedVectorSetValue(u, 1.0);
105         CeedVectorSetValue(v, 0.0);
106         ierr = CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, u, v);
107         if (!ierr) {
108           const auto start = Clock::now();
109           for (int trial = 0; trial < NUM_TRIALS; trial++) {
110             CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, u, v);
111           }
112           data_grad_n = std::chrono::duration_cast<Duration>(Clock::now() - start).count();
113         }
114 
115         // Transpose
116         CeedVectorSetValue(u, 1.0);
117         CeedVectorSetValue(v, 0.0);
118         ierr = CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_GRAD, v, u);
119         if (!ierr) {
120           const auto start = Clock::now();
121           for (int trial = 0; trial < NUM_TRIALS; trial++) {
122             CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_GRAD, v, u);
123           }
124           data_grad_t = std::chrono::duration_cast<Duration>(Clock::now() - start).count();
125         }
126 
127         CeedVectorDestroy(&u);
128         CeedVectorDestroy(&v);
129       }
130 
131       // Postprocess and log the data
132       const double  interp_flops = P * Q * (double)N;
133       const double  grad_flops   = P * Q * dim * (double)N;
134       constexpr int width = 12, precision = 2;
135       // clang-format off
136       std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
137                   width, P, width, Q, width, N, width, 1, width, 0, width, precision,
138                   (data_interp_n > 0.0) ? 1e-6 * NUM_TRIALS * interp_flops / data_interp_n : 0.0);
139       std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
140                   width, P, width, Q, width, N, width, 1, width, 1, width, precision,
141                   (data_interp_t > 0.0) ? 1e-6 * NUM_TRIALS * interp_flops / data_interp_t : 0.0);
142       std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
143                   width, P, width, Q, width, N, width, dim, width, 0, width, precision,
144                   (data_grad_n > 0.0) ? 1e-6 * NUM_TRIALS * grad_flops / data_grad_n : 0.0);
145       std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
146                   width, P, width, Q, width, N, width, dim, width, 1, width, precision,
147                   (data_grad_t > 0.0) ? 1e-6 * NUM_TRIALS * grad_flops / data_grad_t : 0.0);
148       // clang-format on
149     }
150 
151     CeedBasisDestroy(&basis);
152   }
153 
154   CeedDestroy(&ceed);
155   return 0;
156 }
157