1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED: http://github.com/ceed
7
8 #include <ceed.h>
9 #include <algorithm>
10 #include <array>
11 #include <chrono>
12 #include <iostream>
13 #include <random>
14 #include <vector>
15
16 // clang-format off
17 // Triplets of {P, Q, dim}. For now, includes standard H1 spaces on triangles and tetrahedra with p such that P <= 40,
18 // and quadrature rules of degree 2p and 2p - 2. These can be expanded to more element types and quadrature rules in the
19 // future.
20 constexpr static std::array<std::array<int, 3>, 22> PQ_VALUES = {
21 {{3, 1, 2}, {6, 3, 2}, {10, 6, 2}, {15, 12, 2}, {21, 16, 2}, {28, 25, 2}, {36, 33, 2},
22 {3, 3, 2}, {6, 6, 2}, {10, 12, 2}, {15, 16, 2}, {21, 25, 2}, {28, 33, 2},
23 {4, 1, 3}, {10, 4, 3}, {20, 11, 3}, {20, 14, 3}, {35, 24, 3},
24 {4, 4, 3}, {10, 11, 3}, {10, 14, 3}, {20, 24, 3}}
25 };
26 // clang-format on
27
28 constexpr static std::array<std::pair<int, int>, 7> N_VALUES = {
29 {{1024, 200}, {5120, 200}, {10240, 100}, {51200, 100}, {102400, 50}, {512000, 50}, {1024000, 25}}
30 };
31
32 using Clock = std::chrono::steady_clock;
33 using Duration = std::chrono::duration<double>;
34
main(int argc,char ** argv)35 int main(int argc, char **argv) {
36 Ceed ceed;
37
38 std::random_device rand_device;
39 std::default_random_engine rand_engine(rand_device());
40 std::uniform_real_distribution<> rand_dist(0.0, 1.0);
41 auto generate_random = [&rand_dist, &rand_engine]() { return rand_dist(rand_engine); };
42
43 if (argc < 2) {
44 printf("Usage: ./tuning <CEED_RESOURCE>");
45 return 1;
46 }
47 CeedInit(argv[1], &ceed);
48 CeedSetErrorHandler(ceed, CeedErrorStore);
49
50 for (const auto [P, Q, dim] : PQ_VALUES) {
51 CeedBasis basis;
52 CeedVector u, v;
53
54 std::vector<double> q_ref(dim * Q, 0.0), q_weight(Q, 0.0), interp(P * Q), grad(P * Q * dim);
55 std::generate(interp.begin(), interp.end(), generate_random);
56 std::generate(grad.begin(), grad.end(), generate_random);
57
58 CeedBasisCreateH1(ceed, (dim < 3) ? CEED_TOPOLOGY_TRIANGLE : CEED_TOPOLOGY_TET, 1, P, Q, interp.data(), grad.data(), q_ref.data(),
59 q_weight.data(), &basis);
60
61 for (const auto [N, NUM_TRIALS] : N_VALUES) {
62 double data_interp_n = 0.0, data_interp_t = 0.0, data_grad_n = 0.0, data_grad_t = 0.0;
63 int ierr;
64
65 // Interp
66 {
67 CeedVectorCreate(ceed, P * N, &u);
68 CeedVectorCreate(ceed, Q * N, &v);
69
70 // NoTranspose
71 CeedVectorSetValue(u, 1.0);
72 CeedVectorSetValue(v, 0.0);
73 ierr = CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, v);
74 if (!ierr) {
75 const auto start = Clock::now();
76 for (int trial = 0; trial < NUM_TRIALS; trial++) {
77 CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_INTERP, u, v);
78 }
79 data_interp_n = std::chrono::duration_cast<Duration>(Clock::now() - start).count();
80 }
81
82 // Transpose
83 CeedVectorSetValue(u, 1.0);
84 CeedVectorSetValue(v, 0.0);
85 ierr = CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_INTERP, v, u);
86 if (!ierr) {
87 const auto start = Clock::now();
88 for (int trial = 0; trial < NUM_TRIALS; trial++) {
89 CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_INTERP, v, u);
90 }
91 data_interp_t = std::chrono::duration_cast<Duration>(Clock::now() - start).count();
92 }
93
94 CeedVectorDestroy(&u);
95 CeedVectorDestroy(&v);
96 }
97
98 // Grad
99 {
100 CeedVectorCreate(ceed, P * N, &u);
101 CeedVectorCreate(ceed, dim * Q * N, &v);
102
103 // NoTranspose
104 CeedVectorSetValue(u, 1.0);
105 CeedVectorSetValue(v, 0.0);
106 ierr = CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, u, v);
107 if (!ierr) {
108 const auto start = Clock::now();
109 for (int trial = 0; trial < NUM_TRIALS; trial++) {
110 CeedBasisApply(basis, N, CEED_NOTRANSPOSE, CEED_EVAL_GRAD, u, v);
111 }
112 data_grad_n = std::chrono::duration_cast<Duration>(Clock::now() - start).count();
113 }
114
115 // Transpose
116 CeedVectorSetValue(u, 1.0);
117 CeedVectorSetValue(v, 0.0);
118 ierr = CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_GRAD, v, u);
119 if (!ierr) {
120 const auto start = Clock::now();
121 for (int trial = 0; trial < NUM_TRIALS; trial++) {
122 CeedBasisApply(basis, N, CEED_TRANSPOSE, CEED_EVAL_GRAD, v, u);
123 }
124 data_grad_t = std::chrono::duration_cast<Duration>(Clock::now() - start).count();
125 }
126
127 CeedVectorDestroy(&u);
128 CeedVectorDestroy(&v);
129 }
130
131 // Postprocess and log the data
132 const double interp_flops = P * Q * (double)N;
133 const double grad_flops = P * Q * dim * (double)N;
134 constexpr int width = 12, precision = 2;
135 // clang-format off
136 std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
137 width, P, width, Q, width, N, width, 1, width, 0, width, precision,
138 (data_interp_n > 0.0) ? 1e-6 * NUM_TRIALS * interp_flops / data_interp_n : 0.0);
139 std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
140 width, P, width, Q, width, N, width, 1, width, 1, width, precision,
141 (data_interp_t > 0.0) ? 1e-6 * NUM_TRIALS * interp_flops / data_interp_t : 0.0);
142 std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
143 width, P, width, Q, width, N, width, dim, width, 0, width, precision,
144 (data_grad_n > 0.0) ? 1e-6 * NUM_TRIALS * grad_flops / data_grad_n : 0.0);
145 std::printf("%-*d%-*d%-*d%-*d%-*d%*.*f\n",
146 width, P, width, Q, width, N, width, dim, width, 1, width, precision,
147 (data_grad_t > 0.0) ? 1e-6 * NUM_TRIALS * grad_flops / data_grad_t : 0.0);
148 // clang-format on
149 }
150
151 CeedBasisDestroy(&basis);
152 }
153
154 CeedDestroy(&ceed);
155 return 0;
156 }
157