xref: /libCEED/backends/magma/tuning/generate_tuning.py (revision 26bdecf31bec54c17c7af5a516affbb56f2e8d19)
1*26bdecf3SSebastian Grimberg#!/usr/bin/env python3
2*26bdecf3SSebastian Grimberg
3*26bdecf3SSebastian Grimberg# Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
4*26bdecf3SSebastian Grimberg# Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
5*26bdecf3SSebastian Grimberg# All Rights reserved. See files LICENSE and NOTICE for details.
6*26bdecf3SSebastian Grimberg#
7*26bdecf3SSebastian Grimberg# This file is part of CEED, a collection of benchmarks, miniapps, software
8*26bdecf3SSebastian Grimberg# libraries and APIs for efficient high-order finite element and spectral
9*26bdecf3SSebastian Grimberg# element discretizations for exascale applications. For more information and
10*26bdecf3SSebastian Grimberg# source code availability see http://github.com/ceed
11*26bdecf3SSebastian Grimberg#
12*26bdecf3SSebastian Grimberg# The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
13*26bdecf3SSebastian Grimberg# a collaborative effort of two U.S. Department of Energy organizations (Office
14*26bdecf3SSebastian Grimberg# of Science and the National Nuclear Security Administration) responsible for
15*26bdecf3SSebastian Grimberg# the planning and preparation of a capable exascale ecosystem, including
16*26bdecf3SSebastian Grimberg# software, applications, hardware, advanced system engineering and early
17*26bdecf3SSebastian Grimberg# testbed platforms, in support of the nation's exascale computing imperative.
18*26bdecf3SSebastian Grimberg
19*26bdecf3SSebastian Grimbergimport argparse
20*26bdecf3SSebastian Grimbergimport os
21*26bdecf3SSebastian Grimbergimport io
22*26bdecf3SSebastian Grimbergimport re
23*26bdecf3SSebastian Grimbergimport subprocess
24*26bdecf3SSebastian Grimbergimport pandas as pd
25*26bdecf3SSebastian Grimbergimport time
26*26bdecf3SSebastian Grimberg
27*26bdecf3SSebastian Grimbergscript_dir = os.path.dirname(os.path.realpath(__file__))
28*26bdecf3SSebastian Grimberg
29*26bdecf3SSebastian Grimberg
30*26bdecf3SSebastian Grimbergdef build(nb, build_cmd):
31*26bdecf3SSebastian Grimberg    with open(f"{script_dir}/../ceed-magma-gemm-selector.cpp", 'r') as f:
32*26bdecf3SSebastian Grimberg        data = f.read()
33*26bdecf3SSebastian Grimberg        data = re.sub(
34*26bdecf3SSebastian Grimberg            '.*(#define CEED_AUTOTUNE_RTC_NB).*',
35*26bdecf3SSebastian Grimberg            r'\1' + f" {nb}",
36*26bdecf3SSebastian Grimberg            data)
37*26bdecf3SSebastian Grimberg    with open(f"{script_dir}/../ceed-magma-gemm-selector.cpp", 'w') as f:
38*26bdecf3SSebastian Grimberg        f.write(data)
39*26bdecf3SSebastian Grimberg    subprocess.run(build_cmd, cwd=f"{script_dir}/../../..")
40*26bdecf3SSebastian Grimberg    subprocess.run(["make", "tuning"], cwd=f"{script_dir}")
41*26bdecf3SSebastian Grimberg
42*26bdecf3SSebastian Grimberg
43*26bdecf3SSebastian Grimbergdef benchmark(backend):
44*26bdecf3SSebastian Grimberg    data = subprocess.run(["./tuning", f"{backend}"], capture_output=True)
45*26bdecf3SSebastian Grimberg    return pd.read_csv(io.StringIO(data.stdout.decode('utf-8')), header=None,
46*26bdecf3SSebastian Grimberg                       delim_whitespace=True, names=['P', 'N', 'Q', 'Q_COMP', 'TRANS', 'MFLOPS'])
47*26bdecf3SSebastian Grimberg
48*26bdecf3SSebastian Grimberg
49*26bdecf3SSebastian Grimbergif __name__ == "__main__":
50*26bdecf3SSebastian Grimberg    # Command line arguments
51*26bdecf3SSebastian Grimberg    parser = argparse.ArgumentParser("MAGMA RTC autotuning")
52*26bdecf3SSebastian Grimberg    parser.add_argument(
53*26bdecf3SSebastian Grimberg        "-arch",
54*26bdecf3SSebastian Grimberg        help="Device architecture name for tuning data",
55*26bdecf3SSebastian Grimberg        required=True)
56*26bdecf3SSebastian Grimberg    parser.add_argument(
57*26bdecf3SSebastian Grimberg        "-max-nb",
58*26bdecf3SSebastian Grimberg        help="Maximum block size NB to consider for autotuning",
59*26bdecf3SSebastian Grimberg        default=32,
60*26bdecf3SSebastian Grimberg        type=int)
61*26bdecf3SSebastian Grimberg    parser.add_argument(
62*26bdecf3SSebastian Grimberg        "-ceed",
63*26bdecf3SSebastian Grimberg        help="Ceed resource specifier",
64*26bdecf3SSebastian Grimberg        default="/cpu/self")
65*26bdecf3SSebastian Grimberg    parser.add_argument(
66*26bdecf3SSebastian Grimberg        "-build-cmd",
67*26bdecf3SSebastian Grimberg        help="Command used to build libCEED from the source root directory",
68*26bdecf3SSebastian Grimberg        default="make")
69*26bdecf3SSebastian Grimberg    args = parser.parse_args()
70*26bdecf3SSebastian Grimberg
71*26bdecf3SSebastian Grimberg    for nb in range(1, args.max_nb + 1):
72*26bdecf3SSebastian Grimberg        # Rebuild the code for the given value of NB
73*26bdecf3SSebastian Grimberg        build(nb, args.build_cmd)
74*26bdecf3SSebastian Grimberg
75*26bdecf3SSebastian Grimberg        # Run the benchmarks
76*26bdecf3SSebastian Grimberg        start = time.perf_counter()
77*26bdecf3SSebastian Grimberg        data_nb = benchmark(args.ceed)
78*26bdecf3SSebastian Grimberg        print(
79*26bdecf3SSebastian Grimberg            f"Finished benchmarks for NB = {nb}, backend = {args.ceed} ({time.perf_counter() - start} s)")
80*26bdecf3SSebastian Grimberg
81*26bdecf3SSebastian Grimberg        # Save the data for the highest performing NB
82*26bdecf3SSebastian Grimberg        if nb == 1:
83*26bdecf3SSebastian Grimberg            data = pd.DataFrame(data_nb)
84*26bdecf3SSebastian Grimberg            data['NB'] = nb
85*26bdecf3SSebastian Grimberg        else:
86*26bdecf3SSebastian Grimberg            idx = data_nb['MFLOPS'] > data['MFLOPS']
87*26bdecf3SSebastian Grimberg            data.loc[idx, 'NB'] = nb
88*26bdecf3SSebastian Grimberg            data.loc[idx, 'MFLOPS'] = data_nb.loc[idx, 'MFLOPS']
89*26bdecf3SSebastian Grimberg
90*26bdecf3SSebastian Grimberg    # Print the results
91*26bdecf3SSebastian Grimberg    with open(f"{script_dir}/{args.arch}_rtc.h", 'w') as f:
92*26bdecf3SSebastian Grimberg        f.write(
93*26bdecf3SSebastian Grimberg            "////////////////////////////////////////////////////////////////////////////////\n")
94*26bdecf3SSebastian Grimberg        f.write(f"// auto-generated from data on {args.arch}\n\n")
95*26bdecf3SSebastian Grimberg
96*26bdecf3SSebastian Grimberg        rows = data.loc[data['TRANS'] == 1].to_string(header=False, index=False, columns=[
97*26bdecf3SSebastian Grimberg                                                      'P', 'N', 'Q', 'Q_COMP', 'NB']).split('\n')
98*26bdecf3SSebastian Grimberg        f.write(
99*26bdecf3SSebastian Grimberg            "////////////////////////////////////////////////////////////////////////////////\n")
100*26bdecf3SSebastian Grimberg        f.write(
101*26bdecf3SSebastian Grimberg            f"std::vector<std::array<int, RECORD_LENGTH_RTC> > drtc_t_{args.arch}" +
102*26bdecf3SSebastian Grimberg            " = {\n")
103*26bdecf3SSebastian Grimberg        count = 0
104*26bdecf3SSebastian Grimberg        for row in rows:
105*26bdecf3SSebastian Grimberg            f.write("    {" + re.sub(r'(\s+)', r',\1', row) +
106*26bdecf3SSebastian Grimberg                    ("},\n" if count < len(rows) - 1 else "}\n"))
107*26bdecf3SSebastian Grimberg            count += 1
108*26bdecf3SSebastian Grimberg        f.write("};\n\n")
109*26bdecf3SSebastian Grimberg
110*26bdecf3SSebastian Grimberg        rows = data.loc[data['TRANS'] == 0].to_string(header=False, index=False, columns=[
111*26bdecf3SSebastian Grimberg                                                      'P', 'N', 'Q', 'Q_COMP', 'NB']).split('\n')
112*26bdecf3SSebastian Grimberg        f.write(
113*26bdecf3SSebastian Grimberg            "////////////////////////////////////////////////////////////////////////////////\n")
114*26bdecf3SSebastian Grimberg        f.write(
115*26bdecf3SSebastian Grimberg            f"std::vector<std::array<int, RECORD_LENGTH_RTC> > drtc_n_{args.arch}" +
116*26bdecf3SSebastian Grimberg            " = {\n")
117*26bdecf3SSebastian Grimberg        count = 0
118*26bdecf3SSebastian Grimberg        for row in rows:
119*26bdecf3SSebastian Grimberg            f.write("    {" + re.sub(r'(\s+)', r',\1', row) +
120*26bdecf3SSebastian Grimberg                    ("},\n" if count < len(rows) - 1 else "}\n"))
121*26bdecf3SSebastian Grimberg            count += 1
122*26bdecf3SSebastian Grimberg        f.write("};\n")
123