1*26bdecf3SSebastian Grimberg#!/usr/bin/env python3 2*26bdecf3SSebastian Grimberg 3*26bdecf3SSebastian Grimberg# Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 4*26bdecf3SSebastian Grimberg# Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 5*26bdecf3SSebastian Grimberg# All Rights reserved. See files LICENSE and NOTICE for details. 6*26bdecf3SSebastian Grimberg# 7*26bdecf3SSebastian Grimberg# This file is part of CEED, a collection of benchmarks, miniapps, software 8*26bdecf3SSebastian Grimberg# libraries and APIs for efficient high-order finite element and spectral 9*26bdecf3SSebastian Grimberg# element discretizations for exascale applications. For more information and 10*26bdecf3SSebastian Grimberg# source code availability see http://github.com/ceed 11*26bdecf3SSebastian Grimberg# 12*26bdecf3SSebastian Grimberg# The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 13*26bdecf3SSebastian Grimberg# a collaborative effort of two U.S. Department of Energy organizations (Office 14*26bdecf3SSebastian Grimberg# of Science and the National Nuclear Security Administration) responsible for 15*26bdecf3SSebastian Grimberg# the planning and preparation of a capable exascale ecosystem, including 16*26bdecf3SSebastian Grimberg# software, applications, hardware, advanced system engineering and early 17*26bdecf3SSebastian Grimberg# testbed platforms, in support of the nation's exascale computing imperative. 18*26bdecf3SSebastian Grimberg 19*26bdecf3SSebastian Grimbergimport argparse 20*26bdecf3SSebastian Grimbergimport os 21*26bdecf3SSebastian Grimbergimport io 22*26bdecf3SSebastian Grimbergimport re 23*26bdecf3SSebastian Grimbergimport subprocess 24*26bdecf3SSebastian Grimbergimport pandas as pd 25*26bdecf3SSebastian Grimbergimport time 26*26bdecf3SSebastian Grimberg 27*26bdecf3SSebastian Grimbergscript_dir = os.path.dirname(os.path.realpath(__file__)) 28*26bdecf3SSebastian Grimberg 29*26bdecf3SSebastian Grimberg 30*26bdecf3SSebastian Grimbergdef build(nb, build_cmd): 31*26bdecf3SSebastian Grimberg with open(f"{script_dir}/../ceed-magma-gemm-selector.cpp", 'r') as f: 32*26bdecf3SSebastian Grimberg data = f.read() 33*26bdecf3SSebastian Grimberg data = re.sub( 34*26bdecf3SSebastian Grimberg '.*(#define CEED_AUTOTUNE_RTC_NB).*', 35*26bdecf3SSebastian Grimberg r'\1' + f" {nb}", 36*26bdecf3SSebastian Grimberg data) 37*26bdecf3SSebastian Grimberg with open(f"{script_dir}/../ceed-magma-gemm-selector.cpp", 'w') as f: 38*26bdecf3SSebastian Grimberg f.write(data) 39*26bdecf3SSebastian Grimberg subprocess.run(build_cmd, cwd=f"{script_dir}/../../..") 40*26bdecf3SSebastian Grimberg subprocess.run(["make", "tuning"], cwd=f"{script_dir}") 41*26bdecf3SSebastian Grimberg 42*26bdecf3SSebastian Grimberg 43*26bdecf3SSebastian Grimbergdef benchmark(backend): 44*26bdecf3SSebastian Grimberg data = subprocess.run(["./tuning", f"{backend}"], capture_output=True) 45*26bdecf3SSebastian Grimberg return pd.read_csv(io.StringIO(data.stdout.decode('utf-8')), header=None, 46*26bdecf3SSebastian Grimberg delim_whitespace=True, names=['P', 'N', 'Q', 'Q_COMP', 'TRANS', 'MFLOPS']) 47*26bdecf3SSebastian Grimberg 48*26bdecf3SSebastian Grimberg 49*26bdecf3SSebastian Grimbergif __name__ == "__main__": 50*26bdecf3SSebastian Grimberg # Command line arguments 51*26bdecf3SSebastian Grimberg parser = argparse.ArgumentParser("MAGMA RTC autotuning") 52*26bdecf3SSebastian Grimberg parser.add_argument( 53*26bdecf3SSebastian Grimberg "-arch", 54*26bdecf3SSebastian Grimberg help="Device architecture name for tuning data", 55*26bdecf3SSebastian Grimberg required=True) 56*26bdecf3SSebastian Grimberg parser.add_argument( 57*26bdecf3SSebastian Grimberg "-max-nb", 58*26bdecf3SSebastian Grimberg help="Maximum block size NB to consider for autotuning", 59*26bdecf3SSebastian Grimberg default=32, 60*26bdecf3SSebastian Grimberg type=int) 61*26bdecf3SSebastian Grimberg parser.add_argument( 62*26bdecf3SSebastian Grimberg "-ceed", 63*26bdecf3SSebastian Grimberg help="Ceed resource specifier", 64*26bdecf3SSebastian Grimberg default="/cpu/self") 65*26bdecf3SSebastian Grimberg parser.add_argument( 66*26bdecf3SSebastian Grimberg "-build-cmd", 67*26bdecf3SSebastian Grimberg help="Command used to build libCEED from the source root directory", 68*26bdecf3SSebastian Grimberg default="make") 69*26bdecf3SSebastian Grimberg args = parser.parse_args() 70*26bdecf3SSebastian Grimberg 71*26bdecf3SSebastian Grimberg for nb in range(1, args.max_nb + 1): 72*26bdecf3SSebastian Grimberg # Rebuild the code for the given value of NB 73*26bdecf3SSebastian Grimberg build(nb, args.build_cmd) 74*26bdecf3SSebastian Grimberg 75*26bdecf3SSebastian Grimberg # Run the benchmarks 76*26bdecf3SSebastian Grimberg start = time.perf_counter() 77*26bdecf3SSebastian Grimberg data_nb = benchmark(args.ceed) 78*26bdecf3SSebastian Grimberg print( 79*26bdecf3SSebastian Grimberg f"Finished benchmarks for NB = {nb}, backend = {args.ceed} ({time.perf_counter() - start} s)") 80*26bdecf3SSebastian Grimberg 81*26bdecf3SSebastian Grimberg # Save the data for the highest performing NB 82*26bdecf3SSebastian Grimberg if nb == 1: 83*26bdecf3SSebastian Grimberg data = pd.DataFrame(data_nb) 84*26bdecf3SSebastian Grimberg data['NB'] = nb 85*26bdecf3SSebastian Grimberg else: 86*26bdecf3SSebastian Grimberg idx = data_nb['MFLOPS'] > data['MFLOPS'] 87*26bdecf3SSebastian Grimberg data.loc[idx, 'NB'] = nb 88*26bdecf3SSebastian Grimberg data.loc[idx, 'MFLOPS'] = data_nb.loc[idx, 'MFLOPS'] 89*26bdecf3SSebastian Grimberg 90*26bdecf3SSebastian Grimberg # Print the results 91*26bdecf3SSebastian Grimberg with open(f"{script_dir}/{args.arch}_rtc.h", 'w') as f: 92*26bdecf3SSebastian Grimberg f.write( 93*26bdecf3SSebastian Grimberg "////////////////////////////////////////////////////////////////////////////////\n") 94*26bdecf3SSebastian Grimberg f.write(f"// auto-generated from data on {args.arch}\n\n") 95*26bdecf3SSebastian Grimberg 96*26bdecf3SSebastian Grimberg rows = data.loc[data['TRANS'] == 1].to_string(header=False, index=False, columns=[ 97*26bdecf3SSebastian Grimberg 'P', 'N', 'Q', 'Q_COMP', 'NB']).split('\n') 98*26bdecf3SSebastian Grimberg f.write( 99*26bdecf3SSebastian Grimberg "////////////////////////////////////////////////////////////////////////////////\n") 100*26bdecf3SSebastian Grimberg f.write( 101*26bdecf3SSebastian Grimberg f"std::vector<std::array<int, RECORD_LENGTH_RTC> > drtc_t_{args.arch}" + 102*26bdecf3SSebastian Grimberg " = {\n") 103*26bdecf3SSebastian Grimberg count = 0 104*26bdecf3SSebastian Grimberg for row in rows: 105*26bdecf3SSebastian Grimberg f.write(" {" + re.sub(r'(\s+)', r',\1', row) + 106*26bdecf3SSebastian Grimberg ("},\n" if count < len(rows) - 1 else "}\n")) 107*26bdecf3SSebastian Grimberg count += 1 108*26bdecf3SSebastian Grimberg f.write("};\n\n") 109*26bdecf3SSebastian Grimberg 110*26bdecf3SSebastian Grimberg rows = data.loc[data['TRANS'] == 0].to_string(header=False, index=False, columns=[ 111*26bdecf3SSebastian Grimberg 'P', 'N', 'Q', 'Q_COMP', 'NB']).split('\n') 112*26bdecf3SSebastian Grimberg f.write( 113*26bdecf3SSebastian Grimberg "////////////////////////////////////////////////////////////////////////////////\n") 114*26bdecf3SSebastian Grimberg f.write( 115*26bdecf3SSebastian Grimberg f"std::vector<std::array<int, RECORD_LENGTH_RTC> > drtc_n_{args.arch}" + 116*26bdecf3SSebastian Grimberg " = {\n") 117*26bdecf3SSebastian Grimberg count = 0 118*26bdecf3SSebastian Grimberg for row in rows: 119*26bdecf3SSebastian Grimberg f.write(" {" + re.sub(r'(\s+)', r',\1', row) + 120*26bdecf3SSebastian Grimberg ("},\n" if count < len(rows) - 1 else "}\n")) 121*26bdecf3SSebastian Grimberg count += 1 122*26bdecf3SSebastian Grimberg f.write("};\n") 123