#!/usr/bin/env python3
# Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
# Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
# All Rights reserved. See files LICENSE and NOTICE for details.
#
# This file is part of CEED, a collection of benchmarks, miniapps, software
# libraries and APIs for efficient high-order finite element and spectral
# element discretizations for exascale applications. For more information and
# source code availability see http://github.com/ceed.
#
# The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
# a collaborative effort of two U.S. Department of Energy organizations (Office
# of Science and the National Nuclear Security Administration) responsible for
# the planning and preparation of a capable exascale ecosystem, including
# software, applications, hardware, advanced system engineering and early
# testbed platforms, in support of the nation's exascale computing imperative.

import pandas as pd
import fileinput
import pprint

# Read all input files specified on the command line, or stdin and parse
# the content, storing it as a pandas dataframe


def read_logs(files=None):
    it = fileinput.input(files)
    state = 0
    line = ''
    i = 0
    data = dict(
        file='unknown',
        backend='unknown',
        test='unknown',
        num_procs=0,
        num_procs_node=0,
        degree=0,
        quadrature_pts=0,
        code='libCEED',
    )

    runs = []
    while True:
        ##
        if state % 2 == 0:
            ##
            try:
                line = next(it)
                i = i + 1
            except StopIteration:
                break
            state = state + 1
            ##
        elif state == 1:
            ##
            state = 0
            # Legacy header contains number of MPI tasks
            if 'Running the tests using a total of' in line:
                data['num_procs'] = int(
                    line.split(
                        'a total of ',
                        1)[1].split(
                        None,
                        1)[0])
            # MPI tasks per node
            elif 'tasks per node' in line:
                data['num_procs_node'] = int(
                    line.split(
                        ' tasks per',
                        1)[0].rsplit(
                        None,
                        1)[1])
            # New Benchmark Problem
            elif "CEED Benchmark Problem" in line:
                # Starting a new block
                data = data.copy()
                runs.append(data)
                data['file'] = fileinput.filename()
                data['test'] = line.split()[-2] + " " + line.split('-- ')[1]
                data['case'] = 'scalar' if (('Problem 1' in line) or ('Problem 3' in line)
                                            or ('Problem 5' in line)) else 'vector'
            elif "Hostname" in line:
                data['hostname'] = line.split(':')[1].strip()
            elif "Total ranks" in line:
                data['num_procs'] = int(line.split(':')[1].strip())
            elif "Ranks per node" in line:
                data['num_procs_node'] = int(line.split(':')[1].strip())
            # Backend
            elif 'libCEED Backend MemType' in line:
                data['backend_memtype'] = line.split(':')[1].strip()
            elif 'libCEED Backend' in line:
                data['backend'] = line.split(':')[1].strip()
            # P
            elif 'Basis Nodes' in line:
                data['degree'] = int(line.split(':')[1]) - 1
            # Q
            elif 'Quadrature Points' in line:
                qpts = int(line.split(':')[1])
                data['quadrature_pts'] = qpts**3
            # Total DOFs
            elif 'Global nodes' in line:
                data['num_unknowns'] = int(line.split(':')[1])
                if data['case'] == 'vector':
                    data['num_unknowns'] *= 3
            # Number of elements
            elif 'Local Elements' in line:
                data['num_elem'] = int(
                    line.split(':')[1].split()[0]) * data['num_procs']
            # CG Solve Time
            elif 'Total KSP Iterations' in line:
                data['ksp_its'] = int(line.split(':')[1].split()[0])
            elif 'CG Solve Time' in line:
                data['time_per_it'] = float(
                    line.split(':')[1].split()[0]) / data['ksp_its']
            # CG DOFs/Sec
            elif 'DoFs/Sec in CG' in line:
                data['cg_iteration_dps'] = 1e6 * \
                    float(line.split(':')[1].split()[0])
            # End of output

    return pd.DataFrame(runs)


if __name__ == "__main__":
    runs = read_logs()
    print('Number of test runs read: %i' % len(runs))
    print(runs)