xref: /petsc/lib/petsc/bin/petsc_tas_analysis.py (revision 6996bd1a6dda9216f11f3a1c5d2357ea301aa80d)
1#!/usr/bin/env python3
2import numpy as np
3import os
4import sys
5import importlib
6import datetime as date
7
8# Check to ensure that the environmental variable PETSC_DIR has been assigned.
9# MPLCONFIGDIR is needed for matplotlib
10try:
11    if os.environ.get('PETSC_DIR') is None:
12        raise NotADirectoryError()
13    os.environ['MPLCONFIGDIR'] = os.environ.get(
14        'PETSC_DIR')+'/share/petsc/xml/'
15except NotADirectoryError:
16    sys.exit('The environmental variable PETSC_DIR was not found.\n'
17             'Please add this variable with the base directory for PETSc or the base directory that MPLCONFIGDIR resides')
18
19import matplotlib.pyplot as plt
20import argparse
21import math
22import configureTAS as config
23import pandas as pd
24from tasClasses import File
25from tasClasses import Field
26
27def main(cmdLineArgs):
28    data = []
29    # This section handles the command arguments that edit configurTas.py
30    if cmdLineArgs.setDefaultGraphDir is not None:
31        aliasInConfig, editConfig = checkAlias(
32            'defaultGraphs', cmdLineArgs.setDefaultGraphDir[0])
33
34        if editConfig and checkDirforFilePath('defaultGraphs', cmdLineArgs.setDefaultGraphDir[0]):
35            result = editConfigureTasFile(
36                'defaultGraphs', 'add', aliasInConfig, cmdLineArgs.setDefaultGraphDir[0])
37
38        if result:
39            print(f'\nconfigureTAS.py defaultGraphs was updated with path {cmdLineArgs.setDefaultGraphDir[0]}\n')
40
41        exit()
42
43    if cmdLineArgs.setDefaultFileDir is not None:
44        aliasInConfig, editConfig = checkAlias(
45            'defaultData', cmdLineArgs.setDefaultFileDir[0])
46
47        if editConfig and checkDirforFilePath('defaultData', cmdLineArgs.setDefaultFileDir[0]):
48            result = editConfigureTasFile(
49                'defaultData', 'add', aliasInConfig, cmdLineArgs.setDefaultFileDir[0])
50
51        if result:
52            print(f'\nconfigureTAS.py defaultData was updated with path {cmdLineArgs.setDefaultFileDir[0]}\n')
53
54        exit()
55
56    if cmdLineArgs.addEditAliasDir is not None:
57        listToAdd = cmdLineArgs.addEditAliasDir
58        counter = 0
59        if len(listToAdd) % 2 == 0:
60            for counter in range(0, len(listToAdd)-1, 2):
61                aliasInConfig, editConfig = checkAlias(
62                    listToAdd[counter], listToAdd[counter+1])
63
64                if editConfig and checkDirforFilePath(listToAdd[counter], listToAdd[counter+1]):
65                    result = editConfigureTasFile(
66                        listToAdd[counter], 'add', aliasInConfig, listToAdd[counter+1])
67                    if result:
68                        print(f'\nconfigureTAS.py was updated with\n\talias: {listToAdd[counter]}\n\tpath {listToAdd[counter+1]}\n')
69                    else:
70                        print(f'\nconfigureTAS.py was NOT updated with \n\talias: {listToAdd[counter]}\n\tpath {listToAdd[counter+1]}\n')
71        else:
72            print(f'\nWhen using the command line option to add or edit an alias path pair, both must be included.\n'
73                  f'Your input was:')
74            for item in cmdLineArgs.addEditAliasDir:
75                print(item)
76        exit()
77
78    if cmdLineArgs.removeAliasDir is not None:
79        for alias in cmdLineArgs.removeAliasDir:
80            if alias in config.filePath:
81                result = editConfigureTasFile(alias, 'remove')
82                if result:
83                    print(f'\n{alias} was successfully removed from configureTAS.py\n')
84                else:
85                    print(f'\n{alias} was not successful removed from configureTAS.py\n')
86            else:
87                print(f'\n{alias} was not found in configureTAS.py\nList of valid aliases is:\n')
88                for alias in config.filePath:
89                    print(alias)
90        exit()
91
92    if cmdLineArgs.file is None:
93        if cmdLineArgs.pathAliasData is None:
94            files = getFiles(cmdLineArgs, 'defaultData')
95        else:
96            files = getFiles(cmdLineArgs, cmdLineArgs.pathAliasData[0])
97    else:
98        files = getFiles(cmdLineArgs, None)
99
100    if len(files['module']) != 0:
101        for fileName in files['module']:
102            data.append(dataProces(cmdLineArgs, fileName))
103
104    if len(files['csv']) != 0:
105        for fileName in files['csv']:
106            data.append(dataProcesCSV(cmdLineArgs, fileName))
107
108    for item in data:
109        graphGen(item, cmdLineArgs.enable_graphs,
110                 cmdLineArgs.graph_flops_scaling, cmdLineArgs.dim)
111
112    exit()
113
114
115def checkAlias(alias, path):
116    """
117    This function checks to see if the alias to be added/updated, entered on the command line, already exits
118    in configureTAS.py.  In configureTAS.py the alias is the key in the Dictionary filePath.  If that key exists
119    The current key value pair is displayed and the user is prompted to confirm replacement.  The function
120    returns True or False based on the user response.
121
122    If the key is not present the function returns True, without prompting.
123
124    :param alias: Contains the string to compare to the keys in Dictionary filePath.
125    :param path: Contains the new path as entered on the command line.
126
127    :returns:   a tuple of True/False, The first is True if the alias exists in configureTAS.py and the second is
128                True if is should continue and edit configureTAS.py..
129    """
130
131    if alias in config.filePath:
132        print(f'\nalias: {alias}\nalready has path: {config.filePath[alias]}\n\n'
133              f'Do you wish to replace with \npath: {path}\n')
134
135        response = input('(y/n) to continue\n')
136
137        if response.lower() == 'y':
138            return True, True
139        else:
140            return True, False
141
142    else:
143        return False, True
144
145
146def checkDirforFilePath(alias, path):
147    """
148    This function checks to see if the path to be added/updated, as entered on the command line, already exits
149    in the file system. If the path exists the function returns True.
150
151    If it does not the user is prompted with the option of creating the directory.  If the user responds
152    yes an attempt will be made to create the directory.  If it is successful a message that it was created is
153    displayed and the function returns True.
154
155    If there is an error then a message is displayed and the user is asked if theyh wish to continue.  If they
156    respond yes the function will return True, otherwise it will return False.
157
158    If during any of the other prompts the user responds with no, the function returns False.
159
160    :param alias: Contains the string to compare to the keys in Dictionary filePath.
161    :param path: Contains the new path as entered on the command line.
162
163    :returns:   True/False, True if the program should edit configureTAS.py and False to not edit it.
164    """
165
166    if os.path.isdir(path):
167        return True
168    else:
169        print(f'\nDirectory: {path}\nfor alias: {alias}\nDoes not exist.\n'
170              f'Do you wish to create the directory\n')
171        response = input('(y/n) to continue\n')
172        if response.lower() == 'y':
173            try:
174                os.mkdir(path)
175                print('Directory successfully created.\n')
176                return True
177            except os.error:
178                print(f'\nAn error occurred while attempting to create directory:\n{path}\n\n'
179                      f'Please check to make sure that you have permission to create directories\n\n'
180                      f'Do you with to continue with adding {path} to configureTAS.py?')
181                response = input('(y/n) to continue\n')
182                if response.lower() == 'y':
183                    return True
184                else:
185                    return False
186        else:
187            print('\nDo you wish to continue adding the alias path pair to configureTAS.py?\n')
188            response = input('(y/n) to continue\n')
189            if response.lower() == 'y':
190                return True
191            else:
192                return False
193
194
195def editConfigureTasFile(alias, mode, aliasInConfig=False, path=None):
196    """
197    This function edits configureTAS.py by updating an alias path pair, adding a new one, or removing one.
198
199    :param alias: Contains the string to compare to the keys in Dictionary filePath.
200    :param path: Contains the new path as entered on the command line.
201    :param mode: Contains a string, add or remove.  If it is add then an alias will be added
202                    or edited.  If it is remove an alias and path will be removed.
203
204    :returns:   True/False,True if the file is updated, False if not.
205    """
206
207    linesToWrite = []
208    with open('configureTAS.py', 'r') as configureTASFile:
209        for line in configureTASFile:
210            if mode == 'add':
211                if aliasInConfig:
212                    if alias in line:
213                        linesToWrite.append(
214                            'filePath[\'' + alias + '\']=' + '\'' + path + '\'\n')
215                    else:
216                        linesToWrite.append(line)
217                else:
218                    if 'defaultData' in line:
219                        linesToWrite.append(line)
220                        linesToWrite.append(
221                            'filePath[\'' + alias + '\']=' + '\'' + path + '\'\n')
222                    else:
223                        linesToWrite.append(line)
224            else:
225                if alias in line:
226                    continue
227                else:
228                    linesToWrite.append(line)
229
230    with open('configureTAS.py', 'w') as configureTASFile:
231        configureTASFile.writelines(linesToWrite)
232
233    # Code to make sure configureTAS was updated.
234    importlib.reload(config)
235
236    if mode == 'add':
237        if alias in config.filePath:
238            return True
239        else:
240            return False
241    else:
242        if alias not in config.filePath:
243            return True
244        else:
245            return False
246
247
248def getFiles(cmdLineArgs, alias):
249    """
250    This function first determines if it should look in the pathway specified in filePath['defaultData']
251    in the configurationTAS.py file or a file name given as a command line argument using -f or -file.
252    It then builds lists of file names and stores them in a dictionary, where they keys correspond to
253    the type of file, ie, module(ASCII type) or CSV.
254
255    :param cmdLineArgs: Contains command line arguments.
256
257    :returns:   files, a dictionary with keys whose values are lists of file names, grouped by type
258                of file.
259    """
260
261    files = {'module': [], 'csv': []}
262
263    if alias is not None:
264        try:
265            if not config.filePath[alias]:
266                raise NotADirectoryError()
267            dataPath = config.filePath[alias]
268            filesTemp = os.listdir(dataPath)
269            for f in filesTemp:
270                if f[-3:] == '.py':
271                    files['module'].append(f[0:len(f)-3])
272                elif f[-4:] == '.pyc':
273                    files['module'].append(f[0:len(f)-4])
274                elif f[-4:] == '.csv':
275                    files['csv'].append(f)
276            if len(filesTemp) == 0 or len(files) == 0:
277                raise IOError()
278        except NotADirectoryError:
279            print(f'The path for {alias} in configureTAS.py is empty and no valid file was specified using the -file/-f argument. \n'
280                  f'Please either specify a path in configureTAS.py or use the command line argument -file/-f')
281        except IOError:
282            sys.exit('No valid data files in ' + dataPath + ' and -file/-f argument is empty. \n'
283                     'Please check for .py, .pyc, or .csv files in '
284                     + dataPath + ' or specify one with the -file/-f '
285                     'argument.')
286    else:
287        for file in cmdLineArgs.file:
288            if not os.path.exists(file):
289                print(f'{file} is not a valid path or file name')
290            else:
291                if file[-4:] == '.csv':
292                    print('csv file')
293                    files['csv'].append(file)
294                else:
295                    files['module'].append(file)
296    for key in files.keys():
297        print(f'key: {key}, items: {files[key]}')
298    return files
299
300
301def dataProcesCSV(cmdLineArgs, fileName):
302    """
303    This function takes the list of data files in CSV format supplied as a list and parses them into a tasClasses
304    object, whose top level key is the file name, followed by data type, i.e. dofs, times, flops, errors, and
305    the finale value is a NumPy array of the data to plot.
306
307    :param cmdLineArgs: Contains command line arguments.
308    :param fileNames: Contains the CSV file names.
309    :type string:
310
311    :returns:   data a tasClasses file object containing the parsed data from the files specified on the command line.
312    """
313    data = {}
314    results = []
315
316    if(cmdLineArgs.file == None):
317        if cmdLineArgs.pathAliasData == None:
318            os.chdir(config.filePath['defaultData'])
319        else:
320            os.chdir(config.filePath[cmdLineArgs.pathAliasData[0]])
321
322    if('/' in fileName):
323        path_fileName = os.path.split(fileName)
324        os.chdir(path_fileName[0])
325        fileName = path_fileName[1]
326
327    df = pd.read_csv(fileName)
328    Nf = getNfCSV(df)
329    nProcs = int(df.columns.tolist()[25])
330    dofs = []
331    errors = []
332
333    times = []
334    timesMin = []
335    meanTime = []
336    timeGrowthRate = []
337
338    flops = []
339    flopsMax = []
340    flopsMin = []
341    meanFlop = []
342    flopGrowthRate = []
343
344    luFactor = []
345    luFactorMin = []
346    luFactorMean = []
347    luFactorGrowthRate = []
348
349    file = File(fileName[0:len(fileName)-4])
350
351    # filters for using in df.loc[]
352
353    # Needed for SNES problems
354    SNESSolveFilter = (df['Event Name'] == 'SNESSolve')
355    # Needed for Time Step problems
356    TSStepFilter = (df['Event Name'] == 'TSStep')
357    MatLUFactorFilter = ((df['Event Name'] == 'MatLUFactorNum')
358                         | (df['Event Name'] == 'MatLUFactorSym'))
359    ConvEstErrorFilter = (df['Event Name'] == 'ConvEst Error')
360    rankFilter = (df['Rank'] == 0)
361
362    if cmdLineArgs.timestep == 0:
363        SolverFilter = SNESSolveFilter
364        # Added a check to make sure the problem is truly a SNES rather than TS
365        if (df.loc[SNESSolveFilter & (df['Stage Name'] == 'ConvEst Refinement Level 0') & rankFilter, 'Time'] == 0).bool():
366            print(f'The sampled time value for SNESSolve is 0.  This most commonly happens if the problem'
367                  f' is a Time Step problem.\n If this is a Time Step problem hit (y) to apply the Time Step filter'
368                  f'(This can also be done on the command line using -ts 1).\nOtherwise hit (n) to continue using the SNES Solver filter')
369            response = input()
370            if response.lower() == 'y':
371                SolverFilter = TSStepFilter
372    else:
373        SolverFilter = TSStepFilter
374
375    for f in range(Nf):
376        errors.append([])
377    for f in range(Nf):
378        dofs.append([])
379
380    level = 0
381    while level >= 0:
382        if ('ConvEst Refinement Level ' + str(level) in df['Stage Name'].values):
383            stageName = 'ConvEst Refinement Level '+str(level)
384            #Level dependent filters
385            stageNameFilter = (df['Stage Name'] == stageName)
386            fieldFilter = stageNameFilter & ConvEstErrorFilter & rankFilter
387
388            SolverDf = df.loc[stageNameFilter & SolverFilter]
389
390            MatLUFactorDf = df.loc[(stageNameFilter & MatLUFactorFilter), [
391                                    'Time', 'Rank']]
392            # groupby done in order to get the sum of MatLUFactorNum and MatLUFactorSym
393            # For each Rank/CPU
394            MatLUFactorDf = MatLUFactorDf.groupby(['Rank']).sum()
395
396            meanTime.append((SolverDf['Time'].sum())/nProcs)
397            times.append(SolverDf['Time'].max())
398            timesMin.append(SolverDf['Time'].min())
399
400            meanFlop.append((SolverDf['FLOP'].sum())/nProcs)
401            flops.append(SolverDf['FLOP'].sum())
402            flopsMax.append(SolverDf['FLOP'].max())
403            flopsMin.append(SolverDf['FLOP'].min())
404
405            if level >= 1:
406                timeGrowthRate.append(meanTime[level]/meanTime[level-1])
407                flopGrowthRate.append(meanFlop[level]/meanFlop[level-1])
408
409            luFactorMean.append(MatLUFactorDf.sum()/nProcs)
410            luFactor.append(MatLUFactorDf.max())
411            luFactorMin.append(MatLUFactorDf.min())
412
413            for f in range(Nf):
414                dofs[f].append((df.loc[fieldFilter])['dof'+str(f)].values[0])
415                errors[f].append((df.loc[fieldFilter])['e'+str(f)].values[0])
416
417            level = level + 1
418        else:
419            level = -1
420
421    dofs = np.array(dofs, dtype=object)
422    errors = np.array(errors, dtype=object)
423
424    times = np.array(times)
425    meanTime = np.array(meanTime)
426    timesMin = np.array(timesMin)
427    timeGrowthRate = np.array(timeGrowthRate)
428
429    flops = np.array(flops)
430    meanFlop = np.array(meanFlop)
431    flopsMax = np.array(flopsMax)
432    flopsMin = np.array(flopsMin)
433    flopGrowthRate = np.array(flopGrowthRate)
434
435    luFactor = np.array(luFactor)
436    luFactorMin = np.array(luFactorMin)
437    luFactorMean = np.array(luFactorMean)
438    luFactorGrowthRate = np.array(luFactorGrowthRate)
439
440    data['Times'] = times
441    data['Mean Time'] = meanTime
442    data['Times Range'] = times-timesMin
443    data['Time Growth Rate'] = timeGrowthRate
444
445    data['Flops'] = flops
446    data['Mean Flops'] = meanFlop
447    data['Flop Range'] = flopsMax - flopsMin
448    data['Flop Growth Rate'] = flopGrowthRate
449
450    data['LU Factor'] = luFactor
451    data['LU Factor Mean'] = luFactorMean
452    data['LU Factor Range'] = luFactor-luFactorMin
453    data['LU Factor Growth Rate'] = luFactorGrowthRate
454
455    for f in range(Nf):
456        try:
457            if cmdLineArgs.fieldList is not None:
458                if len(cmdLineArgs.fieldList) != Nf:
459                    print(f'\nYou specified {len(cmdLineArgs.fieldList)} from the command line, while the log file has {Nf} fields.\n\n'
460                          f'The fields you specified were:\n{cmdLineArgs.fieldList}\n\n')
461
462                    response = input('(y/n) to continue without field names\n')
463
464                    if response.lower() == 'n':
465                        exit()
466                    else:
467                        cmdLineArgs.fieldList = None
468                        file.addField(Field(file.fileName, str(f)))
469                else:
470                    file.addField(
471                        Field(file.fileName, cmdLineArgs.fieldList[f]))
472            elif cmdLineArgs.problem != 'NULL':
473                file.addField(
474                    Field(file.fileName, config.fieldNames[cmdLineArgs.problem]['field '+str(f)]))
475            else:
476                file.addField(Field(file.fileName, str(f)))
477        except KeyError:
478            sys.exit('The problem you specified on the command line: ' + cmdLineArgs.problem + ' \ncould not be found'
479                     ' please check ' + config.__file__ + ' to ensure that you are using the correct name/have defined the fields for the problem.')
480
481    file.fileData = data
482    for f in range(Nf):
483        print(f)
484        file.fieldList[f].fieldData['dofs'] = dofs[f]
485        file.fieldList[f].fieldData['Errors'] = errors[f]
486
487    file.printFile()
488
489    return file
490
491
492def dataProces(cmdLineArgs, fileName):
493    """
494    This function takes a data file, ASCII type, for supplied as command line arguments and parses it into a multi-level
495    dictionary, whose top level key is the file name, followed by data type, i.e. dofs, times, flops, errors, and
496    the finale value is a NumPy array of the data to plot.  This is the used to generate a tasClasses File object
497
498        data[<file name>][<data type>]:<numpy array>
499
500    :param cmdLineArgs: Contains the command line arguments.
501    :param fileName: Contains the name of file to be processed
502    :type string:
503
504    :returns:   data a tasClasses File object containing the parsed data from the file specified on the command line.
505    """
506
507    data = {}
508    files = []
509    results = []
510    #if -file/-f was left blank then this will automatically add every .py and .pyc
511    #file to the files[] list to be processed.
512
513    module = importlib.import_module(fileName)
514    Nf = getNf(module.Stages['ConvEst Refinement Level 1']
515               ['ConvEst Error'][0]['error'])
516    nProcs = module.size
517    dofs = []
518    errors = []
519
520    times = []
521    timesMin = []
522    meanTime = []
523    timeGrowthRate = []
524
525    flops = []
526    flopsMax = []
527    flopsMin = []
528    meanFlop = []
529    flopGrowthRate = []
530
531    luFactor = []
532    luFactorMin = []
533    luFactorMean = []
534    luFactorGrowthRate = []
535
536    file = File(module.__name__)
537
538    for f in range(Nf):
539        try:
540            if cmdLineArgs.problem != 'NULL':
541                file.addField(
542                    Field(file.fileName, config.fieldNames[cmdLineArgs.problem]['field '+str(f)]))
543            else:
544                file.addField(Field(file.fileName, str(f)))
545        except:
546            sys.exit('The problem you specified on the command line: ' + cmdLineArgs.problem + ' \ncould not be found'
547                     ' please check ' + config.__file__ + ' to ensure that you are using the correct name/have defined the fields for the problem.')
548
549    for f in range(Nf):
550        errors.append([])
551    for f in range(Nf):
552        dofs.append([])
553
554    level = 0
555    while level >= 0:
556        stageName = 'ConvEst Refinement Level '+str(level)
557        if stageName in module.Stages:
558            timeTempMax = module.Stages[stageName]['SNESSolve'][0]['time']
559            timeTempMin = module.Stages[stageName]['SNESSolve'][0]['time']
560            totalTime = module.Stages[stageName]['SNESSolve'][0]['time']
561
562            flopsTempMax = module.Stages[stageName]['SNESSolve'][0]['flop']
563            flopsTempMin = module.Stages[stageName]['SNESSolve'][0]['flop']
564            totalFlop = module.Stages[stageName]['SNESSolve'][0]['flop']
565
566            luFactorTempMax = module.Stages[stageName]['MatLUFactorNum'][0]['time'] + \
567                module.Stages[stageName]['MatLUFactorSym'][0]['time']
568            luFactorTempMin = luFactorTempMax
569            totalLuFactor = luFactorTempMax
570
571            #This loops is used to grab the greatest time and flop when run in parallel
572            for n in range(1, nProcs):
573                #Sum of MatLUFactorNum and MatLUFactorSym
574                if module.Stages[stageName]['MatLUFactorNum'][n]['time'] != 0:
575                    luFactorCur = module.Stages[stageName]['MatLUFactorNum'][n]['time'] + \
576                        module.Stages[stageName]['MatLUFactorSym'][n]['time']
577
578                #Gather Time information
579                timeTempMax = timeTempMax if timeTempMax >= module.Stages[stageName]['SNESSolve'][n]['time'] \
580                    else module.Stages[stageName]['SNESSolve'][n]['time']
581                timeTempMin = timeTempMin if timeTempMin <= module.Stages[stageName]['SNESSolve'][n]['time'] \
582                    else module.Stages[stageName]['SNESSolve'][n]['time']
583                totalTime = totalTime + \
584                    module.Stages[stageName]['SNESSolve'][n]['time']
585
586                #Gather Flop information
587                flopsTempMax = flopsTempMax if flopsTempMax >= module.Stages[stageName]['SNESSolve'][n]['flop'] \
588                    else module.Stages[stageName]['SNESSolve'][n]['flop']
589                flopsTempMin = flopsTempMin if flopsTempMin <= module.Stages[stageName]['SNESSolve'][n]['flop'] \
590                    else module.Stages[stageName]['SNESSolve'][n]['flop']
591                totalFlop = totalFlop + \
592                    module.Stages[stageName]['SNESSolve'][n]['flop']
593
594                if module.Stages[stageName]['MatLUFactorNum'][n]['time'] != 0:
595                    luFactorTempMax = luFactorTempMax if luFactorTempMax >= luFactorCur \
596                            else luFactorCur
597                    luFactorTempMin = luFactorTempMin if luFactorTempMin <= luFactorCur \
598                        else luFactorCur
599                    totalLuFactor = totalLuFactor + luFactorCur
600
601            meanTime.append(totalTime/nProcs)
602            times.append(timeTempMax)
603            timesMin.append(timeTempMin)
604
605            meanFlop.append(totalFlop/nProcs)
606            flops.append(totalFlop)
607            flopsMax.append(flopsTempMax)
608            flopsMin.append(timeTempMin)
609            if module.Stages[stageName]['MatLUFactorNum'][n]['time'] != 0:
610                luFactor.append(luFactorTempMax)
611                luFactorMin.append(luFactorTempMin)
612                luFactorMean.append(totalLuFactor/nProcs)
613
614            #Calculates the growth rate of statistics between levels
615            if level >= 1:
616                timeGrowthRate.append(meanTime[level]/meanTime[level-1])
617                flopGrowthRate.append(meanFlop[level]/meanFlop[level-1])
618
619            #TODO FOR SNES
620            #if module.Stages[stageName]['MatLUFactorNum'][n]['time'] != 0:
621            #    luFactorGrowthRate.append(luFactorMean[level-1]/luFactorMean[level-2])
622
623            for f in range(Nf):
624                dofs[f].append(module.Stages[stageName]
625                               ['ConvEst Error'][0]['dof'][f])
626                errors[f].append(module.Stages[stageName]
627                                 ['ConvEst Error'][0]['error'][f])
628
629            level = level + 1
630        else:
631            level = -1
632
633    dofs = np.array(dofs)
634    errors = np.array(errors)
635
636    times = np.array(times)
637    meanTime = np.array(meanTime)
638    timesMin = np.array(timesMin)
639    timeGrowthRate = np.array(timeGrowthRate)
640
641    flops = np.array(flops)
642    meanFlop = np.array(meanFlop)
643    flopsMax = np.array(flopsMax)
644    flopsMin = np.array(flopsMin)
645    flopGrowthRate = np.array(flopGrowthRate)
646
647    luFactor = np.array(luFactor)
648    luFactorMin = np.array(luFactorMin)
649    luFactorMean = np.array(luFactorMean)
650    luFactorGrowthRate = np.array(luFactorGrowthRate)
651
652    data['Times'] = times
653    data['Mean Time'] = meanTime
654    data['Times Range'] = times-timesMin
655    data['Time Growth Rate'] = timeGrowthRate
656
657    data['Flops'] = flops
658    data['Mean Flops'] = meanFlop
659    data['Flop Range'] = flopsMax - flopsMin
660    data['Flop Growth Rate'] = flopGrowthRate
661
662    data['LU Factor'] = luFactor
663    data['LU Factor Mean'] = luFactorMean
664    data['LU Factor Range'] = luFactor-luFactorMin
665    data['LU Factor Growth Rate'] = luFactorGrowthRate
666
667    file.fileData = data
668    for f in range(Nf):
669        file.fieldList[f].fieldData['dofs'] = dofs[f]
670        file.fieldList[f].fieldData['Errors'] = errors[f]
671
672    file.printFile()
673
674    return file
675
676
677def getNf(errorList):
678    """
679    This simple function takes the supplied error list and loops through that list until it encounters -1.  The default
680    convention is that each field from the problem has an entry in the error list with at most 8 fields.  If there are
681    less than 8 fields those entries are set to -1.
682    Example:
683      A problem with 4 fields would have a list of the form [.01, .003, .2, .04, -1, -1, -1, -1]
684
685    :param errorList: contains a list of floating point numbers with the errors from each level of refinement.
686    :type errorList: List containing Floating point numbers.
687    :returns: Nf an integer that represents the number of fields.
688    """
689    i = 0
690    Nf = 0
691    while errorList[i] != -1:
692        Nf = Nf + 1
693        i += 1
694    return Nf
695
696
697def getNfCSV(df):
698    """
699    This simple function is the same as getNf, except it is for the CSV files. It loops through
700    the values of the dofx columns, where x is an integer, from the row where
701    Stage Name = ConvEst Refinement Level 0, Event Name = ConvEst Error, and Rank = 0 until it
702    encounters -1.  The default convention is that each field from the problem has an entry in the error list with at most
703    8 fields.  If there are less than 8 fields those entries are set to -1.
704
705    Example:
706      A problem with 4 fields would have a list of the form [.01, .003, .2, .04, -1, -1, -1, -1]
707
708    :param df: Contains a Pandas Data Frame.
709    :type df: A Pandas Data Frame object.
710    :returns: Nf an integer that represents the number of fields.
711    """
712    #Get a single row from the Data Frame that contains the field information
713    df = df.loc[(df['Event Name'] == 'ConvEst Error') & (df['Stage Name'] == 'ConvEst Refinement Level 0')
714                & (df['Rank'] == 0)].reset_index()
715    level = 1
716    while level >= 1:
717        dof = 'dof' + str(level)
718        if df.loc[0, dof] == -1:
719            break
720        else:
721            level = level + 1
722    return level
723
724
725def graphGen(file, enable_graphs, graph_flops_scaling, dim):
726    """
727    This function takes the supplied dictionary and plots the data from each file on the Mesh Convergence, Static Scaling, and
728    Efficacy graphs.
729
730    :param file: Contains the data to be plotted on the graphs, assumes the format -- file[<file name>][<data type>]:<numpy array>
731    :type file: Dictionary
732    :param graph_flops_scaling: Controls creating the scaling graph that uses flops/second.  The default is not to.  This option
733                                    is specified on the command line.
734    :type graph_flops_scaling: Integer
735    :param dim: Contains the number of dimension of the mesh.  This is specified on the command line.
736    :type dim: Integer
737
738
739    :returns: None
740    """
741    lstSqMeshConv = np.empty([2])
742
743    counter = 0
744    #Loop through each file and add the data/line for that file to the Mesh Convergence, Static Scaling, and Efficacy Graphs
745    for field in file.fieldList:
746        #Least squares solution for Mesh Convergence
747        if isinstance(field.fieldData['Errors'][0], str) or field.fieldData['Errors'][0] == -1:
748            print('Mesh Convergence can not be calculated, nan values in Error field will change to 1')
749            for x in range(len(field.fieldData['Errors'])):
750                field.fieldData['Errors'][x] = 1
751
752
753
754        lstSqMeshConv[0], lstSqMeshConv[1] = leastSquares(
755            field.fieldData['dofs'], field.fieldData['Errors'])
756        print('Least Squares Data')
757        print('==================')
758        print('Mesh Convergence')
759        print('Alpha: {} \n  {}'.format(lstSqMeshConv[0], lstSqMeshConv[1]))
760
761        convRate = lstSqMeshConv[0] * -dim
762        print('convRate: {} of {} field'.format(convRate, field.fieldName))
763
764        field.setConvergeRate(convRate)
765        field.setAlpha(lstSqMeshConv[0])
766        field.setBeta(lstSqMeshConv[1])
767
768    if cmdLineArgs.enable_graphs == 1:
769        #Uses the specified style sheet for generating the plots
770        styleDir = os.path.join(os.environ.get('PETSC_DIR'), 'lib/petsc/bin')
771        plt.style.use(os.path.join(styleDir, 'petsc_tas_style.mplstyle'))
772
773        #Set up plots with labels
774        if not pd.isna(field.fieldData['Errors'][0]):
775            meshConvFig = plt.figure()
776            meshConvOrigHandles = []
777            meshConvLstSqHandles = []
778            axMeshConv = meshConvFig.add_subplot(1, 1, 1)
779            axMeshConv.set(xlabel='Problem Size $\log N$', ylabel='Error $\log |x - x^*|$', title='Mesh Convergence')
780
781        statScaleFig = plt.figure()
782        statScaleHandles = []
783        axStatScale = statScaleFig.add_subplot(1, 1, 1)
784        axStatScale.set(xlabel='Time(s)', ylabel='Flop Rate (F/s)', title='Static Scaling')
785
786        statScaleFig = plt.figure()
787        statScaleHandles = []
788        axStatScale = statScaleFig.add_subplot(1, 1, 1)
789        axStatScale.set(xlabel='Time(s)', ylabel='DoF Rate (DoF/s)', title='Static Scaling')
790
791        efficFig = plt.figure()
792        efficHandles = []
793        axEffic = efficFig.add_subplot(1, 1, 1)
794        axEffic.set(xlabel='Time(s)', ylabel='Error Time', title='Efficacy')
795        axEffic.set_ylim(0, 10)
796
797        #Loop through each file and add the data/line for that file to the Mesh Convergence, Static Scaling, and Efficacy Graphs
798        for field in file.fieldList:
799            ##Start Mesh Convergence graph
800            convRate = str(round(field.cRate, 3))
801
802            x, = axMeshConv.loglog(field.fieldData['dofs'], field.fieldData['Errors'],
803                                   label='Field ' + field.fieldName + ' Orig Data', marker='^')
804
805            meshConvOrigHandles.append(x)
806
807            y, = axMeshConv.loglog(field.fieldData['dofs'], ((field.fieldData['dofs']**lstSqMeshConv[0] * 10**lstSqMeshConv[1])),
808                                   label=field.fieldName + ' Convergence rate =  ' + convRate, marker='x')
809
810            #meshConvLstSqHandles.append(y)
811
812            ##Start Static Scaling Graph, only if graph_flops_scaling equals 1.  Specified on the command line.
813            if graph_flops_scaling == 1:
814                x, = axStatScale.loglog(file.fileData['Times'], file.fileData['Flops']/file.fileData['Times'],
815                                        label='Field ' + field.fieldName, marker='^')
816
817            ##Start Static Scaling with DoFs Graph
818            x, = axStatScale.loglog(file.fileData['Times'], field.fieldData['dofs']/file.fileData['Times'],
819                                    label='Field ' + field.fieldName, marker='^')
820
821            statScaleHandles.append(x)
822            ##Start Efficacy graph
823            x, = axEffic.semilogx(file.fileData['Times'], -np.log10((field.fieldData['Errors']*file.fileData['Times']).astype(np.float64)),
824                                  label='Field ' + field.fieldName, marker='^')
825
826            efficHandles.append(x)
827
828            counter = counter + 1
829
830        #meshConvHandles = meshConvOrigHandles + meshConvLstSqHandles
831        #meshConvLabels = [h.get_label() for h in meshConvOrigHandles]
832        #meshConvLabels = meshConvLabels + [h.get_label() for h in meshConvLstSqHandles]
833
834        #meshConvFig.legend(handles=meshConvHandles, labels=meshConvLabels)
835        meshConvFig.legend()
836        #statScaleLabels = [h.get_label() for h in statScaleHandles]
837        #statScaleFig.legend(handles=statScaleHandles, labels=statScaleLabels)
838        statScaleFig.legend()
839
840        #efficLabels = [h.get_label() for h in efficHandles]
841        #efficFig.legend(handles=efficHandles, labels=efficLabels)
842        efficFig.legend()
843
844        axStatScale.set_ylim(ymin=0.1)
845
846        #code for determining if the default path for graphs, in configureTAS.py should be used or
847        #if an alias was given for a different path on the command line.
848        if cmdLineArgs.pathAliasGraph is None:
849            if config.filePath['defaultGraphs'] is None:
850                print(f'The defaultGraphs alias is not set.  \nPlease either specify an alias using the'
851                      f' -pag command line option or set a defaultGraphs path using -dgd')
852                exit()
853            else:
854                pathAlias = 'defaultGraphs'
855        else:
856            if cmdLineArgs.pathAliasGraph[0] in config.filePath:
857                pathAlias = cmdLineArgs.pathAliasGraph[0]
858
859            elif 'defaultGraphs' in config.filePath and config.filePath['defaultGraphs'] is not None:
860                defGraphPath = config.filePath['defaultGraphs']
861                print(f'\nAlias {cmdLineArgs.pathAliasGraph[0]} was not found in configureTAS.py\n'
862                      f'Do you wish to use the default path of {defGraphPath}')
863
864                response = input('y/n to continue')
865
866                if response.lower() == 'y':
867                    pathAlias = 'defaultGraphs'
868                else:
869                    exit()
870            else:
871                print(f'\nAlias {cmdLineArgs.pathAliasGraph[0]} was not found in configureTAS.py'
872                      f'and defaultGraphs path is empty.\nPlease set these through the command line options:\n'
873                      f'-dgd to set the default path for graphs or\n-aefd to set additional aliases\n')
874
875        meshConvFig.savefig(
876            config.filePath[pathAlias]+'meshConvergenceField_' + field.fileName + '.png')
877        statScaleFig.savefig(
878            config.filePath[pathAlias]+'staticScalingField_' + field.fileName + '.png')
879        efficFig.savefig(
880            config.filePath[pathAlias]+'efficacyField_' + field.fileName + '.png')
881
882    return
883
884
885def leastSquares(x, y):
886    """
887    This function takes 2 numpy arrays of data and out puts the least squares solution,
888       y = m*x + c.  The solution is obtained by finding the result of y = Ap, where A
889       is the matrix of the form [[x 1]] and p = [[m], [c]].
890
891    :param x: Contains the x values for the data.
892    :type x: numpy array
893    :param y: Contains the y values for the data.
894    :type y: numpy array
895
896    :returns: alpha -- the convRate for the least squares solution
897    :returns: c -- the constant of the least squares solution.
898    """
899
900    x = np.log10(x.astype(np.float64))
901    y = np.log10(y.astype(np.float64))
902    X = np.hstack((np.ones((x.shape[0], 1)), x.reshape((x.shape[0], 1))))
903
904    beta = np.dot(np.linalg.pinv(np.dot(X.transpose(), X)), X.transpose())
905    beta = np.dot(beta, y.reshape((y.shape[0], 1)))
906    A = np.hstack((np.ones((x.shape[0], 1)), x.reshape((x.shape[0], 1))))
907
908    AtranA = np.dot(A.T, A)
909
910    invAtranA = np.linalg.pinv(AtranA)
911
912    return beta[1][0], beta[0][0]
913
914
915if __name__ == '__main__':
916    cmdLine = argparse.ArgumentParser(
917           description='This is part of the PETSc toolkit for evaluating solvers using\n\
918                   Time-Accuracy-Size(TAS) spectrum analysis.')
919
920    cmdLine.add_argument('-f', '--file', metavar='<filename>',
921                         nargs='*', help='List of files to import for TAS analysis.')
922
923    cmdLine.add_argument('-output_base', '--output_base',
924                         default=os.getcwd(), help='Base directory for output.')
925
926    cmdLine.add_argument(
927        '-v', '--version', action='version', version='%(prog)s 1.0')
928
929    cmdLine.add_argument('-gfs', '--graph_flops_scaling', type=int, default=0, choices=[0, 1],
930                         help='Enables graphing flop rate static scaling graph. Default: %(default)s  do not print the graph. 1 to print the graph.')
931
932    cmdLine.add_argument('-d', '--dim', type=int, default=2, help='Specifies the number of dimensions of the mesh. \
933        Default: %(default)s.')
934
935    cmdLine.add_argument('-eg', '--enable_graphs', type=int, default=1, choices=[0, 1],
936                         help='Enables graphing. Default: %(default)s  print the graphs. 0 to disable printing the graphs.')
937
938    cmdLine.add_argument('-vv', '--view_variance', type=int, default=0, choices=[0, 1],
939                         help='Enables calculating and outputting the Variance. Default: %(default)s does not print the variance. 1 to enable \
940        printing the graphs.')
941
942    cmdLine.add_argument('-p', '--problem', default='NULL', help='Enables searching for the names of fields in \
943        configureTAS.py. Default: %(default)s does not look for the names.  Instead identifies the fields using \
944        a number, 0, 1, 2,...n')
945
946    cmdLine.add_argument('-ts', '--timestep', type=int, default=0, choices=[0, 1],
947                         help='Enable if solving a time step problem.')
948
949    cmdLine.add_argument('-dfd', '--setDefaultFileDir', type=str, nargs=1, help='Sets the default path for log \
950        files to be processed.')
951
952    cmdLine.add_argument('-dgd', '--setDefaultGraphDir', type=str, nargs=1, help='Sets the default path for graph \
953        files to be saved.')
954
955    cmdLine.add_argument('-aefd', '--addEditAliasDir', metavar='<alias> <path>', type=str, nargs='*',
956                         help='Add a new alias and path, for log files to be processed or graphs to be saved, or edits an existing one.')
957
958    cmdLine.add_argument('-rad', '--removeAliasDir', metavar='<alias>', type=str, nargs='*',
959                         help='Remove an alias and path for log files to be processed or edits an existing one.')
960
961    cmdLine.add_argument('-pad', '--pathAliasData', type=str,
962                         nargs=1, help='Specify path alias to use for data.')
963
964    cmdLine.add_argument('-pag', '--pathAliasGraph', type=str,
965                         nargs=1, help='Specify path alias to use for data.')
966
967    cmdLine.add_argument('-fl', '--fieldList', type=str,
968                         nargs='*', help='List of field names.')
969
970    cmdLineArgs = cmdLine.parse_args()
971
972    main(cmdLineArgs)