#!/usr/bin/env python3 """ Parse the PETSc tutorial or test file (example) and return a dictionary containing information about the tests to be run for the example Quick usage:: lib/petsc/bin/maint/testparse.py -t src/ksp/ksp/tutorials/ex1.c From the command line, it prints out the dictionary. This is meant to be used by other scripts, but it is useful to debug individual files. Example language ---------------- /*TEST build: requires: moab # This is equivalent to test: testset: args: -pc_type mg -ksp_type fgmres -da_refine 2 -ksp_monitor_short -mg_levels_ksp_monitor_short -mg_levels_ksp_norm_type unpreconditioned -ksp_view -pc_mg_type full testset: suffix: 2 nsize: 2 args: -pc_type mg -ksp_type fgmres -da_refine 2 -ksp_monitor_short -mg_levels_ksp_monitor_short -mg_levels_ksp_norm_type unpreconditioned -ksp_view -pc_mg_type full testset: suffix: 2 nsize: 2 args: -pc_type mg -ksp_type fgmres -da_refine 2 -ksp_monitor_short -mg_levels_ksp_monitor_short -mg_levels_ksp_norm_type unpreconditioned -ksp_view -pc_mg_type full test: TEST*/ """ from __future__ import print_function import os, re, glob, types import sys import logging from gmakegen import * sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) import inspect thisscriptdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) maintdir=os.path.join(os.path.join(os.path.dirname(thisscriptdir),'bin'),'maint') sys.path.insert(0,maintdir) # These are special keys describing build buildkeys="requires TODO SKIP depends".split() acceptedkeys=set("test nsize requires command suffix diff_args args filter filter_output localrunfiles comments TODO SKIP output_file timeoutfactor env temporaries".split()) appendlist="args diff_args requires comments env".split() import re def getDefaultOutputFileRoot(testname): """ Given testname, give DefaultRoot and DefaultOutputFilename e.g., runex1 gives ex1_1, output/ex1_1.out """ defroot=(re.sub("run","",testname) if testname.startswith("run") else testname) if not "_" in defroot: defroot=defroot+"_1" return defroot def _stripIndent(block,srcfile,entireBlock=False,fileNums=[]): """ Go through and remove a level of indentation Also strip of trailing whitespace """ # The first entry should be test: but it might be indented. ext = getlangext(srcfile) stripstr=" " if len(fileNums)>0: lineNum=fileNums[0]-1 for lline in block.split("\n"): if len(fileNums)>0: lineNum+=1 line=lline[1:] if lline.startswith("!") else lline if not line.strip(): continue if line.strip().startswith('#'): continue if entireBlock: var=line.split(":")[0].strip() if not var in ['test','testset','build']: raise Exception("Formatting error: Cannot find test in file: "+srcfile+" at line: "+str(lineNum)+"\n") nspace=len(line)-len(line.lstrip(stripstr)) newline=line[nspace:] break # Strip off any indentation for the whole string and any trailing # whitespace for convenience newTestStr="\n" if len(fileNums)>0: lineNum=fileNums[0]-1 firstPass=True for lline in block.split("\n"): if len(fileNums)>0: lineNum+=1 line=lline[1:] if lline.startswith("!") else lline if not line.strip(): continue if line.strip().startswith('#'): newTestStr+=line+'\n' else: newline=line[nspace:] newTestStr+=newline.rstrip()+"\n" # Do some basic indentation checks if entireBlock: # Don't need to check comment lines if line.strip().startswith('#'): continue if not newline.startswith(" "): var=newline.split(":")[0].strip() if not var in ['test','testset','build']: err="Formatting error in file "+srcfile+" at line: " +line+"\n" if len(fileNums)>0: raise Exception(err+"Check indentation at line number: "+str(lineNum)) else: raise Exception(err) else: var=line.split(":")[0].strip() if var in ['test','testset','build']: subnspace=len(line)-len(line.lstrip(stripstr)) if firstPass: firstsubnspace=subnspace firstPass=False else: if firstsubnspace!=subnspace: err="Formatting subtest error in file "+srcfile+" at line: " +line+"\n" if len(fileNums)>0: raise Exception(err+"Check indentation at line number: "+str(lineNum)) else: raise Exception(err) # Allow line continuation character '\' return newTestStr.replace('\\\n', ' ') def parseLoopArgs(varset): """ Given: String containing loop variables Return: tuple containing separate/shared and string of loop vars """ keynm=varset.split("{{")[0].strip().lstrip('-') if not keynm.strip(): keynm='nsize' lvars=varset.split('{{')[1].split('}')[0] suffx=varset.split('{{')[1].split('}')[1] ftype='separate' if suffx.startswith('separate') else 'shared' return keynm,lvars,ftype def _getLoopVars(testDict): """ Given: dictionary that may have Return: Variables that cause a test split """ vals=None loopVars={} loopVars['separate']=[] loopVars['shared']=[] # Check nsize if 'nsize' in testDict: varset=testDict['nsize'] if '{{' in varset: keynm,lvars,ftype=parseLoopArgs(varset) if ftype=='separate': loopVars['separate'].append(keynm) # Now check args if 'args' not in testDict: return loopVars for varset in re.split(r'(^|\W)-(?=[a-zA-Z])',testDict['args']): if not varset.strip(): continue if '{{' in varset: # Assuming only one for loop per var specification keynm,lvars,ftype=parseLoopArgs(varset) loopVars[ftype].append(keynm) return loopVars def _getNewArgs(args,separate=True): """ Given: String that has args that might have loops in them Return: All of the arguments/values that do not have for 'separate output' in for loops unless separate=False """ newargs='' if not args.strip(): return args for varset in re.split(r'(^|\W)-(?=[a-zA-Z])',args): if not varset.strip(): continue if '{{' in varset: if separate: if 'separate' in varset: continue else: if 'separate' not in varset: continue newargs+="-"+varset.strip()+" " return newargs def _getVarVals(findvar,testDict): """ Given: variable that is either nsize or in args Return: Values to loop over and the other arguments Note that we keep the other arguments even if they have for loops to enable stepping through all of the for lops """ save_vals=None if findvar=='nsize': varset=testDict[findvar] keynm,save_vals,ftype=parseLoopArgs('nsize '+varset) else: varlist=[] for varset in re.split('-(?=[a-zA-Z])',testDict['args']): if not varset.strip(): continue if '{{' not in varset: continue keyvar,vals,ftype=parseLoopArgs(varset) if keyvar==findvar: save_vals=vals if not save_vals: raise Exception("Could not find separate_testvar: "+findvar) return save_vals def genTestsSeparateTestvars(intests,indicts,final=False): """ Given: testname, sdict with 'separate_testvars Return: testnames,sdicts: List of generated tests The tricky part here is the {{ ... }separate output} that can be used multiple times """ testnames=[]; sdicts=[] for i in range(len(intests)): testname=intests[i]; sdict=indicts[i]; i+=1 loopVars=_getLoopVars(sdict) if len(loopVars['shared'])>0 and not final: # Need to remove shared loop vars and push down to subtests if 'subtests' in sdict: for varset in re.split(r'(^|\W)-(?=[a-zA-Z])',sdict['args']): if '{{' in varset: for stest in sdict['subtests']: if 'args' in sdict[stest]: sdict[stest]['args']+=' -'+varset else: sdict[stest]['args']="-"+varset sdict['args']=_getNewArgs(sdict['args'],separate=False) if len(loopVars['separate'])>0: sep_dicts=[sdict.copy()] if 'args' in sep_dicts[0]: sep_dicts[0]['args']=_getNewArgs(sdict['args']) sep_testnames=[testname] for kvar in loopVars['separate']: kvals=_getVarVals(kvar,sdict) # Have to do loop over previous var/val combos as well # and accumulate as we go val_testnames=[]; val_dicts=[] for val in kvals.split(): gensuffix="_"+kvar+"-"+val.replace(',','__') for kvaltestnm in sep_testnames: val_testnames.append(kvaltestnm+gensuffix) for kv in sep_dicts: kvardict=kv.copy() # If the last var then we have the final version if 'suffix' in sdict: kvardict['suffix']+=gensuffix else: kvardict['suffix']=gensuffix if kvar=='nsize': kvardict[kvar]=val else: kvardict['args']+="-"+kvar+" "+val+" " val_dicts.append(kvardict) sep_testnames=val_testnames sep_dicts=val_dicts testnames+=sep_testnames sdicts+=sep_dicts else: # These are plain vanilla tests (no subtests, no loops) that # do not have a suffix. This makes the targets match up with # the output file (testname_1.out) if final: if '_' not in testname: testname+='_1' testnames.append(testname) sdicts.append(sdict) return testnames,sdicts def genTestsSubtestSuffix(testnames,sdicts): """ Given: testname, sdict with separate_testvars Return: testnames,sdicts: List of generated tests """ tnms=[]; sdcts=[] for i in range(len(testnames)): testname=testnames[i] rmsubtests=[]; keepSubtests=False if 'subtests' in sdicts[i]: for stest in sdicts[i]["subtests"]: if 'suffix' in sdicts[i][stest]: rmsubtests.append(stest) gensuffix="_"+sdicts[i][stest]['suffix'] newtestnm=testname+gensuffix tnms.append(newtestnm) newsdict=sdicts[i].copy() del newsdict['subtests'] # Have to hand update # Append for kup in appendlist: if kup in sdicts[i][stest]: if kup in sdicts[i]: newsdict[kup]=sdicts[i][kup]+" "+sdicts[i][stest][kup] else: newsdict[kup]=sdicts[i][stest][kup] # Promote for kup in acceptedkeys: if kup in appendlist: continue if kup in sdicts[i][stest]: newsdict[kup]=sdicts[i][stest][kup] # Cleanup for st in sdicts[i]["subtests"]: del newsdict[st] sdcts.append(newsdict) else: keepSubtests=True else: tnms.append(testnames[i]) sdcts.append(sdicts[i]) # If a subtest without a suffix exists, then save it if keepSubtests: tnms.append(testnames[i]) newsdict=sdicts[i].copy() # Prune the tests to prepare for keeping for rmtest in rmsubtests: newsdict['subtests'].remove(rmtest) del newsdict[rmtest] sdcts.append(newsdict) i+=1 return tnms,sdcts def splitTests(testname,sdict): """ Given: testname and dictionary generated from the YAML-like definition Return: list of names and dictionaries corresponding to each test given that the YAML-like language allows for multiple tests """ # Order: Parent sep_tv, subtests suffix, subtests sep_tv testnames,sdicts=genTestsSeparateTestvars([testname],[sdict]) testnames,sdicts=genTestsSubtestSuffix(testnames,sdicts) testnames,sdicts=genTestsSeparateTestvars(testnames,sdicts,final=True) # Because I am altering the list, I do this in passes. Inelegant return testnames, sdicts def testSplit(striptest): """ Split up a test into lines, but use a shell parser to detect when newlines are within quotation marks and keep those together """ import shlex sl = shlex.shlex() sl.whitespace_split = True # only split at whitespace sl.commenters = '' sl.push_source(striptest) last_pos = sl.instream.tell() try: last_token = sl.read_token() except ValueError: print(striptest) raise ValueError last_line = '' while last_token != '': new_pos = sl.instream.tell() block = striptest[last_pos:new_pos] token_start = block.find(last_token) leading = block[0:token_start] trailing = block[(token_start + len(last_token)):] leading_split = leading.split('\n') if len(leading_split) > 1: yield last_line last_line = '' last_line += leading_split[-1] last_line += last_token trailing_split = trailing.split('\n') last_line += trailing_split[0] if len(trailing_split) > 1: yield last_line last_line = '' last_pos = new_pos try: last_token = sl.read_token() except ValueError: print(striptest) raise ValueError yield last_line def parseTest(testStr,srcfile,verbosity): """ This parses an individual test Our YAML-like language is hierarchial so should use a state machine in the general case, but in practice we only support two levels of test: """ basename=os.path.basename(srcfile) # Handle the new at the beginning bn=re.sub("new_","",basename) # This is the default testname="run"+getlangsplit(bn) # Tests that have default everything (so empty effectively) if len(testStr)==0: if '_' not in testname: testname+='_1' return [testname], [{}] striptest=_stripIndent(testStr,srcfile) # go through and parse subtestnum=0 subdict={} comments=[] indentlevel=0 for ln in testSplit(striptest): line=ln.split('#')[0].rstrip() if verbosity>2: print(line) comment=("" if len(ln.split("#"))>0 else " ".join(ln.split("#")[1:]).strip()) if comment: comments.append(comment) if not line.strip(): continue lsplit=line.split(':') if len(lsplit)==0: raise Exception("\n\nError in test harness parsing file: "+srcfile+"\nMissing : in line: "+line) indentcount=lsplit[0].count(" ") var=lsplit[0].strip() val=line[line.find(':')+1:].strip() if not var in acceptedkeys: raise Exception("\n\nError in test harness parsing file: "+srcfile+"\n"+var+" from: "+line+" is not a valid keyword") # Start by seeing if we are in a subtest if line.startswith(" "): if not 'subtestname' in locals(): raise Exception("\n\nError in test harness parsing file: "+srcfile+"\nInvalid indentation at line:"+line) if var in subdict[subtestname]: subdict[subtestname][var]+=" "+val else: subdict[subtestname][var]=val if not indentlevel: indentlevel=indentcount #if indentlevel!=indentcount: print("Error in indentation:", ln) # Determine subtest name and make dict elif var=="test": subtestname="test"+str(subtestnum) subdict[subtestname]={} if "subtests" not in subdict: subdict["subtests"]=[] subdict["subtests"].append(subtestname) subtestnum=subtestnum+1 # The rest are easy else: # For convenience, it is sometimes convenient to list twice if var in subdict: if var in appendlist: subdict[var]+=" "+val else: raise Exception("\n\nError in test harness parsing file: "+srcfile+"\n"+var+" entered twice: "+line) else: subdict[var]=val if var=="suffix": if len(val)>0: testname+="_"+val if var == "env" and len(val) == 0: raise Exception("\n\nError in test harness parsing file: "+srcfile+"\nvalue for {}: directive cannot be empty!".format(var)) if len(comments): subdict['comments']="\n".join(comments).lstrip("\n") # add in error check: if 'requires' is added to subtests, make sure 'suffix' is also added. if 'subtests' in subdict: for stest in subdict['subtests']: if 'requires' in subdict[stest] and not 'suffix' in subdict[stest]: raise Exception("\n\nError in test harness parsing file: "+srcfile+"\nA test in a testset with 'requires' directive should also have a 'suffix' directive!") # A test block can create multiple tests. This does that logic testnames,subdicts=splitTests(testname,subdict) return testnames,subdicts def parseTests(testStr,srcfile,fileNums,verbosity): """ Parse the YAML-like string describing tests and return a dictionary with the info in the form of: testDict[test][subtest] """ testDict={} # The first entry should be test: but it might be indented. newTestStr=_stripIndent(testStr,srcfile,entireBlock=True,fileNums=fileNums) if verbosity>2: print(srcfile) ## Check and see if we have build requirements addToRunRequirements=None if "\nbuild:" in newTestStr: testDict['build']={} # The file info is already here and need to append Part1=newTestStr.split("build:")[1] fileInfo=re.split("\ntest(?:set)?:",newTestStr)[0] for bkey in buildkeys: if bkey+":" in fileInfo: testDict['build'][bkey]=fileInfo.split(bkey+":")[1].split("\n")[0].strip() #if verbosity>1: bkey+": "+testDict['build'][bkey] # If a runtime requires are put into build, push them down to all run tests # At this point, we are working with strings and not lists if 'requires' in testDict['build']: if 'todo' in testDict['build']['requires'] or 'TODO' in testDict['build']['requires']: raise Exception("Error: Do not list 'TODO' in requires: list it as a field, for example, TODO: XXX is currently unstable\n"+srcfile) addToRunRequirements=testDict['build']['requires'] # People put datafilespath into build, but it needs to be a runtime if 'datafilespath' in testDict['build']['requires']: newreqs=re.sub('datafilespath','',testDict['build']['requires']) testDict['build']['requires']=newreqs.strip() # Now go through each test. First elem in split is blank for test in re.split("\ntest(?:set)?:",newTestStr)[1:]: testnames,subdicts=parseTest(test,srcfile,verbosity) for i in range(len(testnames)): if testnames[i] in testDict: raise RuntimeError("Multiple test names specified: "+testnames[i]+" in file: "+srcfile) # Add in build requirements that need to be moved if addToRunRequirements: if 'requires' in subdicts[i]: subdicts[i]['requires']+=' '+addToRunRequirements else: subdicts[i]['requires']=addToRunRequirements testDict[testnames[i]]=subdicts[i] return testDict def parseTestFile(srcfile,verbosity): """ Parse single example files and return dictionary of the form: testDict[srcfile][test][subtest] """ debug=False basename=os.path.basename(srcfile) if basename=='makefile': return {} curdir=os.path.realpath(os.path.curdir) basedir=os.path.dirname(os.path.realpath(srcfile)) os.chdir(basedir) testDict={} sh=open(basename,"r"); fileStr=sh.read(); sh.close() # Handle Windows issues if not os.linesep == "\n": fileStr=fileStr.replace(os.linesep,"\n") ## Start with doing the tests # fsplit=fileStr.split("/*TEST\n")[1:] fstart=len(fileStr.split("/*TEST\n")[0].split("\n"))+1 # Allow for multiple "/*TEST" blocks even though it really should be # one srcTests=[] for t in fsplit: srcTests.append(t.split("TEST*/")[0]) testString=" ".join(srcTests) flen=len(testString.split("\n")) fend=fstart+flen-1 fileNums=range(fstart,fend) testDict[basename]=parseTests(testString,srcfile,fileNums,verbosity) # Massage dictionary for build requirements if 'build' in testDict[basename]: testDict[basename].update(testDict[basename]['build']) del testDict[basename]['build'] os.chdir(curdir) return testDict def parseTestDir(directory,verbosity): """ Parse single example files and return dictionary of the form: testDict[srcfile][test][subtest] """ curdir=os.path.realpath(os.path.curdir) basedir=os.path.realpath(directory) os.chdir(basedir) tDict={} for test_file in sorted(glob.glob("new_ex*.*")): tDict.update(parseTestFile(test_file,verbosity)) os.chdir(curdir) return tDict def printExParseDict(rDict): """ This is useful for debugging """ indent=" " for sfile in rDict: print(sfile) sortkeys=list(rDict[sfile].keys()) sortkeys.sort() for runex in sortkeys: if runex == 'requires': print(indent+runex+':'+str(rDict[sfile][runex])) continue print(indent+runex) if type(rDict[sfile][runex])==bytes: print(indent*2+rDict[sfile][runex]) else: for var in rDict[sfile][runex]: if var.startswith("test"): continue print(indent*2+var+": "+str(rDict[sfile][runex][var])) if 'subtests' in rDict[sfile][runex]: for var in rDict[sfile][runex]['subtests']: print(indent*2+var) for var2 in rDict[sfile][runex][var]: print(indent*3+var2+": "+str(rDict[sfile][runex][var][var2])) print("\n") return def main(directory='',test_file='',verbosity=0): if directory: tDict=parseTestDir(directory,verbosity) else: tDict=parseTestFile(test_file,verbosity) if verbosity>0: printExParseDict(tDict) return if __name__ == '__main__': import optparse parser = optparse.OptionParser() parser.add_option('-d', '--directory', dest='directory', default="", help='Directory containing files to parse') parser.add_option('-t', '--test_file', dest='test_file', default="", help='Test file, e.g., ex1.c, to parse') parser.add_option('-v', '--verbosity', dest='verbosity', help='Verbosity of output by level: 1, 2, or 3', default=0) opts, extra_args = parser.parse_args() if extra_args: import sys sys.stderr.write('Unknown arguments: %s\n' % ' '.join(extra_args)) exit(1) if not opts.test_file and not opts.directory: print("test file or directory is required") parser.print_usage() sys.exit() # Need verbosity to be an integer try: verbosity=int(opts.verbosity) except: raise Exception("Error: Verbosity must be integer") main(directory=opts.directory,test_file=opts.test_file,verbosity=verbosity)