xref: /petsc/config/query_tests.py (revision ccfb0f9f40a0131988d7995ed9679700dae2a75a)
1#!/usr/bin/env python3
2import fnmatch
3import glob
4import inspect
5import os
6import optparse
7import pickle
8import re
9import sys
10
11thisfile = os.path.abspath(inspect.getfile(inspect.currentframe()))
12petscdir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(thisfile)))))
13sys.path.insert(0, os.path.join(petscdir, 'config'))
14
15import testparse
16from gmakegentest import nameSpace
17
18"""
19   This is used by gmakefile.test for the following searches
20
21  - make test search=X (or s=X)
22  - make test query=X (or q=X) queryval=Y (or qv=Y)
23
24  Which tests to query?  Two options:
25      1. Query only the tests that are run for a given configuration.
26      2. Query all of the test files in the source directory
27  For #1:
28     Use dataDict as written out by gmakegentest.py in $PETSC_ARCH/$TESTBASE
29  For #2:
30     Walk the entire tree parsing the files as we go along using testparse.
31     The tree walker is simpler than what is in gmakegentest.py
32
33  The dataDict follows that generated by testparse.  gmakegentest.py does
34  further manipulations of the dataDict to handle things like for loops
35  so if using #2, those modifications are not included.
36
37  Querying:
38      The dataDict dictionary is then "inverted" to create a dictionary with the
39      range of field values as keys and list test names as the values.  This
40      allows fast searching
41
42"""
43
44def isFile(maybeFile):
45  ext=os.path.splitext(maybeFile)[1]
46  if not ext: return False
47  if ext not in ['.c','.cxx','.cpp','F90','F','cu']: return False
48  return True
49
50def pathToLabel(path):
51  """
52  Because the scripts have a non-unique naming, the pretty-printing
53  needs to convey the srcdir and srcfile.  There are two ways of doing this.
54  """
55  # Strip off any top-level directories or spaces
56  path=path.strip().replace(petscdir,'')
57  path=path.replace('src/','')
58  if isFile(path):
59    prefix=os.path.dirname(path).replace("/","_")
60    suffix=os.path.splitext(os.path.basename(path))[0]
61    label=prefix+"-"+suffix+'_*'
62  else:
63    path=path.rstrip('/')
64    label=path.replace("/","_").replace('tests_','tests-').replace('tutorials_','tutorials-')
65  return label
66
67def get_value(varset):
68  """
69  Searching args is a bit funky:
70  Consider
71      args: -ksp_monitor_short -pc_type ml -ksp_max_it 3
72  Search terms are:
73    ksp_monitor, 'pc_type ml', ksp_max_it
74  Also ignore all loops
75    -pc_fieldsplit_diag_use_amat {{0 1}}
76  Gives: pc_fieldsplit_diag_use_amat as the search term
77  Also ignore -f ...  (use matrices from file) because I'll assume
78   that this kind of information isn't needed for testing.  If it's
79   a separate search than just grep it
80  """
81  if varset.startswith('-f '): return None
82
83  # First  remove loops
84  value=re.sub('{{.*}}','',varset)
85  # Next remove -
86  value=varset.lstrip("-")
87  # Get rid of numbers
88  value=re.sub(r"[+-]? *(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?",'',value)
89  # return without spaces
90  return value.strip()
91
92def query(invDict,fields,labels):
93    """
94    Search the keys using fnmatch to find matching names and return list with
95    the results
96    """
97    setlist=[]  # setlist is a list of lists that set operations will operate on
98    llist=labels.replace('|',',').split(',')
99    i=-1
100    for field in fields.replace('|',',').split(','):
101        i+=1
102        label=llist[i]
103        if field == 'name':
104            if '/' in label:
105              label=pathToLabel(label)
106            elif label.startswith('src'):
107                  label=label.lstrip('src').lstrip('*')
108            setlist.append(fnmatch.filter(invDict['name'],label))
109            continue
110
111        foundLabel=False   # easy to do if you misspell argument search
112        label=label.lower()
113        for key in invDict[field]:
114            if fnmatch.filter([key.lower()],label):
115              foundLabel=True
116              # Do not return values with not unless label itself has not
117              if label.startswith('!') and not key.startswith('!'): continue
118              if not label.startswith('!') and key.startswith('!'): continue
119              setlist.append(invDict[field][key])
120        if not foundLabel:
121          setlist.append([])
122
123    # Now process the union and intersection operators based on setlist
124    allresults=[]
125    # Union
126    i=-1
127    for ufield in fields.split(','):
128       i+=1
129       if '|' in ufield:
130         # Intersection
131         label=llist[i]
132         results=set(setlist[i])
133         for field in ufield.split('|')[1:]:
134             i+=1
135             label=llist[i]
136             results=results.intersection(set(setlist[i]))
137         allresults+=list(results)
138       else:
139         allresults+=setlist[i]
140
141    # remove duplicate entries and sort to give consistent results
142    uniqlist=list(set(allresults))
143    uniqlist.sort()
144    return  uniqlist
145
146def get_inverse_dictionary(dataDict,fields,srcdir):
147    """
148    Create a dictionary with the values of field as the keys, and the name of
149    the tests as the results.
150    """
151    invDict={}
152    # Comma-delimited lists denote union
153    for field in fields.replace('|',',').split(','):
154        if field not in invDict:
155            if field == 'name':
156                 invDict[field]=[]   # List for ease
157            else:
158                 invDict[field]={}
159        for root in dataDict:
160          for exfile in dataDict[root]:
161            for test in dataDict[root][exfile]:
162              if test in testparse.buildkeys: continue
163              defroot = testparse.getDefaultOutputFileRoot(test)
164              fname=nameSpace(defroot,os.path.relpath(root,srcdir))
165              if field == 'name':
166                  invDict['name'].append(fname)
167                  continue
168              if field not in dataDict[root][exfile][test]: continue
169              values=dataDict[root][exfile][test][field]
170
171              if not field == 'args' and not field == 'diff_args':
172                for val in values.split():
173                    if val in invDict[field]:
174                        invDict[field][val].append(fname)
175                    else:
176                        invDict[field][val] = [fname]
177              else:
178                # Args are funky.
179                for varset in re.split(r'(^|\W)-(?=[a-zA-Z])',values):
180                  val=get_value(varset)
181                  if not val: continue
182                  if val in invDict[field]:
183                    invDict[field][val].append(fname)
184                  else:
185                    invDict[field][val] = [fname]
186        # remove duplicate entries (multiple test/file)
187        if not field == 'name':
188          for val in invDict[field]:
189            invDict[field][val]=list(set(invDict[field][val]))
190
191    return invDict
192
193def get_gmakegentest_data(srcdir,testdir,petsc_dir,petsc_arch):
194    """
195     Write out the dataDict into a pickle file
196    """
197    # This needs to be consistent with gmakegentest.py of course
198    pkl_file=os.path.join(testdir,'datatest.pkl')
199    # If it doesn't exist, then we need to regenerate
200    if not os.path.exists(pkl_file):
201      startdir=os.path.abspath(os.curdir)
202      os.chdir(petsc_dir)
203      args='--petsc-dir='+petsc_dir+' --petsc-arch='+petsc_arch+' --testdir='+testdir+' --srcdir='+srcdir
204      buf = os.popen('config/gmakegentest.py '+args).read()
205      os.chdir(startdir)
206
207    fd = open(pkl_file, 'rb')
208    dataDict=pickle.load(fd)
209    fd.close()
210    return dataDict
211
212def walktree(top):
213    """
214    Walk a directory tree, starting from 'top'
215    """
216    verbose = False
217    dataDict = {}
218    alldatafiles = []
219    for root, dirs, files in os.walk(top, topdown=False):
220        if root == 'output': continue
221        if '.dSYM' in root: continue
222        if verbose: print(root)
223
224        dataDict[root] = {}
225
226        for exfile in files:
227            # Ignore emacs files
228            if exfile.startswith("#") or exfile.startswith(".#"): continue
229            ext=os.path.splitext(exfile)[1]
230            if ext[1:] not in ['c','cxx','cpp','cu','F90','F']: continue
231
232            # Convenience
233            fullex = os.path.join(root, exfile)
234            if verbose: print('   --> '+fullex)
235            dataDict[root].update(testparse.parseTestFile(fullex, 0))
236
237    return dataDict
238
239def do_query(use_source, startdir, srcdir, testdir, petsc_dir, petsc_arch,
240             fields, labels, searchin):
241    """
242    Do the actual query
243    This part of the code is placed here instead of main()
244    to show how one could translate this into ipython/jupyer notebook
245    commands for more advanced queries
246    """
247    # Get dictionary
248    if use_source:
249        dataDict=walktree(startdir)
250    else:
251        dataDict=get_gmakegentest_data(srcdir,testdir, petsc_dir, petsc_arch)
252
253    # Get inverse dictionary for searching
254    invDict=get_inverse_dictionary(dataDict, fields, srcdir)
255
256    # Now do query
257    resList=query(invDict, fields, labels)
258
259    # Filter results using searchin
260    newresList=[]
261    if searchin.strip():
262        if not searchin.startswith('!'):
263            for key in resList:
264                if fnmatch.filter([key],searchin):
265                  newresList.append(key)
266        else:
267            for key in resList:
268                if not fnmatch.filter([key],searchin[1:]):
269                  newresList.append(key)
270        resList=newresList
271
272    # Print in flat list suitable for use by gmakefile.test
273    print(' '.join(resList))
274
275    return
276
277def expand_path_like(petscdir,petscarch,pathlike):
278    def remove_prefix(text,prefix):
279        return text[text.startswith(prefix) and len(prefix):]
280
281    # expand user second, as expandvars may insert a '~'
282    string = os.path.expanduser(os.path.expandvars(pathlike))
283    # if the dirname check succeeds then likely we have a glob expression
284    pardir = os.path.dirname(string)
285    if os.path.exists(pardir):
286        suffix   = string.replace(pardir,'') # get whatever is left over
287        pathlike = remove_prefix(os.path.relpath(os.path.abspath(pardir),petscdir),'.'+os.path.sep)
288        if petscarch == '':
289            pathlike = pathlike.replace(os.path.sep.join(('share','petsc','examples'))+'/','')
290        pathlike += suffix
291    pathlike = pathlike.replace('diff-','')
292    return pathlike
293
294def main():
295    parser = optparse.OptionParser(usage="%prog [options] field match_pattern")
296    parser.add_option('-s', '--startdir', dest='startdir',
297                      help='Where to start the recursion if not srcdir',
298                      default='')
299    parser.add_option('-p', '--petsc-dir', dest='petsc_dir',
300                      help='Set PETSC_DIR different from environment',
301                      default=os.environ.get('PETSC_DIR'))
302    parser.add_option('-a', '--petsc-arch', dest='petsc_arch',
303                      help='Set PETSC_ARCH different from environment',
304                      default=os.environ.get('PETSC_ARCH'))
305    parser.add_option('--srcdir', dest='srcdir',
306                      help='Set location of sources different from PETSC_DIR/src.  Must be full path.',
307                      default='src')
308    parser.add_option('-t', '--testdir', dest='testdir',
309                      help='Test directory if not PETSC_ARCH/tests.  Must be full path',
310                      default='tests')
311    parser.add_option('-u', '--use-source', action="store_false",
312                      dest='use_source',
313                      help='Query all sources rather than those configured in PETSC_ARCH')
314    parser.add_option('-i', '--searchin', dest='searchin',
315                      help='Filter results from the arguments',
316                      default='')
317
318    opts, args = parser.parse_args()
319
320    # Argument Sanity checks
321    if len(args) != 2:
322        parser.print_usage()
323        print('Arguments: ')
324        print('  field:          Field to search for; e.g., requires')
325        print('                  To just match names, use "name"')
326        print('  match_pattern:  Matching pattern for field; e.g., cuda')
327        return
328
329    def shell_unquote(string):
330      """
331      Remove quotes from STRING. Useful in the case where you need to bury escaped quotes in a query
332      string in order to escape shell characters. For example:
333
334      $ make test query='foo,bar' queryval='requires|name'
335      /usr/bin/bash: line 1: name: command not found
336
337      While the original shell does not see the pipe character, the actual query is done via a second
338      shell, which is (literally) passed '$(queryval)', i.e. 'queryval='requires|name'' when expanded.
339      Note the fact that the expansion cancels out the quoting!!!
340
341      You can fix this by doing:
342
343      $ make test query='foo,bar' queryval='"requires|name"'
344
345      However this then shows up here as labels = 'queryval="requires|name"'. So we need to remove the
346      '"'. Applying shlex.split() on this returns:
347
348      >>> shlex.split('queryval="requires|name"')
349      ['queryval=requires|name']
350
351      And voila. Note also that:
352
353      >>> shlex.split('queryval=requires|name')
354      ['queryval=requires|name']
355      """
356      import shlex
357
358      if string:
359        ret = shlex.split(string)
360        assert len(ret) == 1, "Dont know what to do if shlex.split() produces more than 1 value?"
361        string = ret[0]
362      return string
363
364    def alternate_command_preprocess(string):
365      """
366      Replace the alternate versions in STRING with the regular variants
367      """
368      return string.replace('%OR%', '|').replace('%AND%', ',').replace('%NEG%', '!')
369
370    # Process arguments and options -- mostly just paths here
371    field=alternate_command_preprocess(shell_unquote(args[0]))
372    labels=alternate_command_preprocess(shell_unquote(args[1]))
373    searchin=opts.searchin
374
375    petsc_dir = opts.petsc_dir
376    petsc_arch = opts.petsc_arch
377    petsc_full_arch = os.path.join(petsc_dir, petsc_arch)
378
379    if petsc_arch == '':
380        petsc_full_src = os.path.join(petsc_dir, 'share', 'petsc', 'examples', 'src')
381    else:
382      if opts.srcdir == 'src':
383        petsc_full_src = os.path.join(petsc_dir, 'src')
384      else:
385        petsc_full_src = opts.srcdir
386    if opts.testdir == 'tests':
387      petsc_full_test = os.path.join(petsc_full_arch, 'tests')
388    else:
389      petsc_full_test = opts.testdir
390    if opts.startdir:
391      startdir=opts.startdir=petsc_full_src
392    else:
393      startdir=petsc_full_src
394
395    # Options Sanity checks
396    if not os.path.isdir(petsc_dir):
397        print("PETSC_DIR must be a directory")
398        return
399
400    if not opts.use_source:
401        if not os.path.isdir(petsc_full_arch):
402            print("PETSC_DIR/PETSC_ARCH must be a directory")
403            return
404        elif not os.path.isdir(petsc_full_test):
405            print("Testdir must be a directory"+petsc_full_test)
406            return
407    else:
408        if not os.path.isdir(petsc_full_src):
409            print("Source directory must be a directory"+petsc_full_src)
410            return
411
412    labels = expand_path_like(petsc_dir,petsc_arch,labels)
413
414    # Do the actual query
415    do_query(opts.use_source, startdir, petsc_full_src, petsc_full_test,
416             petsc_dir, petsc_arch, field, labels, searchin)
417
418    return
419
420if __name__ == "__main__":
421        main()
422