xref: /petsc/config/query_tests.py (revision 21e3ffae2f3b73c0bd738cf6d0a809700fc04bb0)
1#!/usr/bin/env python3
2import fnmatch
3import glob
4import inspect
5import os
6import optparse
7import pickle
8import re
9import sys
10
11thisfile = os.path.abspath(inspect.getfile(inspect.currentframe()))
12pdir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(thisfile)))))
13sys.path.insert(0, os.path.join(pdir, 'config'))
14
15import testparse
16from gmakegentest import nameSpace
17
18
19"""
20  Tool for querying the tests.
21
22  Which tests to query?  Two options:
23      1. Query only the tests that are run for a given configuration.
24      2. Query all of the test files in the source directory
25  For #1:
26     Use dataDict as written out by gmakegentest.py in $PETSC_ARCH/$TESTBASE
27  For #2:
28     Walk the entire tree parsing the files as we go along using testparse.
29     The tree walker is simpler than what is in gmakegentest.py
30
31  The dataDict follows that generated by testparse.  gmakegentest.py does
32  further manipulations of the dataDict to handle things like for loops
33  so if using #2, those modifications are not included.
34
35  Querying:
36      The dataDict dictionary is then "inverted" to create a dictionary with the
37      range of field values as keys and list test names as the values.  This
38      allows fast searching
39
40"""
41
42def isFile(maybeFile):
43  ext=os.path.splitext(maybeFile)[1]
44  if not ext: return False
45  if ext not in ['.c','.cxx','.cpp','F90','F','cu']: return False
46  return True
47
48def pathToLabel(path):
49  """
50  Because the scripts have a non-unique naming, the pretty-printing
51  needs to convey the srcdir and srcfile.  There are two ways of doing this.
52  """
53  # Strip off any top-level directories or spaces
54  path=path.strip().replace(pdir,'')
55  path=path.replace('src/','')
56  if isFile(path):
57    prefix=os.path.dirname(path).replace("/","_")
58    suffix=os.path.splitext(os.path.basename(path))[0]
59    label=prefix+"-"+suffix+'_*'
60  else:
61    path=path.rstrip('/')
62    label=path.replace("/","_").replace('tests_','tests-').replace('tutorials_','tutorials-')
63  return label
64
65def get_value(varset):
66  """
67  Searching args is a bit funky:
68  Consider
69      args:  -ksp_monitor_short -pc_type ml -ksp_max_it 3
70  Search terms are:
71    ksp_monitor, 'pc_type ml', ksp_max_it
72  Also ignore all loops
73    -pc_fieldsplit_diag_use_amat {{0 1}}
74  Gives: pc_fieldsplit_diag_use_amat as the search term
75  Also ignore -f ...  (use matrices from file) because I'll assume
76   that this kind of information isn't needed for testing.  If it's
77   a separate search than just grep it
78  """
79  if varset.startswith('-f '): return None
80
81  # First  remove loops
82  value=re.sub('{{.*}}','',varset)
83  # Next remove -
84  value=varset.lstrip("-")
85  # Get rid of numbers
86  value=re.sub(r"[+-]? *(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?",'',value)
87  # return without spaces
88  return value.strip()
89
90def query(invDict,fields,labels):
91    """
92    Search the keys using fnmatch to find matching names and return list with
93    the results
94    """
95    setlist=[]  # setlist is a list of lists that set operations will operate on
96    llist=labels.replace('|',',').split(',')
97    i=-1
98    for field in fields.replace('|',',').split(','):
99        i+=1
100        label=llist[i]
101        if field == 'name':
102            if '/' in label:
103              label=pathToLabel(label)
104            elif label.startswith('src'):
105                  label=label.lstrip('src').lstrip('*')
106            setlist.append(fnmatch.filter(invDict['name'],label))
107            continue
108
109        foundLabel=False   # easy to do if you misspell argument search
110        label=label.lower()
111        for key in invDict[field]:
112            if fnmatch.filter([key.lower()],label):
113              foundLabel=True
114              # Do not return values with not unless label itself has not
115              if label.startswith('!') and not key.startswith('!'): continue
116              if not label.startswith('!') and key.startswith('!'): continue
117              setlist.append(invDict[field][key])
118        if not foundLabel:
119          setlist.append([])
120
121    # Now process the union and intersection operators based on setlist
122    allresults=[]
123    # Union
124    i=-1
125    for ufield in fields.split(','):
126       i+=1
127       if '|' in ufield:
128         # Intersection
129         label=llist[i]
130         results=set(setlist[i])
131         for field in ufield.split('|')[1:]:
132             i+=1
133             label=llist[i]
134             results=results.intersection(set(setlist[i]))
135         allresults+=list(results)
136       else:
137         allresults+=setlist[i]
138
139    # remove duplicate entries and sort to give consistent results
140    uniqlist=list(set(allresults))
141    uniqlist.sort()
142    return  uniqlist
143
144def get_inverse_dictionary(dataDict,fields,srcdir):
145    """
146    Create a dictionary with the values of field as the keys, and the name of
147    the tests as the results.
148    """
149    invDict={}
150    # Comma-delimited lists denote union
151    for field in fields.replace('|',',').split(','):
152        if field not in invDict:
153            if field == 'name':
154                 invDict[field]=[]   # List for ease
155            else:
156                 invDict[field]={}
157        for root in dataDict:
158          for exfile in dataDict[root]:
159            for test in dataDict[root][exfile]:
160              if test in testparse.buildkeys: continue
161              defroot = testparse.getDefaultOutputFileRoot(test)
162              fname=nameSpace(defroot,os.path.relpath(root,srcdir))
163              if field == 'name':
164                  invDict['name'].append(fname)
165                  continue
166              if field not in dataDict[root][exfile][test]: continue
167              values=dataDict[root][exfile][test][field]
168
169              if not field == 'args' and not field == 'diff_args':
170                for val in values.split():
171                    if val in invDict[field]:
172                        invDict[field][val].append(fname)
173                    else:
174                        invDict[field][val] = [fname]
175              else:
176                # Args are funky.
177                for varset in re.split('(^|\W)-(?=[a-zA-Z])',values):
178                  val=get_value(varset)
179                  if not val: continue
180                  if val in invDict[field]:
181                    invDict[field][val].append(fname)
182                  else:
183                    invDict[field][val] = [fname]
184        # remove duplicate entries (multiple test/file)
185        if not field == 'name':
186          for val in invDict[field]:
187            invDict[field][val]=list(set(invDict[field][val]))
188
189    return invDict
190
191def get_gmakegentest_data(testdir,petsc_dir,petsc_arch):
192    """
193     Write out the dataDict into a pickle file
194    """
195    # This needs to be consistent with gmakegentest.py of course
196    pkl_file=os.path.join(testdir,'datatest.pkl')
197    # If it doesn't exist, then we need to regenerate
198    if not os.path.exists(pkl_file):
199      startdir=os.path.abspath(os.curdir)
200      os.chdir(petsc_dir)
201      args='--petsc-dir='+petsc_dir+' --petsc-arch='+petsc_arch+' --testdir='+testdir
202      buf = os.popen('config/gmakegentest.py '+args).read()
203      os.chdir(startdir)
204
205    fd = open(pkl_file, 'rb')
206    dataDict=pickle.load(fd)
207    fd.close()
208    return dataDict
209
210def walktree(top):
211    """
212    Walk a directory tree, starting from 'top'
213    """
214    verbose = False
215    dataDict = {}
216    alldatafiles = []
217    for root, dirs, files in os.walk(top, topdown=False):
218        if root == 'output': continue
219        if '.dSYM' in root: continue
220        if verbose: print(root)
221
222        dataDict[root] = {}
223
224        for exfile in files:
225            # Ignore emacs files
226            if exfile.startswith("#") or exfile.startswith(".#"): continue
227            ext=os.path.splitext(exfile)[1]
228            if ext[1:] not in ['c','cxx','cpp','cu','F90','F']: continue
229
230            # Convenience
231            fullex = os.path.join(root, exfile)
232            if verbose: print('   --> '+fullex)
233            dataDict[root].update(testparse.parseTestFile(fullex, 0))
234
235    return dataDict
236
237def do_query(use_source, startdir, srcdir, testdir, petsc_dir, petsc_arch,
238             fields, labels, searchin):
239    """
240    Do the actual query
241    This part of the code is placed here instead of main()
242    to show how one could translate this into ipython/jupyer notebook
243    commands for more advanced queries
244    """
245    # Get dictionary
246    if use_source:
247        dataDict=walktree(startdir)
248    else:
249        dataDict=get_gmakegentest_data(testdir, petsc_dir, petsc_arch)
250
251    # Get inverse dictionary for searching
252    invDict=get_inverse_dictionary(dataDict, fields, srcdir)
253
254    # Now do query
255    resList=query(invDict, fields, labels)
256
257    # Filter results using searchin
258    newresList=[]
259    if searchin.strip():
260        if not searchin.startswith('!'):
261            for key in resList:
262                if fnmatch.filter([key],searchin):
263                  newresList.append(key)
264        else:
265            for key in resList:
266                if not fnmatch.filter([key],searchin[1:]):
267                  newresList.append(key)
268        resList=newresList
269
270    # Print in flat list suitable for use by gmakefile.test
271    print(' '.join(resList))
272
273    return
274
275def expand_path_like(petscdir,petscarch,pathlike):
276    def remove_prefix(text,prefix):
277        return text[text.startswith(prefix) and len(prefix):]
278
279    # expand user second, as expandvars may insert a '~'
280    string = os.path.expanduser(os.path.expandvars(pathlike))
281    # if the dirname check succeeds then likely we have a glob expression
282    pardir = os.path.dirname(string)
283    if os.path.exists(pardir):
284        suffix   = string.replace(pardir,'') # get whatever is left over
285        pathlike = remove_prefix(os.path.relpath(os.path.abspath(pardir),petscdir),'.'+os.path.sep)
286        if petscarch == '':
287            pathlike = pathlike.replace(os.path.sep.join(('share','petsc','examples'))+'/','')
288        pathlike += suffix
289    return pathlike
290
291def main():
292    parser = optparse.OptionParser(usage="%prog [options] field match_pattern")
293    parser.add_option('-s', '--startdir', dest='startdir',
294                      help='Where to start the recursion if not srcdir',
295                      default='')
296    parser.add_option('-p', '--petsc-dir', dest='petsc_dir',
297                      help='Set PETSC_DIR different from environment',
298                      default=os.environ.get('PETSC_DIR'))
299    parser.add_option('-a', '--petsc-arch', dest='petsc_arch',
300                      help='Set PETSC_ARCH different from environment',
301                      default=os.environ.get('PETSC_ARCH'))
302    parser.add_option('--srcdir', dest='srcdir',
303                      help='Set location of sources different from PETSC_DIR/src.  Must be full path.',
304                      default='src')
305    parser.add_option('-t', '--testdir', dest='testdir',
306                      help='Test directory if not PETSC_ARCH/tests.  Must be full path',
307                      default='tests')
308    parser.add_option('-u', '--use-source', action="store_false",
309                      dest='use_source',
310                      help='Query all sources rather than those configured in PETSC_ARCH')
311    parser.add_option('-i', '--searchin', dest='searchin',
312                      help='Filter results from the arguments',
313                      default='')
314
315    opts, args = parser.parse_args()
316
317    # Argument Sanity checks
318    if len(args) != 2:
319        parser.print_usage()
320        print('Arguments: ')
321        print('  field:          Field to search for; e.g., requires')
322        print('                  To just match names, use "name"')
323        print('  match_pattern:  Matching pattern for field; e.g., cuda')
324        return
325
326    def shell_unquote(string):
327      """
328      Remove quotes from STRING. Useful in the case where you need to bury escaped quotes in a query
329      string in order to escape shell characters. For example:
330
331      $ make test query='foo,bar' queryval='requires|name'
332      /usr/bin/bash: line 1: name: command not found
333
334      While the original shell does not see the pipe character, the actual query is done via a second
335      shell, which is (literally) passed '$(queryval)', i.e. 'queryval='requires|name'' when expanded.
336      Note the fact that the expansion cancels out the quoting!!!
337
338      You can fix this by doing:
339
340      $ make test query='foo,bar' queryval='"requires|name"'
341
342      However this then shows up here as labels = 'queryval="requires|name"'. So we need to remove the
343      '"'. Applying shlex.split() on this returns:
344
345      >>> shlex.split('queryval="requires|name"')
346      ['queryval=requires|name']
347
348      And voila. Note also that:
349
350      >>> shlex.split('queryval=requires|name')
351      ['queryval=requires|name']
352      """
353      import shlex
354
355      if string:
356        ret = shlex.split(string)
357        assert len(ret) == 1, "Dont know what to do if shlex.split() produces more than 1 value?"
358        string = ret[0]
359      return string
360
361    def alternate_command_preprocess(string):
362      """
363      Replace the alternate versions in STRING with the regular variants
364      """
365      return string.replace('%OR%', '|').replace('%AND%', ',').replace('%NEG%', '!')
366
367    # Process arguments and options -- mostly just paths here
368    field=alternate_command_preprocess(shell_unquote(args[0]))
369    match=alternate_command_preprocess(shell_unquote(args[1]))
370    searchin=opts.searchin
371
372    petsc_dir = opts.petsc_dir
373    petsc_arch = opts.petsc_arch
374    petsc_full_arch = os.path.join(petsc_dir, petsc_arch)
375
376    if petsc_arch == '':
377        petsc_full_src = os.path.join(petsc_dir, 'share', 'petsc', 'examples', 'src')
378    else:
379      if opts.srcdir == 'src':
380        petsc_full_src = os.path.join(petsc_dir, 'src')
381      else:
382        petsc_full_src = opts.srcdir
383    if opts.testdir == 'tests':
384      petsc_full_test = os.path.join(petsc_full_arch, 'tests')
385    else:
386      petsc_full_test = opts.testdir
387    if opts.startdir:
388      startdir=opts.startdir=petsc_full_src
389    else:
390      startdir=petsc_full_src
391
392    # Options Sanity checks
393    if not os.path.isdir(petsc_dir):
394        print("PETSC_DIR must be a directory")
395        return
396
397    if not opts.use_source:
398        if not os.path.isdir(petsc_full_arch):
399            print("PETSC_DIR/PETSC_ARCH must be a directory")
400            return
401        elif not os.path.isdir(petsc_full_test):
402            print("Testdir must be a directory"+petsc_full_test)
403            return
404    else:
405        if not os.path.isdir(petsc_full_src):
406            print("Source directory must be a directory"+petsc_full_src)
407            return
408
409    match = expand_path_like(petsc_dir,petsc_arch,match)
410
411    # Do the actual query
412    do_query(opts.use_source, startdir, petsc_full_src, petsc_full_test,
413             petsc_dir, petsc_arch, field, match, searchin)
414
415    return
416
417
418if __name__ == "__main__":
419        main()
420