xref: /petsc/config/query_tests.py (revision 6dd63270497ad23dcf16ae500a87ff2b2a0b7474)
1#!/usr/bin/env python3
2import fnmatch
3import glob
4import inspect
5import os
6import optparse
7import pickle
8import re
9import sys
10
11thisfile = os.path.abspath(inspect.getfile(inspect.currentframe()))
12petscdir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(thisfile)))))
13sys.path.insert(0, os.path.join(petscdir, 'config'))
14
15import testparse
16from gmakegentest import nameSpace
17
18
19"""
20   This is used by gmakefile.test for the following searches
21
22  - make test search=X (or s=X)
23  - make test query=X (or q=X) queryval=Y (or qv=Y)
24
25
26  Which tests to query?  Two options:
27      1. Query only the tests that are run for a given configuration.
28      2. Query all of the test files in the source directory
29  For #1:
30     Use dataDict as written out by gmakegentest.py in $PETSC_ARCH/$TESTBASE
31  For #2:
32     Walk the entire tree parsing the files as we go along using testparse.
33     The tree walker is simpler than what is in gmakegentest.py
34
35  The dataDict follows that generated by testparse.  gmakegentest.py does
36  further manipulations of the dataDict to handle things like for loops
37  so if using #2, those modifications are not included.
38
39  Querying:
40      The dataDict dictionary is then "inverted" to create a dictionary with the
41      range of field values as keys and list test names as the values.  This
42      allows fast searching
43
44"""
45
46def isFile(maybeFile):
47  ext=os.path.splitext(maybeFile)[1]
48  if not ext: return False
49  if ext not in ['.c','.cxx','.cpp','F90','F','cu']: return False
50  return True
51
52def pathToLabel(path):
53  """
54  Because the scripts have a non-unique naming, the pretty-printing
55  needs to convey the srcdir and srcfile.  There are two ways of doing this.
56  """
57  # Strip off any top-level directories or spaces
58  path=path.strip().replace(petscdir,'')
59  path=path.replace('src/','')
60  if isFile(path):
61    prefix=os.path.dirname(path).replace("/","_")
62    suffix=os.path.splitext(os.path.basename(path))[0]
63    label=prefix+"-"+suffix+'_*'
64  else:
65    path=path.rstrip('/')
66    label=path.replace("/","_").replace('tests_','tests-').replace('tutorials_','tutorials-')
67  return label
68
69def get_value(varset):
70  """
71  Searching args is a bit funky:
72  Consider
73      args: -ksp_monitor_short -pc_type ml -ksp_max_it 3
74  Search terms are:
75    ksp_monitor, 'pc_type ml', ksp_max_it
76  Also ignore all loops
77    -pc_fieldsplit_diag_use_amat {{0 1}}
78  Gives: pc_fieldsplit_diag_use_amat as the search term
79  Also ignore -f ...  (use matrices from file) because I'll assume
80   that this kind of information isn't needed for testing.  If it's
81   a separate search than just grep it
82  """
83  if varset.startswith('-f '): return None
84
85  # First  remove loops
86  value=re.sub('{{.*}}','',varset)
87  # Next remove -
88  value=varset.lstrip("-")
89  # Get rid of numbers
90  value=re.sub(r"[+-]? *(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?",'',value)
91  # return without spaces
92  return value.strip()
93
94def query(invDict,fields,labels):
95    """
96    Search the keys using fnmatch to find matching names and return list with
97    the results
98    """
99    setlist=[]  # setlist is a list of lists that set operations will operate on
100    llist=labels.replace('|',',').split(',')
101    i=-1
102    for field in fields.replace('|',',').split(','):
103        i+=1
104        label=llist[i]
105        if field == 'name':
106            if '/' in label:
107              label=pathToLabel(label)
108            elif label.startswith('src'):
109                  label=label.lstrip('src').lstrip('*')
110            setlist.append(fnmatch.filter(invDict['name'],label))
111            continue
112
113        foundLabel=False   # easy to do if you misspell argument search
114        label=label.lower()
115        for key in invDict[field]:
116            if fnmatch.filter([key.lower()],label):
117              foundLabel=True
118              # Do not return values with not unless label itself has not
119              if label.startswith('!') and not key.startswith('!'): continue
120              if not label.startswith('!') and key.startswith('!'): continue
121              setlist.append(invDict[field][key])
122        if not foundLabel:
123          setlist.append([])
124
125    # Now process the union and intersection operators based on setlist
126    allresults=[]
127    # Union
128    i=-1
129    for ufield in fields.split(','):
130       i+=1
131       if '|' in ufield:
132         # Intersection
133         label=llist[i]
134         results=set(setlist[i])
135         for field in ufield.split('|')[1:]:
136             i+=1
137             label=llist[i]
138             results=results.intersection(set(setlist[i]))
139         allresults+=list(results)
140       else:
141         allresults+=setlist[i]
142
143    # remove duplicate entries and sort to give consistent results
144    uniqlist=list(set(allresults))
145    uniqlist.sort()
146    return  uniqlist
147
148def get_inverse_dictionary(dataDict,fields,srcdir):
149    """
150    Create a dictionary with the values of field as the keys, and the name of
151    the tests as the results.
152    """
153    invDict={}
154    # Comma-delimited lists denote union
155    for field in fields.replace('|',',').split(','):
156        if field not in invDict:
157            if field == 'name':
158                 invDict[field]=[]   # List for ease
159            else:
160                 invDict[field]={}
161        for root in dataDict:
162          for exfile in dataDict[root]:
163            for test in dataDict[root][exfile]:
164              if test in testparse.buildkeys: continue
165              defroot = testparse.getDefaultOutputFileRoot(test)
166              fname=nameSpace(defroot,os.path.relpath(root,srcdir))
167              if field == 'name':
168                  invDict['name'].append(fname)
169                  continue
170              if field not in dataDict[root][exfile][test]: continue
171              values=dataDict[root][exfile][test][field]
172
173              if not field == 'args' and not field == 'diff_args':
174                for val in values.split():
175                    if val in invDict[field]:
176                        invDict[field][val].append(fname)
177                    else:
178                        invDict[field][val] = [fname]
179              else:
180                # Args are funky.
181                for varset in re.split(r'(^|\W)-(?=[a-zA-Z])',values):
182                  val=get_value(varset)
183                  if not val: continue
184                  if val in invDict[field]:
185                    invDict[field][val].append(fname)
186                  else:
187                    invDict[field][val] = [fname]
188        # remove duplicate entries (multiple test/file)
189        if not field == 'name':
190          for val in invDict[field]:
191            invDict[field][val]=list(set(invDict[field][val]))
192
193    return invDict
194
195def get_gmakegentest_data(srcdir,testdir,petsc_dir,petsc_arch):
196    """
197     Write out the dataDict into a pickle file
198    """
199    # This needs to be consistent with gmakegentest.py of course
200    pkl_file=os.path.join(testdir,'datatest.pkl')
201    # If it doesn't exist, then we need to regenerate
202    if not os.path.exists(pkl_file):
203      startdir=os.path.abspath(os.curdir)
204      os.chdir(petsc_dir)
205      args='--petsc-dir='+petsc_dir+' --petsc-arch='+petsc_arch+' --testdir='+testdir+' --srcdir='+srcdir
206      buf = os.popen('config/gmakegentest.py '+args).read()
207      os.chdir(startdir)
208
209    fd = open(pkl_file, 'rb')
210    dataDict=pickle.load(fd)
211    fd.close()
212    return dataDict
213
214def walktree(top):
215    """
216    Walk a directory tree, starting from 'top'
217    """
218    verbose = False
219    dataDict = {}
220    alldatafiles = []
221    for root, dirs, files in os.walk(top, topdown=False):
222        if root == 'output': continue
223        if '.dSYM' in root: continue
224        if verbose: print(root)
225
226        dataDict[root] = {}
227
228        for exfile in files:
229            # Ignore emacs files
230            if exfile.startswith("#") or exfile.startswith(".#"): continue
231            ext=os.path.splitext(exfile)[1]
232            if ext[1:] not in ['c','cxx','cpp','cu','F90','F']: continue
233
234            # Convenience
235            fullex = os.path.join(root, exfile)
236            if verbose: print('   --> '+fullex)
237            dataDict[root].update(testparse.parseTestFile(fullex, 0))
238
239    return dataDict
240
241def do_query(use_source, startdir, srcdir, testdir, petsc_dir, petsc_arch,
242             fields, labels, searchin):
243    """
244    Do the actual query
245    This part of the code is placed here instead of main()
246    to show how one could translate this into ipython/jupyer notebook
247    commands for more advanced queries
248    """
249    # Get dictionary
250    if use_source:
251        dataDict=walktree(startdir)
252    else:
253        dataDict=get_gmakegentest_data(srcdir,testdir, petsc_dir, petsc_arch)
254
255    # Get inverse dictionary for searching
256    invDict=get_inverse_dictionary(dataDict, fields, srcdir)
257
258    # Now do query
259    resList=query(invDict, fields, labels)
260
261    # Filter results using searchin
262    newresList=[]
263    if searchin.strip():
264        if not searchin.startswith('!'):
265            for key in resList:
266                if fnmatch.filter([key],searchin):
267                  newresList.append(key)
268        else:
269            for key in resList:
270                if not fnmatch.filter([key],searchin[1:]):
271                  newresList.append(key)
272        resList=newresList
273
274    # Print in flat list suitable for use by gmakefile.test
275    print(' '.join(resList))
276
277    return
278
279def expand_path_like(petscdir,petscarch,pathlike):
280    def remove_prefix(text,prefix):
281        return text[text.startswith(prefix) and len(prefix):]
282
283    # expand user second, as expandvars may insert a '~'
284    string = os.path.expanduser(os.path.expandvars(pathlike))
285    # if the dirname check succeeds then likely we have a glob expression
286    pardir = os.path.dirname(string)
287    if os.path.exists(pardir):
288        suffix   = string.replace(pardir,'') # get whatever is left over
289        pathlike = remove_prefix(os.path.relpath(os.path.abspath(pardir),petscdir),'.'+os.path.sep)
290        if petscarch == '':
291            pathlike = pathlike.replace(os.path.sep.join(('share','petsc','examples'))+'/','')
292        pathlike += suffix
293    pathlike = pathlike.replace('diff-','')
294    return pathlike
295
296def main():
297    parser = optparse.OptionParser(usage="%prog [options] field match_pattern")
298    parser.add_option('-s', '--startdir', dest='startdir',
299                      help='Where to start the recursion if not srcdir',
300                      default='')
301    parser.add_option('-p', '--petsc-dir', dest='petsc_dir',
302                      help='Set PETSC_DIR different from environment',
303                      default=os.environ.get('PETSC_DIR'))
304    parser.add_option('-a', '--petsc-arch', dest='petsc_arch',
305                      help='Set PETSC_ARCH different from environment',
306                      default=os.environ.get('PETSC_ARCH'))
307    parser.add_option('--srcdir', dest='srcdir',
308                      help='Set location of sources different from PETSC_DIR/src.  Must be full path.',
309                      default='src')
310    parser.add_option('-t', '--testdir', dest='testdir',
311                      help='Test directory if not PETSC_ARCH/tests.  Must be full path',
312                      default='tests')
313    parser.add_option('-u', '--use-source', action="store_false",
314                      dest='use_source',
315                      help='Query all sources rather than those configured in PETSC_ARCH')
316    parser.add_option('-i', '--searchin', dest='searchin',
317                      help='Filter results from the arguments',
318                      default='')
319
320    opts, args = parser.parse_args()
321
322    # Argument Sanity checks
323    if len(args) != 2:
324        parser.print_usage()
325        print('Arguments: ')
326        print('  field:          Field to search for; e.g., requires')
327        print('                  To just match names, use "name"')
328        print('  match_pattern:  Matching pattern for field; e.g., cuda')
329        return
330
331    def shell_unquote(string):
332      """
333      Remove quotes from STRING. Useful in the case where you need to bury escaped quotes in a query
334      string in order to escape shell characters. For example:
335
336      $ make test query='foo,bar' queryval='requires|name'
337      /usr/bin/bash: line 1: name: command not found
338
339      While the original shell does not see the pipe character, the actual query is done via a second
340      shell, which is (literally) passed '$(queryval)', i.e. 'queryval='requires|name'' when expanded.
341      Note the fact that the expansion cancels out the quoting!!!
342
343      You can fix this by doing:
344
345      $ make test query='foo,bar' queryval='"requires|name"'
346
347      However this then shows up here as labels = 'queryval="requires|name"'. So we need to remove the
348      '"'. Applying shlex.split() on this returns:
349
350      >>> shlex.split('queryval="requires|name"')
351      ['queryval=requires|name']
352
353      And voila. Note also that:
354
355      >>> shlex.split('queryval=requires|name')
356      ['queryval=requires|name']
357      """
358      import shlex
359
360      if string:
361        ret = shlex.split(string)
362        assert len(ret) == 1, "Dont know what to do if shlex.split() produces more than 1 value?"
363        string = ret[0]
364      return string
365
366    def alternate_command_preprocess(string):
367      """
368      Replace the alternate versions in STRING with the regular variants
369      """
370      return string.replace('%OR%', '|').replace('%AND%', ',').replace('%NEG%', '!')
371
372    # Process arguments and options -- mostly just paths here
373    field=alternate_command_preprocess(shell_unquote(args[0]))
374    labels=alternate_command_preprocess(shell_unquote(args[1]))
375    searchin=opts.searchin
376
377    petsc_dir = opts.petsc_dir
378    petsc_arch = opts.petsc_arch
379    petsc_full_arch = os.path.join(petsc_dir, petsc_arch)
380
381    if petsc_arch == '':
382        petsc_full_src = os.path.join(petsc_dir, 'share', 'petsc', 'examples', 'src')
383    else:
384      if opts.srcdir == 'src':
385        petsc_full_src = os.path.join(petsc_dir, 'src')
386      else:
387        petsc_full_src = opts.srcdir
388    if opts.testdir == 'tests':
389      petsc_full_test = os.path.join(petsc_full_arch, 'tests')
390    else:
391      petsc_full_test = opts.testdir
392    if opts.startdir:
393      startdir=opts.startdir=petsc_full_src
394    else:
395      startdir=petsc_full_src
396
397    # Options Sanity checks
398    if not os.path.isdir(petsc_dir):
399        print("PETSC_DIR must be a directory")
400        return
401
402    if not opts.use_source:
403        if not os.path.isdir(petsc_full_arch):
404            print("PETSC_DIR/PETSC_ARCH must be a directory")
405            return
406        elif not os.path.isdir(petsc_full_test):
407            print("Testdir must be a directory"+petsc_full_test)
408            return
409    else:
410        if not os.path.isdir(petsc_full_src):
411            print("Source directory must be a directory"+petsc_full_src)
412            return
413
414    labels = expand_path_like(petsc_dir,petsc_arch,labels)
415
416    # Do the actual query
417    do_query(opts.use_source, startdir, petsc_full_src, petsc_full_test,
418             petsc_dir, petsc_arch, field, labels, searchin)
419
420    return
421
422if __name__ == "__main__":
423        main()
424