petsc/config/query_tests.py

#!/usr/bin/env python3
import fnmatch
import glob
import inspect
import os
import optparse
import pickle
import re
import sys

thisfile = os.path.abspath(inspect.getfile(inspect.currentframe()))
pdir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(thisfile)))))
sys.path.insert(0, os.path.join(pdir, 'config'))

import testparse
from gmakegentest import nameSpace


"""
  Tool for querying the tests.

  Which tests to query?  Two options:
      1. Query only the tests that are run for a given configuration.
      2. Query all of the test files in the source directory
  For #1:
     Use dataDict as written out by gmakegentest.py in $PETSC_ARCH/$TESTBASE
  For #2:
     Walk the entire tree parsing the files as we go along using testparse.
     The tree walker is simpler than what is in gmakegentest.py

  The dataDict follows that generated by testparse.  gmakegentest.py does
  further manipulations of the dataDict to handle things like for loops
  so if using #2, those modifications are not included.

  Querying:
      The dataDict dictionary is then "inverted" to create a dictionary with the
      range of field values as keys and list test names as the values.  This
      allows fast searching

"""

def isFile(maybeFile):
  ext=os.path.splitext(maybeFile)[1]
  if not ext: return False
  if ext not in ['.c','.cxx','.cpp','F90','F','cu']: return False
  return True

def pathToLabel(path):
  """
  Because the scripts have a non-unique naming, the pretty-printing
  needs to convey the srcdir and srcfile.  There are two ways of doing this.
  """
  # Strip off any top-level directories or spaces
  path=path.strip().replace(pdir,'')
  path=path.replace('src/','')
  if isFile(path):
    prefix=os.path.dirname(path).replace("/","_")
    suffix=os.path.splitext(os.path.basename(path))[0]
    label=prefix+"-"+suffix+'_*'
  else:
    path=path.rstrip('/')
    label=path.replace("/","_").replace('tests_','tests-').replace('tutorials_','tutorials-')
  return label

def get_value(varset):
  """
  Searching args is a bit funky:
  Consider
      args:  -ksp_monitor_short -pc_type ml -ksp_max_it 3
  Search terms are:
    ksp_monitor, 'pc_type ml', ksp_max_it
  Also ignore all loops
    -pc_fieldsplit_diag_use_amat {{0 1}}
  Gives: pc_fieldsplit_diag_use_amat as the search term
  Also ignore -f ...  (use matrices from file) because I'll assume
   that this kind of information isn't needed for testing.  If it's
   a separate search than just grep it
  """
  if varset.startswith('-f '): return None

  # First  remove loops
  value=re.sub('{{.*}}','',varset)
  # Next remove -
  value=varset.lstrip("-")
  # Get rid of numbers
  value=re.sub(r"[+-]? *(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?",'',value)
  # return without spaces
  return value.strip()

def query(invDict,fields,labels):
    """
    Search the keys using fnmatch to find matching names and return list with
    the results
    """
    setlist=[]  # setlist is a list of lists that set operations will operate on
    llist=labels.replace('|',',').split(',')
    i=-1
    for field in fields.replace('|',',').split(','):
        i+=1
        label=llist[i]
        if field == 'name':
            if '/' in label:
              label=pathToLabel(label)
            elif label.startswith('src'):
                  label=label.lstrip('src').lstrip('*')
            setlist.append(fnmatch.filter(invDict['name'],label))
            continue

        foundLabel=False   # easy to do if you misspell argument search
        label=label.lower()
        for key in invDict[field]:
            if fnmatch.filter([key.lower()],label):
              foundLabel=True
              # Do not return values with not unless label itself has not
              if label.startswith('!') and not key.startswith('!'): continue
              if not label.startswith('!') and key.startswith('!'): continue
              setlist.append(invDict[field][key])
        if not foundLabel:
          setlist.append([])

    # Now process the union and intersection operators based on setlist
    allresults=[]
    # Union
    i=-1
    for ufield in fields.split(','):
       i+=1
       if '|' in ufield:
         # Intersection
         label=llist[i]
         results=set(setlist[i])
         for field in ufield.split('|')[1:]:
             i+=1
             label=llist[i]
             results=results.intersection(set(setlist[i]))
         allresults+=list(results)
       else:
         allresults+=setlist[i]

    # remove duplicate entries and sort to give consistent results
    uniqlist=list(set(allresults))
    uniqlist.sort()
    return  uniqlist

def get_inverse_dictionary(dataDict,fields,srcdir):
    """
    Create a dictionary with the values of field as the keys, and the name of
    the tests as the results.
    """
    invDict={}
    # Comma-delimited lists denote union
    for field in fields.replace('|',',').split(','):
        if field not in invDict:
            if field == 'name':
                 invDict[field]=[]   # List for ease
            else:
                 invDict[field]={}
        for root in dataDict:
          for exfile in dataDict[root]:
            for test in dataDict[root][exfile]:
              if test in testparse.buildkeys: continue
              defroot = testparse.getDefaultOutputFileRoot(test)
              fname=nameSpace(defroot,os.path.relpath(root,srcdir))
              if field == 'name':
                  invDict['name'].append(fname)
                  continue
              if field not in dataDict[root][exfile][test]: continue
              values=dataDict[root][exfile][test][field]

              if not field == 'args' and not field == 'diff_args':
                for val in values.split():
                    if val in invDict[field]:
                        invDict[field][val].append(fname)
                    else:
                        invDict[field][val] = [fname]
              else:
                # Args are funky.
                for varset in re.split('(^|\W)-(?=[a-zA-Z])',values):
                  val=get_value(varset)
                  if not val: continue
                  if val in invDict[field]:
                    invDict[field][val].append(fname)
                  else:
                    invDict[field][val] = [fname]
        # remove duplicate entries (multiple test/file)
        if not field == 'name':
          for val in invDict[field]:
            invDict[field][val]=list(set(invDict[field][val]))

    return invDict

def get_gmakegentest_data(testdir,petsc_dir,petsc_arch):
    """
     Write out the dataDict into a pickle file
    """
    # This needs to be consistent with gmakegentest.py of course
    pkl_file=os.path.join(testdir,'datatest.pkl')
    # If it doesn't exist, then we need to regenerate
    if not os.path.exists(pkl_file):
      startdir=os.path.abspath(os.curdir)
      os.chdir(petsc_dir)
      args='--petsc-dir='+petsc_dir+' --petsc-arch='+petsc_arch+' --testdir='+testdir
      buf = os.popen('config/gmakegentest.py '+args).read()
      os.chdir(startdir)

    fd = open(pkl_file, 'rb')
    dataDict=pickle.load(fd)
    fd.close()
    return dataDict

def walktree(top):
    """
    Walk a directory tree, starting from 'top'
    """
    verbose = False
    dataDict = {}
    alldatafiles = []
    for root, dirs, files in os.walk(top, topdown=False):
        if root == 'output': continue
        if '.dSYM' in root: continue
        if verbose: print(root)

        dataDict[root] = {}

        for exfile in files:
            # Ignore emacs files
            if exfile.startswith("#") or exfile.startswith(".#"): continue
            ext=os.path.splitext(exfile)[1]
            if ext[1:] not in ['c','cxx','cpp','cu','F90','F']: continue

            # Convenience
            fullex = os.path.join(root, exfile)
            if verbose: print('   --> '+fullex)
            dataDict[root].update(testparse.parseTestFile(fullex, 0))

    return dataDict

def do_query(use_source, startdir, srcdir, testdir, petsc_dir, petsc_arch,
             fields, labels, searchin):
    """
    Do the actual query
    This part of the code is placed here instead of main()
    to show how one could translate this into ipython/jupyer notebook
    commands for more advanced queries
    """
    # Get dictionary
    if use_source:
        dataDict=walktree(startdir)
    else:
        dataDict=get_gmakegentest_data(testdir, petsc_dir, petsc_arch)

    # Get inverse dictionary for searching
    invDict=get_inverse_dictionary(dataDict, fields, srcdir)

    # Now do query
    resList=query(invDict, fields, labels)

    # Filter results using searchin
    newresList=[]
    if searchin.strip():
        if not searchin.startswith('!'):
            for key in resList:
                if fnmatch.filter([key],searchin):
                  newresList.append(key)
        else:
            for key in resList:
                if not fnmatch.filter([key],searchin[1:]):
                  newresList.append(key)
        resList=newresList

    # Print in flat list suitable for use by gmakefile.test
    print(' '.join(resList))

    return

def expand_path_like(petscdir,petscarch,pathlike):
    def remove_prefix(text,prefix):
        return text[text.startswith(prefix) and len(prefix):]

    # expand user second, as expandvars may insert a '~'
    string = os.path.expanduser(os.path.expandvars(pathlike))
    # if the dirname check succeeds then likely we have a glob expression
    pardir = os.path.dirname(string)
    if os.path.exists(pardir):
        suffix   = string.replace(pardir,'') # get whatever is left over
        pathlike = remove_prefix(os.path.relpath(os.path.abspath(pardir),petscdir),'.'+os.path.sep)
        if petscarch == '':
            pathlike = pathlike.replace(os.path.sep.join(('share','petsc','examples'))+'/','')
        pathlike += suffix
    return pathlike

def main():
    parser = optparse.OptionParser(usage="%prog [options] field match_pattern")
    parser.add_option('-s', '--startdir', dest='startdir',
                      help='Where to start the recursion if not srcdir',
                      default='')
    parser.add_option('-p', '--petsc-dir', dest='petsc_dir',
                      help='Set PETSC_DIR different from environment',
                      default=os.environ.get('PETSC_DIR'))
    parser.add_option('-a', '--petsc-arch', dest='petsc_arch',
                      help='Set PETSC_ARCH different from environment',
                      default=os.environ.get('PETSC_ARCH'))
    parser.add_option('--srcdir', dest='srcdir',
                      help='Set location of sources different from PETSC_DIR/src.  Must be full path.',
                      default='src')
    parser.add_option('-t', '--testdir', dest='testdir',
                      help='Test directory if not PETSC_ARCH/tests.  Must be full path',
                      default='tests')
    parser.add_option('-u', '--use-source', action="store_false",
                      dest='use_source',
                      help='Query all sources rather than those configured in PETSC_ARCH')
    parser.add_option('-i', '--searchin', dest='searchin',
                      help='Filter results from the arguments',
                      default='')

    opts, args = parser.parse_args()

    # Argument Sanity checks
    if len(args) != 2:
        parser.print_usage()
        print('Arguments: ')
        print('  field:          Field to search for; e.g., requires')
        print('                  To just match names, use "name"')
        print('  match_pattern:  Matching pattern for field; e.g., cuda')
        return

    def shell_unquote(string):
      """
      Remove quotes from STRING. Useful in the case where you need to bury escaped quotes in a query
      string in order to escape shell characters. For example:

      $ make test query='foo,bar' queryval='requires|name'
      /usr/bin/bash: line 1: name: command not found

      While the original shell does not see the pipe character, the actual query is done via a second
      shell, which is (literally) passed '$(queryval)', i.e. 'queryval='requires|name'' when expanded.
      Note the fact that the expansion cancels out the quoting!!!

      You can fix this by doing:

      $ make test query='foo,bar' queryval='"requires|name"'

      However this then shows up here as labels = 'queryval="requires|name"'. So we need to remove the
      '"'. Applying shlex.split() on this returns:

      >>> shlex.split('queryval="requires|name"')
      ['queryval=requires|name']

      And voila. Note also that:

      >>> shlex.split('queryval=requires|name')
      ['queryval=requires|name']
      """
      import shlex

      if string:
        ret = shlex.split(string)
        assert len(ret) == 1, "Dont know what to do if shlex.split() produces more than 1 value?"
        string = ret[0]
      return string

    def alternate_command_preprocess(string):
      """
      Replace the alternate versions in STRING with the regular variants
      """
      return string.replace('%OR%', '|').replace('%AND%', ',').replace('%NEG%', '!')

    # Process arguments and options -- mostly just paths here
    field=alternate_command_preprocess(shell_unquote(args[0]))
    match=alternate_command_preprocess(shell_unquote(args[1]))
    searchin=opts.searchin

    petsc_dir = opts.petsc_dir
    petsc_arch = opts.petsc_arch
    petsc_full_arch = os.path.join(petsc_dir, petsc_arch)

    if petsc_arch == '':
        petsc_full_src = os.path.join(petsc_dir, 'share', 'petsc', 'examples', 'src')
    else:
      if opts.srcdir == 'src':
        petsc_full_src = os.path.join(petsc_dir, 'src')
      else:
        petsc_full_src = opts.srcdir
    if opts.testdir == 'tests':
      petsc_full_test = os.path.join(petsc_full_arch, 'tests')
    else:
      petsc_full_test = opts.testdir
    if opts.startdir:
      startdir=opts.startdir=petsc_full_src
    else:
      startdir=petsc_full_src

    # Options Sanity checks
    if not os.path.isdir(petsc_dir):
        print("PETSC_DIR must be a directory")
        return

    if not opts.use_source:
        if not os.path.isdir(petsc_full_arch):
            print("PETSC_DIR/PETSC_ARCH must be a directory")
            return
        elif not os.path.isdir(petsc_full_test):
            print("Testdir must be a directory"+petsc_full_test)
            return
    else:
        if not os.path.isdir(petsc_full_src):
            print("Source directory must be a directory"+petsc_full_src)
            return

    match = expand_path_like(petsc_dir,petsc_arch,match)

    # Do the actual query
    do_query(opts.use_source, startdir, petsc_full_src, petsc_full_test,
             petsc_dir, petsc_arch, field, match, searchin)

    return


if __name__ == "__main__":
        main()