xref: /petsc/config/BuildSystem/sourceDatabase.py (revision 5b6bfdb9644f185dbf5e5a09b808ec241507e1e7)
1179860b2SJed Brown#!/usr/bin/env python
2179860b2SJed Brown'''A source code database
3179860b2SJed Brown
4179860b2SJed Brown    SourceDB is a database of file information used to determine whether files
5179860b2SJed Brown    should be rebuilt by the build system. All files names are stored relative
6179860b2SJed Brown    to a given root, which is intended as the root of a Project.
7179860b2SJed Brown
8179860b2SJed Brown    Relative or absolute pathnames may be used as keys, but absolute pathnames
9179860b2SJed Brown    must fall under the database root. The value format is a tuple of the following:
10179860b2SJed Brown
11179860b2SJed Brown      Checksum:     The md5 checksum of the file
12179860b2SJed Brown      Mod Time:     The time the file was last modified
13179860b2SJed Brown      Timestamp:    The time theentry was last modified
14179860b2SJed Brown      Dependencies: A tuple of files upon which this entry depends
15179860b2SJed Brown
16179860b2SJed Brown    This script also provides some default actions:
17179860b2SJed Brown
18179860b2SJed Brown      - insert <database file> <filename>
19179860b2SJed Brown        Inserts this file from the database, or updates its entry if it
20179860b2SJed Brown        already exists.
21179860b2SJed Brown
22179860b2SJed Brown      - remove <database file> <filename>
23179860b2SJed Brown        Removes this file from the database. The filename may also be a
24179860b2SJed Brown        regular expression.
25179860b2SJed Brown
26179860b2SJed Brown'''
27*5b6bfdb9SJed Brownfrom __future__ import print_function
28*5b6bfdb9SJed Brownfrom __future__ import absolute_import
29179860b2SJed Brownimport logger
30179860b2SJed Brown
31179860b2SJed Brownimport errno
32179860b2SJed Brownimport os
33179860b2SJed Brownimport re
34179860b2SJed Brownimport time
35179860b2SJed Brown
36179860b2SJed Brownimport cPickle
37179860b2SJed Brown
38179860b2SJed Browntry:
39179860b2SJed Brown  from hashlib import md5 as new_md5
40179860b2SJed Brownexcept ImportError:
41179860b2SJed Brown  from md5 import new as new_md5
42179860b2SJed Brown
43179860b2SJed Brown
44179860b2SJed Brownclass SourceDB (dict, logger.Logger):
45179860b2SJed Brown  '''A SourceDB is a dictionary of file data used during the build process.'''
46179860b2SJed Brown  includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1')
47179860b2SJed Brown  isLoading = 0
48179860b2SJed Brown
49179860b2SJed Brown  def __init__(self, root, filename = None):
50179860b2SJed Brown    dict.__init__(self)
51179860b2SJed Brown    logger.Logger.__init__(self)
52179860b2SJed Brown    self.root       = root
53179860b2SJed Brown    self.filename   = filename
54179860b2SJed Brown    if self.filename is None:
55179860b2SJed Brown      self.filename = os.path.join(str(root), 'bsSource.db')
56179860b2SJed Brown    self.isDirty    = 0
57179860b2SJed Brown    return
58179860b2SJed Brown
59179860b2SJed Brown  def __str__(self):
60179860b2SJed Brown    output = ''
61179860b2SJed Brown    for source in self:
62179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
63179860b2SJed Brown      output += source+'\n'
64179860b2SJed Brown      output += '  Checksum:  '+str(checksum)+'\n'
65179860b2SJed Brown      output += '  Mod Time:  '+str(mtime)+'\n'
66179860b2SJed Brown      output += '  Timestamp: '+str(timestamp)+'\n'
67179860b2SJed Brown      output += '  Deps:      '+str(dependencies)+'\n'
68179860b2SJed Brown    return output
69179860b2SJed Brown
70179860b2SJed Brown  def __setstate__(self, d):
71179860b2SJed Brown    logger.Logger.__setstate__(self, d)
72179860b2SJed Brown    # We have to prevent recursive calls to this when the pickled database is loaded in load()
73179860b2SJed Brown    #   This is to ensure that fresh copies of the database are obtained after unpickling
74179860b2SJed Brown    if not SourceDB.isLoading:
75179860b2SJed Brown      SourceDB.isLoading = 1
76179860b2SJed Brown      self.load()
77179860b2SJed Brown      SourceDB.isLoading = 0
78179860b2SJed Brown    return
79179860b2SJed Brown
80179860b2SJed Brown  def getRelativePath(self, path):
81179860b2SJed Brown    '''Returns a relative source file path using the root'''
82179860b2SJed Brown    if os.path.isabs(path):
83179860b2SJed Brown      root = str(self.root)
84179860b2SJed Brown      if not path.startswith(root+os.sep):
85179860b2SJed Brown        raise ValueError('Absolute path '+path+' conflicts with root '+root)
86179860b2SJed Brown      else:
87179860b2SJed Brown        path = path[len(root)+1:]
88179860b2SJed Brown    return path
89179860b2SJed Brown
90179860b2SJed Brown  def checkValue(self, value):
91179860b2SJed Brown    '''Validate the value, raising ValueError for problems'''
92179860b2SJed Brown    if not isinstance(value, tuple):
93179860b2SJed Brown      raise ValueError('Source database values must be tuples, '+str(type(value))+' given')
94179860b2SJed Brown    if not len(value) == 4:
95179860b2SJed Brown      raise ValueError('Source database values must have 4 items, '+str(len(value))+' given')
96179860b2SJed Brown    (checksum, mtime, timestamp, dependencies) = value
97179860b2SJed Brown    if not isinstance(checksum, str):
98179860b2SJed Brown      raise ValueError('Invalid checksum for source database, '+str(type(checksum))+' given')
99179860b2SJed Brown    if not isinstance(mtime, int):
100179860b2SJed Brown      raise ValueError('Invalid modification time for source database, '+str(type(mtime))+' given')
101179860b2SJed Brown    elif mtime < 0:
102179860b2SJed Brown      raise ValueError('Negative modification time for source database, '+str(mtime))
103179860b2SJed Brown    if not isinstance(timestamp, float):
104179860b2SJed Brown      raise ValueError('Invalid timestamp for source database, '+str(type(timestamp))+' given')
105179860b2SJed Brown    elif timestamp < 0:
106179860b2SJed Brown      raise ValueError('Negative timestamp for source database, '+str(timestamp))
107179860b2SJed Brown    if not isinstance(dependencies, tuple):
108179860b2SJed Brown      raise ValueError('Invalid dependencies for source database, '+str(type(dependencies))+' given')
109179860b2SJed Brown    return value
110179860b2SJed Brown
111179860b2SJed Brown  def __getitem__(self, key):
112179860b2SJed Brown    '''Converts the key to a relative source file path using the root'''
113179860b2SJed Brown    return dict.__getitem__(self, self.getRelativePath(key))
114179860b2SJed Brown
115179860b2SJed Brown  def __setitem__(self, key, value):
116179860b2SJed Brown    '''Converts the key to a relative source file path using the root, and checks the validity of the value'''
117179860b2SJed Brown    self.isDirty = 1
118179860b2SJed Brown    return dict.__setitem__(self, self.getRelativePath(key), self.checkValue(value))
119179860b2SJed Brown
120179860b2SJed Brown  def __delitem__(self, key):
121179860b2SJed Brown    '''Converts the key to a relative source file path using the root'''
122179860b2SJed Brown    self.isDirty = 1
123179860b2SJed Brown    return dict.__delitem__(self, self.getRelativePath(key))
124179860b2SJed Brown
125179860b2SJed Brown  def __contains__(self, key):
126179860b2SJed Brown    '''Converts the key to a relative source file path using the root'''
127179860b2SJed Brown    return dict.__contains__(self, self.getRelativePath(key))
128179860b2SJed Brown
129179860b2SJed Brown  def has_key(self, key):
130179860b2SJed Brown    '''This method just calls self.__contains__(key)'''
131179860b2SJed Brown    return self.__contains__(key)
132179860b2SJed Brown
133179860b2SJed Brown  def items(self):
134179860b2SJed Brown    '''Converts each key to a relative source file path using the root'''
135179860b2SJed Brown    return [(self.getRelativePath(item[0]), item[1]) for item in dict.items(self)]
136179860b2SJed Brown
137179860b2SJed Brown  def keys(self):
138179860b2SJed Brown    '''Converts each key to a relative source file path using the root'''
139179860b2SJed Brown    return map(self.getRelativePath, dict.keys(self))
140179860b2SJed Brown
141179860b2SJed Brown  def update(self, d):
142179860b2SJed Brown    '''Update the dictionary with the contents of d'''
143179860b2SJed Brown    self.isDirty = 1
144179860b2SJed Brown    for k in d:
145179860b2SJed Brown      self[k] = d[k]
146179860b2SJed Brown    return
147179860b2SJed Brown
148179860b2SJed Brown  def getChecksum(source, chunkSize = 1024*1024):
149179860b2SJed Brown    '''Return the md5 checksum for a given file, which may also be specified by its filename
150179860b2SJed Brown       - The chunkSize argument specifies the size of blocks read from the file'''
151179860b2SJed Brown    if isinstance(source, file):
152179860b2SJed Brown      f = source
153179860b2SJed Brown    else:
154179860b2SJed Brown      f = file(source)
155179860b2SJed Brown    m = new_md5()
156179860b2SJed Brown    size = chunkSize
157179860b2SJed Brown    buf  = f.read(size)
158179860b2SJed Brown    while buf:
159179860b2SJed Brown      m.update(buf)
160179860b2SJed Brown      buf = f.read(size)
161179860b2SJed Brown    f.close()
162179860b2SJed Brown    return m.hexdigest()
163179860b2SJed Brown  getChecksum = staticmethod(getChecksum)
164179860b2SJed Brown
165179860b2SJed Brown  def getModificationTime(source):
166179860b2SJed Brown    t = os.path.getmtime(source)
167179860b2SJed Brown    if isinstance(t, float):
168179860b2SJed Brown      t = int(t)
169179860b2SJed Brown    return t
170179860b2SJed Brown  getModificationTime = staticmethod(getModificationTime)
171179860b2SJed Brown
172179860b2SJed Brown  def updateSource(self, source, noChecksum = 0):
173179860b2SJed Brown    self.isDirty = 1
174179860b2SJed Brown    dependencies = ()
175179860b2SJed Brown    try:
176179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
177179860b2SJed Brown    except KeyError:
178179860b2SJed Brown      pass
179179860b2SJed Brown    self.logPrint('Updating '+source+' in source database', 3, 'sourceDB')
180179860b2SJed Brown    if noChecksum:
181179860b2SJed Brown      checksum   = ''
182179860b2SJed Brown    else:
183179860b2SJed Brown      checksum   = SourceDB.getChecksum(source)
184179860b2SJed Brown    self[source] = (checksum, SourceDB.getModificationTime(source), time.time(), dependencies)
185179860b2SJed Brown    return
186179860b2SJed Brown
187179860b2SJed Brown  def clearSource(self, source):
188179860b2SJed Brown    '''This removes source information, but preserved dependencies'''
189179860b2SJed Brown    if source in self:
190179860b2SJed Brown      self.isDirty = 1
191179860b2SJed Brown      self.logPrint('Clearing '+source+' from source database', 3, 'sourceDB')
192179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
193179860b2SJed Brown      self[source] = ('', 0, time.time(), dependencies)
194179860b2SJed Brown    return
195179860b2SJed Brown
196179860b2SJed Brown  def getDependencies(self, source):
197179860b2SJed Brown    try:
198179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
199179860b2SJed Brown    except KeyError:
200179860b2SJed Brown      dependencies = ()
201179860b2SJed Brown    return dependencies
202179860b2SJed Brown
203179860b2SJed Brown  def addDependency(self, source, dependency):
204179860b2SJed Brown    self.isDirty = 1
205179860b2SJed Brown    dependencies = ()
206179860b2SJed Brown    try:
207179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
208179860b2SJed Brown    except KeyError:
209179860b2SJed Brown      checksum = ''
210179860b2SJed Brown      mtime    = 0
211179860b2SJed Brown    if not dependency in dependencies:
212179860b2SJed Brown      self.logPrint('Adding dependency '+dependency+' to source '+source+' in source database', 3, 'sourceDB')
213179860b2SJed Brown      dependencies = dependencies+(dependency,)
214179860b2SJed Brown    self[source] = (checksum, mtime, time.time(), dependencies)
215179860b2SJed Brown    return
216179860b2SJed Brown
217179860b2SJed Brown  def calculateDependencies(self):
218179860b2SJed Brown    self.logPrint('Recalculating dependencies', 1, 'sourceDB')
219179860b2SJed Brown    for source in self:
220179860b2SJed Brown      self.logPrint('Calculating '+source, 3, 'sourceDB')
221179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
222179860b2SJed Brown      newDep = []
223179860b2SJed Brown      try:
224179860b2SJed Brown        file = file(source)
225*5b6bfdb9SJed Brown      except IOError as e:
226179860b2SJed Brown        if e.errno == errno.ENOENT:
227179860b2SJed Brown          del self[source]
228179860b2SJed Brown        else:
229179860b2SJed Brown          raise e
230179860b2SJed Brown      comps  = source.split('/')
231*5b6bfdb9SJed Brown      for line in file:
232179860b2SJed Brown        m = self.includeRE.match(line)
233179860b2SJed Brown        if m:
234179860b2SJed Brown          filename  = m.group('includeFile')
235179860b2SJed Brown          matchNum  = 0
236179860b2SJed Brown          matchName = filename
237179860b2SJed Brown          self.logPrint('  Includes '+filename, 3, 'sourceDB')
238179860b2SJed Brown          for s in self:
239179860b2SJed Brown            if s.find(filename) >= 0:
240179860b2SJed Brown              self.logPrint('    Checking '+s, 3, 'sourceDB')
241179860b2SJed Brown              c = s.split('/')
242179860b2SJed Brown              for i in range(len(c)):
243179860b2SJed Brown                if not comps[i] == c[i]: break
244179860b2SJed Brown              if i > matchNum:
245179860b2SJed Brown                self.logPrint('    Choosing '+s+'('+str(i)+')', 3, 'sourceDB')
246179860b2SJed Brown                matchName = s
247179860b2SJed Brown                matchNum  = i
248179860b2SJed Brown          newDep.append(matchName)
249179860b2SJed Brown      # Grep for #include, then put these files in a tuple, we can be recursive later in a fixpoint algorithm
250179860b2SJed Brown      self[source] = (checksum, mtime, timestamp, tuple(newDep))
251179860b2SJed Brown      file.close()
252179860b2SJed Brown
253179860b2SJed Brown  def load(self):
254179860b2SJed Brown    '''Load the source database from the saved filename'''
255179860b2SJed Brown    filename = str(self.filename)
256179860b2SJed Brown    if os.path.exists(filename):
257179860b2SJed Brown      self.clear()
258179860b2SJed Brown      self.logPrint('Loading source database from '+filename, 2, 'sourceDB')
259179860b2SJed Brown      dbFile = file(filename)
260179860b2SJed Brown      newDB  = cPickle.load(dbFile)
261179860b2SJed Brown      dbFile.close()
262179860b2SJed Brown      self.update(newDB)
263179860b2SJed Brown    else:
264179860b2SJed Brown      self.logPrint('Could not load source database from '+filename, 1, 'sourceDB')
265179860b2SJed Brown    return
266179860b2SJed Brown
267179860b2SJed Brown  def save(self, force = 0):
268179860b2SJed Brown    '''Save the source database to a file. The saved database with have path names relative to the root.'''
269179860b2SJed Brown    if not self.isDirty and not force:
270179860b2SJed Brown      self.logPrint('No need to save source database in '+str(self.filename), 2, 'sourceDB')
271179860b2SJed Brown      return
272179860b2SJed Brown    filename = str(self.filename)
273179860b2SJed Brown    if os.path.exists(os.path.dirname(filename)):
274179860b2SJed Brown      self.logPrint('Saving source database in '+filename, 2, 'sourceDB')
275179860b2SJed Brown      dbFile = file(filename, 'w')
276179860b2SJed Brown      cPickle.dump(self, dbFile)
277179860b2SJed Brown      dbFile.close()
278179860b2SJed Brown      self.isDirty = 0
279179860b2SJed Brown    else:
280179860b2SJed Brown      self.logPrint('Could not save source database in '+filename, 1, 'sourceDB')
281179860b2SJed Brown    return
282179860b2SJed Brown
283179860b2SJed Brownclass DependencyAnalyzer (logger.Logger):
284179860b2SJed Brown  def __init__(self, sourceDB):
285179860b2SJed Brown    logger.Logger.__init__(self)
286179860b2SJed Brown    self.sourceDB  = sourceDB
287179860b2SJed Brown    self.includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1')
288179860b2SJed Brown    return
289179860b2SJed Brown
290179860b2SJed Brown  def resolveDependency(self, source, dep):
291179860b2SJed Brown    if dep in self.sourceDB: return dep
292179860b2SJed Brown    # Choose the entry in sourceDB whose base matches dep,
293179860b2SJed Brown    #   and who has the most path components in common with source
294179860b2SJed Brown    # This should be replaced by an appeal to cpp
295179860b2SJed Brown    matchNum   = 0
296179860b2SJed Brown    matchName  = dep
297179860b2SJed Brown    components = source.split(os.sep)
298179860b2SJed Brown    self.logPrint('  Includes '+filename, 3, 'sourceDB')
299179860b2SJed Brown    for s in self.sourceDB:
300179860b2SJed Brown      if s.find(dep) >= 0:
301179860b2SJed Brown        self.logPrint('    Checking '+s, 3, 'sourceDB')
302179860b2SJed Brown        comp = s.split(os.sep)
303179860b2SJed Brown        for i in range(len(comp)):
304179860b2SJed Brown          if not components[i] == comp[i]: break
305179860b2SJed Brown        if i > matchNum:
306179860b2SJed Brown          self.logPrint('    Choosing '+s+'('+str(i)+')', 3, 'sourceDB')
307179860b2SJed Brown          matchName = s
308179860b2SJed Brown          matchNum  = i
309179860b2SJed Brown    if not matchName in self.sourceDB: raise RuntimeError('Invalid #include '+matchName+' in '+source)
310179860b2SJed Brown    return matchName
311179860b2SJed Brown
312179860b2SJed Brown  def getNeighbors(self, source):
313179860b2SJed Brown    file = file(source)
314179860b2SJed Brown    adj  = []
315*5b6bfdb9SJed Brown    for line in file:
316179860b2SJed Brown      match = self.includeRE.match(line)
317179860b2SJed Brown      if match:
318179860b2SJed Brown        adj.append(self.resolveDependency(source, m.group('includeFile')))
319179860b2SJed Brown    file.close()
320179860b2SJed Brown    return adj
321179860b2SJed Brown
322179860b2SJed Brown  def calculateDependencies(self):
323179860b2SJed Brown    '''Should this be a generator?
324179860b2SJed Brown    First assemble the DAG using #include relations
325179860b2SJed Brown    Then calculate the depdencies with all pairs shortest-path
326179860b2SJed Brown      - I think Floyd-Warshell and N-source Dijkstra are just as good
327179860b2SJed Brown    '''
328179860b2SJed Brown    # Assembling DAG
329179860b2SJed Brown    dag = {}
330179860b2SJed Brown    for source in self.sourceDB:
331179860b2SJed Brown      try:
332179860b2SJed Brown        dag[source] = self.getNeighbors(self, source)
333*5b6bfdb9SJed Brown      except IOError as e:
334179860b2SJed Brown        if e.errno == errno.ENOENT:
335179860b2SJed Brown          del self[source]
336179860b2SJed Brown        else:
337179860b2SJed Brown          raise e
338179860b2SJed Brown    # Finding all-pairs shortest path
339179860b2SJed Brown
340179860b2SJed Brownif __name__ == '__main__':
341179860b2SJed Brown  import sys
342179860b2SJed Brown  try:
343179860b2SJed Brown    if len(sys.argv) < 3:
344*5b6bfdb9SJed Brown      print('sourceDatabase.py <database filename> [insert | remove] <filename>')
345179860b2SJed Brown    else:
346179860b2SJed Brown      if os.path.exists(sys.argv[1]):
347179860b2SJed Brown        dbFile   = file(sys.argv[1])
348179860b2SJed Brown        sourceDB = cPickle.load(dbFile)
349179860b2SJed Brown        dbFile.close()
350179860b2SJed Brown      else:
351179860b2SJed Brown        sys.exit('Could not load source database from '+sys.argv[1])
352179860b2SJed Brown      if sys.argv[2] == 'insert':
353179860b2SJed Brown        if sys.argv[3] in sourceDB:
354179860b2SJed Brown          self.logPrint('Updating '+sys.argv[3], 3, 'sourceDB')
355179860b2SJed Brown        else:
356179860b2SJed Brown          self.logPrint('Inserting '+sys.argv[3], 3, 'sourceDB')
357179860b2SJed Brown        self.sourceDB.updateSource(sys.argv[3])
358179860b2SJed Brown      elif sys.argv[2] == 'remove':
359179860b2SJed Brown        if sys.argv[3] in sourceDB:
360179860b2SJed Brown          sourceDB.logPrint('Removing '+sys.argv[3], 3, 'sourceDB')
361179860b2SJed Brown          del self.sourceDB[sys.argv[3]]
362179860b2SJed Brown        else:
363179860b2SJed Brown          sourceDB.logPrint('Matching regular expression '+sys.argv[3]+' over source database', 1, 'sourceDB')
364179860b2SJed Brown          removeRE = re.compile(sys.argv[3])
365179860b2SJed Brown          removes  = filter(removeRE.match, sourceDB.keys())
366179860b2SJed Brown          for source in removes:
367179860b2SJed Brown            self.logPrint('Removing '+source, 3, 'sourceDB')
368179860b2SJed Brown            del self.sourceDB[source]
369179860b2SJed Brown      else:
370179860b2SJed Brown        sys.exit('Unknown source database action: '+sys.argv[2])
371179860b2SJed Brown      sourceDB.save()
372*5b6bfdb9SJed Brown  except Exception as e:
373179860b2SJed Brown    import traceback
374*5b6bfdb9SJed Brown    print(traceback.print_tb(sys.exc_info()[2]))
375179860b2SJed Brown    sys.exit(str(e))
376179860b2SJed Brown  sys.exit(0)
377