xref: /petsc/config/BuildSystem/sourceDatabase.py (revision 3e1910f1ab6113d8365e15c6b8c907ccce7ce4ea)
1#!/usr/bin/env python
2'''A source code database
3
4    SourceDB is a database of file information used to determine whether files
5    should be rebuilt by the build system. All files names are stored relative
6    to a given root, which is intended as the root of a Project.
7
8    Relative or absolute pathnames may be used as keys, but absolute pathnames
9    must fall under the database root. The value format is a tuple of the following:
10
11      Checksum:     The md5 checksum of the file
12      Mod Time:     The time the file was last modified
13      Timestamp:    The time theentry was last modified
14      Dependencies: A tuple of files upon which this entry depends
15
16    This script also provides some default actions:
17
18      - insert <database file> <filename>
19        Inserts this file from the database, or updates its entry if it
20        already exists.
21
22      - remove <database file> <filename>
23        Removes this file from the database. The filename may also be a
24        regular expression.
25
26'''
27import logger
28
29import errno
30import os
31import re
32import time
33
34import cPickle
35
36try:
37  from hashlib import md5 as new_md5
38except ImportError:
39  from md5 import new as new_md5
40
41
42class SourceDB (dict, logger.Logger):
43  '''A SourceDB is a dictionary of file data used during the build process.'''
44  includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1')
45  isLoading = 0
46
47  def __init__(self, root, filename = None):
48    dict.__init__(self)
49    logger.Logger.__init__(self)
50    self.root       = root
51    self.filename   = filename
52    if self.filename is None:
53      self.filename = os.path.join(str(root), 'bsSource.db')
54    self.isDirty    = 0
55    return
56
57  def __str__(self):
58    output = ''
59    for source in self:
60      (checksum, mtime, timestamp, dependencies) = self[source]
61      output += source+'\n'
62      output += '  Checksum:  '+str(checksum)+'\n'
63      output += '  Mod Time:  '+str(mtime)+'\n'
64      output += '  Timestamp: '+str(timestamp)+'\n'
65      output += '  Deps:      '+str(dependencies)+'\n'
66    return output
67
68  def __setstate__(self, d):
69    logger.Logger.__setstate__(self, d)
70    # We have to prevent recursive calls to this when the pickled database is loaded in load()
71    #   This is to ensure that fresh copies of the database are obtained after unpickling
72    if not SourceDB.isLoading:
73      SourceDB.isLoading = 1
74      self.load()
75      SourceDB.isLoading = 0
76    return
77
78  def getRelativePath(self, path):
79    '''Returns a relative source file path using the root'''
80    if os.path.isabs(path):
81      root = str(self.root)
82      if not path.startswith(root+os.sep):
83        raise ValueError('Absolute path '+path+' conflicts with root '+root)
84      else:
85        path = path[len(root)+1:]
86    return path
87
88  def checkValue(self, value):
89    '''Validate the value, raising ValueError for problems'''
90    if not isinstance(value, tuple):
91      raise ValueError('Source database values must be tuples, '+str(type(value))+' given')
92    if not len(value) == 4:
93      raise ValueError('Source database values must have 4 items, '+str(len(value))+' given')
94    (checksum, mtime, timestamp, dependencies) = value
95    if not isinstance(checksum, str):
96      raise ValueError('Invalid checksum for source database, '+str(type(checksum))+' given')
97    if not isinstance(mtime, int):
98      raise ValueError('Invalid modification time for source database, '+str(type(mtime))+' given')
99    elif mtime < 0:
100      raise ValueError('Negative modification time for source database, '+str(mtime))
101    if not isinstance(timestamp, float):
102      raise ValueError('Invalid timestamp for source database, '+str(type(timestamp))+' given')
103    elif timestamp < 0:
104      raise ValueError('Negative timestamp for source database, '+str(timestamp))
105    if not isinstance(dependencies, tuple):
106      raise ValueError('Invalid dependencies for source database, '+str(type(dependencies))+' given')
107    return value
108
109  def __getitem__(self, key):
110    '''Converts the key to a relative source file path using the root'''
111    return dict.__getitem__(self, self.getRelativePath(key))
112
113  def __setitem__(self, key, value):
114    '''Converts the key to a relative source file path using the root, and checks the validity of the value'''
115    self.isDirty = 1
116    return dict.__setitem__(self, self.getRelativePath(key), self.checkValue(value))
117
118  def __delitem__(self, key):
119    '''Converts the key to a relative source file path using the root'''
120    self.isDirty = 1
121    return dict.__delitem__(self, self.getRelativePath(key))
122
123  def __contains__(self, key):
124    '''Converts the key to a relative source file path using the root'''
125    return dict.__contains__(self, self.getRelativePath(key))
126
127  def has_key(self, key):
128    '''This method just calls self.__contains__(key)'''
129    return self.__contains__(key)
130
131  def items(self):
132    '''Converts each key to a relative source file path using the root'''
133    return [(self.getRelativePath(item[0]), item[1]) for item in dict.items(self)]
134
135  def keys(self):
136    '''Converts each key to a relative source file path using the root'''
137    return map(self.getRelativePath, dict.keys(self))
138
139  def update(self, d):
140    '''Update the dictionary with the contents of d'''
141    self.isDirty = 1
142    for k in d:
143      self[k] = d[k]
144    return
145
146  def getChecksum(source, chunkSize = 1024*1024):
147    '''Return the md5 checksum for a given file, which may also be specified by its filename
148       - The chunkSize argument specifies the size of blocks read from the file'''
149    if isinstance(source, file):
150      f = source
151    else:
152      f = file(source)
153    m = new_md5()
154    size = chunkSize
155    buf  = f.read(size)
156    while buf:
157      m.update(buf)
158      buf = f.read(size)
159    f.close()
160    return m.hexdigest()
161  getChecksum = staticmethod(getChecksum)
162
163  def getModificationTime(source):
164    t = os.path.getmtime(source)
165    if isinstance(t, float):
166      t = int(t)
167    return t
168  getModificationTime = staticmethod(getModificationTime)
169
170  def updateSource(self, source, noChecksum = 0):
171    self.isDirty = 1
172    dependencies = ()
173    try:
174      (checksum, mtime, timestamp, dependencies) = self[source]
175    except KeyError:
176      pass
177    self.logPrint('Updating '+source+' in source database', 3, 'sourceDB')
178    if noChecksum:
179      checksum   = ''
180    else:
181      checksum   = SourceDB.getChecksum(source)
182    self[source] = (checksum, SourceDB.getModificationTime(source), time.time(), dependencies)
183    return
184
185  def clearSource(self, source):
186    '''This removes source information, but preserved dependencies'''
187    if source in self:
188      self.isDirty = 1
189      self.logPrint('Clearing '+source+' from source database', 3, 'sourceDB')
190      (checksum, mtime, timestamp, dependencies) = self[source]
191      self[source] = ('', 0, time.time(), dependencies)
192    return
193
194  def getDependencies(self, source):
195    try:
196      (checksum, mtime, timestamp, dependencies) = self[source]
197    except KeyError:
198      dependencies = ()
199    return dependencies
200
201  def addDependency(self, source, dependency):
202    self.isDirty = 1
203    dependencies = ()
204    try:
205      (checksum, mtime, timestamp, dependencies) = self[source]
206    except KeyError:
207      checksum = ''
208      mtime    = 0
209    if not dependency in dependencies:
210      self.logPrint('Adding dependency '+dependency+' to source '+source+' in source database', 3, 'sourceDB')
211      dependencies = dependencies+(dependency,)
212    self[source] = (checksum, mtime, time.time(), dependencies)
213    return
214
215  def calculateDependencies(self):
216    self.logPrint('Recalculating dependencies', 1, 'sourceDB')
217    for source in self:
218      self.logPrint('Calculating '+source, 3, 'sourceDB')
219      (checksum, mtime, timestamp, dependencies) = self[source]
220      newDep = []
221      try:
222        file = file(source)
223      except IOError, e:
224        if e.errno == errno.ENOENT:
225          del self[source]
226        else:
227          raise e
228      comps  = source.split('/')
229      for line in file.xreadlines():
230        m = self.includeRE.match(line)
231        if m:
232          filename  = m.group('includeFile')
233          matchNum  = 0
234          matchName = filename
235          self.logPrint('  Includes '+filename, 3, 'sourceDB')
236          for s in self:
237            if s.find(filename) >= 0:
238              self.logPrint('    Checking '+s, 3, 'sourceDB')
239              c = s.split('/')
240              for i in range(len(c)):
241                if not comps[i] == c[i]: break
242              if i > matchNum:
243                self.logPrint('    Choosing '+s+'('+str(i)+')', 3, 'sourceDB')
244                matchName = s
245                matchNum  = i
246          newDep.append(matchName)
247      # Grep for #include, then put these files in a tuple, we can be recursive later in a fixpoint algorithm
248      self[source] = (checksum, mtime, timestamp, tuple(newDep))
249      file.close()
250
251  def load(self):
252    '''Load the source database from the saved filename'''
253    filename = str(self.filename)
254    if os.path.exists(filename):
255      self.clear()
256      self.logPrint('Loading source database from '+filename, 2, 'sourceDB')
257      dbFile = file(filename)
258      newDB  = cPickle.load(dbFile)
259      dbFile.close()
260      self.update(newDB)
261    else:
262      self.logPrint('Could not load source database from '+filename, 1, 'sourceDB')
263    return
264
265  def save(self, force = 0):
266    '''Save the source database to a file. The saved database with have path names relative to the root.'''
267    if not self.isDirty and not force:
268      self.logPrint('No need to save source database in '+str(self.filename), 2, 'sourceDB')
269      return
270    filename = str(self.filename)
271    if os.path.exists(os.path.dirname(filename)):
272      self.logPrint('Saving source database in '+filename, 2, 'sourceDB')
273      dbFile = file(filename, 'w')
274      cPickle.dump(self, dbFile)
275      dbFile.close()
276      self.isDirty = 0
277    else:
278      self.logPrint('Could not save source database in '+filename, 1, 'sourceDB')
279    return
280
281class DependencyAnalyzer (logger.Logger):
282  def __init__(self, sourceDB):
283    logger.Logger.__init__(self)
284    self.sourceDB  = sourceDB
285    self.includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1')
286    return
287
288  def resolveDependency(self, source, dep):
289    if dep in self.sourceDB: return dep
290    # Choose the entry in sourceDB whose base matches dep,
291    #   and who has the most path components in common with source
292    # This should be replaced by an appeal to cpp
293    matchNum   = 0
294    matchName  = dep
295    components = source.split(os.sep)
296    self.logPrint('  Includes '+filename, 3, 'sourceDB')
297    for s in self.sourceDB:
298      if s.find(dep) >= 0:
299        self.logPrint('    Checking '+s, 3, 'sourceDB')
300        comp = s.split(os.sep)
301        for i in range(len(comp)):
302          if not components[i] == comp[i]: break
303        if i > matchNum:
304          self.logPrint('    Choosing '+s+'('+str(i)+')', 3, 'sourceDB')
305          matchName = s
306          matchNum  = i
307    if not matchName in self.sourceDB: raise RuntimeError('Invalid #include '+matchName+' in '+source)
308    return matchName
309
310  def getNeighbors(self, source):
311    file = file(source)
312    adj  = []
313    for line in file.xreadlines():
314      match = self.includeRE.match(line)
315      if match:
316        adj.append(self.resolveDependency(source, m.group('includeFile')))
317    file.close()
318    return adj
319
320  def calculateDependencies(self):
321    '''Should this be a generator?
322    First assemble the DAG using #include relations
323    Then calculate the depdencies with all pairs shortest-path
324      - I think Floyd-Warshell and N-source Dijkstra are just as good
325    '''
326    # Assembling DAG
327    dag = {}
328    for source in self.sourceDB:
329      try:
330        dag[source] = self.getNeighbors(self, source)
331      except IOError, e:
332        if e.errno == errno.ENOENT:
333          del self[source]
334        else:
335          raise e
336    # Finding all-pairs shortest path
337
338if __name__ == '__main__':
339  import sys
340  try:
341    if len(sys.argv) < 3:
342      print 'sourceDatabase.py <database filename> [insert | remove] <filename>'
343    else:
344      if os.path.exists(sys.argv[1]):
345        dbFile   = file(sys.argv[1])
346        sourceDB = cPickle.load(dbFile)
347        dbFile.close()
348      else:
349        sys.exit('Could not load source database from '+sys.argv[1])
350      if sys.argv[2] == 'insert':
351        if sys.argv[3] in sourceDB:
352          self.logPrint('Updating '+sys.argv[3], 3, 'sourceDB')
353        else:
354          self.logPrint('Inserting '+sys.argv[3], 3, 'sourceDB')
355        self.sourceDB.updateSource(sys.argv[3])
356      elif sys.argv[2] == 'remove':
357        if sys.argv[3] in sourceDB:
358          sourceDB.logPrint('Removing '+sys.argv[3], 3, 'sourceDB')
359          del self.sourceDB[sys.argv[3]]
360        else:
361          sourceDB.logPrint('Matching regular expression '+sys.argv[3]+' over source database', 1, 'sourceDB')
362          removeRE = re.compile(sys.argv[3])
363          removes  = filter(removeRE.match, sourceDB.keys())
364          for source in removes:
365            self.logPrint('Removing '+source, 3, 'sourceDB')
366            del self.sourceDB[source]
367      else:
368        sys.exit('Unknown source database action: '+sys.argv[2])
369      sourceDB.save()
370  except Exception, e:
371    import traceback
372    print traceback.print_tb(sys.exc_info()[2])
373    sys.exit(str(e))
374  sys.exit(0)
375