xref: /petsc/config/BuildSystem/sourceDatabase.py (revision faa0657440740b3f1b365a5983704e3cdad1e0a4)
1'''A source code database
2
3    SourceDB is a database of file information used to determine whether files
4    should be rebuilt by the build system. All files names are stored relative
5    to a given root, which is intended as the root of a Project.
6
7    Relative or absolute pathnames may be used as keys, but absolute pathnames
8    must fall under the database root. The value format is a tuple of the following:
9
10      Checksum:     The md5 checksum of the file
11      Mod Time:     The time the file was last modified
12      Timestamp:    The time theentry was last modified
13      Dependencies: A tuple of files upon which this entry depends
14
15    This script also provides some default actions:
16
17      - insert <database file> <filename>
18        Inserts this file from the database, or updates its entry if it
19        already exists.
20
21      - remove <database file> <filename>
22        Removes this file from the database. The filename may also be a
23        regular expression.
24
25'''
26from __future__ import print_function
27from __future__ import absolute_import
28import logger
29
30import errno
31import os
32import re
33import time
34
35import pickle
36
37try:
38  from hashlib import md5 as new_md5
39except ImportError:
40  from md5 import new as new_md5 # novermin
41
42
43class SourceDB (dict, logger.Logger):
44  '''A SourceDB is a dictionary of file data used during the build process.'''
45  includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1')
46  isLoading = 0
47
48  def __init__(self, root, filename = None):
49    dict.__init__(self)
50    logger.Logger.__init__(self)
51    self.root       = root
52    self.filename   = filename
53    if self.filename is None:
54      self.filename = os.path.join(str(root), 'bsSource.db')
55    self.isDirty    = 0
56    return
57
58  def __str__(self):
59    output = ''
60    for source in self:
61      (checksum, mtime, timestamp, dependencies) = self[source]
62      output += source+'\n'
63      output += '  Checksum:  '+str(checksum)+'\n'
64      output += '  Mod Time:  '+str(mtime)+'\n'
65      output += '  Timestamp: '+str(timestamp)+'\n'
66      output += '  Deps:      '+str(dependencies)+'\n'
67    return output
68
69  def __setstate__(self, d):
70    logger.Logger.__setstate__(self, d)
71    # We have to prevent recursive calls to this when the pickled database is loaded in load()
72    #   This is to ensure that fresh copies of the database are obtained after unpickling
73    if not SourceDB.isLoading:
74      SourceDB.isLoading = 1
75      self.load()
76      SourceDB.isLoading = 0
77    return
78
79  def getRelativePath(self, path):
80    '''Returns a relative source file path using the root'''
81    if os.path.isabs(path):
82      root = str(self.root)
83      if not path.startswith(root+os.sep):
84        raise ValueError('Absolute path '+path+' conflicts with root '+root)
85      else:
86        path = path[len(root)+1:]
87    return path
88
89  def checkValue(self, value):
90    '''Validate the value, raising ValueError for problems'''
91    if not isinstance(value, tuple):
92      raise ValueError('Source database values must be tuples, '+str(type(value))+' given')
93    if not len(value) == 4:
94      raise ValueError('Source database values must have 4 items, '+str(len(value))+' given')
95    (checksum, mtime, timestamp, dependencies) = value
96    if not isinstance(checksum, str):
97      raise ValueError('Invalid checksum for source database, '+str(type(checksum))+' given')
98    if not isinstance(mtime, int):
99      raise ValueError('Invalid modification time for source database, '+str(type(mtime))+' given')
100    elif mtime < 0:
101      raise ValueError('Negative modification time for source database, '+str(mtime))
102    if not isinstance(timestamp, float):
103      raise ValueError('Invalid timestamp for source database, '+str(type(timestamp))+' given')
104    elif timestamp < 0:
105      raise ValueError('Negative timestamp for source database, '+str(timestamp))
106    if not isinstance(dependencies, tuple):
107      raise ValueError('Invalid dependencies for source database, '+str(type(dependencies))+' given')
108    return value
109
110  def __getitem__(self, key):
111    '''Converts the key to a relative source file path using the root'''
112    return dict.__getitem__(self, self.getRelativePath(key))
113
114  def __setitem__(self, key, value):
115    '''Converts the key to a relative source file path using the root, and checks the validity of the value'''
116    self.isDirty = 1
117    return dict.__setitem__(self, self.getRelativePath(key), self.checkValue(value))
118
119  def __delitem__(self, key):
120    '''Converts the key to a relative source file path using the root'''
121    self.isDirty = 1
122    return dict.__delitem__(self, self.getRelativePath(key))
123
124  def __contains__(self, key):
125    '''Converts the key to a relative source file path using the root'''
126    return dict.__contains__(self, self.getRelativePath(key))
127
128  def has_key(self, key):
129    '''This method just calls self.__contains__(key)'''
130    return self.__contains__(key)
131
132  def items(self):
133    '''Converts each key to a relative source file path using the root'''
134    return [(self.getRelativePath(item[0]), item[1]) for item in dict.items(self)]
135
136  def keys(self):
137    '''Converts each key to a relative source file path using the root'''
138    return map(self.getRelativePath, dict.keys(self))
139
140  def update(self, d):
141    '''Update the dictionary with the contents of d'''
142    self.isDirty = 1
143    for k in d:
144      self[k] = d[k]
145    return
146
147  def getChecksum(source, chunkSize = 1024*1024):
148    '''Return the md5 checksum for a given file, which may also be specified by its filename
149       - The chunkSize argument specifies the size of blocks read from the file'''
150    if hasattr(source, 'close'):
151      f = source
152    else:
153      f = open(source)
154    m = new_md5()
155    size = chunkSize
156    buf  = f.read(size)
157    while buf:
158      m.update(buf)
159      buf = f.read(size)
160    f.close()
161    return m.hexdigest()
162  getChecksum = staticmethod(getChecksum)
163
164  def getModificationTime(source):
165    t = os.path.getmtime(source)
166    if isinstance(t, float):
167      t = int(t)
168    return t
169  getModificationTime = staticmethod(getModificationTime)
170
171  def updateSource(self, source, noChecksum = 0):
172    self.isDirty = 1
173    dependencies = ()
174    try:
175      (checksum, mtime, timestamp, dependencies) = self[source]
176    except KeyError:
177      pass
178    self.logPrint('Updating '+source+' in source database', 3, 'sourceDB')
179    if noChecksum:
180      checksum   = ''
181    else:
182      checksum   = SourceDB.getChecksum(source)
183    self[source] = (checksum, SourceDB.getModificationTime(source), time.time(), dependencies)
184    return
185
186  def clearSource(self, source):
187    '''This removes source information, but preserved dependencies'''
188    if source in self:
189      self.isDirty = 1
190      self.logPrint('Clearing '+source+' from source database', 3, 'sourceDB')
191      (checksum, mtime, timestamp, dependencies) = self[source]
192      self[source] = ('', 0, time.time(), dependencies)
193    return
194
195  def getDependencies(self, source):
196    try:
197      (checksum, mtime, timestamp, dependencies) = self[source]
198    except KeyError:
199      dependencies = ()
200    return dependencies
201
202  def addDependency(self, source, dependency):
203    self.isDirty = 1
204    dependencies = ()
205    try:
206      (checksum, mtime, timestamp, dependencies) = self[source]
207    except KeyError:
208      checksum = ''
209      mtime    = 0
210    if not dependency in dependencies:
211      self.logPrint('Adding dependency '+dependency+' to source '+source+' in source database', 3, 'sourceDB')
212      dependencies = dependencies+(dependency,)
213    self[source] = (checksum, mtime, time.time(), dependencies)
214    return
215
216  def calculateDependencies(self):
217    self.logPrint('Recalculating dependencies', 1, 'sourceDB')
218    for source in self:
219      self.logPrint('Calculating '+source, 3, 'sourceDB')
220      (checksum, mtime, timestamp, dependencies) = self[source]
221      newDep = []
222      try:
223        file = open(source)
224      except IOError as e:
225        if e.errno == errno.ENOENT:
226          del self[source]
227        else:
228          raise e
229      comps  = source.split('/')
230      for line in file:
231        m = self.includeRE.match(line)
232        if m:
233          filename  = m.group('includeFile')
234          matchNum  = 0
235          matchName = filename
236          self.logPrint('  Includes '+filename, 3, 'sourceDB')
237          for s in self:
238            if s.find(filename) >= 0:
239              self.logPrint('    Checking '+s, 3, 'sourceDB')
240              c = s.split('/')
241              for i in range(len(c)):
242                if not comps[i] == c[i]: break
243              if i > matchNum:
244                self.logPrint('    Choosing '+s+'('+str(i)+')', 3, 'sourceDB')
245                matchName = s
246                matchNum  = i
247          newDep.append(matchName)
248      # Grep for #include, then put these files in a tuple, we can be recursive later in a fixpoint algorithm
249      self[source] = (checksum, mtime, timestamp, tuple(newDep))
250      file.close()
251
252  def load(self):
253    '''Load the source database from the saved filename'''
254    filename = str(self.filename)
255    if os.path.exists(filename):
256      self.clear()
257      self.logPrint('Loading source database from '+filename, 2, 'sourceDB')
258      dbFile = open(filename)
259      newDB  = pickle.load(dbFile)
260      dbFile.close()
261      self.update(newDB)
262    else:
263      self.logPrint('Could not load source database from '+filename, 1, 'sourceDB')
264    return
265
266  def save(self, force = 0):
267    '''Save the source database to a file. The saved database with have path names relative to the root.'''
268    if not self.isDirty and not force:
269      self.logPrint('No need to save source database in '+str(self.filename), 2, 'sourceDB')
270      return
271    filename = str(self.filename)
272    if os.path.exists(os.path.dirname(filename)):
273      self.logPrint('Saving source database in '+filename, 2, 'sourceDB')
274      dbFile = open(filename, 'w')
275      pickle.dump(self, dbFile)
276      dbFile.close()
277      self.isDirty = 0
278    else:
279      self.logPrint('Could not save source database in '+filename, 1, 'sourceDB')
280    return
281
282class DependencyAnalyzer (logger.Logger):
283  def __init__(self, sourceDB):
284    logger.Logger.__init__(self)
285    self.sourceDB  = sourceDB
286    self.includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1')
287    return
288
289  def resolveDependency(self, source, dep):
290    if dep in self.sourceDB: return dep
291    # Choose the entry in sourceDB whose base matches dep,
292    #   and who has the most path components in common with source
293    # This should be replaced by an appeal to cpp
294    matchNum   = 0
295    matchName  = dep
296    components = source.split(os.sep)
297    self.logPrint('  Includes '+filename, 3, 'sourceDB')
298    for s in self.sourceDB:
299      if s.find(dep) >= 0:
300        self.logPrint('    Checking '+s, 3, 'sourceDB')
301        comp = s.split(os.sep)
302        for i in range(len(comp)):
303          if not components[i] == comp[i]: break
304        if i > matchNum:
305          self.logPrint('    Choosing '+s+'('+str(i)+')', 3, 'sourceDB')
306          matchName = s
307          matchNum  = i
308    if not matchName in self.sourceDB: raise RuntimeError('Invalid #include '+matchName+' in '+source)
309    return matchName
310
311  def getNeighbors(self, source):
312    file = open(source)
313    adj  = []
314    for line in file:
315      match = self.includeRE.match(line)
316      if match:
317        adj.append(self.resolveDependency(source, m.group('includeFile')))
318    file.close()
319    return adj
320
321  def calculateDependencies(self):
322    '''Should this be a generator?
323    First assemble the DAG using #include relations
324    Then calculate the depdencies with all pairs shortest-path
325      - I think Floyd-Warshell and N-source Dijkstra are just as good
326    '''
327    # Assembling DAG
328    dag = {}
329    for source in self.sourceDB:
330      try:
331        dag[source] = self.getNeighbors(self, source)
332      except IOError as e:
333        if e.errno == errno.ENOENT:
334          del self[source]
335        else:
336          raise e
337    # Finding all-pairs shortest path
338
339if __name__ == '__main__':
340  import sys
341  try:
342    if len(sys.argv) < 3:
343      print('sourceDatabase.py <database filename> [insert | remove] <filename>')
344    else:
345      if os.path.exists(sys.argv[1]):
346        dbFile   = open(sys.argv[1])
347        sourceDB = pickle.load(dbFile)
348        dbFile.close()
349      else:
350        sys.exit('Could not load source database from '+sys.argv[1])
351      if sys.argv[2] == 'insert':
352        if sys.argv[3] in sourceDB:
353          self.logPrint('Updating '+sys.argv[3], 3, 'sourceDB')
354        else:
355          self.logPrint('Inserting '+sys.argv[3], 3, 'sourceDB')
356        self.sourceDB.updateSource(sys.argv[3])
357      elif sys.argv[2] == 'remove':
358        if sys.argv[3] in sourceDB:
359          sourceDB.logPrint('Removing '+sys.argv[3], 3, 'sourceDB')
360          del self.sourceDB[sys.argv[3]]
361        else:
362          sourceDB.logPrint('Matching regular expression '+sys.argv[3]+' over source database', 1, 'sourceDB')
363          removeRE = re.compile(sys.argv[3])
364          removes  = list(filter(removeRE.match, sourceDB.keys()))
365          for source in removes:
366            self.logPrint('Removing '+source, 3, 'sourceDB')
367            del self.sourceDB[source]
368      else:
369        sys.exit('Unknown source database action: '+sys.argv[2])
370      sourceDB.save()
371  except Exception as e:
372    import traceback
373    print(traceback.print_tb(sys.exc_info()[2]))
374    sys.exit(str(e))
375  sys.exit(0)
376