xref: /petsc/config/BuildSystem/sourceDatabase.py (revision a69119a591a03a9d906b29c0a4e9802e4d7c9795)
1'''A source code database
2
3    SourceDB is a database of file information used to determine whether files
4    should be rebuilt by the build system. All files names are stored relative
5    to a given root, which is intended as the root of a Project.
6
7    Relative or absolute pathnames may be used as keys, but absolute pathnames
8    must fall under the database root. The value format is a tuple of the following:
9
10      Checksum:     The md5 checksum of the file
11      Mod Time:     The time the file was last modified
12      Timestamp:    The time theentry was last modified
13      Dependencies: A tuple of files upon which this entry depends
14
15    This script also provides some default actions:
16
17      - insert <database file> <filename>
18        Inserts this file from the database, or updates its entry if it
19        already exists.
20
21      - remove <database file> <filename>
22        Removes this file from the database. The filename may also be a
23        regular expression.
24
25'''
26from __future__ import print_function
27from __future__ import absolute_import
28import logger
29
30import errno
31import os
32import re
33import time
34
35import pickle
36from hashlib import md5 as new_md5
37
38class SourceDB (dict, logger.Logger):
39  '''A SourceDB is a dictionary of file data used during the build process.'''
40  includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1')
41  isLoading = 0
42
43  def __init__(self, root, filename = None):
44    dict.__init__(self)
45    logger.Logger.__init__(self)
46    self.root       = root
47    self.filename   = filename
48    if self.filename is None:
49      self.filename = os.path.join(str(root), 'bsSource.db')
50    self.isDirty    = 0
51    return
52
53  def __str__(self):
54    output = ''
55    for source in self:
56      (checksum, mtime, timestamp, dependencies) = self[source]
57      output += source+'\n'
58      output += '  Checksum:  '+str(checksum)+'\n'
59      output += '  Mod Time:  '+str(mtime)+'\n'
60      output += '  Timestamp: '+str(timestamp)+'\n'
61      output += '  Deps:      '+str(dependencies)+'\n'
62    return output
63
64  def __setstate__(self, d):
65    logger.Logger.__setstate__(self, d)
66    # We have to prevent recursive calls to this when the pickled database is loaded in load()
67    #   This is to ensure that fresh copies of the database are obtained after unpickling
68    if not SourceDB.isLoading:
69      SourceDB.isLoading = 1
70      self.load()
71      SourceDB.isLoading = 0
72    return
73
74  def getRelativePath(self, path):
75    '''Returns a relative source file path using the root'''
76    if os.path.isabs(path):
77      root = str(self.root)
78      if not path.startswith(root+os.sep):
79        raise ValueError('Absolute path '+path+' conflicts with root '+root)
80      else:
81        path = path[len(root)+1:]
82    return path
83
84  def checkValue(self, value):
85    '''Validate the value, raising ValueError for problems'''
86    if not isinstance(value, tuple):
87      raise ValueError('Source database values must be tuples, '+str(type(value))+' given')
88    if not len(value) == 4:
89      raise ValueError('Source database values must have 4 items, '+str(len(value))+' given')
90    (checksum, mtime, timestamp, dependencies) = value
91    if not isinstance(checksum, str):
92      raise ValueError('Invalid checksum for source database, '+str(type(checksum))+' given')
93    if not isinstance(mtime, int):
94      raise ValueError('Invalid modification time for source database, '+str(type(mtime))+' given')
95    elif mtime < 0:
96      raise ValueError('Negative modification time for source database, '+str(mtime))
97    if not isinstance(timestamp, float):
98      raise ValueError('Invalid timestamp for source database, '+str(type(timestamp))+' given')
99    elif timestamp < 0:
100      raise ValueError('Negative timestamp for source database, '+str(timestamp))
101    if not isinstance(dependencies, tuple):
102      raise ValueError('Invalid dependencies for source database, '+str(type(dependencies))+' given')
103    return value
104
105  def __getitem__(self, key):
106    '''Converts the key to a relative source file path using the root'''
107    return dict.__getitem__(self, self.getRelativePath(key))
108
109  def __setitem__(self, key, value):
110    '''Converts the key to a relative source file path using the root, and checks the validity of the value'''
111    self.isDirty = 1
112    return dict.__setitem__(self, self.getRelativePath(key), self.checkValue(value))
113
114  def __delitem__(self, key):
115    '''Converts the key to a relative source file path using the root'''
116    self.isDirty = 1
117    return dict.__delitem__(self, self.getRelativePath(key))
118
119  def __contains__(self, key):
120    '''Converts the key to a relative source file path using the root'''
121    return dict.__contains__(self, self.getRelativePath(key))
122
123  def has_key(self, key):
124    '''This method just calls self.__contains__(key)'''
125    return self.__contains__(key)
126
127  def items(self):
128    '''Converts each key to a relative source file path using the root'''
129    return [(self.getRelativePath(item[0]), item[1]) for item in dict.items(self)]
130
131  def keys(self):
132    '''Converts each key to a relative source file path using the root'''
133    return map(self.getRelativePath, dict.keys(self))
134
135  def update(self, d):
136    '''Update the dictionary with the contents of d'''
137    self.isDirty = 1
138    for k in d:
139      self[k] = d[k]
140    return
141
142  def getChecksum(source, chunkSize = 1024*1024):
143    '''Return the md5 checksum for a given file, which may also be specified by its filename
144       - The chunkSize argument specifies the size of blocks read from the file'''
145    if hasattr(source, 'close'):
146      f = source
147    else:
148      f = open(source)
149    m = new_md5()
150    size = chunkSize
151    buf  = f.read(size)
152    while buf:
153      m.update(buf)
154      buf = f.read(size)
155    f.close()
156    return m.hexdigest()
157  getChecksum = staticmethod(getChecksum)
158
159  def getModificationTime(source):
160    t = os.path.getmtime(source)
161    if isinstance(t, float):
162      t = int(t)
163    return t
164  getModificationTime = staticmethod(getModificationTime)
165
166  def updateSource(self, source, noChecksum = 0):
167    self.isDirty = 1
168    dependencies = ()
169    try:
170      (checksum, mtime, timestamp, dependencies) = self[source]
171    except KeyError:
172      pass
173    self.logPrint('Updating '+source+' in source database', 3, 'sourceDB')
174    if noChecksum:
175      checksum   = ''
176    else:
177      checksum   = SourceDB.getChecksum(source)
178    self[source] = (checksum, SourceDB.getModificationTime(source), time.time(), dependencies)
179    return
180
181  def clearSource(self, source):
182    '''This removes source information, but preserved dependencies'''
183    if source in self:
184      self.isDirty = 1
185      self.logPrint('Clearing '+source+' from source database', 3, 'sourceDB')
186      (checksum, mtime, timestamp, dependencies) = self[source]
187      self[source] = ('', 0, time.time(), dependencies)
188    return
189
190  def getDependencies(self, source):
191    try:
192      (checksum, mtime, timestamp, dependencies) = self[source]
193    except KeyError:
194      dependencies = ()
195    return dependencies
196
197  def addDependency(self, source, dependency):
198    self.isDirty = 1
199    dependencies = ()
200    try:
201      (checksum, mtime, timestamp, dependencies) = self[source]
202    except KeyError:
203      checksum = ''
204      mtime    = 0
205    if not dependency in dependencies:
206      self.logPrint('Adding dependency '+dependency+' to source '+source+' in source database', 3, 'sourceDB')
207      dependencies = dependencies+(dependency,)
208    self[source] = (checksum, mtime, time.time(), dependencies)
209    return
210
211  def calculateDependencies(self):
212    self.logPrint('Recalculating dependencies', 1, 'sourceDB')
213    for source in self:
214      self.logPrint('Calculating '+source, 3, 'sourceDB')
215      (checksum, mtime, timestamp, dependencies) = self[source]
216      newDep = []
217      try:
218        file = open(source)
219      except IOError as e:
220        if e.errno == errno.ENOENT:
221          del self[source]
222        else:
223          raise e
224      comps  = source.split('/')
225      for line in file:
226        m = self.includeRE.match(line)
227        if m:
228          filename  = m.group('includeFile')
229          matchNum  = 0
230          matchName = filename
231          self.logPrint('  Includes '+filename, 3, 'sourceDB')
232          for s in self:
233            if s.find(filename) >= 0:
234              self.logPrint('    Checking '+s, 3, 'sourceDB')
235              c = s.split('/')
236              for i in range(len(c)):
237                if not comps[i] == c[i]: break
238              if i > matchNum:
239                self.logPrint('    Choosing '+s+'('+str(i)+')', 3, 'sourceDB')
240                matchName = s
241                matchNum  = i
242          newDep.append(matchName)
243      # Grep for #include, then put these files in a tuple, we can be recursive later in a fixpoint algorithm
244      self[source] = (checksum, mtime, timestamp, tuple(newDep))
245      file.close()
246
247  def load(self):
248    '''Load the source database from the saved filename'''
249    filename = str(self.filename)
250    if os.path.exists(filename):
251      self.clear()
252      self.logPrint('Loading source database from '+filename, 2, 'sourceDB')
253      dbFile = open(filename)
254      newDB  = pickle.load(dbFile)
255      dbFile.close()
256      self.update(newDB)
257    else:
258      self.logPrint('Could not load source database from '+filename, 1, 'sourceDB')
259    return
260
261  def save(self, force = 0):
262    '''Save the source database to a file. The saved database with have path names relative to the root.'''
263    if not self.isDirty and not force:
264      self.logPrint('No need to save source database in '+str(self.filename), 2, 'sourceDB')
265      return
266    filename = str(self.filename)
267    if os.path.exists(os.path.dirname(filename)):
268      self.logPrint('Saving source database in '+filename, 2, 'sourceDB')
269      dbFile = open(filename, 'w')
270      pickle.dump(self, dbFile)
271      dbFile.close()
272      self.isDirty = 0
273    else:
274      self.logPrint('Could not save source database in '+filename, 1, 'sourceDB')
275    return
276
277class DependencyAnalyzer (logger.Logger):
278  def __init__(self, sourceDB):
279    logger.Logger.__init__(self)
280    self.sourceDB  = sourceDB
281    self.includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1')
282    return
283
284  def resolveDependency(self, source, dep):
285    if dep in self.sourceDB: return dep
286    # Choose the entry in sourceDB whose base matches dep,
287    #   and who has the most path components in common with source
288    # This should be replaced by an appeal to cpp
289    matchNum   = 0
290    matchName  = dep
291    components = source.split(os.sep)
292    self.logPrint('  Includes '+filename, 3, 'sourceDB')
293    for s in self.sourceDB:
294      if s.find(dep) >= 0:
295        self.logPrint('    Checking '+s, 3, 'sourceDB')
296        comp = s.split(os.sep)
297        for i in range(len(comp)):
298          if not components[i] == comp[i]: break
299        if i > matchNum:
300          self.logPrint('    Choosing '+s+'('+str(i)+')', 3, 'sourceDB')
301          matchName = s
302          matchNum  = i
303    if not matchName in self.sourceDB: raise RuntimeError('Invalid #include '+matchName+' in '+source)
304    return matchName
305
306  def getNeighbors(self, source):
307    file = open(source)
308    adj  = []
309    for line in file:
310      match = self.includeRE.match(line)
311      if match:
312        adj.append(self.resolveDependency(source, m.group('includeFile')))
313    file.close()
314    return adj
315
316  def calculateDependencies(self):
317    '''Should this be a generator?
318    First assemble the DAG using #include relations
319    Then calculate the depdencies with all pairs shortest-path
320      - I think Floyd-Warshell and N-source Dijkstra are just as good
321    '''
322    # Assembling DAG
323    dag = {}
324    for source in self.sourceDB:
325      try:
326        dag[source] = self.getNeighbors(self, source)
327      except IOError as e:
328        if e.errno == errno.ENOENT:
329          del self[source]
330        else:
331          raise e
332    # Finding all-pairs shortest path
333
334if __name__ == '__main__':
335  import sys
336  try:
337    if len(sys.argv) < 3:
338      print('sourceDatabase.py <database filename> [insert | remove] <filename>')
339    else:
340      if os.path.exists(sys.argv[1]):
341        dbFile   = open(sys.argv[1])
342        sourceDB = pickle.load(dbFile)
343        dbFile.close()
344      else:
345        sys.exit('Could not load source database from '+sys.argv[1])
346      if sys.argv[2] == 'insert':
347        if sys.argv[3] in sourceDB:
348          self.logPrint('Updating '+sys.argv[3], 3, 'sourceDB')
349        else:
350          self.logPrint('Inserting '+sys.argv[3], 3, 'sourceDB')
351        self.sourceDB.updateSource(sys.argv[3])
352      elif sys.argv[2] == 'remove':
353        if sys.argv[3] in sourceDB:
354          sourceDB.logPrint('Removing '+sys.argv[3], 3, 'sourceDB')
355          del self.sourceDB[sys.argv[3]]
356        else:
357          sourceDB.logPrint('Matching regular expression '+sys.argv[3]+' over source database', 1, 'sourceDB')
358          removeRE = re.compile(sys.argv[3])
359          removes  = list(filter(removeRE.match, sourceDB.keys()))
360          for source in removes:
361            self.logPrint('Removing '+source, 3, 'sourceDB')
362            del self.sourceDB[source]
363      else:
364        sys.exit('Unknown source database action: '+sys.argv[2])
365      sourceDB.save()
366  except Exception as e:
367    import traceback
368    print(traceback.print_tb(sys.exc_info()[2]))
369    sys.exit(str(e))
370  sys.exit(0)
371