xref: /petsc/config/BuildSystem/sourceDatabase.py (revision 179860b23afbef20daed3359c1645679d1efa988)
1*179860b2SJed Brown#!/usr/bin/env python
2*179860b2SJed Brown'''A source code database
3*179860b2SJed Brown
4*179860b2SJed Brown    SourceDB is a database of file information used to determine whether files
5*179860b2SJed Brown    should be rebuilt by the build system. All files names are stored relative
6*179860b2SJed Brown    to a given root, which is intended as the root of a Project.
7*179860b2SJed Brown
8*179860b2SJed Brown    Relative or absolute pathnames may be used as keys, but absolute pathnames
9*179860b2SJed Brown    must fall under the database root. The value format is a tuple of the following:
10*179860b2SJed Brown
11*179860b2SJed Brown      Checksum:     The md5 checksum of the file
12*179860b2SJed Brown      Mod Time:     The time the file was last modified
13*179860b2SJed Brown      Timestamp:    The time theentry was last modified
14*179860b2SJed Brown      Dependencies: A tuple of files upon which this entry depends
15*179860b2SJed Brown
16*179860b2SJed Brown    This script also provides some default actions:
17*179860b2SJed Brown
18*179860b2SJed Brown      - insert <database file> <filename>
19*179860b2SJed Brown        Inserts this file from the database, or updates its entry if it
20*179860b2SJed Brown        already exists.
21*179860b2SJed Brown
22*179860b2SJed Brown      - remove <database file> <filename>
23*179860b2SJed Brown        Removes this file from the database. The filename may also be a
24*179860b2SJed Brown        regular expression.
25*179860b2SJed Brown
26*179860b2SJed Brown'''
27*179860b2SJed Brownimport logger
28*179860b2SJed Brown
29*179860b2SJed Brownimport errno
30*179860b2SJed Brownimport os
31*179860b2SJed Brownimport re
32*179860b2SJed Brownimport time
33*179860b2SJed Brown
34*179860b2SJed Brownimport cPickle
35*179860b2SJed Brown
36*179860b2SJed Browntry:
37*179860b2SJed Brown  from hashlib import md5 as new_md5
38*179860b2SJed Brownexcept ImportError:
39*179860b2SJed Brown  from md5 import new as new_md5
40*179860b2SJed Brown
41*179860b2SJed Brown
42*179860b2SJed Brownclass SourceDB (dict, logger.Logger):
43*179860b2SJed Brown  '''A SourceDB is a dictionary of file data used during the build process.'''
44*179860b2SJed Brown  includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1')
45*179860b2SJed Brown  isLoading = 0
46*179860b2SJed Brown
47*179860b2SJed Brown  def __init__(self, root, filename = None):
48*179860b2SJed Brown    dict.__init__(self)
49*179860b2SJed Brown    logger.Logger.__init__(self)
50*179860b2SJed Brown    self.root       = root
51*179860b2SJed Brown    self.filename   = filename
52*179860b2SJed Brown    if self.filename is None:
53*179860b2SJed Brown      self.filename = os.path.join(str(root), 'bsSource.db')
54*179860b2SJed Brown    self.isDirty    = 0
55*179860b2SJed Brown    return
56*179860b2SJed Brown
57*179860b2SJed Brown  def __str__(self):
58*179860b2SJed Brown    output = ''
59*179860b2SJed Brown    for source in self:
60*179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
61*179860b2SJed Brown      output += source+'\n'
62*179860b2SJed Brown      output += '  Checksum:  '+str(checksum)+'\n'
63*179860b2SJed Brown      output += '  Mod Time:  '+str(mtime)+'\n'
64*179860b2SJed Brown      output += '  Timestamp: '+str(timestamp)+'\n'
65*179860b2SJed Brown      output += '  Deps:      '+str(dependencies)+'\n'
66*179860b2SJed Brown    return output
67*179860b2SJed Brown
68*179860b2SJed Brown  def __setstate__(self, d):
69*179860b2SJed Brown    logger.Logger.__setstate__(self, d)
70*179860b2SJed Brown    # We have to prevent recursive calls to this when the pickled database is loaded in load()
71*179860b2SJed Brown    #   This is to ensure that fresh copies of the database are obtained after unpickling
72*179860b2SJed Brown    if not SourceDB.isLoading:
73*179860b2SJed Brown      SourceDB.isLoading = 1
74*179860b2SJed Brown      self.load()
75*179860b2SJed Brown      SourceDB.isLoading = 0
76*179860b2SJed Brown    return
77*179860b2SJed Brown
78*179860b2SJed Brown  def getRelativePath(self, path):
79*179860b2SJed Brown    '''Returns a relative source file path using the root'''
80*179860b2SJed Brown    if os.path.isabs(path):
81*179860b2SJed Brown      root = str(self.root)
82*179860b2SJed Brown      if not path.startswith(root+os.sep):
83*179860b2SJed Brown        raise ValueError('Absolute path '+path+' conflicts with root '+root)
84*179860b2SJed Brown      else:
85*179860b2SJed Brown        path = path[len(root)+1:]
86*179860b2SJed Brown    return path
87*179860b2SJed Brown
88*179860b2SJed Brown  def checkValue(self, value):
89*179860b2SJed Brown    '''Validate the value, raising ValueError for problems'''
90*179860b2SJed Brown    if not isinstance(value, tuple):
91*179860b2SJed Brown      raise ValueError('Source database values must be tuples, '+str(type(value))+' given')
92*179860b2SJed Brown    if not len(value) == 4:
93*179860b2SJed Brown      raise ValueError('Source database values must have 4 items, '+str(len(value))+' given')
94*179860b2SJed Brown    (checksum, mtime, timestamp, dependencies) = value
95*179860b2SJed Brown    if not isinstance(checksum, str):
96*179860b2SJed Brown      raise ValueError('Invalid checksum for source database, '+str(type(checksum))+' given')
97*179860b2SJed Brown    if not isinstance(mtime, int):
98*179860b2SJed Brown      raise ValueError('Invalid modification time for source database, '+str(type(mtime))+' given')
99*179860b2SJed Brown    elif mtime < 0:
100*179860b2SJed Brown      raise ValueError('Negative modification time for source database, '+str(mtime))
101*179860b2SJed Brown    if not isinstance(timestamp, float):
102*179860b2SJed Brown      raise ValueError('Invalid timestamp for source database, '+str(type(timestamp))+' given')
103*179860b2SJed Brown    elif timestamp < 0:
104*179860b2SJed Brown      raise ValueError('Negative timestamp for source database, '+str(timestamp))
105*179860b2SJed Brown    if not isinstance(dependencies, tuple):
106*179860b2SJed Brown      raise ValueError('Invalid dependencies for source database, '+str(type(dependencies))+' given')
107*179860b2SJed Brown    return value
108*179860b2SJed Brown
109*179860b2SJed Brown  def __getitem__(self, key):
110*179860b2SJed Brown    '''Converts the key to a relative source file path using the root'''
111*179860b2SJed Brown    return dict.__getitem__(self, self.getRelativePath(key))
112*179860b2SJed Brown
113*179860b2SJed Brown  def __setitem__(self, key, value):
114*179860b2SJed Brown    '''Converts the key to a relative source file path using the root, and checks the validity of the value'''
115*179860b2SJed Brown    self.isDirty = 1
116*179860b2SJed Brown    return dict.__setitem__(self, self.getRelativePath(key), self.checkValue(value))
117*179860b2SJed Brown
118*179860b2SJed Brown  def __delitem__(self, key):
119*179860b2SJed Brown    '''Converts the key to a relative source file path using the root'''
120*179860b2SJed Brown    self.isDirty = 1
121*179860b2SJed Brown    return dict.__delitem__(self, self.getRelativePath(key))
122*179860b2SJed Brown
123*179860b2SJed Brown  def __contains__(self, key):
124*179860b2SJed Brown    '''Converts the key to a relative source file path using the root'''
125*179860b2SJed Brown    return dict.__contains__(self, self.getRelativePath(key))
126*179860b2SJed Brown
127*179860b2SJed Brown  def has_key(self, key):
128*179860b2SJed Brown    '''This method just calls self.__contains__(key)'''
129*179860b2SJed Brown    return self.__contains__(key)
130*179860b2SJed Brown
131*179860b2SJed Brown  def items(self):
132*179860b2SJed Brown    '''Converts each key to a relative source file path using the root'''
133*179860b2SJed Brown    return [(self.getRelativePath(item[0]), item[1]) for item in dict.items(self)]
134*179860b2SJed Brown
135*179860b2SJed Brown  def keys(self):
136*179860b2SJed Brown    '''Converts each key to a relative source file path using the root'''
137*179860b2SJed Brown    return map(self.getRelativePath, dict.keys(self))
138*179860b2SJed Brown
139*179860b2SJed Brown  def update(self, d):
140*179860b2SJed Brown    '''Update the dictionary with the contents of d'''
141*179860b2SJed Brown    self.isDirty = 1
142*179860b2SJed Brown    for k in d:
143*179860b2SJed Brown      self[k] = d[k]
144*179860b2SJed Brown    return
145*179860b2SJed Brown
146*179860b2SJed Brown  def getChecksum(source, chunkSize = 1024*1024):
147*179860b2SJed Brown    '''Return the md5 checksum for a given file, which may also be specified by its filename
148*179860b2SJed Brown       - The chunkSize argument specifies the size of blocks read from the file'''
149*179860b2SJed Brown    if isinstance(source, file):
150*179860b2SJed Brown      f = source
151*179860b2SJed Brown    else:
152*179860b2SJed Brown      f = file(source)
153*179860b2SJed Brown    m = new_md5()
154*179860b2SJed Brown    size = chunkSize
155*179860b2SJed Brown    buf  = f.read(size)
156*179860b2SJed Brown    while buf:
157*179860b2SJed Brown      m.update(buf)
158*179860b2SJed Brown      buf = f.read(size)
159*179860b2SJed Brown    f.close()
160*179860b2SJed Brown    return m.hexdigest()
161*179860b2SJed Brown  getChecksum = staticmethod(getChecksum)
162*179860b2SJed Brown
163*179860b2SJed Brown  def getModificationTime(source):
164*179860b2SJed Brown    t = os.path.getmtime(source)
165*179860b2SJed Brown    if isinstance(t, float):
166*179860b2SJed Brown      t = int(t)
167*179860b2SJed Brown    return t
168*179860b2SJed Brown  getModificationTime = staticmethod(getModificationTime)
169*179860b2SJed Brown
170*179860b2SJed Brown  def updateSource(self, source, noChecksum = 0):
171*179860b2SJed Brown    self.isDirty = 1
172*179860b2SJed Brown    dependencies = ()
173*179860b2SJed Brown    try:
174*179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
175*179860b2SJed Brown    except KeyError:
176*179860b2SJed Brown      pass
177*179860b2SJed Brown    self.logPrint('Updating '+source+' in source database', 3, 'sourceDB')
178*179860b2SJed Brown    if noChecksum:
179*179860b2SJed Brown      checksum   = ''
180*179860b2SJed Brown    else:
181*179860b2SJed Brown      checksum   = SourceDB.getChecksum(source)
182*179860b2SJed Brown    self[source] = (checksum, SourceDB.getModificationTime(source), time.time(), dependencies)
183*179860b2SJed Brown    return
184*179860b2SJed Brown
185*179860b2SJed Brown  def clearSource(self, source):
186*179860b2SJed Brown    '''This removes source information, but preserved dependencies'''
187*179860b2SJed Brown    if source in self:
188*179860b2SJed Brown      self.isDirty = 1
189*179860b2SJed Brown      self.logPrint('Clearing '+source+' from source database', 3, 'sourceDB')
190*179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
191*179860b2SJed Brown      self[source] = ('', 0, time.time(), dependencies)
192*179860b2SJed Brown    return
193*179860b2SJed Brown
194*179860b2SJed Brown  def getDependencies(self, source):
195*179860b2SJed Brown    try:
196*179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
197*179860b2SJed Brown    except KeyError:
198*179860b2SJed Brown      dependencies = ()
199*179860b2SJed Brown    return dependencies
200*179860b2SJed Brown
201*179860b2SJed Brown  def addDependency(self, source, dependency):
202*179860b2SJed Brown    self.isDirty = 1
203*179860b2SJed Brown    dependencies = ()
204*179860b2SJed Brown    try:
205*179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
206*179860b2SJed Brown    except KeyError:
207*179860b2SJed Brown      checksum = ''
208*179860b2SJed Brown      mtime    = 0
209*179860b2SJed Brown    if not dependency in dependencies:
210*179860b2SJed Brown      self.logPrint('Adding dependency '+dependency+' to source '+source+' in source database', 3, 'sourceDB')
211*179860b2SJed Brown      dependencies = dependencies+(dependency,)
212*179860b2SJed Brown    self[source] = (checksum, mtime, time.time(), dependencies)
213*179860b2SJed Brown    return
214*179860b2SJed Brown
215*179860b2SJed Brown  def calculateDependencies(self):
216*179860b2SJed Brown    self.logPrint('Recalculating dependencies', 1, 'sourceDB')
217*179860b2SJed Brown    for source in self:
218*179860b2SJed Brown      self.logPrint('Calculating '+source, 3, 'sourceDB')
219*179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
220*179860b2SJed Brown      newDep = []
221*179860b2SJed Brown      try:
222*179860b2SJed Brown        file = file(source)
223*179860b2SJed Brown      except IOError, e:
224*179860b2SJed Brown        if e.errno == errno.ENOENT:
225*179860b2SJed Brown          del self[source]
226*179860b2SJed Brown        else:
227*179860b2SJed Brown          raise e
228*179860b2SJed Brown      comps  = source.split('/')
229*179860b2SJed Brown      for line in file.xreadlines():
230*179860b2SJed Brown        m = self.includeRE.match(line)
231*179860b2SJed Brown        if m:
232*179860b2SJed Brown          filename  = m.group('includeFile')
233*179860b2SJed Brown          matchNum  = 0
234*179860b2SJed Brown          matchName = filename
235*179860b2SJed Brown          self.logPrint('  Includes '+filename, 3, 'sourceDB')
236*179860b2SJed Brown          for s in self:
237*179860b2SJed Brown            if s.find(filename) >= 0:
238*179860b2SJed Brown              self.logPrint('    Checking '+s, 3, 'sourceDB')
239*179860b2SJed Brown              c = s.split('/')
240*179860b2SJed Brown              for i in range(len(c)):
241*179860b2SJed Brown                if not comps[i] == c[i]: break
242*179860b2SJed Brown              if i > matchNum:
243*179860b2SJed Brown                self.logPrint('    Choosing '+s+'('+str(i)+')', 3, 'sourceDB')
244*179860b2SJed Brown                matchName = s
245*179860b2SJed Brown                matchNum  = i
246*179860b2SJed Brown          newDep.append(matchName)
247*179860b2SJed Brown      # Grep for #include, then put these files in a tuple, we can be recursive later in a fixpoint algorithm
248*179860b2SJed Brown      self[source] = (checksum, mtime, timestamp, tuple(newDep))
249*179860b2SJed Brown      file.close()
250*179860b2SJed Brown
251*179860b2SJed Brown  def load(self):
252*179860b2SJed Brown    '''Load the source database from the saved filename'''
253*179860b2SJed Brown    filename = str(self.filename)
254*179860b2SJed Brown    if os.path.exists(filename):
255*179860b2SJed Brown      self.clear()
256*179860b2SJed Brown      self.logPrint('Loading source database from '+filename, 2, 'sourceDB')
257*179860b2SJed Brown      dbFile = file(filename)
258*179860b2SJed Brown      newDB  = cPickle.load(dbFile)
259*179860b2SJed Brown      dbFile.close()
260*179860b2SJed Brown      self.update(newDB)
261*179860b2SJed Brown    else:
262*179860b2SJed Brown      self.logPrint('Could not load source database from '+filename, 1, 'sourceDB')
263*179860b2SJed Brown    return
264*179860b2SJed Brown
265*179860b2SJed Brown  def save(self, force = 0):
266*179860b2SJed Brown    '''Save the source database to a file. The saved database with have path names relative to the root.'''
267*179860b2SJed Brown    if not self.isDirty and not force:
268*179860b2SJed Brown      self.logPrint('No need to save source database in '+str(self.filename), 2, 'sourceDB')
269*179860b2SJed Brown      return
270*179860b2SJed Brown    filename = str(self.filename)
271*179860b2SJed Brown    if os.path.exists(os.path.dirname(filename)):
272*179860b2SJed Brown      self.logPrint('Saving source database in '+filename, 2, 'sourceDB')
273*179860b2SJed Brown      dbFile = file(filename, 'w')
274*179860b2SJed Brown      cPickle.dump(self, dbFile)
275*179860b2SJed Brown      dbFile.close()
276*179860b2SJed Brown      self.isDirty = 0
277*179860b2SJed Brown    else:
278*179860b2SJed Brown      self.logPrint('Could not save source database in '+filename, 1, 'sourceDB')
279*179860b2SJed Brown    return
280*179860b2SJed Brown
281*179860b2SJed Brownclass DependencyAnalyzer (logger.Logger):
282*179860b2SJed Brown  def __init__(self, sourceDB):
283*179860b2SJed Brown    logger.Logger.__init__(self)
284*179860b2SJed Brown    self.sourceDB  = sourceDB
285*179860b2SJed Brown    self.includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1')
286*179860b2SJed Brown    return
287*179860b2SJed Brown
288*179860b2SJed Brown  def resolveDependency(self, source, dep):
289*179860b2SJed Brown    if dep in self.sourceDB: return dep
290*179860b2SJed Brown    # Choose the entry in sourceDB whose base matches dep,
291*179860b2SJed Brown    #   and who has the most path components in common with source
292*179860b2SJed Brown    # This should be replaced by an appeal to cpp
293*179860b2SJed Brown    matchNum   = 0
294*179860b2SJed Brown    matchName  = dep
295*179860b2SJed Brown    components = source.split(os.sep)
296*179860b2SJed Brown    self.logPrint('  Includes '+filename, 3, 'sourceDB')
297*179860b2SJed Brown    for s in self.sourceDB:
298*179860b2SJed Brown      if s.find(dep) >= 0:
299*179860b2SJed Brown        self.logPrint('    Checking '+s, 3, 'sourceDB')
300*179860b2SJed Brown        comp = s.split(os.sep)
301*179860b2SJed Brown        for i in range(len(comp)):
302*179860b2SJed Brown          if not components[i] == comp[i]: break
303*179860b2SJed Brown        if i > matchNum:
304*179860b2SJed Brown          self.logPrint('    Choosing '+s+'('+str(i)+')', 3, 'sourceDB')
305*179860b2SJed Brown          matchName = s
306*179860b2SJed Brown          matchNum  = i
307*179860b2SJed Brown    if not matchName in self.sourceDB: raise RuntimeError('Invalid #include '+matchName+' in '+source)
308*179860b2SJed Brown    return matchName
309*179860b2SJed Brown
310*179860b2SJed Brown  def getNeighbors(self, source):
311*179860b2SJed Brown    file = file(source)
312*179860b2SJed Brown    adj  = []
313*179860b2SJed Brown    for line in file.xreadlines():
314*179860b2SJed Brown      match = self.includeRE.match(line)
315*179860b2SJed Brown      if match:
316*179860b2SJed Brown        adj.append(self.resolveDependency(source, m.group('includeFile')))
317*179860b2SJed Brown    file.close()
318*179860b2SJed Brown    return adj
319*179860b2SJed Brown
320*179860b2SJed Brown  def calculateDependencies(self):
321*179860b2SJed Brown    '''Should this be a generator?
322*179860b2SJed Brown    First assemble the DAG using #include relations
323*179860b2SJed Brown    Then calculate the depdencies with all pairs shortest-path
324*179860b2SJed Brown      - I think Floyd-Warshell and N-source Dijkstra are just as good
325*179860b2SJed Brown    '''
326*179860b2SJed Brown    # Assembling DAG
327*179860b2SJed Brown    dag = {}
328*179860b2SJed Brown    for source in self.sourceDB:
329*179860b2SJed Brown      try:
330*179860b2SJed Brown        dag[source] = self.getNeighbors(self, source)
331*179860b2SJed Brown      except IOError, e:
332*179860b2SJed Brown        if e.errno == errno.ENOENT:
333*179860b2SJed Brown          del self[source]
334*179860b2SJed Brown        else:
335*179860b2SJed Brown          raise e
336*179860b2SJed Brown    # Finding all-pairs shortest path
337*179860b2SJed Brown
338*179860b2SJed Brownif __name__ == '__main__':
339*179860b2SJed Brown  import sys
340*179860b2SJed Brown  try:
341*179860b2SJed Brown    if len(sys.argv) < 3:
342*179860b2SJed Brown      print 'sourceDatabase.py <database filename> [insert | remove] <filename>'
343*179860b2SJed Brown    else:
344*179860b2SJed Brown      if os.path.exists(sys.argv[1]):
345*179860b2SJed Brown        dbFile   = file(sys.argv[1])
346*179860b2SJed Brown        sourceDB = cPickle.load(dbFile)
347*179860b2SJed Brown        dbFile.close()
348*179860b2SJed Brown      else:
349*179860b2SJed Brown        sys.exit('Could not load source database from '+sys.argv[1])
350*179860b2SJed Brown      if sys.argv[2] == 'insert':
351*179860b2SJed Brown        if sys.argv[3] in sourceDB:
352*179860b2SJed Brown          self.logPrint('Updating '+sys.argv[3], 3, 'sourceDB')
353*179860b2SJed Brown        else:
354*179860b2SJed Brown          self.logPrint('Inserting '+sys.argv[3], 3, 'sourceDB')
355*179860b2SJed Brown        self.sourceDB.updateSource(sys.argv[3])
356*179860b2SJed Brown      elif sys.argv[2] == 'remove':
357*179860b2SJed Brown        if sys.argv[3] in sourceDB:
358*179860b2SJed Brown          sourceDB.logPrint('Removing '+sys.argv[3], 3, 'sourceDB')
359*179860b2SJed Brown          del self.sourceDB[sys.argv[3]]
360*179860b2SJed Brown        else:
361*179860b2SJed Brown          sourceDB.logPrint('Matching regular expression '+sys.argv[3]+' over source database', 1, 'sourceDB')
362*179860b2SJed Brown          removeRE = re.compile(sys.argv[3])
363*179860b2SJed Brown          removes  = filter(removeRE.match, sourceDB.keys())
364*179860b2SJed Brown          for source in removes:
365*179860b2SJed Brown            self.logPrint('Removing '+source, 3, 'sourceDB')
366*179860b2SJed Brown            del self.sourceDB[source]
367*179860b2SJed Brown      else:
368*179860b2SJed Brown        sys.exit('Unknown source database action: '+sys.argv[2])
369*179860b2SJed Brown      sourceDB.save()
370*179860b2SJed Brown  except Exception, e:
371*179860b2SJed Brown    import traceback
372*179860b2SJed Brown    print traceback.print_tb(sys.exc_info()[2])
373*179860b2SJed Brown    sys.exit(str(e))
374*179860b2SJed Brown  sys.exit(0)
375