xref: /petsc/config/BuildSystem/sourceDatabase.py (revision 9fa15c4dcfb14eef1e694c8c4e3db8b9dee7e944)
1179860b2SJed Brown'''A source code database
2179860b2SJed Brown
3179860b2SJed Brown    SourceDB is a database of file information used to determine whether files
4179860b2SJed Brown    should be rebuilt by the build system. All files names are stored relative
5179860b2SJed Brown    to a given root, which is intended as the root of a Project.
6179860b2SJed Brown
7179860b2SJed Brown    Relative or absolute pathnames may be used as keys, but absolute pathnames
8179860b2SJed Brown    must fall under the database root. The value format is a tuple of the following:
9179860b2SJed Brown
10*fe324abeSRonald Gould      Checksum:     The checksum of the file
11179860b2SJed Brown      Mod Time:     The time the file was last modified
12179860b2SJed Brown      Timestamp:    The time theentry was last modified
13179860b2SJed Brown      Dependencies: A tuple of files upon which this entry depends
14179860b2SJed Brown
15179860b2SJed Brown    This script also provides some default actions:
16179860b2SJed Brown
17179860b2SJed Brown      - insert <database file> <filename>
18179860b2SJed Brown        Inserts this file from the database, or updates its entry if it
19179860b2SJed Brown        already exists.
20179860b2SJed Brown
21179860b2SJed Brown      - remove <database file> <filename>
22179860b2SJed Brown        Removes this file from the database. The filename may also be a
23179860b2SJed Brown        regular expression.
24179860b2SJed Brown
25179860b2SJed Brown'''
265b6bfdb9SJed Brownfrom __future__ import print_function
275b6bfdb9SJed Brownfrom __future__ import absolute_import
28179860b2SJed Brownimport logger
29179860b2SJed Brown
30179860b2SJed Brownimport errno
31179860b2SJed Brownimport os
32179860b2SJed Brownimport re
33179860b2SJed Brownimport time
34179860b2SJed Brown
35492432c8SJed Brownimport pickle
36*fe324abeSRonald Gouldfrom hashlib import sha256 as checksum_algo
37179860b2SJed Brown
38179860b2SJed Brownclass SourceDB (dict, logger.Logger):
39179860b2SJed Brown  '''A SourceDB is a dictionary of file data used during the build process.'''
40179860b2SJed Brown  includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1')
41179860b2SJed Brown  isLoading = 0
42179860b2SJed Brown
43179860b2SJed Brown  def __init__(self, root, filename = None):
44179860b2SJed Brown    dict.__init__(self)
45179860b2SJed Brown    logger.Logger.__init__(self)
46179860b2SJed Brown    self.root       = root
47179860b2SJed Brown    self.filename   = filename
48179860b2SJed Brown    if self.filename is None:
49179860b2SJed Brown      self.filename = os.path.join(str(root), 'bsSource.db')
50179860b2SJed Brown    self.isDirty    = 0
51179860b2SJed Brown    return
52179860b2SJed Brown
53179860b2SJed Brown  def __str__(self):
54179860b2SJed Brown    output = ''
55179860b2SJed Brown    for source in self:
56179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
57179860b2SJed Brown      output += source+'\n'
58179860b2SJed Brown      output += '  Checksum:  '+str(checksum)+'\n'
59179860b2SJed Brown      output += '  Mod Time:  '+str(mtime)+'\n'
60179860b2SJed Brown      output += '  Timestamp: '+str(timestamp)+'\n'
61179860b2SJed Brown      output += '  Deps:      '+str(dependencies)+'\n'
62179860b2SJed Brown    return output
63179860b2SJed Brown
64179860b2SJed Brown  def __setstate__(self, d):
65179860b2SJed Brown    logger.Logger.__setstate__(self, d)
66179860b2SJed Brown    # We have to prevent recursive calls to this when the pickled database is loaded in load()
67179860b2SJed Brown    #   This is to ensure that fresh copies of the database are obtained after unpickling
68179860b2SJed Brown    if not SourceDB.isLoading:
69179860b2SJed Brown      SourceDB.isLoading = 1
70179860b2SJed Brown      self.load()
71179860b2SJed Brown      SourceDB.isLoading = 0
72179860b2SJed Brown    return
73179860b2SJed Brown
74179860b2SJed Brown  def getRelativePath(self, path):
75179860b2SJed Brown    '''Returns a relative source file path using the root'''
76179860b2SJed Brown    if os.path.isabs(path):
77179860b2SJed Brown      root = str(self.root)
78179860b2SJed Brown      if not path.startswith(root+os.sep):
79179860b2SJed Brown        raise ValueError('Absolute path '+path+' conflicts with root '+root)
80179860b2SJed Brown      else:
81179860b2SJed Brown        path = path[len(root)+1:]
82179860b2SJed Brown    return path
83179860b2SJed Brown
84179860b2SJed Brown  def checkValue(self, value):
85179860b2SJed Brown    '''Validate the value, raising ValueError for problems'''
86179860b2SJed Brown    if not isinstance(value, tuple):
87179860b2SJed Brown      raise ValueError('Source database values must be tuples, '+str(type(value))+' given')
88179860b2SJed Brown    if not len(value) == 4:
89179860b2SJed Brown      raise ValueError('Source database values must have 4 items, '+str(len(value))+' given')
90179860b2SJed Brown    (checksum, mtime, timestamp, dependencies) = value
91179860b2SJed Brown    if not isinstance(checksum, str):
92179860b2SJed Brown      raise ValueError('Invalid checksum for source database, '+str(type(checksum))+' given')
93179860b2SJed Brown    if not isinstance(mtime, int):
94179860b2SJed Brown      raise ValueError('Invalid modification time for source database, '+str(type(mtime))+' given')
95179860b2SJed Brown    elif mtime < 0:
96179860b2SJed Brown      raise ValueError('Negative modification time for source database, '+str(mtime))
97179860b2SJed Brown    if not isinstance(timestamp, float):
98179860b2SJed Brown      raise ValueError('Invalid timestamp for source database, '+str(type(timestamp))+' given')
99179860b2SJed Brown    elif timestamp < 0:
100179860b2SJed Brown      raise ValueError('Negative timestamp for source database, '+str(timestamp))
101179860b2SJed Brown    if not isinstance(dependencies, tuple):
102179860b2SJed Brown      raise ValueError('Invalid dependencies for source database, '+str(type(dependencies))+' given')
103179860b2SJed Brown    return value
104179860b2SJed Brown
105179860b2SJed Brown  def __getitem__(self, key):
106179860b2SJed Brown    '''Converts the key to a relative source file path using the root'''
107179860b2SJed Brown    return dict.__getitem__(self, self.getRelativePath(key))
108179860b2SJed Brown
109179860b2SJed Brown  def __setitem__(self, key, value):
110179860b2SJed Brown    '''Converts the key to a relative source file path using the root, and checks the validity of the value'''
111179860b2SJed Brown    self.isDirty = 1
112179860b2SJed Brown    return dict.__setitem__(self, self.getRelativePath(key), self.checkValue(value))
113179860b2SJed Brown
114179860b2SJed Brown  def __delitem__(self, key):
115179860b2SJed Brown    '''Converts the key to a relative source file path using the root'''
116179860b2SJed Brown    self.isDirty = 1
117179860b2SJed Brown    return dict.__delitem__(self, self.getRelativePath(key))
118179860b2SJed Brown
119179860b2SJed Brown  def __contains__(self, key):
120179860b2SJed Brown    '''Converts the key to a relative source file path using the root'''
121179860b2SJed Brown    return dict.__contains__(self, self.getRelativePath(key))
122179860b2SJed Brown
123179860b2SJed Brown  def has_key(self, key):
124179860b2SJed Brown    '''This method just calls self.__contains__(key)'''
125179860b2SJed Brown    return self.__contains__(key)
126179860b2SJed Brown
127179860b2SJed Brown  def items(self):
128179860b2SJed Brown    '''Converts each key to a relative source file path using the root'''
129179860b2SJed Brown    return [(self.getRelativePath(item[0]), item[1]) for item in dict.items(self)]
130179860b2SJed Brown
131179860b2SJed Brown  def keys(self):
132179860b2SJed Brown    '''Converts each key to a relative source file path using the root'''
133179860b2SJed Brown    return map(self.getRelativePath, dict.keys(self))
134179860b2SJed Brown
135179860b2SJed Brown  def update(self, d):
136179860b2SJed Brown    '''Update the dictionary with the contents of d'''
137179860b2SJed Brown    self.isDirty = 1
138179860b2SJed Brown    for k in d:
139179860b2SJed Brown      self[k] = d[k]
140179860b2SJed Brown    return
141179860b2SJed Brown
142179860b2SJed Brown  def getChecksum(source, chunkSize = 1024*1024):
143*fe324abeSRonald Gould    '''Return the checksum for a given file, which may also be specified by its filename
144179860b2SJed Brown       - The chunkSize argument specifies the size of blocks read from the file'''
145bbfe0604SJed Brown    if hasattr(source, 'close'):
146179860b2SJed Brown      f = source
147179860b2SJed Brown    else:
148c6ef1b5bSJed Brown      f = open(source)
149*fe324abeSRonald Gould    m = checksum_algo()
150179860b2SJed Brown    size = chunkSize
151179860b2SJed Brown    buf  = f.read(size)
152179860b2SJed Brown    while buf:
153179860b2SJed Brown      m.update(buf)
154179860b2SJed Brown      buf = f.read(size)
155179860b2SJed Brown    f.close()
156179860b2SJed Brown    return m.hexdigest()
157179860b2SJed Brown  getChecksum = staticmethod(getChecksum)
158179860b2SJed Brown
159179860b2SJed Brown  def getModificationTime(source):
160179860b2SJed Brown    t = os.path.getmtime(source)
161179860b2SJed Brown    if isinstance(t, float):
162179860b2SJed Brown      t = int(t)
163179860b2SJed Brown    return t
164179860b2SJed Brown  getModificationTime = staticmethod(getModificationTime)
165179860b2SJed Brown
166179860b2SJed Brown  def updateSource(self, source, noChecksum = 0):
167179860b2SJed Brown    self.isDirty = 1
168179860b2SJed Brown    dependencies = ()
169179860b2SJed Brown    try:
170179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
171179860b2SJed Brown    except KeyError:
172179860b2SJed Brown      pass
173179860b2SJed Brown    self.logPrint('Updating '+source+' in source database', 3, 'sourceDB')
174179860b2SJed Brown    if noChecksum:
175179860b2SJed Brown      checksum   = ''
176179860b2SJed Brown    else:
177179860b2SJed Brown      checksum   = SourceDB.getChecksum(source)
178179860b2SJed Brown    self[source] = (checksum, SourceDB.getModificationTime(source), time.time(), dependencies)
179179860b2SJed Brown    return
180179860b2SJed Brown
181179860b2SJed Brown  def clearSource(self, source):
182179860b2SJed Brown    '''This removes source information, but preserved dependencies'''
183179860b2SJed Brown    if source in self:
184179860b2SJed Brown      self.isDirty = 1
185179860b2SJed Brown      self.logPrint('Clearing '+source+' from source database', 3, 'sourceDB')
186179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
187179860b2SJed Brown      self[source] = ('', 0, time.time(), dependencies)
188179860b2SJed Brown    return
189179860b2SJed Brown
190179860b2SJed Brown  def getDependencies(self, source):
191179860b2SJed Brown    try:
192179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
193179860b2SJed Brown    except KeyError:
194179860b2SJed Brown      dependencies = ()
195179860b2SJed Brown    return dependencies
196179860b2SJed Brown
197179860b2SJed Brown  def addDependency(self, source, dependency):
198179860b2SJed Brown    self.isDirty = 1
199179860b2SJed Brown    dependencies = ()
200179860b2SJed Brown    try:
201179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
202179860b2SJed Brown    except KeyError:
203179860b2SJed Brown      checksum = ''
204179860b2SJed Brown      mtime    = 0
205179860b2SJed Brown    if not dependency in dependencies:
206179860b2SJed Brown      self.logPrint('Adding dependency '+dependency+' to source '+source+' in source database', 3, 'sourceDB')
207179860b2SJed Brown      dependencies = dependencies+(dependency,)
208179860b2SJed Brown    self[source] = (checksum, mtime, time.time(), dependencies)
209179860b2SJed Brown    return
210179860b2SJed Brown
211179860b2SJed Brown  def calculateDependencies(self):
212179860b2SJed Brown    self.logPrint('Recalculating dependencies', 1, 'sourceDB')
213179860b2SJed Brown    for source in self:
214179860b2SJed Brown      self.logPrint('Calculating '+source, 3, 'sourceDB')
215179860b2SJed Brown      (checksum, mtime, timestamp, dependencies) = self[source]
216179860b2SJed Brown      newDep = []
217179860b2SJed Brown      try:
218c6ef1b5bSJed Brown        file = open(source)
2195b6bfdb9SJed Brown      except IOError as e:
220179860b2SJed Brown        if e.errno == errno.ENOENT:
221179860b2SJed Brown          del self[source]
222179860b2SJed Brown        else:
223179860b2SJed Brown          raise e
224179860b2SJed Brown      comps  = source.split('/')
2255b6bfdb9SJed Brown      for line in file:
226179860b2SJed Brown        m = self.includeRE.match(line)
227179860b2SJed Brown        if m:
228179860b2SJed Brown          filename  = m.group('includeFile')
229179860b2SJed Brown          matchNum  = 0
230179860b2SJed Brown          matchName = filename
231179860b2SJed Brown          self.logPrint('  Includes '+filename, 3, 'sourceDB')
232179860b2SJed Brown          for s in self:
233179860b2SJed Brown            if s.find(filename) >= 0:
234179860b2SJed Brown              self.logPrint('    Checking '+s, 3, 'sourceDB')
235179860b2SJed Brown              c = s.split('/')
236179860b2SJed Brown              for i in range(len(c)):
237179860b2SJed Brown                if not comps[i] == c[i]: break
238179860b2SJed Brown              if i > matchNum:
239179860b2SJed Brown                self.logPrint('    Choosing '+s+'('+str(i)+')', 3, 'sourceDB')
240179860b2SJed Brown                matchName = s
241179860b2SJed Brown                matchNum  = i
242179860b2SJed Brown          newDep.append(matchName)
243179860b2SJed Brown      # Grep for #include, then put these files in a tuple, we can be recursive later in a fixpoint algorithm
244179860b2SJed Brown      self[source] = (checksum, mtime, timestamp, tuple(newDep))
245179860b2SJed Brown      file.close()
246179860b2SJed Brown
247179860b2SJed Brown  def load(self):
248179860b2SJed Brown    '''Load the source database from the saved filename'''
249179860b2SJed Brown    filename = str(self.filename)
250179860b2SJed Brown    if os.path.exists(filename):
251179860b2SJed Brown      self.clear()
252179860b2SJed Brown      self.logPrint('Loading source database from '+filename, 2, 'sourceDB')
253c6ef1b5bSJed Brown      dbFile = open(filename)
254492432c8SJed Brown      newDB  = pickle.load(dbFile)
255179860b2SJed Brown      dbFile.close()
256179860b2SJed Brown      self.update(newDB)
257179860b2SJed Brown    else:
258179860b2SJed Brown      self.logPrint('Could not load source database from '+filename, 1, 'sourceDB')
259179860b2SJed Brown    return
260179860b2SJed Brown
261179860b2SJed Brown  def save(self, force = 0):
262179860b2SJed Brown    '''Save the source database to a file. The saved database with have path names relative to the root.'''
263179860b2SJed Brown    if not self.isDirty and not force:
264179860b2SJed Brown      self.logPrint('No need to save source database in '+str(self.filename), 2, 'sourceDB')
265179860b2SJed Brown      return
266179860b2SJed Brown    filename = str(self.filename)
267179860b2SJed Brown    if os.path.exists(os.path.dirname(filename)):
268179860b2SJed Brown      self.logPrint('Saving source database in '+filename, 2, 'sourceDB')
269c6ef1b5bSJed Brown      dbFile = open(filename, 'w')
270492432c8SJed Brown      pickle.dump(self, dbFile)
271179860b2SJed Brown      dbFile.close()
272179860b2SJed Brown      self.isDirty = 0
273179860b2SJed Brown    else:
274179860b2SJed Brown      self.logPrint('Could not save source database in '+filename, 1, 'sourceDB')
275179860b2SJed Brown    return
276179860b2SJed Brown
277179860b2SJed Brownclass DependencyAnalyzer (logger.Logger):
278179860b2SJed Brown  def __init__(self, sourceDB):
279179860b2SJed Brown    logger.Logger.__init__(self)
280179860b2SJed Brown    self.sourceDB  = sourceDB
281179860b2SJed Brown    self.includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1')
282179860b2SJed Brown    return
283179860b2SJed Brown
284179860b2SJed Brown  def resolveDependency(self, source, dep):
285179860b2SJed Brown    if dep in self.sourceDB: return dep
286179860b2SJed Brown    # Choose the entry in sourceDB whose base matches dep,
287179860b2SJed Brown    #   and who has the most path components in common with source
288179860b2SJed Brown    # This should be replaced by an appeal to cpp
289179860b2SJed Brown    matchNum   = 0
290179860b2SJed Brown    matchName  = dep
291179860b2SJed Brown    components = source.split(os.sep)
292179860b2SJed Brown    self.logPrint('  Includes '+filename, 3, 'sourceDB')
293179860b2SJed Brown    for s in self.sourceDB:
294179860b2SJed Brown      if s.find(dep) >= 0:
295179860b2SJed Brown        self.logPrint('    Checking '+s, 3, 'sourceDB')
296179860b2SJed Brown        comp = s.split(os.sep)
297179860b2SJed Brown        for i in range(len(comp)):
298179860b2SJed Brown          if not components[i] == comp[i]: break
299179860b2SJed Brown        if i > matchNum:
300179860b2SJed Brown          self.logPrint('    Choosing '+s+'('+str(i)+')', 3, 'sourceDB')
301179860b2SJed Brown          matchName = s
302179860b2SJed Brown          matchNum  = i
303179860b2SJed Brown    if not matchName in self.sourceDB: raise RuntimeError('Invalid #include '+matchName+' in '+source)
304179860b2SJed Brown    return matchName
305179860b2SJed Brown
306179860b2SJed Brown  def getNeighbors(self, source):
307c6ef1b5bSJed Brown    file = open(source)
308179860b2SJed Brown    adj  = []
3095b6bfdb9SJed Brown    for line in file:
310179860b2SJed Brown      match = self.includeRE.match(line)
311179860b2SJed Brown      if match:
312179860b2SJed Brown        adj.append(self.resolveDependency(source, m.group('includeFile')))
313179860b2SJed Brown    file.close()
314179860b2SJed Brown    return adj
315179860b2SJed Brown
316179860b2SJed Brown  def calculateDependencies(self):
317179860b2SJed Brown    '''Should this be a generator?
318179860b2SJed Brown    First assemble the DAG using #include relations
319d5b43468SJose E. Roman    Then calculate the dependencies with all pairs shortest-path
320179860b2SJed Brown      - I think Floyd-Warshell and N-source Dijkstra are just as good
321179860b2SJed Brown    '''
322179860b2SJed Brown    # Assembling DAG
323179860b2SJed Brown    dag = {}
324179860b2SJed Brown    for source in self.sourceDB:
325179860b2SJed Brown      try:
326179860b2SJed Brown        dag[source] = self.getNeighbors(self, source)
3275b6bfdb9SJed Brown      except IOError as e:
328179860b2SJed Brown        if e.errno == errno.ENOENT:
329179860b2SJed Brown          del self[source]
330179860b2SJed Brown        else:
331179860b2SJed Brown          raise e
332179860b2SJed Brown    # Finding all-pairs shortest path
333179860b2SJed Brown
334179860b2SJed Brownif __name__ == '__main__':
335179860b2SJed Brown  import sys
336179860b2SJed Brown  try:
337179860b2SJed Brown    if len(sys.argv) < 3:
3385b6bfdb9SJed Brown      print('sourceDatabase.py <database filename> [insert | remove] <filename>')
339179860b2SJed Brown    else:
340179860b2SJed Brown      if os.path.exists(sys.argv[1]):
341c6ef1b5bSJed Brown        dbFile   = open(sys.argv[1])
342492432c8SJed Brown        sourceDB = pickle.load(dbFile)
343179860b2SJed Brown        dbFile.close()
344179860b2SJed Brown      else:
345179860b2SJed Brown        sys.exit('Could not load source database from '+sys.argv[1])
346179860b2SJed Brown      if sys.argv[2] == 'insert':
347179860b2SJed Brown        if sys.argv[3] in sourceDB:
348179860b2SJed Brown          self.logPrint('Updating '+sys.argv[3], 3, 'sourceDB')
349179860b2SJed Brown        else:
350179860b2SJed Brown          self.logPrint('Inserting '+sys.argv[3], 3, 'sourceDB')
351179860b2SJed Brown        self.sourceDB.updateSource(sys.argv[3])
352179860b2SJed Brown      elif sys.argv[2] == 'remove':
353179860b2SJed Brown        if sys.argv[3] in sourceDB:
354179860b2SJed Brown          sourceDB.logPrint('Removing '+sys.argv[3], 3, 'sourceDB')
355179860b2SJed Brown          del self.sourceDB[sys.argv[3]]
356179860b2SJed Brown        else:
357179860b2SJed Brown          sourceDB.logPrint('Matching regular expression '+sys.argv[3]+' over source database', 1, 'sourceDB')
358179860b2SJed Brown          removeRE = re.compile(sys.argv[3])
359bb3dd2f6SJed Brown          removes  = list(filter(removeRE.match, sourceDB.keys()))
360179860b2SJed Brown          for source in removes:
361179860b2SJed Brown            self.logPrint('Removing '+source, 3, 'sourceDB')
362179860b2SJed Brown            del self.sourceDB[source]
363179860b2SJed Brown      else:
364179860b2SJed Brown        sys.exit('Unknown source database action: '+sys.argv[2])
365179860b2SJed Brown      sourceDB.save()
3665b6bfdb9SJed Brown  except Exception as e:
367179860b2SJed Brown    import traceback
3685b6bfdb9SJed Brown    print(traceback.print_tb(sys.exc_info()[2]))
369179860b2SJed Brown    sys.exit(str(e))
370179860b2SJed Brown  sys.exit(0)
371