1179860b2SJed Brown#!/usr/bin/env python 2179860b2SJed Brown'''A source code database 3179860b2SJed Brown 4179860b2SJed Brown SourceDB is a database of file information used to determine whether files 5179860b2SJed Brown should be rebuilt by the build system. All files names are stored relative 6179860b2SJed Brown to a given root, which is intended as the root of a Project. 7179860b2SJed Brown 8179860b2SJed Brown Relative or absolute pathnames may be used as keys, but absolute pathnames 9179860b2SJed Brown must fall under the database root. The value format is a tuple of the following: 10179860b2SJed Brown 11179860b2SJed Brown Checksum: The md5 checksum of the file 12179860b2SJed Brown Mod Time: The time the file was last modified 13179860b2SJed Brown Timestamp: The time theentry was last modified 14179860b2SJed Brown Dependencies: A tuple of files upon which this entry depends 15179860b2SJed Brown 16179860b2SJed Brown This script also provides some default actions: 17179860b2SJed Brown 18179860b2SJed Brown - insert <database file> <filename> 19179860b2SJed Brown Inserts this file from the database, or updates its entry if it 20179860b2SJed Brown already exists. 21179860b2SJed Brown 22179860b2SJed Brown - remove <database file> <filename> 23179860b2SJed Brown Removes this file from the database. The filename may also be a 24179860b2SJed Brown regular expression. 25179860b2SJed Brown 26179860b2SJed Brown''' 275b6bfdb9SJed Brownfrom __future__ import print_function 285b6bfdb9SJed Brownfrom __future__ import absolute_import 29179860b2SJed Brownimport logger 30179860b2SJed Brown 31179860b2SJed Brownimport errno 32179860b2SJed Brownimport os 33179860b2SJed Brownimport re 34179860b2SJed Brownimport time 35179860b2SJed Brown 36492432c8SJed Brownimport pickle 37179860b2SJed Brown 38179860b2SJed Browntry: 39179860b2SJed Brown from hashlib import md5 as new_md5 40179860b2SJed Brownexcept ImportError: 41179860b2SJed Brown from md5 import new as new_md5 42179860b2SJed Brown 43179860b2SJed Brown 44179860b2SJed Brownclass SourceDB (dict, logger.Logger): 45179860b2SJed Brown '''A SourceDB is a dictionary of file data used during the build process.''' 46179860b2SJed Brown includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1') 47179860b2SJed Brown isLoading = 0 48179860b2SJed Brown 49179860b2SJed Brown def __init__(self, root, filename = None): 50179860b2SJed Brown dict.__init__(self) 51179860b2SJed Brown logger.Logger.__init__(self) 52179860b2SJed Brown self.root = root 53179860b2SJed Brown self.filename = filename 54179860b2SJed Brown if self.filename is None: 55179860b2SJed Brown self.filename = os.path.join(str(root), 'bsSource.db') 56179860b2SJed Brown self.isDirty = 0 57179860b2SJed Brown return 58179860b2SJed Brown 59179860b2SJed Brown def __str__(self): 60179860b2SJed Brown output = '' 61179860b2SJed Brown for source in self: 62179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 63179860b2SJed Brown output += source+'\n' 64179860b2SJed Brown output += ' Checksum: '+str(checksum)+'\n' 65179860b2SJed Brown output += ' Mod Time: '+str(mtime)+'\n' 66179860b2SJed Brown output += ' Timestamp: '+str(timestamp)+'\n' 67179860b2SJed Brown output += ' Deps: '+str(dependencies)+'\n' 68179860b2SJed Brown return output 69179860b2SJed Brown 70179860b2SJed Brown def __setstate__(self, d): 71179860b2SJed Brown logger.Logger.__setstate__(self, d) 72179860b2SJed Brown # We have to prevent recursive calls to this when the pickled database is loaded in load() 73179860b2SJed Brown # This is to ensure that fresh copies of the database are obtained after unpickling 74179860b2SJed Brown if not SourceDB.isLoading: 75179860b2SJed Brown SourceDB.isLoading = 1 76179860b2SJed Brown self.load() 77179860b2SJed Brown SourceDB.isLoading = 0 78179860b2SJed Brown return 79179860b2SJed Brown 80179860b2SJed Brown def getRelativePath(self, path): 81179860b2SJed Brown '''Returns a relative source file path using the root''' 82179860b2SJed Brown if os.path.isabs(path): 83179860b2SJed Brown root = str(self.root) 84179860b2SJed Brown if not path.startswith(root+os.sep): 85179860b2SJed Brown raise ValueError('Absolute path '+path+' conflicts with root '+root) 86179860b2SJed Brown else: 87179860b2SJed Brown path = path[len(root)+1:] 88179860b2SJed Brown return path 89179860b2SJed Brown 90179860b2SJed Brown def checkValue(self, value): 91179860b2SJed Brown '''Validate the value, raising ValueError for problems''' 92179860b2SJed Brown if not isinstance(value, tuple): 93179860b2SJed Brown raise ValueError('Source database values must be tuples, '+str(type(value))+' given') 94179860b2SJed Brown if not len(value) == 4: 95179860b2SJed Brown raise ValueError('Source database values must have 4 items, '+str(len(value))+' given') 96179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = value 97179860b2SJed Brown if not isinstance(checksum, str): 98179860b2SJed Brown raise ValueError('Invalid checksum for source database, '+str(type(checksum))+' given') 99179860b2SJed Brown if not isinstance(mtime, int): 100179860b2SJed Brown raise ValueError('Invalid modification time for source database, '+str(type(mtime))+' given') 101179860b2SJed Brown elif mtime < 0: 102179860b2SJed Brown raise ValueError('Negative modification time for source database, '+str(mtime)) 103179860b2SJed Brown if not isinstance(timestamp, float): 104179860b2SJed Brown raise ValueError('Invalid timestamp for source database, '+str(type(timestamp))+' given') 105179860b2SJed Brown elif timestamp < 0: 106179860b2SJed Brown raise ValueError('Negative timestamp for source database, '+str(timestamp)) 107179860b2SJed Brown if not isinstance(dependencies, tuple): 108179860b2SJed Brown raise ValueError('Invalid dependencies for source database, '+str(type(dependencies))+' given') 109179860b2SJed Brown return value 110179860b2SJed Brown 111179860b2SJed Brown def __getitem__(self, key): 112179860b2SJed Brown '''Converts the key to a relative source file path using the root''' 113179860b2SJed Brown return dict.__getitem__(self, self.getRelativePath(key)) 114179860b2SJed Brown 115179860b2SJed Brown def __setitem__(self, key, value): 116179860b2SJed Brown '''Converts the key to a relative source file path using the root, and checks the validity of the value''' 117179860b2SJed Brown self.isDirty = 1 118179860b2SJed Brown return dict.__setitem__(self, self.getRelativePath(key), self.checkValue(value)) 119179860b2SJed Brown 120179860b2SJed Brown def __delitem__(self, key): 121179860b2SJed Brown '''Converts the key to a relative source file path using the root''' 122179860b2SJed Brown self.isDirty = 1 123179860b2SJed Brown return dict.__delitem__(self, self.getRelativePath(key)) 124179860b2SJed Brown 125179860b2SJed Brown def __contains__(self, key): 126179860b2SJed Brown '''Converts the key to a relative source file path using the root''' 127179860b2SJed Brown return dict.__contains__(self, self.getRelativePath(key)) 128179860b2SJed Brown 129179860b2SJed Brown def has_key(self, key): 130179860b2SJed Brown '''This method just calls self.__contains__(key)''' 131179860b2SJed Brown return self.__contains__(key) 132179860b2SJed Brown 133179860b2SJed Brown def items(self): 134179860b2SJed Brown '''Converts each key to a relative source file path using the root''' 135179860b2SJed Brown return [(self.getRelativePath(item[0]), item[1]) for item in dict.items(self)] 136179860b2SJed Brown 137179860b2SJed Brown def keys(self): 138179860b2SJed Brown '''Converts each key to a relative source file path using the root''' 139179860b2SJed Brown return map(self.getRelativePath, dict.keys(self)) 140179860b2SJed Brown 141179860b2SJed Brown def update(self, d): 142179860b2SJed Brown '''Update the dictionary with the contents of d''' 143179860b2SJed Brown self.isDirty = 1 144179860b2SJed Brown for k in d: 145179860b2SJed Brown self[k] = d[k] 146179860b2SJed Brown return 147179860b2SJed Brown 148179860b2SJed Brown def getChecksum(source, chunkSize = 1024*1024): 149179860b2SJed Brown '''Return the md5 checksum for a given file, which may also be specified by its filename 150179860b2SJed Brown - The chunkSize argument specifies the size of blocks read from the file''' 151*bbfe0604SJed Brown if hasattr(source, 'close'): 152179860b2SJed Brown f = source 153179860b2SJed Brown else: 154c6ef1b5bSJed Brown f = open(source) 155179860b2SJed Brown m = new_md5() 156179860b2SJed Brown size = chunkSize 157179860b2SJed Brown buf = f.read(size) 158179860b2SJed Brown while buf: 159179860b2SJed Brown m.update(buf) 160179860b2SJed Brown buf = f.read(size) 161179860b2SJed Brown f.close() 162179860b2SJed Brown return m.hexdigest() 163179860b2SJed Brown getChecksum = staticmethod(getChecksum) 164179860b2SJed Brown 165179860b2SJed Brown def getModificationTime(source): 166179860b2SJed Brown t = os.path.getmtime(source) 167179860b2SJed Brown if isinstance(t, float): 168179860b2SJed Brown t = int(t) 169179860b2SJed Brown return t 170179860b2SJed Brown getModificationTime = staticmethod(getModificationTime) 171179860b2SJed Brown 172179860b2SJed Brown def updateSource(self, source, noChecksum = 0): 173179860b2SJed Brown self.isDirty = 1 174179860b2SJed Brown dependencies = () 175179860b2SJed Brown try: 176179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 177179860b2SJed Brown except KeyError: 178179860b2SJed Brown pass 179179860b2SJed Brown self.logPrint('Updating '+source+' in source database', 3, 'sourceDB') 180179860b2SJed Brown if noChecksum: 181179860b2SJed Brown checksum = '' 182179860b2SJed Brown else: 183179860b2SJed Brown checksum = SourceDB.getChecksum(source) 184179860b2SJed Brown self[source] = (checksum, SourceDB.getModificationTime(source), time.time(), dependencies) 185179860b2SJed Brown return 186179860b2SJed Brown 187179860b2SJed Brown def clearSource(self, source): 188179860b2SJed Brown '''This removes source information, but preserved dependencies''' 189179860b2SJed Brown if source in self: 190179860b2SJed Brown self.isDirty = 1 191179860b2SJed Brown self.logPrint('Clearing '+source+' from source database', 3, 'sourceDB') 192179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 193179860b2SJed Brown self[source] = ('', 0, time.time(), dependencies) 194179860b2SJed Brown return 195179860b2SJed Brown 196179860b2SJed Brown def getDependencies(self, source): 197179860b2SJed Brown try: 198179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 199179860b2SJed Brown except KeyError: 200179860b2SJed Brown dependencies = () 201179860b2SJed Brown return dependencies 202179860b2SJed Brown 203179860b2SJed Brown def addDependency(self, source, dependency): 204179860b2SJed Brown self.isDirty = 1 205179860b2SJed Brown dependencies = () 206179860b2SJed Brown try: 207179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 208179860b2SJed Brown except KeyError: 209179860b2SJed Brown checksum = '' 210179860b2SJed Brown mtime = 0 211179860b2SJed Brown if not dependency in dependencies: 212179860b2SJed Brown self.logPrint('Adding dependency '+dependency+' to source '+source+' in source database', 3, 'sourceDB') 213179860b2SJed Brown dependencies = dependencies+(dependency,) 214179860b2SJed Brown self[source] = (checksum, mtime, time.time(), dependencies) 215179860b2SJed Brown return 216179860b2SJed Brown 217179860b2SJed Brown def calculateDependencies(self): 218179860b2SJed Brown self.logPrint('Recalculating dependencies', 1, 'sourceDB') 219179860b2SJed Brown for source in self: 220179860b2SJed Brown self.logPrint('Calculating '+source, 3, 'sourceDB') 221179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 222179860b2SJed Brown newDep = [] 223179860b2SJed Brown try: 224c6ef1b5bSJed Brown file = open(source) 2255b6bfdb9SJed Brown except IOError as e: 226179860b2SJed Brown if e.errno == errno.ENOENT: 227179860b2SJed Brown del self[source] 228179860b2SJed Brown else: 229179860b2SJed Brown raise e 230179860b2SJed Brown comps = source.split('/') 2315b6bfdb9SJed Brown for line in file: 232179860b2SJed Brown m = self.includeRE.match(line) 233179860b2SJed Brown if m: 234179860b2SJed Brown filename = m.group('includeFile') 235179860b2SJed Brown matchNum = 0 236179860b2SJed Brown matchName = filename 237179860b2SJed Brown self.logPrint(' Includes '+filename, 3, 'sourceDB') 238179860b2SJed Brown for s in self: 239179860b2SJed Brown if s.find(filename) >= 0: 240179860b2SJed Brown self.logPrint(' Checking '+s, 3, 'sourceDB') 241179860b2SJed Brown c = s.split('/') 242179860b2SJed Brown for i in range(len(c)): 243179860b2SJed Brown if not comps[i] == c[i]: break 244179860b2SJed Brown if i > matchNum: 245179860b2SJed Brown self.logPrint(' Choosing '+s+'('+str(i)+')', 3, 'sourceDB') 246179860b2SJed Brown matchName = s 247179860b2SJed Brown matchNum = i 248179860b2SJed Brown newDep.append(matchName) 249179860b2SJed Brown # Grep for #include, then put these files in a tuple, we can be recursive later in a fixpoint algorithm 250179860b2SJed Brown self[source] = (checksum, mtime, timestamp, tuple(newDep)) 251179860b2SJed Brown file.close() 252179860b2SJed Brown 253179860b2SJed Brown def load(self): 254179860b2SJed Brown '''Load the source database from the saved filename''' 255179860b2SJed Brown filename = str(self.filename) 256179860b2SJed Brown if os.path.exists(filename): 257179860b2SJed Brown self.clear() 258179860b2SJed Brown self.logPrint('Loading source database from '+filename, 2, 'sourceDB') 259c6ef1b5bSJed Brown dbFile = open(filename) 260492432c8SJed Brown newDB = pickle.load(dbFile) 261179860b2SJed Brown dbFile.close() 262179860b2SJed Brown self.update(newDB) 263179860b2SJed Brown else: 264179860b2SJed Brown self.logPrint('Could not load source database from '+filename, 1, 'sourceDB') 265179860b2SJed Brown return 266179860b2SJed Brown 267179860b2SJed Brown def save(self, force = 0): 268179860b2SJed Brown '''Save the source database to a file. The saved database with have path names relative to the root.''' 269179860b2SJed Brown if not self.isDirty and not force: 270179860b2SJed Brown self.logPrint('No need to save source database in '+str(self.filename), 2, 'sourceDB') 271179860b2SJed Brown return 272179860b2SJed Brown filename = str(self.filename) 273179860b2SJed Brown if os.path.exists(os.path.dirname(filename)): 274179860b2SJed Brown self.logPrint('Saving source database in '+filename, 2, 'sourceDB') 275c6ef1b5bSJed Brown dbFile = open(filename, 'w') 276492432c8SJed Brown pickle.dump(self, dbFile) 277179860b2SJed Brown dbFile.close() 278179860b2SJed Brown self.isDirty = 0 279179860b2SJed Brown else: 280179860b2SJed Brown self.logPrint('Could not save source database in '+filename, 1, 'sourceDB') 281179860b2SJed Brown return 282179860b2SJed Brown 283179860b2SJed Brownclass DependencyAnalyzer (logger.Logger): 284179860b2SJed Brown def __init__(self, sourceDB): 285179860b2SJed Brown logger.Logger.__init__(self) 286179860b2SJed Brown self.sourceDB = sourceDB 287179860b2SJed Brown self.includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1') 288179860b2SJed Brown return 289179860b2SJed Brown 290179860b2SJed Brown def resolveDependency(self, source, dep): 291179860b2SJed Brown if dep in self.sourceDB: return dep 292179860b2SJed Brown # Choose the entry in sourceDB whose base matches dep, 293179860b2SJed Brown # and who has the most path components in common with source 294179860b2SJed Brown # This should be replaced by an appeal to cpp 295179860b2SJed Brown matchNum = 0 296179860b2SJed Brown matchName = dep 297179860b2SJed Brown components = source.split(os.sep) 298179860b2SJed Brown self.logPrint(' Includes '+filename, 3, 'sourceDB') 299179860b2SJed Brown for s in self.sourceDB: 300179860b2SJed Brown if s.find(dep) >= 0: 301179860b2SJed Brown self.logPrint(' Checking '+s, 3, 'sourceDB') 302179860b2SJed Brown comp = s.split(os.sep) 303179860b2SJed Brown for i in range(len(comp)): 304179860b2SJed Brown if not components[i] == comp[i]: break 305179860b2SJed Brown if i > matchNum: 306179860b2SJed Brown self.logPrint(' Choosing '+s+'('+str(i)+')', 3, 'sourceDB') 307179860b2SJed Brown matchName = s 308179860b2SJed Brown matchNum = i 309179860b2SJed Brown if not matchName in self.sourceDB: raise RuntimeError('Invalid #include '+matchName+' in '+source) 310179860b2SJed Brown return matchName 311179860b2SJed Brown 312179860b2SJed Brown def getNeighbors(self, source): 313c6ef1b5bSJed Brown file = open(source) 314179860b2SJed Brown adj = [] 3155b6bfdb9SJed Brown for line in file: 316179860b2SJed Brown match = self.includeRE.match(line) 317179860b2SJed Brown if match: 318179860b2SJed Brown adj.append(self.resolveDependency(source, m.group('includeFile'))) 319179860b2SJed Brown file.close() 320179860b2SJed Brown return adj 321179860b2SJed Brown 322179860b2SJed Brown def calculateDependencies(self): 323179860b2SJed Brown '''Should this be a generator? 324179860b2SJed Brown First assemble the DAG using #include relations 325179860b2SJed Brown Then calculate the depdencies with all pairs shortest-path 326179860b2SJed Brown - I think Floyd-Warshell and N-source Dijkstra are just as good 327179860b2SJed Brown ''' 328179860b2SJed Brown # Assembling DAG 329179860b2SJed Brown dag = {} 330179860b2SJed Brown for source in self.sourceDB: 331179860b2SJed Brown try: 332179860b2SJed Brown dag[source] = self.getNeighbors(self, source) 3335b6bfdb9SJed Brown except IOError as e: 334179860b2SJed Brown if e.errno == errno.ENOENT: 335179860b2SJed Brown del self[source] 336179860b2SJed Brown else: 337179860b2SJed Brown raise e 338179860b2SJed Brown # Finding all-pairs shortest path 339179860b2SJed Brown 340179860b2SJed Brownif __name__ == '__main__': 341179860b2SJed Brown import sys 342179860b2SJed Brown try: 343179860b2SJed Brown if len(sys.argv) < 3: 3445b6bfdb9SJed Brown print('sourceDatabase.py <database filename> [insert | remove] <filename>') 345179860b2SJed Brown else: 346179860b2SJed Brown if os.path.exists(sys.argv[1]): 347c6ef1b5bSJed Brown dbFile = open(sys.argv[1]) 348492432c8SJed Brown sourceDB = pickle.load(dbFile) 349179860b2SJed Brown dbFile.close() 350179860b2SJed Brown else: 351179860b2SJed Brown sys.exit('Could not load source database from '+sys.argv[1]) 352179860b2SJed Brown if sys.argv[2] == 'insert': 353179860b2SJed Brown if sys.argv[3] in sourceDB: 354179860b2SJed Brown self.logPrint('Updating '+sys.argv[3], 3, 'sourceDB') 355179860b2SJed Brown else: 356179860b2SJed Brown self.logPrint('Inserting '+sys.argv[3], 3, 'sourceDB') 357179860b2SJed Brown self.sourceDB.updateSource(sys.argv[3]) 358179860b2SJed Brown elif sys.argv[2] == 'remove': 359179860b2SJed Brown if sys.argv[3] in sourceDB: 360179860b2SJed Brown sourceDB.logPrint('Removing '+sys.argv[3], 3, 'sourceDB') 361179860b2SJed Brown del self.sourceDB[sys.argv[3]] 362179860b2SJed Brown else: 363179860b2SJed Brown sourceDB.logPrint('Matching regular expression '+sys.argv[3]+' over source database', 1, 'sourceDB') 364179860b2SJed Brown removeRE = re.compile(sys.argv[3]) 365179860b2SJed Brown removes = filter(removeRE.match, sourceDB.keys()) 366179860b2SJed Brown for source in removes: 367179860b2SJed Brown self.logPrint('Removing '+source, 3, 'sourceDB') 368179860b2SJed Brown del self.sourceDB[source] 369179860b2SJed Brown else: 370179860b2SJed Brown sys.exit('Unknown source database action: '+sys.argv[2]) 371179860b2SJed Brown sourceDB.save() 3725b6bfdb9SJed Brown except Exception as e: 373179860b2SJed Brown import traceback 3745b6bfdb9SJed Brown print(traceback.print_tb(sys.exc_info()[2])) 375179860b2SJed Brown sys.exit(str(e)) 376179860b2SJed Brown sys.exit(0) 377