1179860b2SJed Brown'''A source code database 2179860b2SJed Brown 3179860b2SJed Brown SourceDB is a database of file information used to determine whether files 4179860b2SJed Brown should be rebuilt by the build system. All files names are stored relative 5179860b2SJed Brown to a given root, which is intended as the root of a Project. 6179860b2SJed Brown 7179860b2SJed Brown Relative or absolute pathnames may be used as keys, but absolute pathnames 8179860b2SJed Brown must fall under the database root. The value format is a tuple of the following: 9179860b2SJed Brown 10*fe324abeSRonald Gould Checksum: The checksum of the file 11179860b2SJed Brown Mod Time: The time the file was last modified 12179860b2SJed Brown Timestamp: The time theentry was last modified 13179860b2SJed Brown Dependencies: A tuple of files upon which this entry depends 14179860b2SJed Brown 15179860b2SJed Brown This script also provides some default actions: 16179860b2SJed Brown 17179860b2SJed Brown - insert <database file> <filename> 18179860b2SJed Brown Inserts this file from the database, or updates its entry if it 19179860b2SJed Brown already exists. 20179860b2SJed Brown 21179860b2SJed Brown - remove <database file> <filename> 22179860b2SJed Brown Removes this file from the database. The filename may also be a 23179860b2SJed Brown regular expression. 24179860b2SJed Brown 25179860b2SJed Brown''' 265b6bfdb9SJed Brownfrom __future__ import print_function 275b6bfdb9SJed Brownfrom __future__ import absolute_import 28179860b2SJed Brownimport logger 29179860b2SJed Brown 30179860b2SJed Brownimport errno 31179860b2SJed Brownimport os 32179860b2SJed Brownimport re 33179860b2SJed Brownimport time 34179860b2SJed Brown 35492432c8SJed Brownimport pickle 36*fe324abeSRonald Gouldfrom hashlib import sha256 as checksum_algo 37179860b2SJed Brown 38179860b2SJed Brownclass SourceDB (dict, logger.Logger): 39179860b2SJed Brown '''A SourceDB is a dictionary of file data used during the build process.''' 40179860b2SJed Brown includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1') 41179860b2SJed Brown isLoading = 0 42179860b2SJed Brown 43179860b2SJed Brown def __init__(self, root, filename = None): 44179860b2SJed Brown dict.__init__(self) 45179860b2SJed Brown logger.Logger.__init__(self) 46179860b2SJed Brown self.root = root 47179860b2SJed Brown self.filename = filename 48179860b2SJed Brown if self.filename is None: 49179860b2SJed Brown self.filename = os.path.join(str(root), 'bsSource.db') 50179860b2SJed Brown self.isDirty = 0 51179860b2SJed Brown return 52179860b2SJed Brown 53179860b2SJed Brown def __str__(self): 54179860b2SJed Brown output = '' 55179860b2SJed Brown for source in self: 56179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 57179860b2SJed Brown output += source+'\n' 58179860b2SJed Brown output += ' Checksum: '+str(checksum)+'\n' 59179860b2SJed Brown output += ' Mod Time: '+str(mtime)+'\n' 60179860b2SJed Brown output += ' Timestamp: '+str(timestamp)+'\n' 61179860b2SJed Brown output += ' Deps: '+str(dependencies)+'\n' 62179860b2SJed Brown return output 63179860b2SJed Brown 64179860b2SJed Brown def __setstate__(self, d): 65179860b2SJed Brown logger.Logger.__setstate__(self, d) 66179860b2SJed Brown # We have to prevent recursive calls to this when the pickled database is loaded in load() 67179860b2SJed Brown # This is to ensure that fresh copies of the database are obtained after unpickling 68179860b2SJed Brown if not SourceDB.isLoading: 69179860b2SJed Brown SourceDB.isLoading = 1 70179860b2SJed Brown self.load() 71179860b2SJed Brown SourceDB.isLoading = 0 72179860b2SJed Brown return 73179860b2SJed Brown 74179860b2SJed Brown def getRelativePath(self, path): 75179860b2SJed Brown '''Returns a relative source file path using the root''' 76179860b2SJed Brown if os.path.isabs(path): 77179860b2SJed Brown root = str(self.root) 78179860b2SJed Brown if not path.startswith(root+os.sep): 79179860b2SJed Brown raise ValueError('Absolute path '+path+' conflicts with root '+root) 80179860b2SJed Brown else: 81179860b2SJed Brown path = path[len(root)+1:] 82179860b2SJed Brown return path 83179860b2SJed Brown 84179860b2SJed Brown def checkValue(self, value): 85179860b2SJed Brown '''Validate the value, raising ValueError for problems''' 86179860b2SJed Brown if not isinstance(value, tuple): 87179860b2SJed Brown raise ValueError('Source database values must be tuples, '+str(type(value))+' given') 88179860b2SJed Brown if not len(value) == 4: 89179860b2SJed Brown raise ValueError('Source database values must have 4 items, '+str(len(value))+' given') 90179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = value 91179860b2SJed Brown if not isinstance(checksum, str): 92179860b2SJed Brown raise ValueError('Invalid checksum for source database, '+str(type(checksum))+' given') 93179860b2SJed Brown if not isinstance(mtime, int): 94179860b2SJed Brown raise ValueError('Invalid modification time for source database, '+str(type(mtime))+' given') 95179860b2SJed Brown elif mtime < 0: 96179860b2SJed Brown raise ValueError('Negative modification time for source database, '+str(mtime)) 97179860b2SJed Brown if not isinstance(timestamp, float): 98179860b2SJed Brown raise ValueError('Invalid timestamp for source database, '+str(type(timestamp))+' given') 99179860b2SJed Brown elif timestamp < 0: 100179860b2SJed Brown raise ValueError('Negative timestamp for source database, '+str(timestamp)) 101179860b2SJed Brown if not isinstance(dependencies, tuple): 102179860b2SJed Brown raise ValueError('Invalid dependencies for source database, '+str(type(dependencies))+' given') 103179860b2SJed Brown return value 104179860b2SJed Brown 105179860b2SJed Brown def __getitem__(self, key): 106179860b2SJed Brown '''Converts the key to a relative source file path using the root''' 107179860b2SJed Brown return dict.__getitem__(self, self.getRelativePath(key)) 108179860b2SJed Brown 109179860b2SJed Brown def __setitem__(self, key, value): 110179860b2SJed Brown '''Converts the key to a relative source file path using the root, and checks the validity of the value''' 111179860b2SJed Brown self.isDirty = 1 112179860b2SJed Brown return dict.__setitem__(self, self.getRelativePath(key), self.checkValue(value)) 113179860b2SJed Brown 114179860b2SJed Brown def __delitem__(self, key): 115179860b2SJed Brown '''Converts the key to a relative source file path using the root''' 116179860b2SJed Brown self.isDirty = 1 117179860b2SJed Brown return dict.__delitem__(self, self.getRelativePath(key)) 118179860b2SJed Brown 119179860b2SJed Brown def __contains__(self, key): 120179860b2SJed Brown '''Converts the key to a relative source file path using the root''' 121179860b2SJed Brown return dict.__contains__(self, self.getRelativePath(key)) 122179860b2SJed Brown 123179860b2SJed Brown def has_key(self, key): 124179860b2SJed Brown '''This method just calls self.__contains__(key)''' 125179860b2SJed Brown return self.__contains__(key) 126179860b2SJed Brown 127179860b2SJed Brown def items(self): 128179860b2SJed Brown '''Converts each key to a relative source file path using the root''' 129179860b2SJed Brown return [(self.getRelativePath(item[0]), item[1]) for item in dict.items(self)] 130179860b2SJed Brown 131179860b2SJed Brown def keys(self): 132179860b2SJed Brown '''Converts each key to a relative source file path using the root''' 133179860b2SJed Brown return map(self.getRelativePath, dict.keys(self)) 134179860b2SJed Brown 135179860b2SJed Brown def update(self, d): 136179860b2SJed Brown '''Update the dictionary with the contents of d''' 137179860b2SJed Brown self.isDirty = 1 138179860b2SJed Brown for k in d: 139179860b2SJed Brown self[k] = d[k] 140179860b2SJed Brown return 141179860b2SJed Brown 142179860b2SJed Brown def getChecksum(source, chunkSize = 1024*1024): 143*fe324abeSRonald Gould '''Return the checksum for a given file, which may also be specified by its filename 144179860b2SJed Brown - The chunkSize argument specifies the size of blocks read from the file''' 145bbfe0604SJed Brown if hasattr(source, 'close'): 146179860b2SJed Brown f = source 147179860b2SJed Brown else: 148c6ef1b5bSJed Brown f = open(source) 149*fe324abeSRonald Gould m = checksum_algo() 150179860b2SJed Brown size = chunkSize 151179860b2SJed Brown buf = f.read(size) 152179860b2SJed Brown while buf: 153179860b2SJed Brown m.update(buf) 154179860b2SJed Brown buf = f.read(size) 155179860b2SJed Brown f.close() 156179860b2SJed Brown return m.hexdigest() 157179860b2SJed Brown getChecksum = staticmethod(getChecksum) 158179860b2SJed Brown 159179860b2SJed Brown def getModificationTime(source): 160179860b2SJed Brown t = os.path.getmtime(source) 161179860b2SJed Brown if isinstance(t, float): 162179860b2SJed Brown t = int(t) 163179860b2SJed Brown return t 164179860b2SJed Brown getModificationTime = staticmethod(getModificationTime) 165179860b2SJed Brown 166179860b2SJed Brown def updateSource(self, source, noChecksum = 0): 167179860b2SJed Brown self.isDirty = 1 168179860b2SJed Brown dependencies = () 169179860b2SJed Brown try: 170179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 171179860b2SJed Brown except KeyError: 172179860b2SJed Brown pass 173179860b2SJed Brown self.logPrint('Updating '+source+' in source database', 3, 'sourceDB') 174179860b2SJed Brown if noChecksum: 175179860b2SJed Brown checksum = '' 176179860b2SJed Brown else: 177179860b2SJed Brown checksum = SourceDB.getChecksum(source) 178179860b2SJed Brown self[source] = (checksum, SourceDB.getModificationTime(source), time.time(), dependencies) 179179860b2SJed Brown return 180179860b2SJed Brown 181179860b2SJed Brown def clearSource(self, source): 182179860b2SJed Brown '''This removes source information, but preserved dependencies''' 183179860b2SJed Brown if source in self: 184179860b2SJed Brown self.isDirty = 1 185179860b2SJed Brown self.logPrint('Clearing '+source+' from source database', 3, 'sourceDB') 186179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 187179860b2SJed Brown self[source] = ('', 0, time.time(), dependencies) 188179860b2SJed Brown return 189179860b2SJed Brown 190179860b2SJed Brown def getDependencies(self, source): 191179860b2SJed Brown try: 192179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 193179860b2SJed Brown except KeyError: 194179860b2SJed Brown dependencies = () 195179860b2SJed Brown return dependencies 196179860b2SJed Brown 197179860b2SJed Brown def addDependency(self, source, dependency): 198179860b2SJed Brown self.isDirty = 1 199179860b2SJed Brown dependencies = () 200179860b2SJed Brown try: 201179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 202179860b2SJed Brown except KeyError: 203179860b2SJed Brown checksum = '' 204179860b2SJed Brown mtime = 0 205179860b2SJed Brown if not dependency in dependencies: 206179860b2SJed Brown self.logPrint('Adding dependency '+dependency+' to source '+source+' in source database', 3, 'sourceDB') 207179860b2SJed Brown dependencies = dependencies+(dependency,) 208179860b2SJed Brown self[source] = (checksum, mtime, time.time(), dependencies) 209179860b2SJed Brown return 210179860b2SJed Brown 211179860b2SJed Brown def calculateDependencies(self): 212179860b2SJed Brown self.logPrint('Recalculating dependencies', 1, 'sourceDB') 213179860b2SJed Brown for source in self: 214179860b2SJed Brown self.logPrint('Calculating '+source, 3, 'sourceDB') 215179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 216179860b2SJed Brown newDep = [] 217179860b2SJed Brown try: 218c6ef1b5bSJed Brown file = open(source) 2195b6bfdb9SJed Brown except IOError as e: 220179860b2SJed Brown if e.errno == errno.ENOENT: 221179860b2SJed Brown del self[source] 222179860b2SJed Brown else: 223179860b2SJed Brown raise e 224179860b2SJed Brown comps = source.split('/') 2255b6bfdb9SJed Brown for line in file: 226179860b2SJed Brown m = self.includeRE.match(line) 227179860b2SJed Brown if m: 228179860b2SJed Brown filename = m.group('includeFile') 229179860b2SJed Brown matchNum = 0 230179860b2SJed Brown matchName = filename 231179860b2SJed Brown self.logPrint(' Includes '+filename, 3, 'sourceDB') 232179860b2SJed Brown for s in self: 233179860b2SJed Brown if s.find(filename) >= 0: 234179860b2SJed Brown self.logPrint(' Checking '+s, 3, 'sourceDB') 235179860b2SJed Brown c = s.split('/') 236179860b2SJed Brown for i in range(len(c)): 237179860b2SJed Brown if not comps[i] == c[i]: break 238179860b2SJed Brown if i > matchNum: 239179860b2SJed Brown self.logPrint(' Choosing '+s+'('+str(i)+')', 3, 'sourceDB') 240179860b2SJed Brown matchName = s 241179860b2SJed Brown matchNum = i 242179860b2SJed Brown newDep.append(matchName) 243179860b2SJed Brown # Grep for #include, then put these files in a tuple, we can be recursive later in a fixpoint algorithm 244179860b2SJed Brown self[source] = (checksum, mtime, timestamp, tuple(newDep)) 245179860b2SJed Brown file.close() 246179860b2SJed Brown 247179860b2SJed Brown def load(self): 248179860b2SJed Brown '''Load the source database from the saved filename''' 249179860b2SJed Brown filename = str(self.filename) 250179860b2SJed Brown if os.path.exists(filename): 251179860b2SJed Brown self.clear() 252179860b2SJed Brown self.logPrint('Loading source database from '+filename, 2, 'sourceDB') 253c6ef1b5bSJed Brown dbFile = open(filename) 254492432c8SJed Brown newDB = pickle.load(dbFile) 255179860b2SJed Brown dbFile.close() 256179860b2SJed Brown self.update(newDB) 257179860b2SJed Brown else: 258179860b2SJed Brown self.logPrint('Could not load source database from '+filename, 1, 'sourceDB') 259179860b2SJed Brown return 260179860b2SJed Brown 261179860b2SJed Brown def save(self, force = 0): 262179860b2SJed Brown '''Save the source database to a file. The saved database with have path names relative to the root.''' 263179860b2SJed Brown if not self.isDirty and not force: 264179860b2SJed Brown self.logPrint('No need to save source database in '+str(self.filename), 2, 'sourceDB') 265179860b2SJed Brown return 266179860b2SJed Brown filename = str(self.filename) 267179860b2SJed Brown if os.path.exists(os.path.dirname(filename)): 268179860b2SJed Brown self.logPrint('Saving source database in '+filename, 2, 'sourceDB') 269c6ef1b5bSJed Brown dbFile = open(filename, 'w') 270492432c8SJed Brown pickle.dump(self, dbFile) 271179860b2SJed Brown dbFile.close() 272179860b2SJed Brown self.isDirty = 0 273179860b2SJed Brown else: 274179860b2SJed Brown self.logPrint('Could not save source database in '+filename, 1, 'sourceDB') 275179860b2SJed Brown return 276179860b2SJed Brown 277179860b2SJed Brownclass DependencyAnalyzer (logger.Logger): 278179860b2SJed Brown def __init__(self, sourceDB): 279179860b2SJed Brown logger.Logger.__init__(self) 280179860b2SJed Brown self.sourceDB = sourceDB 281179860b2SJed Brown self.includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1') 282179860b2SJed Brown return 283179860b2SJed Brown 284179860b2SJed Brown def resolveDependency(self, source, dep): 285179860b2SJed Brown if dep in self.sourceDB: return dep 286179860b2SJed Brown # Choose the entry in sourceDB whose base matches dep, 287179860b2SJed Brown # and who has the most path components in common with source 288179860b2SJed Brown # This should be replaced by an appeal to cpp 289179860b2SJed Brown matchNum = 0 290179860b2SJed Brown matchName = dep 291179860b2SJed Brown components = source.split(os.sep) 292179860b2SJed Brown self.logPrint(' Includes '+filename, 3, 'sourceDB') 293179860b2SJed Brown for s in self.sourceDB: 294179860b2SJed Brown if s.find(dep) >= 0: 295179860b2SJed Brown self.logPrint(' Checking '+s, 3, 'sourceDB') 296179860b2SJed Brown comp = s.split(os.sep) 297179860b2SJed Brown for i in range(len(comp)): 298179860b2SJed Brown if not components[i] == comp[i]: break 299179860b2SJed Brown if i > matchNum: 300179860b2SJed Brown self.logPrint(' Choosing '+s+'('+str(i)+')', 3, 'sourceDB') 301179860b2SJed Brown matchName = s 302179860b2SJed Brown matchNum = i 303179860b2SJed Brown if not matchName in self.sourceDB: raise RuntimeError('Invalid #include '+matchName+' in '+source) 304179860b2SJed Brown return matchName 305179860b2SJed Brown 306179860b2SJed Brown def getNeighbors(self, source): 307c6ef1b5bSJed Brown file = open(source) 308179860b2SJed Brown adj = [] 3095b6bfdb9SJed Brown for line in file: 310179860b2SJed Brown match = self.includeRE.match(line) 311179860b2SJed Brown if match: 312179860b2SJed Brown adj.append(self.resolveDependency(source, m.group('includeFile'))) 313179860b2SJed Brown file.close() 314179860b2SJed Brown return adj 315179860b2SJed Brown 316179860b2SJed Brown def calculateDependencies(self): 317179860b2SJed Brown '''Should this be a generator? 318179860b2SJed Brown First assemble the DAG using #include relations 319d5b43468SJose E. Roman Then calculate the dependencies with all pairs shortest-path 320179860b2SJed Brown - I think Floyd-Warshell and N-source Dijkstra are just as good 321179860b2SJed Brown ''' 322179860b2SJed Brown # Assembling DAG 323179860b2SJed Brown dag = {} 324179860b2SJed Brown for source in self.sourceDB: 325179860b2SJed Brown try: 326179860b2SJed Brown dag[source] = self.getNeighbors(self, source) 3275b6bfdb9SJed Brown except IOError as e: 328179860b2SJed Brown if e.errno == errno.ENOENT: 329179860b2SJed Brown del self[source] 330179860b2SJed Brown else: 331179860b2SJed Brown raise e 332179860b2SJed Brown # Finding all-pairs shortest path 333179860b2SJed Brown 334179860b2SJed Brownif __name__ == '__main__': 335179860b2SJed Brown import sys 336179860b2SJed Brown try: 337179860b2SJed Brown if len(sys.argv) < 3: 3385b6bfdb9SJed Brown print('sourceDatabase.py <database filename> [insert | remove] <filename>') 339179860b2SJed Brown else: 340179860b2SJed Brown if os.path.exists(sys.argv[1]): 341c6ef1b5bSJed Brown dbFile = open(sys.argv[1]) 342492432c8SJed Brown sourceDB = pickle.load(dbFile) 343179860b2SJed Brown dbFile.close() 344179860b2SJed Brown else: 345179860b2SJed Brown sys.exit('Could not load source database from '+sys.argv[1]) 346179860b2SJed Brown if sys.argv[2] == 'insert': 347179860b2SJed Brown if sys.argv[3] in sourceDB: 348179860b2SJed Brown self.logPrint('Updating '+sys.argv[3], 3, 'sourceDB') 349179860b2SJed Brown else: 350179860b2SJed Brown self.logPrint('Inserting '+sys.argv[3], 3, 'sourceDB') 351179860b2SJed Brown self.sourceDB.updateSource(sys.argv[3]) 352179860b2SJed Brown elif sys.argv[2] == 'remove': 353179860b2SJed Brown if sys.argv[3] in sourceDB: 354179860b2SJed Brown sourceDB.logPrint('Removing '+sys.argv[3], 3, 'sourceDB') 355179860b2SJed Brown del self.sourceDB[sys.argv[3]] 356179860b2SJed Brown else: 357179860b2SJed Brown sourceDB.logPrint('Matching regular expression '+sys.argv[3]+' over source database', 1, 'sourceDB') 358179860b2SJed Brown removeRE = re.compile(sys.argv[3]) 359bb3dd2f6SJed Brown removes = list(filter(removeRE.match, sourceDB.keys())) 360179860b2SJed Brown for source in removes: 361179860b2SJed Brown self.logPrint('Removing '+source, 3, 'sourceDB') 362179860b2SJed Brown del self.sourceDB[source] 363179860b2SJed Brown else: 364179860b2SJed Brown sys.exit('Unknown source database action: '+sys.argv[2]) 365179860b2SJed Brown sourceDB.save() 3665b6bfdb9SJed Brown except Exception as e: 367179860b2SJed Brown import traceback 3685b6bfdb9SJed Brown print(traceback.print_tb(sys.exc_info()[2])) 369179860b2SJed Brown sys.exit(str(e)) 370179860b2SJed Brown sys.exit(0) 371