1*179860b2SJed Brown#!/usr/bin/env python 2*179860b2SJed Brown'''A source code database 3*179860b2SJed Brown 4*179860b2SJed Brown SourceDB is a database of file information used to determine whether files 5*179860b2SJed Brown should be rebuilt by the build system. All files names are stored relative 6*179860b2SJed Brown to a given root, which is intended as the root of a Project. 7*179860b2SJed Brown 8*179860b2SJed Brown Relative or absolute pathnames may be used as keys, but absolute pathnames 9*179860b2SJed Brown must fall under the database root. The value format is a tuple of the following: 10*179860b2SJed Brown 11*179860b2SJed Brown Checksum: The md5 checksum of the file 12*179860b2SJed Brown Mod Time: The time the file was last modified 13*179860b2SJed Brown Timestamp: The time theentry was last modified 14*179860b2SJed Brown Dependencies: A tuple of files upon which this entry depends 15*179860b2SJed Brown 16*179860b2SJed Brown This script also provides some default actions: 17*179860b2SJed Brown 18*179860b2SJed Brown - insert <database file> <filename> 19*179860b2SJed Brown Inserts this file from the database, or updates its entry if it 20*179860b2SJed Brown already exists. 21*179860b2SJed Brown 22*179860b2SJed Brown - remove <database file> <filename> 23*179860b2SJed Brown Removes this file from the database. The filename may also be a 24*179860b2SJed Brown regular expression. 25*179860b2SJed Brown 26*179860b2SJed Brown''' 27*179860b2SJed Brownimport logger 28*179860b2SJed Brown 29*179860b2SJed Brownimport errno 30*179860b2SJed Brownimport os 31*179860b2SJed Brownimport re 32*179860b2SJed Brownimport time 33*179860b2SJed Brown 34*179860b2SJed Brownimport cPickle 35*179860b2SJed Brown 36*179860b2SJed Browntry: 37*179860b2SJed Brown from hashlib import md5 as new_md5 38*179860b2SJed Brownexcept ImportError: 39*179860b2SJed Brown from md5 import new as new_md5 40*179860b2SJed Brown 41*179860b2SJed Brown 42*179860b2SJed Brownclass SourceDB (dict, logger.Logger): 43*179860b2SJed Brown '''A SourceDB is a dictionary of file data used during the build process.''' 44*179860b2SJed Brown includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1') 45*179860b2SJed Brown isLoading = 0 46*179860b2SJed Brown 47*179860b2SJed Brown def __init__(self, root, filename = None): 48*179860b2SJed Brown dict.__init__(self) 49*179860b2SJed Brown logger.Logger.__init__(self) 50*179860b2SJed Brown self.root = root 51*179860b2SJed Brown self.filename = filename 52*179860b2SJed Brown if self.filename is None: 53*179860b2SJed Brown self.filename = os.path.join(str(root), 'bsSource.db') 54*179860b2SJed Brown self.isDirty = 0 55*179860b2SJed Brown return 56*179860b2SJed Brown 57*179860b2SJed Brown def __str__(self): 58*179860b2SJed Brown output = '' 59*179860b2SJed Brown for source in self: 60*179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 61*179860b2SJed Brown output += source+'\n' 62*179860b2SJed Brown output += ' Checksum: '+str(checksum)+'\n' 63*179860b2SJed Brown output += ' Mod Time: '+str(mtime)+'\n' 64*179860b2SJed Brown output += ' Timestamp: '+str(timestamp)+'\n' 65*179860b2SJed Brown output += ' Deps: '+str(dependencies)+'\n' 66*179860b2SJed Brown return output 67*179860b2SJed Brown 68*179860b2SJed Brown def __setstate__(self, d): 69*179860b2SJed Brown logger.Logger.__setstate__(self, d) 70*179860b2SJed Brown # We have to prevent recursive calls to this when the pickled database is loaded in load() 71*179860b2SJed Brown # This is to ensure that fresh copies of the database are obtained after unpickling 72*179860b2SJed Brown if not SourceDB.isLoading: 73*179860b2SJed Brown SourceDB.isLoading = 1 74*179860b2SJed Brown self.load() 75*179860b2SJed Brown SourceDB.isLoading = 0 76*179860b2SJed Brown return 77*179860b2SJed Brown 78*179860b2SJed Brown def getRelativePath(self, path): 79*179860b2SJed Brown '''Returns a relative source file path using the root''' 80*179860b2SJed Brown if os.path.isabs(path): 81*179860b2SJed Brown root = str(self.root) 82*179860b2SJed Brown if not path.startswith(root+os.sep): 83*179860b2SJed Brown raise ValueError('Absolute path '+path+' conflicts with root '+root) 84*179860b2SJed Brown else: 85*179860b2SJed Brown path = path[len(root)+1:] 86*179860b2SJed Brown return path 87*179860b2SJed Brown 88*179860b2SJed Brown def checkValue(self, value): 89*179860b2SJed Brown '''Validate the value, raising ValueError for problems''' 90*179860b2SJed Brown if not isinstance(value, tuple): 91*179860b2SJed Brown raise ValueError('Source database values must be tuples, '+str(type(value))+' given') 92*179860b2SJed Brown if not len(value) == 4: 93*179860b2SJed Brown raise ValueError('Source database values must have 4 items, '+str(len(value))+' given') 94*179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = value 95*179860b2SJed Brown if not isinstance(checksum, str): 96*179860b2SJed Brown raise ValueError('Invalid checksum for source database, '+str(type(checksum))+' given') 97*179860b2SJed Brown if not isinstance(mtime, int): 98*179860b2SJed Brown raise ValueError('Invalid modification time for source database, '+str(type(mtime))+' given') 99*179860b2SJed Brown elif mtime < 0: 100*179860b2SJed Brown raise ValueError('Negative modification time for source database, '+str(mtime)) 101*179860b2SJed Brown if not isinstance(timestamp, float): 102*179860b2SJed Brown raise ValueError('Invalid timestamp for source database, '+str(type(timestamp))+' given') 103*179860b2SJed Brown elif timestamp < 0: 104*179860b2SJed Brown raise ValueError('Negative timestamp for source database, '+str(timestamp)) 105*179860b2SJed Brown if not isinstance(dependencies, tuple): 106*179860b2SJed Brown raise ValueError('Invalid dependencies for source database, '+str(type(dependencies))+' given') 107*179860b2SJed Brown return value 108*179860b2SJed Brown 109*179860b2SJed Brown def __getitem__(self, key): 110*179860b2SJed Brown '''Converts the key to a relative source file path using the root''' 111*179860b2SJed Brown return dict.__getitem__(self, self.getRelativePath(key)) 112*179860b2SJed Brown 113*179860b2SJed Brown def __setitem__(self, key, value): 114*179860b2SJed Brown '''Converts the key to a relative source file path using the root, and checks the validity of the value''' 115*179860b2SJed Brown self.isDirty = 1 116*179860b2SJed Brown return dict.__setitem__(self, self.getRelativePath(key), self.checkValue(value)) 117*179860b2SJed Brown 118*179860b2SJed Brown def __delitem__(self, key): 119*179860b2SJed Brown '''Converts the key to a relative source file path using the root''' 120*179860b2SJed Brown self.isDirty = 1 121*179860b2SJed Brown return dict.__delitem__(self, self.getRelativePath(key)) 122*179860b2SJed Brown 123*179860b2SJed Brown def __contains__(self, key): 124*179860b2SJed Brown '''Converts the key to a relative source file path using the root''' 125*179860b2SJed Brown return dict.__contains__(self, self.getRelativePath(key)) 126*179860b2SJed Brown 127*179860b2SJed Brown def has_key(self, key): 128*179860b2SJed Brown '''This method just calls self.__contains__(key)''' 129*179860b2SJed Brown return self.__contains__(key) 130*179860b2SJed Brown 131*179860b2SJed Brown def items(self): 132*179860b2SJed Brown '''Converts each key to a relative source file path using the root''' 133*179860b2SJed Brown return [(self.getRelativePath(item[0]), item[1]) for item in dict.items(self)] 134*179860b2SJed Brown 135*179860b2SJed Brown def keys(self): 136*179860b2SJed Brown '''Converts each key to a relative source file path using the root''' 137*179860b2SJed Brown return map(self.getRelativePath, dict.keys(self)) 138*179860b2SJed Brown 139*179860b2SJed Brown def update(self, d): 140*179860b2SJed Brown '''Update the dictionary with the contents of d''' 141*179860b2SJed Brown self.isDirty = 1 142*179860b2SJed Brown for k in d: 143*179860b2SJed Brown self[k] = d[k] 144*179860b2SJed Brown return 145*179860b2SJed Brown 146*179860b2SJed Brown def getChecksum(source, chunkSize = 1024*1024): 147*179860b2SJed Brown '''Return the md5 checksum for a given file, which may also be specified by its filename 148*179860b2SJed Brown - The chunkSize argument specifies the size of blocks read from the file''' 149*179860b2SJed Brown if isinstance(source, file): 150*179860b2SJed Brown f = source 151*179860b2SJed Brown else: 152*179860b2SJed Brown f = file(source) 153*179860b2SJed Brown m = new_md5() 154*179860b2SJed Brown size = chunkSize 155*179860b2SJed Brown buf = f.read(size) 156*179860b2SJed Brown while buf: 157*179860b2SJed Brown m.update(buf) 158*179860b2SJed Brown buf = f.read(size) 159*179860b2SJed Brown f.close() 160*179860b2SJed Brown return m.hexdigest() 161*179860b2SJed Brown getChecksum = staticmethod(getChecksum) 162*179860b2SJed Brown 163*179860b2SJed Brown def getModificationTime(source): 164*179860b2SJed Brown t = os.path.getmtime(source) 165*179860b2SJed Brown if isinstance(t, float): 166*179860b2SJed Brown t = int(t) 167*179860b2SJed Brown return t 168*179860b2SJed Brown getModificationTime = staticmethod(getModificationTime) 169*179860b2SJed Brown 170*179860b2SJed Brown def updateSource(self, source, noChecksum = 0): 171*179860b2SJed Brown self.isDirty = 1 172*179860b2SJed Brown dependencies = () 173*179860b2SJed Brown try: 174*179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 175*179860b2SJed Brown except KeyError: 176*179860b2SJed Brown pass 177*179860b2SJed Brown self.logPrint('Updating '+source+' in source database', 3, 'sourceDB') 178*179860b2SJed Brown if noChecksum: 179*179860b2SJed Brown checksum = '' 180*179860b2SJed Brown else: 181*179860b2SJed Brown checksum = SourceDB.getChecksum(source) 182*179860b2SJed Brown self[source] = (checksum, SourceDB.getModificationTime(source), time.time(), dependencies) 183*179860b2SJed Brown return 184*179860b2SJed Brown 185*179860b2SJed Brown def clearSource(self, source): 186*179860b2SJed Brown '''This removes source information, but preserved dependencies''' 187*179860b2SJed Brown if source in self: 188*179860b2SJed Brown self.isDirty = 1 189*179860b2SJed Brown self.logPrint('Clearing '+source+' from source database', 3, 'sourceDB') 190*179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 191*179860b2SJed Brown self[source] = ('', 0, time.time(), dependencies) 192*179860b2SJed Brown return 193*179860b2SJed Brown 194*179860b2SJed Brown def getDependencies(self, source): 195*179860b2SJed Brown try: 196*179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 197*179860b2SJed Brown except KeyError: 198*179860b2SJed Brown dependencies = () 199*179860b2SJed Brown return dependencies 200*179860b2SJed Brown 201*179860b2SJed Brown def addDependency(self, source, dependency): 202*179860b2SJed Brown self.isDirty = 1 203*179860b2SJed Brown dependencies = () 204*179860b2SJed Brown try: 205*179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 206*179860b2SJed Brown except KeyError: 207*179860b2SJed Brown checksum = '' 208*179860b2SJed Brown mtime = 0 209*179860b2SJed Brown if not dependency in dependencies: 210*179860b2SJed Brown self.logPrint('Adding dependency '+dependency+' to source '+source+' in source database', 3, 'sourceDB') 211*179860b2SJed Brown dependencies = dependencies+(dependency,) 212*179860b2SJed Brown self[source] = (checksum, mtime, time.time(), dependencies) 213*179860b2SJed Brown return 214*179860b2SJed Brown 215*179860b2SJed Brown def calculateDependencies(self): 216*179860b2SJed Brown self.logPrint('Recalculating dependencies', 1, 'sourceDB') 217*179860b2SJed Brown for source in self: 218*179860b2SJed Brown self.logPrint('Calculating '+source, 3, 'sourceDB') 219*179860b2SJed Brown (checksum, mtime, timestamp, dependencies) = self[source] 220*179860b2SJed Brown newDep = [] 221*179860b2SJed Brown try: 222*179860b2SJed Brown file = file(source) 223*179860b2SJed Brown except IOError, e: 224*179860b2SJed Brown if e.errno == errno.ENOENT: 225*179860b2SJed Brown del self[source] 226*179860b2SJed Brown else: 227*179860b2SJed Brown raise e 228*179860b2SJed Brown comps = source.split('/') 229*179860b2SJed Brown for line in file.xreadlines(): 230*179860b2SJed Brown m = self.includeRE.match(line) 231*179860b2SJed Brown if m: 232*179860b2SJed Brown filename = m.group('includeFile') 233*179860b2SJed Brown matchNum = 0 234*179860b2SJed Brown matchName = filename 235*179860b2SJed Brown self.logPrint(' Includes '+filename, 3, 'sourceDB') 236*179860b2SJed Brown for s in self: 237*179860b2SJed Brown if s.find(filename) >= 0: 238*179860b2SJed Brown self.logPrint(' Checking '+s, 3, 'sourceDB') 239*179860b2SJed Brown c = s.split('/') 240*179860b2SJed Brown for i in range(len(c)): 241*179860b2SJed Brown if not comps[i] == c[i]: break 242*179860b2SJed Brown if i > matchNum: 243*179860b2SJed Brown self.logPrint(' Choosing '+s+'('+str(i)+')', 3, 'sourceDB') 244*179860b2SJed Brown matchName = s 245*179860b2SJed Brown matchNum = i 246*179860b2SJed Brown newDep.append(matchName) 247*179860b2SJed Brown # Grep for #include, then put these files in a tuple, we can be recursive later in a fixpoint algorithm 248*179860b2SJed Brown self[source] = (checksum, mtime, timestamp, tuple(newDep)) 249*179860b2SJed Brown file.close() 250*179860b2SJed Brown 251*179860b2SJed Brown def load(self): 252*179860b2SJed Brown '''Load the source database from the saved filename''' 253*179860b2SJed Brown filename = str(self.filename) 254*179860b2SJed Brown if os.path.exists(filename): 255*179860b2SJed Brown self.clear() 256*179860b2SJed Brown self.logPrint('Loading source database from '+filename, 2, 'sourceDB') 257*179860b2SJed Brown dbFile = file(filename) 258*179860b2SJed Brown newDB = cPickle.load(dbFile) 259*179860b2SJed Brown dbFile.close() 260*179860b2SJed Brown self.update(newDB) 261*179860b2SJed Brown else: 262*179860b2SJed Brown self.logPrint('Could not load source database from '+filename, 1, 'sourceDB') 263*179860b2SJed Brown return 264*179860b2SJed Brown 265*179860b2SJed Brown def save(self, force = 0): 266*179860b2SJed Brown '''Save the source database to a file. The saved database with have path names relative to the root.''' 267*179860b2SJed Brown if not self.isDirty and not force: 268*179860b2SJed Brown self.logPrint('No need to save source database in '+str(self.filename), 2, 'sourceDB') 269*179860b2SJed Brown return 270*179860b2SJed Brown filename = str(self.filename) 271*179860b2SJed Brown if os.path.exists(os.path.dirname(filename)): 272*179860b2SJed Brown self.logPrint('Saving source database in '+filename, 2, 'sourceDB') 273*179860b2SJed Brown dbFile = file(filename, 'w') 274*179860b2SJed Brown cPickle.dump(self, dbFile) 275*179860b2SJed Brown dbFile.close() 276*179860b2SJed Brown self.isDirty = 0 277*179860b2SJed Brown else: 278*179860b2SJed Brown self.logPrint('Could not save source database in '+filename, 1, 'sourceDB') 279*179860b2SJed Brown return 280*179860b2SJed Brown 281*179860b2SJed Brownclass DependencyAnalyzer (logger.Logger): 282*179860b2SJed Brown def __init__(self, sourceDB): 283*179860b2SJed Brown logger.Logger.__init__(self) 284*179860b2SJed Brown self.sourceDB = sourceDB 285*179860b2SJed Brown self.includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1') 286*179860b2SJed Brown return 287*179860b2SJed Brown 288*179860b2SJed Brown def resolveDependency(self, source, dep): 289*179860b2SJed Brown if dep in self.sourceDB: return dep 290*179860b2SJed Brown # Choose the entry in sourceDB whose base matches dep, 291*179860b2SJed Brown # and who has the most path components in common with source 292*179860b2SJed Brown # This should be replaced by an appeal to cpp 293*179860b2SJed Brown matchNum = 0 294*179860b2SJed Brown matchName = dep 295*179860b2SJed Brown components = source.split(os.sep) 296*179860b2SJed Brown self.logPrint(' Includes '+filename, 3, 'sourceDB') 297*179860b2SJed Brown for s in self.sourceDB: 298*179860b2SJed Brown if s.find(dep) >= 0: 299*179860b2SJed Brown self.logPrint(' Checking '+s, 3, 'sourceDB') 300*179860b2SJed Brown comp = s.split(os.sep) 301*179860b2SJed Brown for i in range(len(comp)): 302*179860b2SJed Brown if not components[i] == comp[i]: break 303*179860b2SJed Brown if i > matchNum: 304*179860b2SJed Brown self.logPrint(' Choosing '+s+'('+str(i)+')', 3, 'sourceDB') 305*179860b2SJed Brown matchName = s 306*179860b2SJed Brown matchNum = i 307*179860b2SJed Brown if not matchName in self.sourceDB: raise RuntimeError('Invalid #include '+matchName+' in '+source) 308*179860b2SJed Brown return matchName 309*179860b2SJed Brown 310*179860b2SJed Brown def getNeighbors(self, source): 311*179860b2SJed Brown file = file(source) 312*179860b2SJed Brown adj = [] 313*179860b2SJed Brown for line in file.xreadlines(): 314*179860b2SJed Brown match = self.includeRE.match(line) 315*179860b2SJed Brown if match: 316*179860b2SJed Brown adj.append(self.resolveDependency(source, m.group('includeFile'))) 317*179860b2SJed Brown file.close() 318*179860b2SJed Brown return adj 319*179860b2SJed Brown 320*179860b2SJed Brown def calculateDependencies(self): 321*179860b2SJed Brown '''Should this be a generator? 322*179860b2SJed Brown First assemble the DAG using #include relations 323*179860b2SJed Brown Then calculate the depdencies with all pairs shortest-path 324*179860b2SJed Brown - I think Floyd-Warshell and N-source Dijkstra are just as good 325*179860b2SJed Brown ''' 326*179860b2SJed Brown # Assembling DAG 327*179860b2SJed Brown dag = {} 328*179860b2SJed Brown for source in self.sourceDB: 329*179860b2SJed Brown try: 330*179860b2SJed Brown dag[source] = self.getNeighbors(self, source) 331*179860b2SJed Brown except IOError, e: 332*179860b2SJed Brown if e.errno == errno.ENOENT: 333*179860b2SJed Brown del self[source] 334*179860b2SJed Brown else: 335*179860b2SJed Brown raise e 336*179860b2SJed Brown # Finding all-pairs shortest path 337*179860b2SJed Brown 338*179860b2SJed Brownif __name__ == '__main__': 339*179860b2SJed Brown import sys 340*179860b2SJed Brown try: 341*179860b2SJed Brown if len(sys.argv) < 3: 342*179860b2SJed Brown print 'sourceDatabase.py <database filename> [insert | remove] <filename>' 343*179860b2SJed Brown else: 344*179860b2SJed Brown if os.path.exists(sys.argv[1]): 345*179860b2SJed Brown dbFile = file(sys.argv[1]) 346*179860b2SJed Brown sourceDB = cPickle.load(dbFile) 347*179860b2SJed Brown dbFile.close() 348*179860b2SJed Brown else: 349*179860b2SJed Brown sys.exit('Could not load source database from '+sys.argv[1]) 350*179860b2SJed Brown if sys.argv[2] == 'insert': 351*179860b2SJed Brown if sys.argv[3] in sourceDB: 352*179860b2SJed Brown self.logPrint('Updating '+sys.argv[3], 3, 'sourceDB') 353*179860b2SJed Brown else: 354*179860b2SJed Brown self.logPrint('Inserting '+sys.argv[3], 3, 'sourceDB') 355*179860b2SJed Brown self.sourceDB.updateSource(sys.argv[3]) 356*179860b2SJed Brown elif sys.argv[2] == 'remove': 357*179860b2SJed Brown if sys.argv[3] in sourceDB: 358*179860b2SJed Brown sourceDB.logPrint('Removing '+sys.argv[3], 3, 'sourceDB') 359*179860b2SJed Brown del self.sourceDB[sys.argv[3]] 360*179860b2SJed Brown else: 361*179860b2SJed Brown sourceDB.logPrint('Matching regular expression '+sys.argv[3]+' over source database', 1, 'sourceDB') 362*179860b2SJed Brown removeRE = re.compile(sys.argv[3]) 363*179860b2SJed Brown removes = filter(removeRE.match, sourceDB.keys()) 364*179860b2SJed Brown for source in removes: 365*179860b2SJed Brown self.logPrint('Removing '+source, 3, 'sourceDB') 366*179860b2SJed Brown del self.sourceDB[source] 367*179860b2SJed Brown else: 368*179860b2SJed Brown sys.exit('Unknown source database action: '+sys.argv[2]) 369*179860b2SJed Brown sourceDB.save() 370*179860b2SJed Brown except Exception, e: 371*179860b2SJed Brown import traceback 372*179860b2SJed Brown print traceback.print_tb(sys.exc_info()[2]) 373*179860b2SJed Brown sys.exit(str(e)) 374*179860b2SJed Brown sys.exit(0) 375