1#!/usr/bin/env python 2'''A source code database 3 4 SourceDB is a database of file information used to determine whether files 5 should be rebuilt by the build system. All files names are stored relative 6 to a given root, which is intended as the root of a Project. 7 8 Relative or absolute pathnames may be used as keys, but absolute pathnames 9 must fall under the database root. The value format is a tuple of the following: 10 11 Checksum: The md5 checksum of the file 12 Mod Time: The time the file was last modified 13 Timestamp: The time theentry was last modified 14 Dependencies: A tuple of files upon which this entry depends 15 16 This script also provides some default actions: 17 18 - insert <database file> <filename> 19 Inserts this file from the database, or updates its entry if it 20 already exists. 21 22 - remove <database file> <filename> 23 Removes this file from the database. The filename may also be a 24 regular expression. 25 26''' 27import logger 28 29import errno 30import os 31import re 32import time 33 34import cPickle 35 36try: 37 from hashlib import md5 as new_md5 38except ImportError: 39 from md5 import new as new_md5 40 41 42class SourceDB (dict, logger.Logger): 43 '''A SourceDB is a dictionary of file data used during the build process.''' 44 includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1') 45 isLoading = 0 46 47 def __init__(self, root, filename = None): 48 dict.__init__(self) 49 logger.Logger.__init__(self) 50 self.root = root 51 self.filename = filename 52 if self.filename is None: 53 self.filename = os.path.join(str(root), 'bsSource.db') 54 self.isDirty = 0 55 return 56 57 def __str__(self): 58 output = '' 59 for source in self: 60 (checksum, mtime, timestamp, dependencies) = self[source] 61 output += source+'\n' 62 output += ' Checksum: '+str(checksum)+'\n' 63 output += ' Mod Time: '+str(mtime)+'\n' 64 output += ' Timestamp: '+str(timestamp)+'\n' 65 output += ' Deps: '+str(dependencies)+'\n' 66 return output 67 68 def __setstate__(self, d): 69 logger.Logger.__setstate__(self, d) 70 # We have to prevent recursive calls to this when the pickled database is loaded in load() 71 # This is to ensure that fresh copies of the database are obtained after unpickling 72 if not SourceDB.isLoading: 73 SourceDB.isLoading = 1 74 self.load() 75 SourceDB.isLoading = 0 76 return 77 78 def getRelativePath(self, path): 79 '''Returns a relative source file path using the root''' 80 if os.path.isabs(path): 81 root = str(self.root) 82 if not path.startswith(root+os.sep): 83 raise ValueError('Absolute path '+path+' conflicts with root '+root) 84 else: 85 path = path[len(root)+1:] 86 return path 87 88 def checkValue(self, value): 89 '''Validate the value, raising ValueError for problems''' 90 if not isinstance(value, tuple): 91 raise ValueError('Source database values must be tuples, '+str(type(value))+' given') 92 if not len(value) == 4: 93 raise ValueError('Source database values must have 4 items, '+str(len(value))+' given') 94 (checksum, mtime, timestamp, dependencies) = value 95 if not isinstance(checksum, str): 96 raise ValueError('Invalid checksum for source database, '+str(type(checksum))+' given') 97 if not isinstance(mtime, int): 98 raise ValueError('Invalid modification time for source database, '+str(type(mtime))+' given') 99 elif mtime < 0: 100 raise ValueError('Negative modification time for source database, '+str(mtime)) 101 if not isinstance(timestamp, float): 102 raise ValueError('Invalid timestamp for source database, '+str(type(timestamp))+' given') 103 elif timestamp < 0: 104 raise ValueError('Negative timestamp for source database, '+str(timestamp)) 105 if not isinstance(dependencies, tuple): 106 raise ValueError('Invalid dependencies for source database, '+str(type(dependencies))+' given') 107 return value 108 109 def __getitem__(self, key): 110 '''Converts the key to a relative source file path using the root''' 111 return dict.__getitem__(self, self.getRelativePath(key)) 112 113 def __setitem__(self, key, value): 114 '''Converts the key to a relative source file path using the root, and checks the validity of the value''' 115 self.isDirty = 1 116 return dict.__setitem__(self, self.getRelativePath(key), self.checkValue(value)) 117 118 def __delitem__(self, key): 119 '''Converts the key to a relative source file path using the root''' 120 self.isDirty = 1 121 return dict.__delitem__(self, self.getRelativePath(key)) 122 123 def __contains__(self, key): 124 '''Converts the key to a relative source file path using the root''' 125 return dict.__contains__(self, self.getRelativePath(key)) 126 127 def has_key(self, key): 128 '''This method just calls self.__contains__(key)''' 129 return self.__contains__(key) 130 131 def items(self): 132 '''Converts each key to a relative source file path using the root''' 133 return [(self.getRelativePath(item[0]), item[1]) for item in dict.items(self)] 134 135 def keys(self): 136 '''Converts each key to a relative source file path using the root''' 137 return map(self.getRelativePath, dict.keys(self)) 138 139 def update(self, d): 140 '''Update the dictionary with the contents of d''' 141 self.isDirty = 1 142 for k in d: 143 self[k] = d[k] 144 return 145 146 def getChecksum(source, chunkSize = 1024*1024): 147 '''Return the md5 checksum for a given file, which may also be specified by its filename 148 - The chunkSize argument specifies the size of blocks read from the file''' 149 if isinstance(source, file): 150 f = source 151 else: 152 f = file(source) 153 m = new_md5() 154 size = chunkSize 155 buf = f.read(size) 156 while buf: 157 m.update(buf) 158 buf = f.read(size) 159 f.close() 160 return m.hexdigest() 161 getChecksum = staticmethod(getChecksum) 162 163 def getModificationTime(source): 164 t = os.path.getmtime(source) 165 if isinstance(t, float): 166 t = int(t) 167 return t 168 getModificationTime = staticmethod(getModificationTime) 169 170 def updateSource(self, source, noChecksum = 0): 171 self.isDirty = 1 172 dependencies = () 173 try: 174 (checksum, mtime, timestamp, dependencies) = self[source] 175 except KeyError: 176 pass 177 self.logPrint('Updating '+source+' in source database', 3, 'sourceDB') 178 if noChecksum: 179 checksum = '' 180 else: 181 checksum = SourceDB.getChecksum(source) 182 self[source] = (checksum, SourceDB.getModificationTime(source), time.time(), dependencies) 183 return 184 185 def clearSource(self, source): 186 '''This removes source information, but preserved dependencies''' 187 if source in self: 188 self.isDirty = 1 189 self.logPrint('Clearing '+source+' from source database', 3, 'sourceDB') 190 (checksum, mtime, timestamp, dependencies) = self[source] 191 self[source] = ('', 0, time.time(), dependencies) 192 return 193 194 def getDependencies(self, source): 195 try: 196 (checksum, mtime, timestamp, dependencies) = self[source] 197 except KeyError: 198 dependencies = () 199 return dependencies 200 201 def addDependency(self, source, dependency): 202 self.isDirty = 1 203 dependencies = () 204 try: 205 (checksum, mtime, timestamp, dependencies) = self[source] 206 except KeyError: 207 checksum = '' 208 mtime = 0 209 if not dependency in dependencies: 210 self.logPrint('Adding dependency '+dependency+' to source '+source+' in source database', 3, 'sourceDB') 211 dependencies = dependencies+(dependency,) 212 self[source] = (checksum, mtime, time.time(), dependencies) 213 return 214 215 def calculateDependencies(self): 216 self.logPrint('Recalculating dependencies', 1, 'sourceDB') 217 for source in self: 218 self.logPrint('Calculating '+source, 3, 'sourceDB') 219 (checksum, mtime, timestamp, dependencies) = self[source] 220 newDep = [] 221 try: 222 file = file(source) 223 except IOError, e: 224 if e.errno == errno.ENOENT: 225 del self[source] 226 else: 227 raise e 228 comps = source.split('/') 229 for line in file.xreadlines(): 230 m = self.includeRE.match(line) 231 if m: 232 filename = m.group('includeFile') 233 matchNum = 0 234 matchName = filename 235 self.logPrint(' Includes '+filename, 3, 'sourceDB') 236 for s in self: 237 if s.find(filename) >= 0: 238 self.logPrint(' Checking '+s, 3, 'sourceDB') 239 c = s.split('/') 240 for i in range(len(c)): 241 if not comps[i] == c[i]: break 242 if i > matchNum: 243 self.logPrint(' Choosing '+s+'('+str(i)+')', 3, 'sourceDB') 244 matchName = s 245 matchNum = i 246 newDep.append(matchName) 247 # Grep for #include, then put these files in a tuple, we can be recursive later in a fixpoint algorithm 248 self[source] = (checksum, mtime, timestamp, tuple(newDep)) 249 file.close() 250 251 def load(self): 252 '''Load the source database from the saved filename''' 253 filename = str(self.filename) 254 if os.path.exists(filename): 255 self.clear() 256 self.logPrint('Loading source database from '+filename, 2, 'sourceDB') 257 dbFile = file(filename) 258 newDB = cPickle.load(dbFile) 259 dbFile.close() 260 self.update(newDB) 261 else: 262 self.logPrint('Could not load source database from '+filename, 1, 'sourceDB') 263 return 264 265 def save(self, force = 0): 266 '''Save the source database to a file. The saved database with have path names relative to the root.''' 267 if not self.isDirty and not force: 268 self.logPrint('No need to save source database in '+str(self.filename), 2, 'sourceDB') 269 return 270 filename = str(self.filename) 271 if os.path.exists(os.path.dirname(filename)): 272 self.logPrint('Saving source database in '+filename, 2, 'sourceDB') 273 dbFile = file(filename, 'w') 274 cPickle.dump(self, dbFile) 275 dbFile.close() 276 self.isDirty = 0 277 else: 278 self.logPrint('Could not save source database in '+filename, 1, 'sourceDB') 279 return 280 281class DependencyAnalyzer (logger.Logger): 282 def __init__(self, sourceDB): 283 logger.Logger.__init__(self) 284 self.sourceDB = sourceDB 285 self.includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1') 286 return 287 288 def resolveDependency(self, source, dep): 289 if dep in self.sourceDB: return dep 290 # Choose the entry in sourceDB whose base matches dep, 291 # and who has the most path components in common with source 292 # This should be replaced by an appeal to cpp 293 matchNum = 0 294 matchName = dep 295 components = source.split(os.sep) 296 self.logPrint(' Includes '+filename, 3, 'sourceDB') 297 for s in self.sourceDB: 298 if s.find(dep) >= 0: 299 self.logPrint(' Checking '+s, 3, 'sourceDB') 300 comp = s.split(os.sep) 301 for i in range(len(comp)): 302 if not components[i] == comp[i]: break 303 if i > matchNum: 304 self.logPrint(' Choosing '+s+'('+str(i)+')', 3, 'sourceDB') 305 matchName = s 306 matchNum = i 307 if not matchName in self.sourceDB: raise RuntimeError('Invalid #include '+matchName+' in '+source) 308 return matchName 309 310 def getNeighbors(self, source): 311 file = file(source) 312 adj = [] 313 for line in file.xreadlines(): 314 match = self.includeRE.match(line) 315 if match: 316 adj.append(self.resolveDependency(source, m.group('includeFile'))) 317 file.close() 318 return adj 319 320 def calculateDependencies(self): 321 '''Should this be a generator? 322 First assemble the DAG using #include relations 323 Then calculate the depdencies with all pairs shortest-path 324 - I think Floyd-Warshell and N-source Dijkstra are just as good 325 ''' 326 # Assembling DAG 327 dag = {} 328 for source in self.sourceDB: 329 try: 330 dag[source] = self.getNeighbors(self, source) 331 except IOError, e: 332 if e.errno == errno.ENOENT: 333 del self[source] 334 else: 335 raise e 336 # Finding all-pairs shortest path 337 338if __name__ == '__main__': 339 import sys 340 try: 341 if len(sys.argv) < 3: 342 print 'sourceDatabase.py <database filename> [insert | remove] <filename>' 343 else: 344 if os.path.exists(sys.argv[1]): 345 dbFile = file(sys.argv[1]) 346 sourceDB = cPickle.load(dbFile) 347 dbFile.close() 348 else: 349 sys.exit('Could not load source database from '+sys.argv[1]) 350 if sys.argv[2] == 'insert': 351 if sys.argv[3] in sourceDB: 352 self.logPrint('Updating '+sys.argv[3], 3, 'sourceDB') 353 else: 354 self.logPrint('Inserting '+sys.argv[3], 3, 'sourceDB') 355 self.sourceDB.updateSource(sys.argv[3]) 356 elif sys.argv[2] == 'remove': 357 if sys.argv[3] in sourceDB: 358 sourceDB.logPrint('Removing '+sys.argv[3], 3, 'sourceDB') 359 del self.sourceDB[sys.argv[3]] 360 else: 361 sourceDB.logPrint('Matching regular expression '+sys.argv[3]+' over source database', 1, 'sourceDB') 362 removeRE = re.compile(sys.argv[3]) 363 removes = filter(removeRE.match, sourceDB.keys()) 364 for source in removes: 365 self.logPrint('Removing '+source, 3, 'sourceDB') 366 del self.sourceDB[source] 367 else: 368 sys.exit('Unknown source database action: '+sys.argv[2]) 369 sourceDB.save() 370 except Exception, e: 371 import traceback 372 print traceback.print_tb(sys.exc_info()[2]) 373 sys.exit(str(e)) 374 sys.exit(0) 375