1'''A source code database 2 3 SourceDB is a database of file information used to determine whether files 4 should be rebuilt by the build system. All files names are stored relative 5 to a given root, which is intended as the root of a Project. 6 7 Relative or absolute pathnames may be used as keys, but absolute pathnames 8 must fall under the database root. The value format is a tuple of the following: 9 10 Checksum: The md5 checksum of the file 11 Mod Time: The time the file was last modified 12 Timestamp: The time theentry was last modified 13 Dependencies: A tuple of files upon which this entry depends 14 15 This script also provides some default actions: 16 17 - insert <database file> <filename> 18 Inserts this file from the database, or updates its entry if it 19 already exists. 20 21 - remove <database file> <filename> 22 Removes this file from the database. The filename may also be a 23 regular expression. 24 25''' 26from __future__ import print_function 27from __future__ import absolute_import 28import logger 29 30import errno 31import os 32import re 33import time 34 35import pickle 36 37try: 38 from hashlib import md5 as new_md5 39except ImportError: 40 from md5 import new as new_md5 # novermin 41 42 43class SourceDB (dict, logger.Logger): 44 '''A SourceDB is a dictionary of file data used during the build process.''' 45 includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1') 46 isLoading = 0 47 48 def __init__(self, root, filename = None): 49 dict.__init__(self) 50 logger.Logger.__init__(self) 51 self.root = root 52 self.filename = filename 53 if self.filename is None: 54 self.filename = os.path.join(str(root), 'bsSource.db') 55 self.isDirty = 0 56 return 57 58 def __str__(self): 59 output = '' 60 for source in self: 61 (checksum, mtime, timestamp, dependencies) = self[source] 62 output += source+'\n' 63 output += ' Checksum: '+str(checksum)+'\n' 64 output += ' Mod Time: '+str(mtime)+'\n' 65 output += ' Timestamp: '+str(timestamp)+'\n' 66 output += ' Deps: '+str(dependencies)+'\n' 67 return output 68 69 def __setstate__(self, d): 70 logger.Logger.__setstate__(self, d) 71 # We have to prevent recursive calls to this when the pickled database is loaded in load() 72 # This is to ensure that fresh copies of the database are obtained after unpickling 73 if not SourceDB.isLoading: 74 SourceDB.isLoading = 1 75 self.load() 76 SourceDB.isLoading = 0 77 return 78 79 def getRelativePath(self, path): 80 '''Returns a relative source file path using the root''' 81 if os.path.isabs(path): 82 root = str(self.root) 83 if not path.startswith(root+os.sep): 84 raise ValueError('Absolute path '+path+' conflicts with root '+root) 85 else: 86 path = path[len(root)+1:] 87 return path 88 89 def checkValue(self, value): 90 '''Validate the value, raising ValueError for problems''' 91 if not isinstance(value, tuple): 92 raise ValueError('Source database values must be tuples, '+str(type(value))+' given') 93 if not len(value) == 4: 94 raise ValueError('Source database values must have 4 items, '+str(len(value))+' given') 95 (checksum, mtime, timestamp, dependencies) = value 96 if not isinstance(checksum, str): 97 raise ValueError('Invalid checksum for source database, '+str(type(checksum))+' given') 98 if not isinstance(mtime, int): 99 raise ValueError('Invalid modification time for source database, '+str(type(mtime))+' given') 100 elif mtime < 0: 101 raise ValueError('Negative modification time for source database, '+str(mtime)) 102 if not isinstance(timestamp, float): 103 raise ValueError('Invalid timestamp for source database, '+str(type(timestamp))+' given') 104 elif timestamp < 0: 105 raise ValueError('Negative timestamp for source database, '+str(timestamp)) 106 if not isinstance(dependencies, tuple): 107 raise ValueError('Invalid dependencies for source database, '+str(type(dependencies))+' given') 108 return value 109 110 def __getitem__(self, key): 111 '''Converts the key to a relative source file path using the root''' 112 return dict.__getitem__(self, self.getRelativePath(key)) 113 114 def __setitem__(self, key, value): 115 '''Converts the key to a relative source file path using the root, and checks the validity of the value''' 116 self.isDirty = 1 117 return dict.__setitem__(self, self.getRelativePath(key), self.checkValue(value)) 118 119 def __delitem__(self, key): 120 '''Converts the key to a relative source file path using the root''' 121 self.isDirty = 1 122 return dict.__delitem__(self, self.getRelativePath(key)) 123 124 def __contains__(self, key): 125 '''Converts the key to a relative source file path using the root''' 126 return dict.__contains__(self, self.getRelativePath(key)) 127 128 def has_key(self, key): 129 '''This method just calls self.__contains__(key)''' 130 return self.__contains__(key) 131 132 def items(self): 133 '''Converts each key to a relative source file path using the root''' 134 return [(self.getRelativePath(item[0]), item[1]) for item in dict.items(self)] 135 136 def keys(self): 137 '''Converts each key to a relative source file path using the root''' 138 return map(self.getRelativePath, dict.keys(self)) 139 140 def update(self, d): 141 '''Update the dictionary with the contents of d''' 142 self.isDirty = 1 143 for k in d: 144 self[k] = d[k] 145 return 146 147 def getChecksum(source, chunkSize = 1024*1024): 148 '''Return the md5 checksum for a given file, which may also be specified by its filename 149 - The chunkSize argument specifies the size of blocks read from the file''' 150 if hasattr(source, 'close'): 151 f = source 152 else: 153 f = open(source) 154 m = new_md5() 155 size = chunkSize 156 buf = f.read(size) 157 while buf: 158 m.update(buf) 159 buf = f.read(size) 160 f.close() 161 return m.hexdigest() 162 getChecksum = staticmethod(getChecksum) 163 164 def getModificationTime(source): 165 t = os.path.getmtime(source) 166 if isinstance(t, float): 167 t = int(t) 168 return t 169 getModificationTime = staticmethod(getModificationTime) 170 171 def updateSource(self, source, noChecksum = 0): 172 self.isDirty = 1 173 dependencies = () 174 try: 175 (checksum, mtime, timestamp, dependencies) = self[source] 176 except KeyError: 177 pass 178 self.logPrint('Updating '+source+' in source database', 3, 'sourceDB') 179 if noChecksum: 180 checksum = '' 181 else: 182 checksum = SourceDB.getChecksum(source) 183 self[source] = (checksum, SourceDB.getModificationTime(source), time.time(), dependencies) 184 return 185 186 def clearSource(self, source): 187 '''This removes source information, but preserved dependencies''' 188 if source in self: 189 self.isDirty = 1 190 self.logPrint('Clearing '+source+' from source database', 3, 'sourceDB') 191 (checksum, mtime, timestamp, dependencies) = self[source] 192 self[source] = ('', 0, time.time(), dependencies) 193 return 194 195 def getDependencies(self, source): 196 try: 197 (checksum, mtime, timestamp, dependencies) = self[source] 198 except KeyError: 199 dependencies = () 200 return dependencies 201 202 def addDependency(self, source, dependency): 203 self.isDirty = 1 204 dependencies = () 205 try: 206 (checksum, mtime, timestamp, dependencies) = self[source] 207 except KeyError: 208 checksum = '' 209 mtime = 0 210 if not dependency in dependencies: 211 self.logPrint('Adding dependency '+dependency+' to source '+source+' in source database', 3, 'sourceDB') 212 dependencies = dependencies+(dependency,) 213 self[source] = (checksum, mtime, time.time(), dependencies) 214 return 215 216 def calculateDependencies(self): 217 self.logPrint('Recalculating dependencies', 1, 'sourceDB') 218 for source in self: 219 self.logPrint('Calculating '+source, 3, 'sourceDB') 220 (checksum, mtime, timestamp, dependencies) = self[source] 221 newDep = [] 222 try: 223 file = open(source) 224 except IOError as e: 225 if e.errno == errno.ENOENT: 226 del self[source] 227 else: 228 raise e 229 comps = source.split('/') 230 for line in file: 231 m = self.includeRE.match(line) 232 if m: 233 filename = m.group('includeFile') 234 matchNum = 0 235 matchName = filename 236 self.logPrint(' Includes '+filename, 3, 'sourceDB') 237 for s in self: 238 if s.find(filename) >= 0: 239 self.logPrint(' Checking '+s, 3, 'sourceDB') 240 c = s.split('/') 241 for i in range(len(c)): 242 if not comps[i] == c[i]: break 243 if i > matchNum: 244 self.logPrint(' Choosing '+s+'('+str(i)+')', 3, 'sourceDB') 245 matchName = s 246 matchNum = i 247 newDep.append(matchName) 248 # Grep for #include, then put these files in a tuple, we can be recursive later in a fixpoint algorithm 249 self[source] = (checksum, mtime, timestamp, tuple(newDep)) 250 file.close() 251 252 def load(self): 253 '''Load the source database from the saved filename''' 254 filename = str(self.filename) 255 if os.path.exists(filename): 256 self.clear() 257 self.logPrint('Loading source database from '+filename, 2, 'sourceDB') 258 dbFile = open(filename) 259 newDB = pickle.load(dbFile) 260 dbFile.close() 261 self.update(newDB) 262 else: 263 self.logPrint('Could not load source database from '+filename, 1, 'sourceDB') 264 return 265 266 def save(self, force = 0): 267 '''Save the source database to a file. The saved database with have path names relative to the root.''' 268 if not self.isDirty and not force: 269 self.logPrint('No need to save source database in '+str(self.filename), 2, 'sourceDB') 270 return 271 filename = str(self.filename) 272 if os.path.exists(os.path.dirname(filename)): 273 self.logPrint('Saving source database in '+filename, 2, 'sourceDB') 274 dbFile = open(filename, 'w') 275 pickle.dump(self, dbFile) 276 dbFile.close() 277 self.isDirty = 0 278 else: 279 self.logPrint('Could not save source database in '+filename, 1, 'sourceDB') 280 return 281 282class DependencyAnalyzer (logger.Logger): 283 def __init__(self, sourceDB): 284 logger.Logger.__init__(self) 285 self.sourceDB = sourceDB 286 self.includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1') 287 return 288 289 def resolveDependency(self, source, dep): 290 if dep in self.sourceDB: return dep 291 # Choose the entry in sourceDB whose base matches dep, 292 # and who has the most path components in common with source 293 # This should be replaced by an appeal to cpp 294 matchNum = 0 295 matchName = dep 296 components = source.split(os.sep) 297 self.logPrint(' Includes '+filename, 3, 'sourceDB') 298 for s in self.sourceDB: 299 if s.find(dep) >= 0: 300 self.logPrint(' Checking '+s, 3, 'sourceDB') 301 comp = s.split(os.sep) 302 for i in range(len(comp)): 303 if not components[i] == comp[i]: break 304 if i > matchNum: 305 self.logPrint(' Choosing '+s+'('+str(i)+')', 3, 'sourceDB') 306 matchName = s 307 matchNum = i 308 if not matchName in self.sourceDB: raise RuntimeError('Invalid #include '+matchName+' in '+source) 309 return matchName 310 311 def getNeighbors(self, source): 312 file = open(source) 313 adj = [] 314 for line in file: 315 match = self.includeRE.match(line) 316 if match: 317 adj.append(self.resolveDependency(source, m.group('includeFile'))) 318 file.close() 319 return adj 320 321 def calculateDependencies(self): 322 '''Should this be a generator? 323 First assemble the DAG using #include relations 324 Then calculate the depdencies with all pairs shortest-path 325 - I think Floyd-Warshell and N-source Dijkstra are just as good 326 ''' 327 # Assembling DAG 328 dag = {} 329 for source in self.sourceDB: 330 try: 331 dag[source] = self.getNeighbors(self, source) 332 except IOError as e: 333 if e.errno == errno.ENOENT: 334 del self[source] 335 else: 336 raise e 337 # Finding all-pairs shortest path 338 339if __name__ == '__main__': 340 import sys 341 try: 342 if len(sys.argv) < 3: 343 print('sourceDatabase.py <database filename> [insert | remove] <filename>') 344 else: 345 if os.path.exists(sys.argv[1]): 346 dbFile = open(sys.argv[1]) 347 sourceDB = pickle.load(dbFile) 348 dbFile.close() 349 else: 350 sys.exit('Could not load source database from '+sys.argv[1]) 351 if sys.argv[2] == 'insert': 352 if sys.argv[3] in sourceDB: 353 self.logPrint('Updating '+sys.argv[3], 3, 'sourceDB') 354 else: 355 self.logPrint('Inserting '+sys.argv[3], 3, 'sourceDB') 356 self.sourceDB.updateSource(sys.argv[3]) 357 elif sys.argv[2] == 'remove': 358 if sys.argv[3] in sourceDB: 359 sourceDB.logPrint('Removing '+sys.argv[3], 3, 'sourceDB') 360 del self.sourceDB[sys.argv[3]] 361 else: 362 sourceDB.logPrint('Matching regular expression '+sys.argv[3]+' over source database', 1, 'sourceDB') 363 removeRE = re.compile(sys.argv[3]) 364 removes = list(filter(removeRE.match, sourceDB.keys())) 365 for source in removes: 366 self.logPrint('Removing '+source, 3, 'sourceDB') 367 del self.sourceDB[source] 368 else: 369 sys.exit('Unknown source database action: '+sys.argv[2]) 370 sourceDB.save() 371 except Exception as e: 372 import traceback 373 print(traceback.print_tb(sys.exc_info()[2])) 374 sys.exit(str(e)) 375 sys.exit(0) 376