1'''A source code database 2 3 SourceDB is a database of file information used to determine whether files 4 should be rebuilt by the build system. All files names are stored relative 5 to a given root, which is intended as the root of a Project. 6 7 Relative or absolute pathnames may be used as keys, but absolute pathnames 8 must fall under the database root. The value format is a tuple of the following: 9 10 Checksum: The md5 checksum of the file 11 Mod Time: The time the file was last modified 12 Timestamp: The time theentry was last modified 13 Dependencies: A tuple of files upon which this entry depends 14 15 This script also provides some default actions: 16 17 - insert <database file> <filename> 18 Inserts this file from the database, or updates its entry if it 19 already exists. 20 21 - remove <database file> <filename> 22 Removes this file from the database. The filename may also be a 23 regular expression. 24 25''' 26from __future__ import print_function 27from __future__ import absolute_import 28import logger 29 30import errno 31import os 32import re 33import time 34 35import pickle 36from hashlib import md5 as new_md5 37 38class SourceDB (dict, logger.Logger): 39 '''A SourceDB is a dictionary of file data used during the build process.''' 40 includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1') 41 isLoading = 0 42 43 def __init__(self, root, filename = None): 44 dict.__init__(self) 45 logger.Logger.__init__(self) 46 self.root = root 47 self.filename = filename 48 if self.filename is None: 49 self.filename = os.path.join(str(root), 'bsSource.db') 50 self.isDirty = 0 51 return 52 53 def __str__(self): 54 output = '' 55 for source in self: 56 (checksum, mtime, timestamp, dependencies) = self[source] 57 output += source+'\n' 58 output += ' Checksum: '+str(checksum)+'\n' 59 output += ' Mod Time: '+str(mtime)+'\n' 60 output += ' Timestamp: '+str(timestamp)+'\n' 61 output += ' Deps: '+str(dependencies)+'\n' 62 return output 63 64 def __setstate__(self, d): 65 logger.Logger.__setstate__(self, d) 66 # We have to prevent recursive calls to this when the pickled database is loaded in load() 67 # This is to ensure that fresh copies of the database are obtained after unpickling 68 if not SourceDB.isLoading: 69 SourceDB.isLoading = 1 70 self.load() 71 SourceDB.isLoading = 0 72 return 73 74 def getRelativePath(self, path): 75 '''Returns a relative source file path using the root''' 76 if os.path.isabs(path): 77 root = str(self.root) 78 if not path.startswith(root+os.sep): 79 raise ValueError('Absolute path '+path+' conflicts with root '+root) 80 else: 81 path = path[len(root)+1:] 82 return path 83 84 def checkValue(self, value): 85 '''Validate the value, raising ValueError for problems''' 86 if not isinstance(value, tuple): 87 raise ValueError('Source database values must be tuples, '+str(type(value))+' given') 88 if not len(value) == 4: 89 raise ValueError('Source database values must have 4 items, '+str(len(value))+' given') 90 (checksum, mtime, timestamp, dependencies) = value 91 if not isinstance(checksum, str): 92 raise ValueError('Invalid checksum for source database, '+str(type(checksum))+' given') 93 if not isinstance(mtime, int): 94 raise ValueError('Invalid modification time for source database, '+str(type(mtime))+' given') 95 elif mtime < 0: 96 raise ValueError('Negative modification time for source database, '+str(mtime)) 97 if not isinstance(timestamp, float): 98 raise ValueError('Invalid timestamp for source database, '+str(type(timestamp))+' given') 99 elif timestamp < 0: 100 raise ValueError('Negative timestamp for source database, '+str(timestamp)) 101 if not isinstance(dependencies, tuple): 102 raise ValueError('Invalid dependencies for source database, '+str(type(dependencies))+' given') 103 return value 104 105 def __getitem__(self, key): 106 '''Converts the key to a relative source file path using the root''' 107 return dict.__getitem__(self, self.getRelativePath(key)) 108 109 def __setitem__(self, key, value): 110 '''Converts the key to a relative source file path using the root, and checks the validity of the value''' 111 self.isDirty = 1 112 return dict.__setitem__(self, self.getRelativePath(key), self.checkValue(value)) 113 114 def __delitem__(self, key): 115 '''Converts the key to a relative source file path using the root''' 116 self.isDirty = 1 117 return dict.__delitem__(self, self.getRelativePath(key)) 118 119 def __contains__(self, key): 120 '''Converts the key to a relative source file path using the root''' 121 return dict.__contains__(self, self.getRelativePath(key)) 122 123 def has_key(self, key): 124 '''This method just calls self.__contains__(key)''' 125 return self.__contains__(key) 126 127 def items(self): 128 '''Converts each key to a relative source file path using the root''' 129 return [(self.getRelativePath(item[0]), item[1]) for item in dict.items(self)] 130 131 def keys(self): 132 '''Converts each key to a relative source file path using the root''' 133 return map(self.getRelativePath, dict.keys(self)) 134 135 def update(self, d): 136 '''Update the dictionary with the contents of d''' 137 self.isDirty = 1 138 for k in d: 139 self[k] = d[k] 140 return 141 142 def getChecksum(source, chunkSize = 1024*1024): 143 '''Return the md5 checksum for a given file, which may also be specified by its filename 144 - The chunkSize argument specifies the size of blocks read from the file''' 145 if hasattr(source, 'close'): 146 f = source 147 else: 148 f = open(source) 149 m = new_md5() 150 size = chunkSize 151 buf = f.read(size) 152 while buf: 153 m.update(buf) 154 buf = f.read(size) 155 f.close() 156 return m.hexdigest() 157 getChecksum = staticmethod(getChecksum) 158 159 def getModificationTime(source): 160 t = os.path.getmtime(source) 161 if isinstance(t, float): 162 t = int(t) 163 return t 164 getModificationTime = staticmethod(getModificationTime) 165 166 def updateSource(self, source, noChecksum = 0): 167 self.isDirty = 1 168 dependencies = () 169 try: 170 (checksum, mtime, timestamp, dependencies) = self[source] 171 except KeyError: 172 pass 173 self.logPrint('Updating '+source+' in source database', 3, 'sourceDB') 174 if noChecksum: 175 checksum = '' 176 else: 177 checksum = SourceDB.getChecksum(source) 178 self[source] = (checksum, SourceDB.getModificationTime(source), time.time(), dependencies) 179 return 180 181 def clearSource(self, source): 182 '''This removes source information, but preserved dependencies''' 183 if source in self: 184 self.isDirty = 1 185 self.logPrint('Clearing '+source+' from source database', 3, 'sourceDB') 186 (checksum, mtime, timestamp, dependencies) = self[source] 187 self[source] = ('', 0, time.time(), dependencies) 188 return 189 190 def getDependencies(self, source): 191 try: 192 (checksum, mtime, timestamp, dependencies) = self[source] 193 except KeyError: 194 dependencies = () 195 return dependencies 196 197 def addDependency(self, source, dependency): 198 self.isDirty = 1 199 dependencies = () 200 try: 201 (checksum, mtime, timestamp, dependencies) = self[source] 202 except KeyError: 203 checksum = '' 204 mtime = 0 205 if not dependency in dependencies: 206 self.logPrint('Adding dependency '+dependency+' to source '+source+' in source database', 3, 'sourceDB') 207 dependencies = dependencies+(dependency,) 208 self[source] = (checksum, mtime, time.time(), dependencies) 209 return 210 211 def calculateDependencies(self): 212 self.logPrint('Recalculating dependencies', 1, 'sourceDB') 213 for source in self: 214 self.logPrint('Calculating '+source, 3, 'sourceDB') 215 (checksum, mtime, timestamp, dependencies) = self[source] 216 newDep = [] 217 try: 218 file = open(source) 219 except IOError as e: 220 if e.errno == errno.ENOENT: 221 del self[source] 222 else: 223 raise e 224 comps = source.split('/') 225 for line in file: 226 m = self.includeRE.match(line) 227 if m: 228 filename = m.group('includeFile') 229 matchNum = 0 230 matchName = filename 231 self.logPrint(' Includes '+filename, 3, 'sourceDB') 232 for s in self: 233 if s.find(filename) >= 0: 234 self.logPrint(' Checking '+s, 3, 'sourceDB') 235 c = s.split('/') 236 for i in range(len(c)): 237 if not comps[i] == c[i]: break 238 if i > matchNum: 239 self.logPrint(' Choosing '+s+'('+str(i)+')', 3, 'sourceDB') 240 matchName = s 241 matchNum = i 242 newDep.append(matchName) 243 # Grep for #include, then put these files in a tuple, we can be recursive later in a fixpoint algorithm 244 self[source] = (checksum, mtime, timestamp, tuple(newDep)) 245 file.close() 246 247 def load(self): 248 '''Load the source database from the saved filename''' 249 filename = str(self.filename) 250 if os.path.exists(filename): 251 self.clear() 252 self.logPrint('Loading source database from '+filename, 2, 'sourceDB') 253 dbFile = open(filename) 254 newDB = pickle.load(dbFile) 255 dbFile.close() 256 self.update(newDB) 257 else: 258 self.logPrint('Could not load source database from '+filename, 1, 'sourceDB') 259 return 260 261 def save(self, force = 0): 262 '''Save the source database to a file. The saved database with have path names relative to the root.''' 263 if not self.isDirty and not force: 264 self.logPrint('No need to save source database in '+str(self.filename), 2, 'sourceDB') 265 return 266 filename = str(self.filename) 267 if os.path.exists(os.path.dirname(filename)): 268 self.logPrint('Saving source database in '+filename, 2, 'sourceDB') 269 dbFile = open(filename, 'w') 270 pickle.dump(self, dbFile) 271 dbFile.close() 272 self.isDirty = 0 273 else: 274 self.logPrint('Could not save source database in '+filename, 1, 'sourceDB') 275 return 276 277class DependencyAnalyzer (logger.Logger): 278 def __init__(self, sourceDB): 279 logger.Logger.__init__(self) 280 self.sourceDB = sourceDB 281 self.includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1') 282 return 283 284 def resolveDependency(self, source, dep): 285 if dep in self.sourceDB: return dep 286 # Choose the entry in sourceDB whose base matches dep, 287 # and who has the most path components in common with source 288 # This should be replaced by an appeal to cpp 289 matchNum = 0 290 matchName = dep 291 components = source.split(os.sep) 292 self.logPrint(' Includes '+filename, 3, 'sourceDB') 293 for s in self.sourceDB: 294 if s.find(dep) >= 0: 295 self.logPrint(' Checking '+s, 3, 'sourceDB') 296 comp = s.split(os.sep) 297 for i in range(len(comp)): 298 if not components[i] == comp[i]: break 299 if i > matchNum: 300 self.logPrint(' Choosing '+s+'('+str(i)+')', 3, 'sourceDB') 301 matchName = s 302 matchNum = i 303 if not matchName in self.sourceDB: raise RuntimeError('Invalid #include '+matchName+' in '+source) 304 return matchName 305 306 def getNeighbors(self, source): 307 file = open(source) 308 adj = [] 309 for line in file: 310 match = self.includeRE.match(line) 311 if match: 312 adj.append(self.resolveDependency(source, m.group('includeFile'))) 313 file.close() 314 return adj 315 316 def calculateDependencies(self): 317 '''Should this be a generator? 318 First assemble the DAG using #include relations 319 Then calculate the depdencies with all pairs shortest-path 320 - I think Floyd-Warshell and N-source Dijkstra are just as good 321 ''' 322 # Assembling DAG 323 dag = {} 324 for source in self.sourceDB: 325 try: 326 dag[source] = self.getNeighbors(self, source) 327 except IOError as e: 328 if e.errno == errno.ENOENT: 329 del self[source] 330 else: 331 raise e 332 # Finding all-pairs shortest path 333 334if __name__ == '__main__': 335 import sys 336 try: 337 if len(sys.argv) < 3: 338 print('sourceDatabase.py <database filename> [insert | remove] <filename>') 339 else: 340 if os.path.exists(sys.argv[1]): 341 dbFile = open(sys.argv[1]) 342 sourceDB = pickle.load(dbFile) 343 dbFile.close() 344 else: 345 sys.exit('Could not load source database from '+sys.argv[1]) 346 if sys.argv[2] == 'insert': 347 if sys.argv[3] in sourceDB: 348 self.logPrint('Updating '+sys.argv[3], 3, 'sourceDB') 349 else: 350 self.logPrint('Inserting '+sys.argv[3], 3, 'sourceDB') 351 self.sourceDB.updateSource(sys.argv[3]) 352 elif sys.argv[2] == 'remove': 353 if sys.argv[3] in sourceDB: 354 sourceDB.logPrint('Removing '+sys.argv[3], 3, 'sourceDB') 355 del self.sourceDB[sys.argv[3]] 356 else: 357 sourceDB.logPrint('Matching regular expression '+sys.argv[3]+' over source database', 1, 'sourceDB') 358 removeRE = re.compile(sys.argv[3]) 359 removes = list(filter(removeRE.match, sourceDB.keys())) 360 for source in removes: 361 self.logPrint('Removing '+source, 3, 'sourceDB') 362 del self.sourceDB[source] 363 else: 364 sys.exit('Unknown source database action: '+sys.argv[2]) 365 sourceDB.save() 366 except Exception as e: 367 import traceback 368 print(traceback.print_tb(sys.exc_info()[2])) 369 sys.exit(str(e)) 370 sys.exit(0) 371