1#!/usr/bin/env python3 2""" 3# Created: Mon Jun 20 19:13:15 2022 (-0400) 4# @author: Jacob Faibussowitsch 5""" 6from __future__ import annotations 7 8import weakref 9import functools 10import clang.cindex as clx # type: ignore[import] 11 12from .._typing import * 13 14from . import _util 15from ._path import Path 16from ._attr_cache import AttributeCache 17 18class ClangFileNameCache(weakref.WeakKeyDictionary[clx.TranslationUnit, clx.File]): 19 """ 20 It is for whatever reason stupidly expensive to create these file objects, and clang does it 21 every time you access a tu's file. So we cache them here 22 """ 23 def getname(self, tu: clx.TranslationUnit) -> clx.File: 24 if tu in self: 25 return self[tu] 26 new_file = self[tu] = tu.get_file(tu.spelling) 27 return new_file 28 29@functools.total_ordering 30class SourceLocation(AttributeCache): 31 """ 32 A simple wrapper class to add comparison operators to clx.SourceLocations since they only 33 implement __eq__() 34 """ 35 __filecache = ClangFileNameCache() 36 __slots__ = 'source_location', 'translation_unit', 'offset' 37 38 source_location: clx.SourceLocation 39 translation_unit: Optional[clx.TranslationUnit] 40 offset: int 41 42 def __init__(self, source_location: SourceLocationLike, tu: Optional[clx.TranslationUnit] = None) -> None: 43 r"""Construct a `SourceLocation` 44 45 Parameters 46 ---------- 47 source_location : 48 the source location to create from 49 tu : optional 50 the translation unit owning the source location if available 51 """ 52 if isinstance(source_location, SourceLocation): 53 if tu is None: 54 tu = source_location.translation_unit 55 else: 56 assert source_location.translation_unit is None, 'Both input tu and input source_location have valid Translation Units' 57 super().__init__(source_location._cache) 58 source_location = source_location.source_location 59 else: 60 super().__init__() 61 self.source_location = source_location 62 self.translation_unit = tu # store a reference to guard against GC 63 # offset is __by_far__ the most accessed attribute so we cache it in a slot, normally 64 # we could just defer to _get_cached() but doing so results in a roughly 400% 65 # performance degredation! 66 self.offset = source_location.offset 67 return 68 69 def __hash__(self) -> int: 70 return hash((self.source_location.file, self.offset)) 71 72 def __str__(self) -> str: 73 return f'{Path(str(self.file)).resolve()}:{self.line}:{self.column}' 74 75 def __repr__(self) -> str: 76 return f'<self: {object.__repr__(self)}, translation unit: {self.translation_unit}, clang source location: {self.source_location}>' 77 78 def __getattr__(self, attr: str) -> Any: 79 return super()._get_cached(attr, getattr, self.source_location, attr) 80 81 def __eq__(self, other: object) -> bool: 82 return self is other or self.source_location.__eq__(self.as_clang_source_location(other)) 83 84 def __lt__(self, other: SourceLocationLike) -> bool: 85 return self.offset < other.offset 86 87 def __ge__(self, other: SourceLocationLike) -> bool: 88 return self.offset >= other.offset 89 90 @classmethod 91 def cast(cls, other: SourceLocationLike, tu: Optional[clx.TranslationUnit] = None) -> SourceLocation: 92 r"""Cast `other` to `SourceLocation 93 94 Parameters 95 ---------- 96 other : 97 the object to cast 98 99 Returns 100 ------- 101 loc : 102 the `SourceLocation` 103 tu : optional 104 the translation unit if `other` does not have one already 105 106 Notes 107 ----- 108 If `other` is a `SourceLocation` then this routine returns `other` unchanged. If it is a 109 `clang.cindex.SourceLocation`, then it creates a new `SourceLocation` from it. 110 111 Raises 112 ------ 113 NotImplementedError 114 if `other` is not a `SourceLocation` or `clang.cindex.SourceLocation` 115 """ 116 if isinstance(other, cls): 117 return other 118 if isinstance(other, clx.SourceLocation): 119 return cls(other, tu=tu) 120 raise NotImplementedError(type(other)) 121 122 @classmethod 123 def get_filename_from_tu(cls, tu: clx.TranslationUnit) -> clx.File: 124 r"""Get the filename from a translation unit 125 126 Parameters 127 ---------- 128 tu : 129 the translation unit 130 131 Returns 132 ------- 133 file : 134 the file 135 136 Notes 137 ----- 138 It is for whatever reason stupidly expensive to create these as clang does not cache them. So 139 this acts as a cache 140 """ 141 return cls.__filecache.getname(tu) 142 143 @classmethod 144 def from_position(cls, tu: clx.TranslationUnit, line: int, col: int) -> SourceLocation: 145 r"""Construct a `SourceLocation` from a position 146 147 Parameters 148 ---------- 149 tu : 150 the translation unit of the source location 151 line : 152 the line number of the location 153 col : 154 the column number of the location 155 156 Returns 157 ------- 158 loc : 159 the `SourceLocation` 160 """ 161 return cls(clx.SourceLocation.from_position(tu, cls.get_filename_from_tu(tu), line, col), tu=tu) 162 163 @functools.lru_cache 164 def _get_src(self, func: Callable[..., str], *args, **kwargs) -> str: 165 tu = self.translation_unit 166 assert tu is not None 167 right = SourceLocation.from_position(tu, self.line, self.column + 1) 168 return func(SourceRange.from_locations(self, right), *args, **kwargs) 169 170 def raw(self, *args, **kwargs) -> str: 171 r"""Get the raw source for a `SourceLocation` 172 173 Parameters 174 ---------- 175 *args: iterable 176 the positional arguments to `petsclinter._util.get_raw_source_from_source_range()` 177 **kwargs: dict 178 the keyword arguments to `petsclinter._util.get_raw_source_from_source_range()` 179 180 Returns 181 ------- 182 ret: 183 the formatted source of the `SourceLocation` 184 """ 185 return self._get_src(SourceRange.raw, *args, **kwargs) 186 187 def formatted(self, *args, **kwargs) -> str: 188 r"""Get the formatted source for a `SourceLocation` 189 190 Parameters 191 ---------- 192 *args: iterable 193 the positional arguments to `petsclinter._util.get_formatted_source_from_source_range()` 194 **kwargs: dict 195 the keyword arguments to `petsclinter._util.get_formatted_source_from_source_range()` 196 197 Returns 198 ------- 199 ret: 200 the formatted source of the `SourceLocation` 201 """ 202 return self._get_src(SourceRange.formatted, *args, **kwargs) 203 204 @classmethod 205 def as_clang_source_location(cls, other: SourceLocationLike) -> clx.SourceLocation: 206 r"""Get `other` as a `clang.cindex.SourceLocation` 207 208 Parameters 209 ---------- 210 other : 211 a source location 212 213 Returns 214 ------- 215 loc : 216 the `clang.cindex.SourceLocation` 217 218 Raises 219 ------ 220 NotImplementedError 221 if `other` is not a `SourceLocation` or `clang.cindex.SourceLocation` 222 223 Notes 224 ----- 225 If `other` is a `clang.cindex.SourceLocation` then this routine returns `other` unchanged. 226 Otherwise it returns the stored source location. 227 """ 228 if isinstance(other, cls): 229 return other.source_location 230 if isinstance(other, clx.SourceLocation): 231 return other 232 raise NotImplementedError(type(other)) 233 234@functools.total_ordering 235class SourceRange(AttributeCache): 236 """Like SourceLocation but for clx.SourceRanges""" 237 __slots__ = 'source_range', 'translation_unit', '_end', '_start' 238 239 source_range: clx.SourceRange 240 translation_unit: Optional[clx.TranslationUnit] 241 _end: Optional[SourceLocation] 242 _start: Optional[SourceLocation] 243 244 def __init__(self, source_range: SourceRangeLike, tu: Optional[clx.TranslationUnit] = None) -> None: 245 r"""Construct a `SourceRange` 246 247 Parameters 248 ---------- 249 source_range: 250 the source `SourceRange` 251 tu: optional 252 the translation unit 253 254 Raises 255 ------ 256 ValueError 257 if both `tu` is not None and `source_range` is a `SourceRange` and also has a valid translation 258 unit, since it is ambigious which one should be used in that situation 259 260 Notes 261 ----- 262 Maybe it's not a big deal to simply prefer `tu` over `source_range.translation_unit` if both are 263 given, but I had not found a test case in the wild to debug this situation with, so for now it 264 errors. 265 """ 266 if isinstance(source_range, SourceRange): 267 if tu is None: 268 tu = source_range.translation_unit 269 elif source_range.translation_unit is not None: 270 raise ValueError( 271 'Both input tu and input source_range have valid Translation Units, don\'t know which to use!' 272 ) 273 274 super().__init__(source_range._cache) 275 self.source_range = source_range.source_range 276 self._start = source_range._start 277 self._end = source_range._end 278 else: 279 super().__init__() 280 self.source_range = source_range 281 self._start = None 282 self._end = None 283 self.translation_unit = tu # store a reference to guard against GC 284 return 285 286 def __hash__(self) -> int: 287 return hash((self.__start(), self.__end())) 288 289 def __repr__(self) -> str: 290 return f'<self:{object.__repr__(self)}, tu: {self.translation_unit}, source range: {self.source_range}>' 291 292 def __getattr__(self, attr: str) -> Any: 293 return super()._get_cached(attr, getattr, self.source_range, attr) 294 295 def __eq__(self, other: SourceRangeLike) -> bool: 296 return self is other or self.source_range.__eq__(self.as_clang_source_range(other)) 297 298 def __lt__(self, other: Union[SourceRangeLike, SourceLocationLike]) -> bool: 299 # If all this nonsense seems like a micro-optimization, it kinda is but also kinda 300 # isn't. For regular usage this is way overkill, but all this __start() and __end() 301 # caching and skipping the cast saves roughly 20s in a 100s run when overlap() is 302 # called over 3 million times! 303 if isinstance(other, SourceRange): 304 other = other.__start() 305 elif isinstance(other, clx.SourceRange): 306 other = other.start 307 elif isinstance(other, (clx.SourceLocation, SourceLocation)): 308 pass 309 else: 310 raise NotImplementedError(type(other)) 311 self_end = self.__end() 312 if self_end == other: 313 return self.__start() < other 314 return self_end < other 315 316 def __contains__(self, other: Union[SourceRangeLike, SourceLocationLike]) -> bool: 317 def contains(loc: Union[SourceRange, SourceLocation]) -> bool: 318 # reimplement clx.SourceRange.__contains__() as it has a bug 319 return start <= loc <= self.__end() 320 321 start = self.__start() 322 if isinstance(other, type(self)): 323 return contains(other.__start()) and contains(other.__end()) 324 cast = SourceLocation.cast 325 if isinstance(other, clx.SourceRange): 326 return contains(cast(other.start)) and contains(cast(other.end)) 327 if isinstance(other, SourceLocation): 328 return contains(other) 329 if isinstance(other, clx.SourceLocation): 330 return contains(cast(other)) 331 raise ValueError(type(other)) 332 333 def __len__(self) -> int: 334 return self.__end().offset - self.__start().offset 335 336 def __getitem__(self, idx: int) -> str: 337 return super()._get_cached( 338 '__raw_src', _util.get_raw_source_from_source_range, self 339 ).splitlines()[idx] 340 341 def __start(self) -> SourceLocation: 342 if self._start is None: 343 self._start = SourceLocation.cast(self.start) 344 return self._start 345 346 def __end(self) -> SourceLocation: 347 if self._end is None: 348 self._end = SourceLocation.cast(self.end) 349 return self._end 350 351 @classmethod 352 def cast(cls, other: SourceRangeLike, tu: Optional[clx.TranslationUnit] = None) -> SourceRange: 353 r"""Cast `other` into a `SourceRange` 354 355 Parameters 356 ---------- 357 other : 358 the object to cast 359 tu : 360 the translation unit to attach (if `other` is a `clang.cindex.SourceRange`) 361 362 Returns 363 ------- 364 loc : 365 the `SourceRange` 366 367 Notes 368 ----- 369 If `other` is a `SourceRange` then this routine returns `other` unchanged. If it is a 370 `clang.cindex.SourceRange`, then it creates a new `SourceRange` from it. 371 372 Raises 373 ------ 374 NotImplementedError 375 if `other` is not a `SourceRange` or `clang.cindex.SourceRange` 376 """ 377 if isinstance(other, cls): 378 return other 379 if isinstance(other, clx.SourceRange): 380 return cls(other, tu=tu) 381 raise NotImplementedError(type(other)) 382 383 @classmethod 384 def from_locations(cls, left: SourceLocationLike, right: SourceLocationLike, tu: Optional[clx.TranslationUnit] = None) -> SourceRange: 385 r"""Construct a `SourceRange` from locations 386 387 Parameters 388 ---------- 389 left : 390 the leftmost bound of the range 391 right : 392 the rightmost bound of the range 393 tu : optional 394 the translation unit of the range 395 396 Returns 397 ------- 398 rng : 399 the constructed `SourceRange` 400 401 Notes 402 ----- 403 `left.offset` must be <= `right.offset` 404 """ 405 assert left.offset <= right.offset 406 if tu is None: 407 attr = 'translation_unit' 408 tu = getattr(left, attr, None) 409 if tu is None: 410 tu = getattr(right, attr, None) 411 as_clang_sl = SourceLocation.as_clang_source_location 412 return cls(clx.SourceRange.from_locations(as_clang_sl(left), as_clang_sl(right)), tu=tu) 413 414 @classmethod 415 def from_positions(cls, tu: clx.TranslationUnit, line_left: int, col_left: int, line_right: int, col_right: int) -> SourceRange: 416 r"""Construct a `SourceRange` from positions 417 418 Parameters 419 ---------- 420 tu : 421 the translation unit containing the range 422 line_left : 423 the line number of the low bound 424 col_left : 425 the column number of the low bound 426 line_right : 427 the line number of the upper bound 428 col_right : 429 the column number of the upper bound 430 431 Returns 432 ------- 433 rng : 434 the constructed `SourceRange` 435 """ 436 filename = SourceLocation.get_filename_from_tu(tu) 437 from_pos = clx.SourceLocation.from_position 438 begin = from_pos(tu, filename, line_left, col_left) 439 end = from_pos(tu, filename, line_right, col_right) 440 return cls(clx.SourceRange.from_locations(begin, end), tu=tu) 441 442 @classmethod 443 def as_clang_source_range(cls, other: SourceRangeLike) -> clx.SourceRange: 444 r"""Retrieve the `clang.cindex.SourceRange` from a source range 445 446 Parameters 447 ---------- 448 other : 449 a source range 450 451 Returns 452 ------- 453 loc : 454 the `clang.cindex.SourceRange` 455 456 Raises 457 ------ 458 NotImplementedError 459 if `other` is not a `SourceRange` or `clang.cindex.SourceRange` 460 461 Notes 462 ----- 463 If `other` is a `clang.cindex.SourceRange` then this routine returns `other` unchanged. 464 Otherwise it returns the stored source range. 465 """ 466 if isinstance(other, cls): 467 return other.source_range 468 if isinstance(other, clx.SourceRange): 469 return other 470 raise NotImplementedError(type(other)) 471 472 @classmethod 473 def merge(cls, left: SourceRangeLike, right: SourceRangeLike, tu: Optional[clx.TranslationUnit] = None) -> SourceRange: 474 r"""Create a merged `SourceRange` from two ranges 475 476 Parameters 477 ---------- 478 left : 479 the left range 480 right : 481 the right range 482 tu : 483 the translation unit containing the ranges 484 485 Returns 486 ------- 487 merged : 488 the merged range 489 490 Notes 491 ----- 492 Constructs a range from the set union of `left` and `right`. `left` and `right` may overlap, be 493 disjoint, or one may be entirely contained within the other. 494 """ 495 cast = SourceLocation.cast 496 start = min(cast(left.start), cast(right.start)) 497 end = max(cast(left.end), cast(right.end)) 498 return cls.from_locations(start, end, tu=tu) 499 500 def merge_with(self, other: SourceRangeLike) -> SourceRange: 501 r"""See `SourceRange.merge()`""" 502 return self.merge(self, other, tu=self.translation_unit) 503 504 def overlaps(self, other: SourceRangeLike) -> bool: 505 r"""Asks and answers the question: does this range overlap with `other`? 506 507 Parameters 508 ---------- 509 other : 510 the other range 511 512 Returns 513 ------- 514 result : 515 True if `self` overlaps with `other`, False otherwise 516 517 Notes 518 ----- 519 Two ranges are considered overlapping if either end is contained within the other. Notably, this 520 also includes 'touching' ranges too, i.e. the start of one equals the end of the other. E.g. the 521 following ranges: 522 523 x------x range_1 524 x--------x range_2 525 526 are considered to be overlapping, i.e. range_1.overlaps(range_2) is True (and vice-versa). 527 """ 528 end = self.__end() 529 if isinstance(other, type(self)): 530 return end >= other.__start() and other.__end() >= self.__start() 531 cast = SourceLocation.cast 532 return end >= cast(other.start) and cast(other.end) >= self.__start() 533 534 def resized(self, lbegin: int = 0, lend: int = 0, cbegin: Union[int, None] = 0, cend: Union[int, None] = 0) -> SourceRange: 535 r"""Return a resized `SourceRange`, if the `SourceRange` was resized it is a new object 536 537 Parameters 538 ---------- 539 lbegin : optional 540 number of lines to increment or decrement self.start.lines by 541 lend : optional 542 number of lines to increment or decrement self.end.lines by 543 cbegin : optional 544 number of columns to increment or decrement self.start.colummn by, None for BOL 545 cend : optional 546 number of columns to increment or decrement self.end.colummn by, None for EOL 547 548 Returns 549 ------- 550 ret : 551 the resized `SourceRange` 552 """ 553 start = self.__start() 554 if cbegin is None: 555 cbegin = -start.column + 1 556 if cend == 0 and lbegin + lend + cbegin == 0: 557 return self # nothing to do 558 559 end = self.__end() 560 endcol = -1 if cend is None else end.column + cend # -1 is EOL 561 return self.from_positions( 562 self.translation_unit, start.line + lbegin, start.column + cbegin, end.line + lend, endcol 563 ) 564 565 @functools.lru_cache 566 def raw(self, *args, **kwargs) -> str: 567 r"""Get the raw source for a `SourceRange` 568 569 Parameters 570 ---------- 571 *args: iterable 572 the positional arguments to `petsclinter._util.get_raw_source_from_source_range()` 573 **kwargs: dict 574 the keyword arguments to `petsclinter._util.get_raw_source_from_source_range()` 575 576 Returns 577 ------- 578 ret: 579 the raw source of the `SourceRange` 580 """ 581 return _util.get_raw_source_from_source_range(self, *args, **kwargs) 582 583 @functools.lru_cache 584 def formatted(self, *args, **kwargs) -> str: 585 r"""Get the formatted source for a `SourceRange` 586 587 Parameters 588 ---------- 589 *args: iterable 590 the positional arguments to `petsclinter._util.get_formatted_source_from_source_range()` 591 **kwargs: dict 592 the keyword arguments to `petsclinter._util.get_formatted_source_from_source_range()` 593 594 Returns 595 ------- 596 ret: 597 the formatted source of the `SourceRange` 598 """ 599 return _util.get_formatted_source_from_source_range(self, *args, **kwargs) 600 601 def view(self, *args, **kwargs) -> None: 602 r"""View a `SourceRange`""" 603 kwargs.setdefault('num_context', 5) 604 print(self.formatted(*args, **kwargs)) 605 return 606