xref: /petsc/lib/petsc/bin/maint/petsclinter/petsclinter/classes/_src_pos.py (revision 9c5460f9064ca60dd71a234a1f6faf93e7a6b0c9)
1#!/usr/bin/env python3
2"""
3# Created: Mon Jun 20 19:13:15 2022 (-0400)
4# @author: Jacob Faibussowitsch
5"""
6from __future__ import annotations
7
8import weakref
9import functools
10import clang.cindex as clx # type: ignore[import]
11
12from .._typing import *
13
14from .            import _util
15from ._path       import Path
16from ._attr_cache import AttributeCache
17
18class ClangFileNameCache(weakref.WeakKeyDictionary[clx.TranslationUnit, clx.File]):
19  """
20  It is for whatever reason stupidly expensive to create these file objects, and clang does it
21  every time you access a tu's file. So we cache them here
22  """
23  def getname(self, tu: clx.TranslationUnit) -> clx.File:
24    if tu in self:
25      return self[tu]
26    new_file = self[tu] = tu.get_file(tu.spelling)
27    return new_file
28
29@functools.total_ordering
30class SourceLocation(AttributeCache):
31  """
32  A simple wrapper class to add comparison operators to clx.SourceLocations since they only
33  implement __eq__()
34  """
35  __filecache = ClangFileNameCache()
36  __slots__   = 'source_location', 'translation_unit', 'offset'
37
38  source_location: clx.SourceLocation
39  translation_unit: Optional[clx.TranslationUnit]
40  offset: int
41
42  def __init__(self, source_location: SourceLocationLike, tu: Optional[clx.TranslationUnit] = None) -> None:
43    r"""Construct a `SourceLocation`
44
45    Parameters
46    ----------
47    source_location :
48      the source location to create from
49    tu : optional
50      the translation unit owning the source location if available
51    """
52    if isinstance(source_location, SourceLocation):
53      if tu is None:
54        tu = source_location.translation_unit
55      else:
56        assert source_location.translation_unit is None, 'Both input tu and input source_location have valid Translation Units'
57      super().__init__(source_location._cache)
58      source_location = source_location.source_location
59    else:
60      super().__init__()
61    self.source_location  = source_location
62    self.translation_unit = tu # store a reference to guard against GC
63    # offset is __by_far__ the most accessed attribute so we cache it in a slot, normally
64    # we could just defer to _get_cached() but doing so results in a roughly 400%
65    # performance degredation!
66    self.offset = source_location.offset
67    return
68
69  def __hash__(self) -> int:
70    return hash((self.source_location.file, self.offset))
71
72  def __str__(self) -> str:
73    return f'{Path(str(self.file)).resolve()}:{self.line}:{self.column}'
74
75  def __repr__(self) -> str:
76    return f'<self: {object.__repr__(self)}, translation unit: {self.translation_unit}, clang source location: {self.source_location}>'
77
78  def __getattr__(self, attr: str) -> Any:
79    return super()._get_cached(attr, getattr, self.source_location, attr)
80
81  def __eq__(self, other: object) -> bool:
82    return self is other or self.source_location.__eq__(self.as_clang_source_location(other))
83
84  def __lt__(self, other: SourceLocationLike) -> bool:
85    return self.offset < other.offset
86
87  def __ge__(self, other: SourceLocationLike) -> bool:
88    return self.offset >= other.offset
89
90  @classmethod
91  def cast(cls, other: SourceLocationLike, tu: Optional[clx.TranslationUnit] = None) -> SourceLocation:
92    r"""Cast `other` to `SourceLocation
93
94    Parameters
95    ----------
96    other :
97      the object to cast
98
99    Returns
100    -------
101    loc :
102      the `SourceLocation`
103    tu : optional
104      the translation unit if `other` does not have one already
105
106    Notes
107    -----
108    If `other` is a `SourceLocation` then this routine returns `other` unchanged. If it is a
109    `clang.cindex.SourceLocation`, then it creates a new `SourceLocation` from it.
110
111    Raises
112    ------
113    NotImplementedError
114      if `other` is not a `SourceLocation` or `clang.cindex.SourceLocation`
115    """
116    if isinstance(other, cls):
117      return other
118    if isinstance(other, clx.SourceLocation):
119      return cls(other, tu=tu)
120    raise NotImplementedError(type(other))
121
122  @classmethod
123  def get_filename_from_tu(cls, tu: clx.TranslationUnit) -> clx.File:
124    r"""Get the filename from a translation unit
125
126    Parameters
127    ----------
128    tu :
129      the translation unit
130
131    Returns
132    -------
133    file :
134      the file
135
136    Notes
137    -----
138    It is for whatever reason stupidly expensive to create these as clang does not cache them. So
139    this acts as a cache
140    """
141    return cls.__filecache.getname(tu)
142
143  @classmethod
144  def from_position(cls, tu: clx.TranslationUnit, line: int, col: int) -> SourceLocation:
145    r"""Construct a `SourceLocation` from a position
146
147    Parameters
148    ----------
149    tu :
150      the translation unit of the source location
151    line :
152      the line number of the location
153    col :
154      the column number of the location
155
156    Returns
157    -------
158    loc :
159      the `SourceLocation`
160    """
161    return cls(clx.SourceLocation.from_position(tu, cls.get_filename_from_tu(tu), line, col), tu=tu)
162
163  @functools.lru_cache
164  def _get_src(self, func: Callable[..., str], *args, **kwargs) -> str:
165    tu = self.translation_unit
166    assert tu is not None
167    right = SourceLocation.from_position(tu, self.line, self.column + 1)
168    return func(SourceRange.from_locations(self, right), *args, **kwargs)
169
170  def raw(self, *args, **kwargs) -> str:
171    r"""Get the raw source for a `SourceLocation`
172
173    Parameters
174    ----------
175    *args: iterable
176      the positional arguments to `petsclinter._util.get_raw_source_from_source_range()`
177    **kwargs: dict
178      the keyword arguments to `petsclinter._util.get_raw_source_from_source_range()`
179
180    Returns
181    -------
182    ret:
183      the formatted source of the `SourceLocation`
184    """
185    return self._get_src(SourceRange.raw, *args, **kwargs)
186
187  def formatted(self, *args, **kwargs) -> str:
188    r"""Get the formatted source for a `SourceLocation`
189
190    Parameters
191    ----------
192    *args: iterable
193      the positional arguments to `petsclinter._util.get_formatted_source_from_source_range()`
194    **kwargs: dict
195      the keyword arguments to `petsclinter._util.get_formatted_source_from_source_range()`
196
197    Returns
198    -------
199    ret:
200      the formatted source of the `SourceLocation`
201    """
202    return self._get_src(SourceRange.formatted, *args, **kwargs)
203
204  @classmethod
205  def as_clang_source_location(cls, other: SourceLocationLike) -> clx.SourceLocation:
206    r"""Get `other` as a `clang.cindex.SourceLocation`
207
208    Parameters
209    ----------
210    other :
211      a source location
212
213    Returns
214    -------
215    loc :
216      the `clang.cindex.SourceLocation`
217
218    Raises
219    ------
220    NotImplementedError
221      if `other` is not a `SourceLocation` or `clang.cindex.SourceLocation`
222
223    Notes
224    -----
225    If `other` is a `clang.cindex.SourceLocation` then this routine returns `other` unchanged.
226    Otherwise it returns the stored source location.
227    """
228    if isinstance(other, cls):
229      return other.source_location
230    if isinstance(other, clx.SourceLocation):
231      return other
232    raise NotImplementedError(type(other))
233
234@functools.total_ordering
235class SourceRange(AttributeCache):
236  """Like SourceLocation but for clx.SourceRanges"""
237  __slots__ = 'source_range', 'translation_unit', '_end', '_start'
238
239  source_range: clx.SourceRange
240  translation_unit: Optional[clx.TranslationUnit]
241  _end: Optional[SourceLocation]
242  _start: Optional[SourceLocation]
243
244  def __init__(self, source_range: SourceRangeLike, tu: Optional[clx.TranslationUnit] = None) -> None:
245    r"""Construct a `SourceRange`
246
247    Parameters
248    ----------
249    source_range:
250      the source `SourceRange`
251    tu: optional
252      the translation unit
253
254    Raises
255    ------
256    ValueError
257      if both `tu` is not None and `source_range` is a `SourceRange` and also has a valid translation
258      unit, since it is ambigious which one should be used in that situation
259
260    Notes
261    -----
262    Maybe it's not a big deal to simply prefer `tu` over `source_range.translation_unit` if both are
263    given, but I had not found a test case in the wild to debug this situation with, so for now it
264    errors.
265    """
266    if isinstance(source_range, SourceRange):
267      if tu is None:
268        tu = source_range.translation_unit
269      elif source_range.translation_unit is not None:
270        raise ValueError(
271          'Both input tu and input source_range have valid Translation Units, don\'t know which to use!'
272        )
273
274      super().__init__(source_range._cache)
275      self.source_range = source_range.source_range
276      self._start       = source_range._start
277      self._end         = source_range._end
278    else:
279      super().__init__()
280      self.source_range = source_range
281      self._start       = None
282      self._end         = None
283    self.translation_unit = tu # store a reference to guard against GC
284    return
285
286  def __hash__(self) -> int:
287    return hash((self.__start(), self.__end()))
288
289  def __repr__(self) -> str:
290    return f'<self:{object.__repr__(self)}, tu: {self.translation_unit}, source range: {self.source_range}>'
291
292  def __getattr__(self, attr: str) -> Any:
293    return super()._get_cached(attr, getattr, self.source_range, attr)
294
295  def __eq__(self, other: SourceRangeLike) -> bool:
296    return self is other or self.source_range.__eq__(self.as_clang_source_range(other))
297
298  def __lt__(self, other: Union[SourceRangeLike, SourceLocationLike]) -> bool:
299    # If all this nonsense seems like a micro-optimization, it kinda is but also kinda
300    # isn't. For regular usage this is way overkill, but all this __start() and __end()
301    # caching and skipping the cast saves roughly 20s in a 100s run when overlap() is
302    # called over 3 million times!
303    if isinstance(other, SourceRange):
304      other = other.__start()
305    elif isinstance(other, clx.SourceRange):
306      other = other.start
307    elif isinstance(other, (clx.SourceLocation, SourceLocation)):
308      pass
309    else:
310      raise NotImplementedError(type(other))
311    self_end = self.__end()
312    if self_end == other:
313      return self.__start() < other
314    return self_end < other
315
316  def __contains__(self, other: Union[SourceRangeLike, SourceLocationLike]) -> bool:
317    def contains(loc: Union[SourceRange, SourceLocation]) -> bool:
318      # reimplement clx.SourceRange.__contains__() as it has a bug
319      return start <= loc <= self.__end()
320
321    start = self.__start()
322    if isinstance(other, type(self)):
323      return contains(other.__start()) and contains(other.__end())
324    cast = SourceLocation.cast
325    if isinstance(other, clx.SourceRange):
326      return contains(cast(other.start)) and contains(cast(other.end))
327    if isinstance(other, SourceLocation):
328      return contains(other)
329    if isinstance(other, clx.SourceLocation):
330      return contains(cast(other))
331    raise ValueError(type(other))
332
333  def __len__(self) -> int:
334    return self.__end().offset - self.__start().offset
335
336  def __getitem__(self, idx: int) -> str:
337    return super()._get_cached(
338      '__raw_src', _util.get_raw_source_from_source_range, self
339    ).splitlines()[idx]
340
341  def __start(self) -> SourceLocation:
342    if self._start is None:
343      self._start = SourceLocation.cast(self.start)
344    return self._start
345
346  def __end(self) -> SourceLocation:
347    if self._end is None:
348      self._end = SourceLocation.cast(self.end)
349    return self._end
350
351  @classmethod
352  def cast(cls, other: SourceRangeLike, tu: Optional[clx.TranslationUnit] = None) -> SourceRange:
353    r"""Cast `other` into a `SourceRange`
354
355    Parameters
356    ----------
357    other :
358      the object to cast
359    tu :
360      the translation unit to attach (if `other` is a `clang.cindex.SourceRange`)
361
362    Returns
363    -------
364    loc :
365      the `SourceRange`
366
367    Notes
368    -----
369    If `other` is a `SourceRange` then this routine returns `other` unchanged. If it is a
370    `clang.cindex.SourceRange`, then it creates a new `SourceRange` from it.
371
372    Raises
373    ------
374    NotImplementedError
375      if `other` is not a `SourceRange` or `clang.cindex.SourceRange`
376    """
377    if isinstance(other, cls):
378      return other
379    if isinstance(other, clx.SourceRange):
380      return cls(other, tu=tu)
381    raise NotImplementedError(type(other))
382
383  @classmethod
384  def from_locations(cls, left: SourceLocationLike, right: SourceLocationLike, tu: Optional[clx.TranslationUnit] = None) -> SourceRange:
385    r"""Construct a `SourceRange` from locations
386
387    Parameters
388    ----------
389    left :
390      the leftmost bound of the range
391    right :
392      the rightmost bound of the range
393    tu : optional
394      the translation unit of the range
395
396    Returns
397    -------
398    rng :
399      the constructed `SourceRange`
400
401    Notes
402    -----
403    `left.offset` must be <= `right.offset`
404    """
405    assert left.offset <= right.offset
406    if tu is None:
407      attr = 'translation_unit'
408      tu   = getattr(left, attr, None)
409      if tu is None:
410        tu = getattr(right, attr, None)
411    as_clang_sl = SourceLocation.as_clang_source_location
412    return cls(clx.SourceRange.from_locations(as_clang_sl(left), as_clang_sl(right)), tu=tu)
413
414  @classmethod
415  def from_positions(cls, tu: clx.TranslationUnit, line_left: int, col_left: int, line_right: int, col_right: int) -> SourceRange:
416    r"""Construct a `SourceRange` from positions
417
418    Parameters
419    ----------
420    tu :
421      the translation unit containing the range
422    line_left :
423      the line number of the low bound
424    col_left :
425      the column number of the low bound
426    line_right :
427      the line number of the upper bound
428    col_right :
429      the column number of the upper bound
430
431    Returns
432    -------
433    rng :
434      the constructed `SourceRange`
435    """
436    filename = SourceLocation.get_filename_from_tu(tu)
437    from_pos = clx.SourceLocation.from_position
438    begin    = from_pos(tu, filename, line_left, col_left)
439    end      = from_pos(tu, filename, line_right, col_right)
440    return cls(clx.SourceRange.from_locations(begin, end), tu=tu)
441
442  @classmethod
443  def as_clang_source_range(cls, other: SourceRangeLike) -> clx.SourceRange:
444    r"""Retrieve the `clang.cindex.SourceRange` from a source range
445
446    Parameters
447    ----------
448    other :
449      a source range
450
451    Returns
452    -------
453    loc :
454      the `clang.cindex.SourceRange`
455
456    Raises
457    ------
458    NotImplementedError
459      if `other` is not a `SourceRange` or `clang.cindex.SourceRange`
460
461    Notes
462    -----
463    If `other` is a `clang.cindex.SourceRange` then this routine returns `other` unchanged.
464    Otherwise it returns the stored source range.
465    """
466    if isinstance(other, cls):
467      return other.source_range
468    if isinstance(other, clx.SourceRange):
469      return other
470    raise NotImplementedError(type(other))
471
472  @classmethod
473  def merge(cls, left: SourceRangeLike, right: SourceRangeLike, tu: Optional[clx.TranslationUnit] = None) -> SourceRange:
474    r"""Create a merged `SourceRange` from two ranges
475
476    Parameters
477    ----------
478    left :
479      the left range
480    right :
481      the right range
482    tu :
483      the translation unit containing the ranges
484
485    Returns
486    -------
487    merged :
488      the merged range
489
490    Notes
491    -----
492    Constructs a range from the set union of `left` and `right`. `left` and `right` may overlap, be
493    disjoint, or one may be entirely contained within the other.
494    """
495    cast  = SourceLocation.cast
496    start = min(cast(left.start), cast(right.start))
497    end   = max(cast(left.end),   cast(right.end))
498    return cls.from_locations(start, end, tu=tu)
499
500  def merge_with(self, other: SourceRangeLike) -> SourceRange:
501    r"""See `SourceRange.merge()`"""
502    return self.merge(self, other, tu=self.translation_unit)
503
504  def overlaps(self, other: SourceRangeLike) -> bool:
505    r"""Asks and answers the question: does this range overlap with `other`?
506
507    Parameters
508    ----------
509    other :
510      the other range
511
512    Returns
513    -------
514    result :
515      True if `self` overlaps with `other`, False otherwise
516
517    Notes
518    -----
519    Two ranges are considered overlapping if either end is contained within the other. Notably, this
520    also includes 'touching' ranges too, i.e. the start of one equals the end of the other. E.g. the
521    following ranges:
522
523    x------x          range_1
524           x--------x range_2
525
526    are considered to be overlapping, i.e. range_1.overlaps(range_2) is True (and vice-versa).
527    """
528    end = self.__end()
529    if isinstance(other, type(self)):
530      return end >= other.__start() and other.__end() >= self.__start()
531    cast = SourceLocation.cast
532    return end >= cast(other.start) and cast(other.end) >= self.__start()
533
534  def resized(self, lbegin: int = 0, lend: int = 0, cbegin: Union[int, None] = 0, cend: Union[int, None] = 0) -> SourceRange:
535    r"""Return a resized `SourceRange`, if the `SourceRange` was resized it is a new object
536
537    Parameters
538    ----------
539    lbegin : optional
540      number of lines to increment or decrement self.start.lines by
541    lend : optional
542      number of lines to increment or decrement self.end.lines by
543    cbegin : optional
544      number of columns to increment or decrement self.start.colummn by, None for BOL
545    cend : optional
546      number of columns to increment or decrement self.end.colummn by, None for EOL
547
548    Returns
549    -------
550    ret :
551      the resized `SourceRange`
552    """
553    start = self.__start()
554    if cbegin is None:
555      cbegin = -start.column + 1
556    if cend == 0 and lbegin + lend + cbegin == 0:
557      return self # nothing to do
558
559    end    = self.__end()
560    endcol = -1 if cend is None else end.column + cend # -1 is EOL
561    return self.from_positions(
562      self.translation_unit, start.line + lbegin, start.column + cbegin, end.line + lend, endcol
563    )
564
565  @functools.lru_cache
566  def raw(self, *args, **kwargs) -> str:
567    r"""Get the raw source for a `SourceRange`
568
569    Parameters
570    ----------
571    *args: iterable
572      the positional arguments to `petsclinter._util.get_raw_source_from_source_range()`
573    **kwargs: dict
574      the keyword arguments to `petsclinter._util.get_raw_source_from_source_range()`
575
576    Returns
577    -------
578    ret:
579      the raw source of the `SourceRange`
580    """
581    return _util.get_raw_source_from_source_range(self, *args, **kwargs)
582
583  @functools.lru_cache
584  def formatted(self, *args, **kwargs) -> str:
585    r"""Get the formatted source for a `SourceRange`
586
587    Parameters
588    ----------
589    *args: iterable
590      the positional arguments to `petsclinter._util.get_formatted_source_from_source_range()`
591    **kwargs: dict
592      the keyword arguments to `petsclinter._util.get_formatted_source_from_source_range()`
593
594    Returns
595    -------
596    ret:
597      the formatted source of the `SourceRange`
598    """
599    return _util.get_formatted_source_from_source_range(self, *args, **kwargs)
600
601  def view(self, *args, **kwargs) -> None:
602    r"""View a `SourceRange`"""
603    kwargs.setdefault('num_context', 5)
604    print(self.formatted(*args, **kwargs))
605    return
606