xref: /petsc/lib/petsc/bin/maint/petsclinter/petsclinter/classes/_linter.py (revision b0dcfd164860a975c76f90dabf1036901aab1c4e) !
1#!/usr/bin/env python3
2"""
3# Created: Mon Jun 20 16:40:24 2022 (-0400)
4# @author: Jacob Faibussowitsch
5"""
6from __future__ import annotations
7
8import weakref
9import difflib
10import textwrap
11import datetime
12import itertools
13import collections
14import clang.cindex as clx # type: ignore[import]
15
16from .._typing import *
17
18from ._diag      import DiagnosticManager, Diagnostic
19from ._cursor    import Cursor
20from ._src_pos   import SourceLocation, SourceRange
21from ._patch     import Patch
22from ._scope     import Scope
23from ._weak_list import WeakList
24from ._add_line  import Addline
25
26from .._error import ParsingError, KnownUnhandleableCursorError
27
28from ..util._clang   import clx_func_call_cursor_kinds, base_clang_options
29from ..util._utility import subprocess_capture_output, subprocess_check_returncode
30
31class DiagnosticsContainer:
32  __slots__ = 'prefix', 'data'
33
34  prefix: str
35  data: dict[Path, dict[int, WeakListType]]
36
37  def __init__(self, prefix: str) -> None:
38    r"""Construct a `DiagnosticsContainer`
39
40    Parameters
41    ----------
42    prefix :
43      the prefix for this diagnostic container
44    """
45    self.prefix = prefix
46    self.data   = {}
47    return
48
49  def add_diagnostic(self, cursor: Cursor, diagnostic: Diagnostic) -> None:
50    r"""Add a `Diagnostic` to this container
51
52    Parameters
53    ----------
54    cursor :
55      the cursor about which the `diagnostic` is concerned
56    diagnostic :
57      the diagnostic detailing the error or warning
58    """
59    filename = cursor.get_file()
60    data     = self.data
61    # Of the various dict-key probing method, try-except is the fastest lookup method if a
62    # key exists in a dict, but is by far the slowest in the cases where the key is
63    # missing.
64    #
65    # But each linter instance is likely to only be used on a single file so filename is
66    # very likely to be in the dict.
67    try:
68      file_local = data[filename]
69    except KeyError:
70      file_local = data[filename] = {}
71
72    # note that cursors are _unlikely_ to be in the dict, and hence do the if-test
73    cursor_id = cursor.hash
74    if cursor_id not in file_local:
75      file_local[cursor_id] = WeakList()
76
77    patch      = diagnostic.patch
78    have_patch = patch is not None
79    # type checkers don't grok that have_patch implies patch != None
80    patch_id             = TYPE_CAST(Patch, patch).id if have_patch else -1
81    cursor_id_file_local = file_local[cursor_id]
82    cursor_id_file_local.append((diagnostic.formatted_header(), have_patch, patch_id))
83
84    if have_patch:
85      assert patch is not None # to satisfy type checkers
86      patch.attach(weakref.ref(cursor_id_file_local))
87    return
88
89  def split_and_condense(self) -> tuple[CondensedDiags, CondensedDiags]:
90    r"""Split the diagnostics into resolved and unresolved and condense them per path
91
92    Returns
93    -------
94    unresolved :
95      a dict mapping a `Path` to a list of diagnostic messages for all unresolved diagnostics
96      (i.e. those without a `Patch`)
97    resolved :
98      a dict mapping a `Path` to a list of diagnostic messages for all resolved diagnostics
99      (i.e. those with a `Patch`)
100    """
101    unresolved: CondensedDiags = {p : [] for p in self.data.keys()}
102    resolved: CondensedDiags   = {p : [] for p in self.data.keys()}
103    for path, diags in self.data.items():
104      resolved_list   = resolved[path]
105      unresolved_list = unresolved[path]
106      for err_list in diags.values():
107        for err, have_patch, _ in err_list:
108          if have_patch:
109            resolved_list.append(err)
110          else:
111            unresolved_list.append(err)
112      # remove any empty sets
113      for d in (resolved, unresolved):
114        if not d[path]:
115          del d[path]
116    return unresolved, resolved
117
118  def view_last(self) -> None:
119    r"""Print the last diagnostic added"""
120    import petsclinter as pl
121
122    for files in reversed(self.data):
123      diags = self.data[files]
124      last  = diags[next(reversed(diags))]
125      pl.sync_print(last[-1][0])
126      return
127
128@DiagnosticManager.register(
129  ('duplicate-function', 'Check for duplicate function-calls on the same execution path'),
130  ('static-function-candidate', 'Check for functions that are used only within a single TU, and make them static'),
131  ('parsing-error', 'Generic parsing errors')
132)
133class Linter:
134  """
135  Object to manage the collection and processing of errors during a lint run.
136  """
137  __slots__ = 'flags', 'clang_opts', 'verbose', 'index', 'errors', 'warnings', 'patches', 'werror'
138
139  flags: list[str]
140  clang_opts: CXTranslationUnit
141  verbose: int
142  index: clx.Index
143  errors: DiagnosticsContainer
144  warnings: DiagnosticsContainer
145  patches: collections.defaultdict[Path, list[Patch]]
146  werror: bool
147
148  diags: DiagnosticMap # satisfy type checkers
149
150  def __init__(self, compiler_flags: list[str], clang_options: Optional[CXTranslationUnit] = None, verbose: int = 0, werror: bool = False) -> None:
151    r"""Construct a `Linter`
152
153    Parameters
154    ----------
155    compiler_flags :
156      the set of compiler flags to parse with
157    clang_options : optional
158      the set of clang options to pass to the `clang.cindex.Index.parse()` function, defaults to
159      `petsclinter.util.base_clang_options`
160    verbose : optional
161      whether to print verbose output (and at what level)
162    werror : optional
163      whether to treat warnings as errors
164    """
165    if clang_options is None:
166      clang_options = base_clang_options
167
168    self.flags      = compiler_flags
169    self.clang_opts = clang_options
170    self.verbose    = verbose
171    self.index      = clx.Index.create()
172    self.werror     = werror
173    self.clear()
174    return
175
176  def __str__(self) -> str:
177    print_list = [
178      f'Compiler Flags: {self.flags}',
179      f'Clang Options:  {self.clang_opts}',
180      f'Verbose:        {self.verbose}'
181    ]
182    for getter_func in (self.get_all_warnings, self.get_all_errors):
183      for v in getter_func():
184        for mess in v.values():
185          print_list.append('\n'.join(mess))
186    return '\n'.join(print_list)
187
188  def _vprint(self, *args, **kwargs) -> None:
189    r"""Print only if verbose"""
190    if self.verbose:
191      import petsclinter as pl
192
193      pl.sync_print(*args, **kwargs)
194    return
195
196  def _check_duplicate_function_calls(self, processed_funcs: dict[str, list[tuple[Cursor, Scope]]]) -> None:
197    r"""Check for duplicate instances of functions along the same execution path
198
199    Parameters
200    ----------
201    processed_funcs :
202      a dict mapping parent function names and the list of functions and their scopes
203
204    Notes
205    -----
206    If two instances of a function have the same `Scope` then they are duplicate and an error is
207    logged
208    """
209    dup_diag = self.diags.duplicate_function
210    for function_list in processed_funcs.values():
211      seen = {}
212      for func, scope in function_list:
213        combo: list[str] = [func.displayname]
214        try:
215          combo.extend(map(Cursor.get_raw_name_from_cursor, func.get_arguments()))
216        except ParsingError:
217          continue
218
219        # convert to tuple so it is hashable
220        combo_tup = tuple(combo)
221        if combo_tup not in seen:
222          seen[combo_tup] = (func, scope)
223        elif scope >= seen[combo_tup][1]:
224          # this combination has already been seen, i.e. this call is duplicate!!
225          start      = func.extent.start
226          startline  = start.line
227          tu         = func.translation_unit
228          end        = clx.SourceLocation.from_position(tu, tu.get_file(tu.spelling), startline, -1)
229          patch      = Patch(SourceRange.from_locations(start, end), '')
230          previous   = seen[combo_tup][0].formatted(
231            num_before_context=2, num_after_context=startline - seen[combo_tup][0].extent.start.line
232          )
233          message    = f'Duplicate function found previous identical usage:\n{previous}'
234          self.add_diagnostic_from_cursor(
235            func, Diagnostic(Diagnostic.Kind.ERROR, dup_diag, message, start, patch=patch)
236          )
237    return
238
239  @staticmethod
240  def find_lintable_expressions(tu: clx.TranslationUnit, symbol_names: Container[str]) -> Generator[Union[tuple[clx.Cursor, clx.Cursor, Scope], clx.Cursor], None, None]:
241    r"""Finds all lintable expressions in container symbol_names.
242
243    Parameters
244    ----------
245    tu :
246      the `clang.cindex.TranslationUnit` to search
247    symbol_names :
248      the names of the symbols to search for and lint
249
250    Notes
251    -----
252    Note that if a particular expression is not 100% correctly defined (i.e. would the
253    file actually compile) then it will not be picked up by clang AST.
254
255    Function-like macros can be picked up, but it will be in the wrong 'order'. The AST is
256    built as if you are about to compile it, so macros are handled before any real
257    function definitions in the AST, making it impossible to map a macro invocation to
258    its 'parent' function.
259    """
260    UNEXPOSED_DECL = clx.CursorKind.UNEXPOSED_DECL
261    SWITCH_STMT    = clx.CursorKind.SWITCH_STMT
262    CASE_STMT      = clx.CursorKind.CASE_STMT
263    COMPOUND_STMT  = clx.CursorKind.COMPOUND_STMT
264    CALL_EXPR      = clx.CursorKind.CALL_EXPR
265
266    def walk_scope_switch(parent: clx.Cursor, scope: Scope) -> Generator[tuple[clx.Cursor, clx.Cursor, Scope], None, None]:
267      """
268      Special treatment for switch-case since the AST setup for it is mind-boggingly stupid.
269      The first node after a case statement is listed as the cases *child* whereas every other
270      node (including the break!!) is the cases *sibling*
271      """
272      # in case we get here from a scope decrease within a case
273      case_scope = scope
274      for child in parent.get_children():
275        child_kind = child.kind
276        if child_kind == CASE_STMT:
277          # create a new scope every time we encounter a case, this is now for all intents
278          # and purposes the 'scope' going forward. We don't overwrite the original scope
279          # since we still need each case scope to be the previous scopes sibling
280          case_scope = scope.sub()
281          yield from walk_scope(child, scope=case_scope)
282        elif child_kind == CALL_EXPR:
283          if child.spelling in symbol_names:
284            yield (child, possible_parent, case_scope)
285            # Cursors that indicate change of logical scope
286        elif child_kind == COMPOUND_STMT:
287          yield from walk_scope_switch(child, case_scope.sub())
288
289    def walk_scope(parent: clx.Cursor, scope: Optional[Scope] = None) -> Generator[tuple[clx.Cursor, clx.Cursor, Scope], None, None]:
290      """
291      Walk the tree determining the scope of a node. here 'scope' refers not only
292      to lexical scope but also to logical scope, see Scope object above
293      """
294      if scope is None:
295        scope = Scope()
296
297      for child in parent.get_children():
298        child_kind = child.kind
299        if child_kind == SWITCH_STMT:
300          # switch-case statements require special treatment, we skip to the compound
301          # statement
302          switch_children = [c for c in child.get_children() if c.kind == COMPOUND_STMT]
303          assert len(switch_children) == 1, "Switch statement has multiple '{' operators?"
304          yield from walk_scope_switch(switch_children[0], scope.sub())
305        elif child_kind == CALL_EXPR:
306          if child.spelling in symbol_names:
307            yield (child, possible_parent, scope)
308        elif child_kind == COMPOUND_STMT:
309          # scope has decreased
310          yield from walk_scope(child, scope=scope.sub())
311        else:
312          # same scope
313          yield from walk_scope(child, scope=scope)
314
315    # normal lintable cursor kinds, the type of cursors we directly want to deal with
316    lintable_kinds          = clx_func_call_cursor_kinds | {clx.CursorKind.ENUM_DECL}
317    # "extended" lintable kinds.
318    extended_lintable_kinds = lintable_kinds | {UNEXPOSED_DECL}
319
320    cursor   = tu.cursor
321    filename = tu.spelling
322    for possible_parent in cursor.get_children():
323      # getting filename is for some reason stupidly expensive, so we do this check first
324      parent_kind = possible_parent.kind
325      if parent_kind not in extended_lintable_kinds:
326        continue
327      try:
328        if possible_parent.location.file.name != filename:
329          continue
330      except AttributeError:
331        # possible_parent.location.file is None
332        continue
333      # Sometimes people declare their functions PETSC_EXTERN inline, which would normally
334      # trip up the "lintable kinds" detection since the top-level cursor points to a
335      # macro (i.e. unexposed decl). In this case we need to check the cursors 1 level
336      # down for any lintable kinds.
337      if parent_kind == UNEXPOSED_DECL:
338        for sub_cursor in possible_parent.get_children():
339          if sub_cursor.is_definition() and sub_cursor.kind in lintable_kinds:
340            possible_parent = sub_cursor
341            break
342        else:
343          continue
344      # if we've gotten this far we have found something worth looking into, so first
345      # yield the parent to process any documentation
346      yield possible_parent
347      if possible_parent.kind in clx_func_call_cursor_kinds:
348        # then yield any children matching our function calls
349        yield from walk_scope(possible_parent)
350
351  @staticmethod
352  def get_argument_cursors(func_cursor: CursorLike) -> tuple[Cursor, ...]:
353    r"""Given a cursor representing a function, return a tuple of `Cursor`'s of its arguments
354
355    Parameters
356    ----------
357    func_cursor :
358      the function decl cursor
359
360    Returns
361    -------
362    cursors :
363      a tuple of `func_cursors` arguments
364    """
365    return tuple(Cursor(a, i) for i, a in enumerate(func_cursor.get_arguments(), start=1))
366
367  def clear(self) -> None:
368    r"""Resets the linter error, warning, and patch buffers.
369
370    Notes
371    -----
372    Called automatically before parsing a file
373    """
374    self.errors   = DiagnosticsContainer("-" * 92)
375    self.warnings = DiagnosticsContainer("%" * 92)
376    # This can actually just be a straight list, since each linter object only ever
377    # handles a single file, but use dict nonetheless
378    self.patches  = collections.defaultdict(list)
379    return
380
381  def parse(self, filename: PathLike) -> Linter:
382    r"""Parse a file for errors
383
384    Parameters
385    ----------
386    filename :
387      the path of the file to parse
388
389    Returns
390    -------
391    self :
392      the `Linter` instance
393    """
394    self.clear()
395    self._vprint('Processing file     ', filename)
396    tu = self.index.parse(str(filename), args=self.flags, options=self.clang_opts)
397    if tu.diagnostics:
398      self._vprint('\n'.join(map(str, tu.diagnostics)))
399    self.process(tu)
400    return self
401
402  def parse_in_memory(self, src: str) -> clx.TranslationUnit:
403    r"""Parse a particular source string in memory
404
405    Parameters
406    ----------
407    src :
408      the source string to parse
409
410    Returns
411    -------
412    tu :
413      the translation unit resulting from the parse
414
415    Notes
416    -----
417    This lets you act as if `src` was some mini file somewhere on disk
418    """
419    fname = 'tempfile.cpp'
420    return clx.TranslationUnit.from_source(
421      fname, args=self.flags, unsaved_files=[(fname, src)], options=self.clang_opts
422    )
423
424  def _check_possible_static_function(self, func: Cursor) -> None:
425    r"""Check that `func` could be make static
426
427    Parameters
428    ----------
429    func :
430      the function cursor to check
431
432    Notes
433    -----
434    Determines whether `func` can be made static, and if so, adds the static qualifier to it. Currently
435    the check is very basic, it only catches functions with are defined in a TU, and used absolutely
436    nowhere else. As soon as it is defined in a header, or other file, this function bails immediately.
437
438    We could try and figure out whether it belongs in that header, but that has many false positives.
439    We would need to be able to:
440
441    1. (reliably) distinguish between public and internal API
442    2. if the function is internal API, (relibaly) determine whether it is used within the same mansec.
443       If it is, we can make the decl PETSC_INTERN (if it isn't already). If it's used from multiple
444       mansecs, we can make it PETSC_SINGLE_LIBRARY_INTERN (if it isn't already).
445
446    But these are hard problems, which we leave for another time...
447    """
448    def cursor_is_public(cursor: CursorLike) -> bool:
449      if cursor.storage_class == clx.StorageClass.EXTERN or cursor.spelling == 'main':
450        return True
451
452      for child in cursor.get_children():
453        if child.kind == clx.CursorKind.VISIBILITY_ATTR and child.spelling in {'default', 'hidden'}:
454          # The function cursor has a PETSC_INTERN or PETSC_EXTERN attached
455          return True
456      return False
457
458    if func.kind != clx.CursorKind.FUNCTION_DECL or func.storage_class == clx.StorageClass.STATIC:
459      # nothing to do
460      return
461
462    if cursor_is_public(func):
463      return
464
465    func_decl = func.get_declaration()
466    if cursor_is_public(func_decl):
467      # the cursor declaration has some kind of public api, be that extern, PETSC_EXTERN,
468      # or whatever
469      return
470
471    lex_parent = func_decl.lexical_parent
472    try:
473      lex_parent_kind = lex_parent.kind
474    except ValueError as ve:
475      # Possible ValueError: Unknown template argument kind 300
476      #
477      # I think this is a bug in libclang. clx.CursorKind.TRANSLATION_UNIT is 350, I
478      # think it used to be 300, and they haven't updated the python bindings?
479      if 'unknown template argument kind 300' not in str(ve).casefold() and 'unknown template argument kind 350' not in str(ve).casefold():
480        raise
481      lex_parent_kind = clx.CursorKind.TRANSLATION_UNIT
482    if lex_parent_kind == clx.CursorKind.CLASS_DECL:
483      # we have a situation like
484      #
485      # class Foo <---- func_decl.lexical_parent
486      # {
487      #   friend void bar();
488      #               ^^^---- func_decl
489      # };
490      #
491      # void bar() { }
492      #      ^^^------- func
493      #
494      # Note, I have *ONLY* seen this happen with friend functions, so let's assert that
495      # that is the case here so we can potentially handle the other variants
496      assert any('friend' in t.spelling for t in lex_parent.get_tokens())
497      return
498
499    origin_file = func.get_file()
500    decl_file   = Cursor.get_file_from_cursor(func_decl).resolve()
501    if origin_file != decl_file:
502      # The function declaration is in some other file, presumably a header. This implies
503      # the function is used elsewhere/is public API.
504      return
505
506    result_type     = func.result_type
507    result_spelling = result_type.spelling
508    if result_type.get_declaration().kind == clx.CursorKind.NO_DECL_FOUND:
509      # The result type declaration (i.e. typedef x_type result_type) cannot be located!
510      # This indicates 1 of 2 scenarios:
511      #
512      # 1. The type is built-in, e.g. int, or void, or double
513      # 2. The type is actually completely uknown (likely due to linter not having the
514      #    appropriate package installed). In this case, the type defaults to int, meaning
515      #    that instead of searching for e.g. "BlopexInt PETSC_dpotrf_interface" the
516      #    linter searches for "int PETSC_dpotrf_interface" which of course it will not
517      #    find!
518
519      # extract 'PETSC_EXTERN inline void' from 'PETSC_EXTERN inline void FooBar(...)'
520      raw_result_spelling = func.raw().partition(func.spelling)[0]
521      if result_spelling not in raw_result_spelling:
522        # The type is likely unknown, i.e. it defaulted to int
523        assert result_type.kind == clx.TypeKind.INT
524        # Let's try and extract the type nonetheless
525        raw_types = [
526          t
527          for t in raw_result_spelling.split()
528            if t not in {'static', 'inline', 'extern', '\"C\"', '\"C++\"'} or
529              not t.startswith(('PETSC_', 'SLEPC_'))
530        ]
531        if len(raw_types) > 1:
532          # something we didn't handle
533          return
534        # we have narrowed the type down to just a single string, let's try it out
535        result_spelling = raw_types[0]
536
537    if result_spelling.endswith('*'):
538      # if the result type is a pointer, it will sit flush against the function name, so
539      # we should match for potentially 0 spaces, i.e.
540      #
541      # void *Foo()
542      #
543      # We don't match for exactly 0 spaces since someone may have disabled clang-format
544      # and hence it's possible the pointer is not flush.
545      result_type_spacing = ' *'
546    else:
547      # not a pointer, so result type must always be at least 1 space away from function
548      # name
549      result_type_spacing = ' +'
550    # have to escape the pointers
551    result_spelling    = result_spelling.replace('*', '\*')
552    func_name_and_type = rf'{result_spelling}{result_type_spacing}{func.spelling} *\('
553    # The absolute final check, we need to grep for the symbol across the code-base. This
554    # is needed for cases when:
555    #
556    # // my_file.c
557    # PetscErrorCode PetscFoo(PetscBar baz) <--- marked as a potential static candidate
558    # {
559    #   ...
560    #
561    # // my_file.h
562    # #if PetscDefined(HAVE_FOO)
563    # PETSC_EXTERN PetscErrorCode PetscFoo(PetscBar);
564    # #endif
565    #
566    # In the case where the linter is not configured with PETSC_HAVE_FOO, the above checks
567    # will fail to find the extern decl (since from the viewpoint of the compiler, it
568    # literally does not exist) and hence produce a false positive. So the only way to
569    # reliably determine is to search the text.
570    #
571    # The alternative is to emit the diagnostic anyway, knowing that there will be false
572    # positives. To offset this, we could attach a note that says "if this is a false
573    # positive, add PETSC_EXTERN/PETSC_INTERN to the definition as well".
574    #
575    # We chose not to do that because it makes the whole process more brittle, and
576    # introduces otherwise unecessary changes just to placate the linter.
577    ret = subprocess_capture_output([
578      'git', '--no-pager', 'grep', '--color=never', '-r', '-l',
579      '-P', # use Perl regex, which is -- for whatever reason -- over 6x faster
580      '-e', func_name_and_type, '--',
581      # The reason for all of this exclusion nonsense is because without it, this search
582      # is __slooooow__. Running the test-lint job, even a naive search (search only
583      # src and include) takes ~30s to complete. Adding these exclusions drops that to
584      # just under 7s. Not great, but manageable.
585
586      # magic git pathspecs see
587      # https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefpathspecapathspec
588
589      # :/NAME means match NAME from the project root directory, i.e. PETSC_DIR
590      ':/src',
591      ':/include',
592      # exclude all Fortran wrappers
593      ':!**/ftn-*/**',
594      # exclude docs, we don't want to match changelog mentions
595      ':!**/docs/**',
596      # exclude bindings (the symbol must have been declared extern somewhere else for it
597      # to be usable from them anyways)
598      ':!**/binding/**',
599      # similarly, for the symbol to be usable from tests and tutorials it must be
600      # extern somewhere else
601      ':!**/tests/**',
602      ':!**/tutorials/**',
603      # ignore code we don't own
604      ':!**/yaml/**',
605      ':!**/perfstubs/**',
606      # last but not least, don't search makefiles
607      ':!*makefile'
608    ], check=False)
609    if (retcode := ret.returncode) == 0:
610      # found matches
611      str_origin = str(origin_file)
612      for found_file in filter(len, map(str.strip, ret.stdout.splitlines())):
613        # origin_file is:
614        # '/full/path/to/petsc/src/sys/tests/linter/testStaticFunctionCandidates.cxx'
615        # found_file is:
616        # 'src/sys/tests/linter/testStaticFunctionCandidates.cxx'
617        if not str_origin.endswith(found_file):
618          # Some other file contains a match. We conservatively assume that the API is
619          # somehow public and bail
620          return
621    elif retcode == 1:
622      # no matches, the current file appears to be the only place this function is visible
623      # from
624      pass
625    else:
626      # something else went wrong, propagate the error
627      subprocess_check_returncode(ret)
628
629    start = SourceLocation.cast(func.extent.start, tu=func.translation_unit)
630    self.add_diagnostic_from_cursor(
631      func, Diagnostic(
632        Diagnostic.Kind.ERROR, self.diags.static_function_candidate,
633        Diagnostic.make_message_from_formattable(
634          f'Function \'{func.name}()\' does not appear to be used anywhere outside of '
635          f'{origin_file.name}, and can be made static',
636          crange=func
637        ),
638        start,
639        # Note the space in 'static '! start points to exactly the first letter of
640        # the type, so we need to insert an extra space
641        #
642        #  ~~~ start
643        # v
644        # PetscErrorCode Foo(...)
645        # {
646        #    ...
647        #
648        patch=Patch(SourceRange.from_locations(start, start), 'static ')
649      )
650    )
651    return
652
653  def process(self, tu: clx.TranslationUnit) -> None:
654    r"""Process a translation unit for errors
655
656    Parameters
657    ----------
658    tu :
659      the translation unit to process
660
661    Notes
662    -----
663    This is the main entry point for the linter
664    """
665    from ..checks import _register
666
667    # TODO: these check function maps need to be unified into a single dispatcher... it is
668    # not very intuitive currently how to add "general" AST-matching checks.
669    func_map        = _register.check_function_map
670    docs_map        = _register.check_doc_map
671    parsing_diag    = self.diags.parsing_error
672    processed_funcs = collections.defaultdict(list)
673
674    for results in self.find_lintable_expressions(tu, set(func_map.keys())):
675      try:
676        if isinstance(results, clx.Cursor):
677          func = Cursor.cast(results)
678          docs_map[results.kind](self, func)
679        else:
680          func, parent, scope = results
681          func                = Cursor.cast(func)
682          parent              = Cursor.cast(parent)
683          func_map[func.spelling](self, func, parent)
684          processed_funcs[parent.name].append((func, scope))
685      except KnownUnhandleableCursorError:
686        # ignored
687        pass
688      except ParsingError as pe:
689        tu_cursor = Cursor.cast(tu.cursor)
690        self.add_diagnostic_from_cursor(
691          tu_cursor, Diagnostic(Diagnostic.Kind.WARNING, parsing_diag, str(pe), tu_cursor.extent.start)
692        )
693        # we don't want to check these
694        continue
695      self._check_possible_static_function(func)
696
697    self._check_duplicate_function_calls(processed_funcs)
698    return
699
700  def add_diagnostic_from_cursor(self, cursor: Cursor, diagnostic: Diagnostic) -> None:
701    r"""Given a cursor and a diagnostic, log the diagnostic with the linter
702
703    Parameters
704    ----------
705    cursor :
706      the cursor about which the `diagnostic` is concerned
707    diagnostic :
708      the diagnostic detailing the error or warning
709
710    Raises
711    ------
712    TypeError
713      if `cursor` is not a `Cursor`
714    ValueError
715      if the diagnostic kind is not handled
716    """
717    if not isinstance(cursor, Cursor):
718      raise TypeError(type(cursor))
719    if diagnostic.kind == Diagnostic.Kind.ERROR:
720      container = self.errors
721    elif diagnostic.kind == Diagnostic.Kind.WARNING:
722      container = self.warnings
723    else:
724      raise ValueError(f'Unhandled diagnostic kind {diagnostic.kind}')
725
726    if diagnostic.disabled():
727      return
728
729    container.add_diagnostic(cursor, diagnostic)
730    if (patch := diagnostic.patch) is not None:
731      self.patches[cursor.get_file()].append(patch)
732    return
733
734  def view_last_error(self) -> None:
735    r"""Print the last error added, useful for debugging"""
736    return self.errors.view_last()
737
738  def view_last_warning(self) -> None:
739    r"""Print the last warning added, useful for debugging"""
740    return self.warnings.view_last()
741
742  def get_all_errors(self) -> tuple[CondensedDiags, CondensedDiags]:
743    r"""Return all errors collected so far
744
745    Returns
746    -------
747    all_unresolved :
748      a list of tuples of the path and message of unresolved errors (i.e. those without a `Patch`)
749    all_resolved :
750      a list of tuples of the path and message of resolved errors (i.e. those with a `Patch`)
751    """
752    return self.errors.split_and_condense()
753
754  def get_all_warnings(self) -> tuple[CondensedDiags, CondensedDiags]:
755    r"""Return all warnings collected so far
756
757    Returns
758    -------
759    all_unresolved :
760      a list of tuples of the path and message of unresolved warnings (i.e. those without a `Patch`)
761    all_resolved :
762      a list of tuples of the path and message of resolved warnings (i.e. those with a `Patch`)
763      (should be empty!)
764    """
765    return self.warnings.split_and_condense()
766
767  def coalesce_patches(self) -> list[PathDiffPair]:
768    r"""Given a set of patches, collapse all patches and return the minimal set of diffs required
769
770    Returns
771    -------
772    patches :
773      the list of pairs of coalesced patches and their source files
774    """
775    def combine(filename: Path, patches: list[Patch]) -> PathDiffPair:
776      fstr                   = str(filename)
777      diffs: list[list[str]] = []
778      for patch in patches:
779        rn  = datetime.datetime.now().ctime()
780        tmp = list(
781          difflib.unified_diff(
782            patch._make_source().splitlines(True), patch.collapse().splitlines(True),
783            fromfile=fstr, tofile=fstr, fromfiledate=rn, tofiledate=rn, n=patch.ctxlines
784          )
785        )
786        tmp[2] = Addline.diff_line_re.sub(Addline(patch.extent.start.line), tmp[2])
787        # only the first diff should get the file heading
788        diffs.append(tmp[2:] if diffs else tmp)
789      return filename, ''.join(itertools.chain.from_iterable(diffs))
790
791    def merge_patches(patch_list: list[Patch], patch: Patch) -> tuple[bool, Patch]:
792      patch_extent       = patch.extent
793      patch_extent_start = patch_extent.start.line
794      for i, previous_patch in enumerate(patch_list):
795        prev_patch_extent = previous_patch.extent
796        if patch_extent_start == prev_patch_extent.start.line or patch_extent.overlaps(
797            prev_patch_extent
798        ):
799          # this should now be the previous patch on the same line
800          merged_patch = previous_patch.merge(patch)
801          assert patch_list[i] == previous_patch
802          del patch_list[i]
803          return True, merged_patch
804      return False, patch
805
806    for patch_list in self.patches.values():
807      # merge overlapping patches together before we collapse the actual patches
808      # themselves
809      new_list: list[Patch] = []
810      for patch in sorted(patch_list, key=lambda x: x.extent.start.line):
811        # we loop until we cannot merge the patch with any additional patches
812        while 1:
813          merged, patch = merge_patches(new_list, patch)
814          if not merged:
815            break
816        new_list.append(patch)
817      patch_list[:] = new_list
818
819    return list(itertools.starmap(combine, self.patches.items()))
820
821  def diagnostics(self) -> tuple[CondensedDiags, CondensedDiags, CondensedDiags, list[PathDiffPair]]:
822    r"""Return the errors left (unfixed), fixed errors, warnings and avaiable patches. Automatically
823    coalesces the patches
824
825    Returns
826    -------
827    errors_left :
828      the condensed set of filename - list of error-messages for errors that could not be patched
829    errors_fixed :
830      the condensed set of filename - list of error-messages for errors that are patchable
831    warnings_left :
832      the condensed set of filename - list of warning-messages for warnings that could not be patched
833    patches :
834      the list of patches corresponding to entries in `errors_fixed`
835
836    Raises
837    ------
838    RuntimeError
839      if there exist any fixable warnings
840
841    Notes
842    -----
843    The linter technically also collects a `warnings_fixed` set, but these are not returned.
844    As warnings indicate a failure of the linter to parse or understand some construct there is no
845    reason for a warning to ever be fixable. These diagnostics should be errors instead.
846    """
847    # order is critical, coalesce_patches() will prune the patch and warning lists
848    patches                       = self.coalesce_patches()
849    errors_left, errors_fixed     = self.get_all_errors()
850    warnings_left, warnings_fixed = self.get_all_warnings()
851    if nfix := sum(map(len, warnings_fixed.values())):
852      raise RuntimeError(
853        f'Have {nfix} "fixable" warnings, this should not happen! '
854        'If a warning has a fix then it should be an error instead!'
855      )
856    return errors_left, errors_fixed, warnings_left, patches
857