1#!/usr/bin/env python3 2""" 3# Created: Mon Jun 20 16:40:24 2022 (-0400) 4# @author: Jacob Faibussowitsch 5""" 6from __future__ import annotations 7 8import weakref 9import difflib 10import textwrap 11import datetime 12import itertools 13import collections 14import clang.cindex as clx # type: ignore[import] 15 16from .._typing import * 17 18from ._diag import DiagnosticManager, Diagnostic 19from ._cursor import Cursor 20from ._src_pos import SourceLocation, SourceRange 21from ._patch import Patch 22from ._scope import Scope 23from ._weak_list import WeakList 24from ._add_line import Addline 25 26from .._error import ParsingError, KnownUnhandleableCursorError 27 28from ..util._clang import clx_func_call_cursor_kinds, base_clang_options 29from ..util._utility import subprocess_capture_output, subprocess_check_returncode 30 31class DiagnosticsContainer: 32 __slots__ = 'prefix', 'data' 33 34 prefix: str 35 data: dict[Path, dict[int, WeakListType]] 36 37 def __init__(self, prefix: str) -> None: 38 r"""Construct a `DiagnosticsContainer` 39 40 Parameters 41 ---------- 42 prefix : 43 the prefix for this diagnostic container 44 """ 45 self.prefix = prefix 46 self.data = {} 47 return 48 49 def add_diagnostic(self, cursor: Cursor, diagnostic: Diagnostic) -> None: 50 r"""Add a `Diagnostic` to this container 51 52 Parameters 53 ---------- 54 cursor : 55 the cursor about which the `diagnostic` is concerned 56 diagnostic : 57 the diagnostic detailing the error or warning 58 """ 59 filename = cursor.get_file() 60 data = self.data 61 # Of the various dict-key probing method, try-except is the fastest lookup method if a 62 # key exists in a dict, but is by far the slowest in the cases where the key is 63 # missing. 64 # 65 # But each linter instance is likely to only be used on a single file so filename is 66 # very likely to be in the dict. 67 try: 68 file_local = data[filename] 69 except KeyError: 70 file_local = data[filename] = {} 71 72 # note that cursors are _unlikely_ to be in the dict, and hence do the if-test 73 cursor_id = cursor.hash 74 if cursor_id not in file_local: 75 file_local[cursor_id] = WeakList() 76 77 patch = diagnostic.patch 78 have_patch = patch is not None 79 # type checkers don't grok that have_patch implies patch != None 80 patch_id = TYPE_CAST(Patch, patch).id if have_patch else -1 81 cursor_id_file_local = file_local[cursor_id] 82 cursor_id_file_local.append((diagnostic.formatted_header(), have_patch, patch_id)) 83 84 if have_patch: 85 assert patch is not None # to satisfy type checkers 86 patch.attach(weakref.ref(cursor_id_file_local)) 87 return 88 89 def split_and_condense(self) -> tuple[CondensedDiags, CondensedDiags]: 90 r"""Split the diagnostics into resolved and unresolved and condense them per path 91 92 Returns 93 ------- 94 unresolved : 95 a dict mapping a `Path` to a list of diagnostic messages for all unresolved diagnostics 96 (i.e. those without a `Patch`) 97 resolved : 98 a dict mapping a `Path` to a list of diagnostic messages for all resolved diagnostics 99 (i.e. those with a `Patch`) 100 """ 101 unresolved: CondensedDiags = {p : [] for p in self.data.keys()} 102 resolved: CondensedDiags = {p : [] for p in self.data.keys()} 103 for path, diags in self.data.items(): 104 resolved_list = resolved[path] 105 unresolved_list = unresolved[path] 106 for err_list in diags.values(): 107 for err, have_patch, _ in err_list: 108 if have_patch: 109 resolved_list.append(err) 110 else: 111 unresolved_list.append(err) 112 # remove any empty sets 113 for d in (resolved, unresolved): 114 if not d[path]: 115 del d[path] 116 return unresolved, resolved 117 118 def view_last(self) -> None: 119 r"""Print the last diagnostic added""" 120 import petsclinter as pl 121 122 for files in reversed(self.data): 123 diags = self.data[files] 124 last = diags[next(reversed(diags))] 125 pl.sync_print(last[-1][0]) 126 return 127 128@DiagnosticManager.register( 129 ('duplicate-function', 'Check for duplicate function-calls on the same execution path'), 130 ('static-function-candidate', 'Check for functions that are used only within a single TU, and make them static'), 131 ('parsing-error', 'Generic parsing errors') 132) 133class Linter: 134 """ 135 Object to manage the collection and processing of errors during a lint run. 136 """ 137 __slots__ = 'flags', 'clang_opts', 'verbose', 'index', 'errors', 'warnings', 'patches', 'werror' 138 139 flags: list[str] 140 clang_opts: CXTranslationUnit 141 verbose: int 142 index: clx.Index 143 errors: DiagnosticsContainer 144 warnings: DiagnosticsContainer 145 patches: collections.defaultdict[Path, list[Patch]] 146 werror: bool 147 148 diags: DiagnosticMap # satisfy type checkers 149 150 def __init__(self, compiler_flags: list[str], clang_options: Optional[CXTranslationUnit] = None, verbose: int = 0, werror: bool = False) -> None: 151 r"""Construct a `Linter` 152 153 Parameters 154 ---------- 155 compiler_flags : 156 the set of compiler flags to parse with 157 clang_options : optional 158 the set of clang options to pass to the `clang.cindex.Index.parse()` function, defaults to 159 `petsclinter.util.base_clang_options` 160 verbose : optional 161 whether to print verbose output (and at what level) 162 werror : optional 163 whether to treat warnings as errors 164 """ 165 if clang_options is None: 166 clang_options = base_clang_options 167 168 self.flags = compiler_flags 169 self.clang_opts = clang_options 170 self.verbose = verbose 171 self.index = clx.Index.create() 172 self.werror = werror 173 self.clear() 174 return 175 176 def __str__(self) -> str: 177 print_list = [ 178 f'Compiler Flags: {self.flags}', 179 f'Clang Options: {self.clang_opts}', 180 f'Verbose: {self.verbose}' 181 ] 182 for getter_func in (self.get_all_warnings, self.get_all_errors): 183 for v in getter_func(): 184 for mess in v.values(): 185 print_list.append('\n'.join(mess)) 186 return '\n'.join(print_list) 187 188 def _vprint(self, *args, **kwargs) -> None: 189 r"""Print only if verbose""" 190 if self.verbose: 191 import petsclinter as pl 192 193 pl.sync_print(*args, **kwargs) 194 return 195 196 def _check_duplicate_function_calls(self, processed_funcs: dict[str, list[tuple[Cursor, Scope]]]) -> None: 197 r"""Check for duplicate instances of functions along the same execution path 198 199 Parameters 200 ---------- 201 processed_funcs : 202 a dict mapping parent function names and the list of functions and their scopes 203 204 Notes 205 ----- 206 If two instances of a function have the same `Scope` then they are duplicate and an error is 207 logged 208 """ 209 dup_diag = self.diags.duplicate_function 210 for function_list in processed_funcs.values(): 211 seen = {} 212 for func, scope in function_list: 213 combo: list[str] = [func.displayname] 214 try: 215 combo.extend(map(Cursor.get_raw_name_from_cursor, func.get_arguments())) 216 except ParsingError: 217 continue 218 219 # convert to tuple so it is hashable 220 combo_tup = tuple(combo) 221 if combo_tup not in seen: 222 seen[combo_tup] = (func, scope) 223 elif scope >= seen[combo_tup][1]: 224 # this combination has already been seen, i.e. this call is duplicate!! 225 start = func.extent.start 226 startline = start.line 227 tu = func.translation_unit 228 end = clx.SourceLocation.from_position(tu, tu.get_file(tu.spelling), startline, -1) 229 patch = Patch(SourceRange.from_locations(start, end), '') 230 previous = seen[combo_tup][0].formatted( 231 num_before_context=2, num_after_context=startline - seen[combo_tup][0].extent.start.line 232 ) 233 message = f'Duplicate function found previous identical usage:\n{previous}' 234 self.add_diagnostic_from_cursor( 235 func, Diagnostic(Diagnostic.Kind.ERROR, dup_diag, message, start, patch=patch) 236 ) 237 return 238 239 @staticmethod 240 def find_lintable_expressions(tu: clx.TranslationUnit, symbol_names: Container[str]) -> Generator[Union[tuple[clx.Cursor, clx.Cursor, Scope], clx.Cursor], None, None]: 241 r"""Finds all lintable expressions in container symbol_names. 242 243 Parameters 244 ---------- 245 tu : 246 the `clang.cindex.TranslationUnit` to search 247 symbol_names : 248 the names of the symbols to search for and lint 249 250 Notes 251 ----- 252 Note that if a particular expression is not 100% correctly defined (i.e. would the 253 file actually compile) then it will not be picked up by clang AST. 254 255 Function-like macros can be picked up, but it will be in the wrong 'order'. The AST is 256 built as if you are about to compile it, so macros are handled before any real 257 function definitions in the AST, making it impossible to map a macro invocation to 258 its 'parent' function. 259 """ 260 UNEXPOSED_DECL = clx.CursorKind.UNEXPOSED_DECL 261 SWITCH_STMT = clx.CursorKind.SWITCH_STMT 262 CASE_STMT = clx.CursorKind.CASE_STMT 263 COMPOUND_STMT = clx.CursorKind.COMPOUND_STMT 264 CALL_EXPR = clx.CursorKind.CALL_EXPR 265 266 def walk_scope_switch(parent: clx.Cursor, scope: Scope) -> Generator[tuple[clx.Cursor, clx.Cursor, Scope], None, None]: 267 """ 268 Special treatment for switch-case since the AST setup for it is mind-boggingly stupid. 269 The first node after a case statement is listed as the cases *child* whereas every other 270 node (including the break!!) is the cases *sibling* 271 """ 272 # in case we get here from a scope decrease within a case 273 case_scope = scope 274 for child in parent.get_children(): 275 child_kind = child.kind 276 if child_kind == CASE_STMT: 277 # create a new scope every time we encounter a case, this is now for all intents 278 # and purposes the 'scope' going forward. We don't overwrite the original scope 279 # since we still need each case scope to be the previous scopes sibling 280 case_scope = scope.sub() 281 yield from walk_scope(child, scope=case_scope) 282 elif child_kind == CALL_EXPR: 283 if child.spelling in symbol_names: 284 yield (child, possible_parent, case_scope) 285 # Cursors that indicate change of logical scope 286 elif child_kind == COMPOUND_STMT: 287 yield from walk_scope_switch(child, case_scope.sub()) 288 289 def walk_scope(parent: clx.Cursor, scope: Optional[Scope] = None) -> Generator[tuple[clx.Cursor, clx.Cursor, Scope], None, None]: 290 """ 291 Walk the tree determining the scope of a node. here 'scope' refers not only 292 to lexical scope but also to logical scope, see Scope object above 293 """ 294 if scope is None: 295 scope = Scope() 296 297 for child in parent.get_children(): 298 child_kind = child.kind 299 if child_kind == SWITCH_STMT: 300 # switch-case statements require special treatment, we skip to the compound 301 # statement 302 switch_children = [c for c in child.get_children() if c.kind == COMPOUND_STMT] 303 assert len(switch_children) == 1, "Switch statement has multiple '{' operators?" 304 yield from walk_scope_switch(switch_children[0], scope.sub()) 305 elif child_kind == CALL_EXPR: 306 if child.spelling in symbol_names: 307 yield (child, possible_parent, scope) 308 elif child_kind == COMPOUND_STMT: 309 # scope has decreased 310 yield from walk_scope(child, scope=scope.sub()) 311 else: 312 # same scope 313 yield from walk_scope(child, scope=scope) 314 315 # normal lintable cursor kinds, the type of cursors we directly want to deal with 316 lintable_kinds = clx_func_call_cursor_kinds | {clx.CursorKind.ENUM_DECL} 317 # "extended" lintable kinds. 318 extended_lintable_kinds = lintable_kinds | {UNEXPOSED_DECL} 319 320 cursor = tu.cursor 321 filename = tu.spelling 322 for possible_parent in cursor.get_children(): 323 # getting filename is for some reason stupidly expensive, so we do this check first 324 parent_kind = possible_parent.kind 325 if parent_kind not in extended_lintable_kinds: 326 continue 327 try: 328 if possible_parent.location.file.name != filename: 329 continue 330 except AttributeError: 331 # possible_parent.location.file is None 332 continue 333 # Sometimes people declare their functions PETSC_EXTERN inline, which would normally 334 # trip up the "lintable kinds" detection since the top-level cursor points to a 335 # macro (i.e. unexposed decl). In this case we need to check the cursors 1 level 336 # down for any lintable kinds. 337 if parent_kind == UNEXPOSED_DECL: 338 for sub_cursor in possible_parent.get_children(): 339 if sub_cursor.is_definition() and sub_cursor.kind in lintable_kinds: 340 possible_parent = sub_cursor 341 break 342 else: 343 continue 344 # if we've gotten this far we have found something worth looking into, so first 345 # yield the parent to process any documentation 346 yield possible_parent 347 if possible_parent.kind in clx_func_call_cursor_kinds: 348 # then yield any children matching our function calls 349 yield from walk_scope(possible_parent) 350 351 @staticmethod 352 def get_argument_cursors(func_cursor: CursorLike) -> tuple[Cursor, ...]: 353 r"""Given a cursor representing a function, return a tuple of `Cursor`'s of its arguments 354 355 Parameters 356 ---------- 357 func_cursor : 358 the function decl cursor 359 360 Returns 361 ------- 362 cursors : 363 a tuple of `func_cursors` arguments 364 """ 365 return tuple(Cursor(a, i) for i, a in enumerate(func_cursor.get_arguments(), start=1)) 366 367 def clear(self) -> None: 368 r"""Resets the linter error, warning, and patch buffers. 369 370 Notes 371 ----- 372 Called automatically before parsing a file 373 """ 374 self.errors = DiagnosticsContainer("-" * 92) 375 self.warnings = DiagnosticsContainer("%" * 92) 376 # This can actually just be a straight list, since each linter object only ever 377 # handles a single file, but use dict nonetheless 378 self.patches = collections.defaultdict(list) 379 return 380 381 def parse(self, filename: PathLike) -> Linter: 382 r"""Parse a file for errors 383 384 Parameters 385 ---------- 386 filename : 387 the path of the file to parse 388 389 Returns 390 ------- 391 self : 392 the `Linter` instance 393 """ 394 self.clear() 395 self._vprint('Processing file ', filename) 396 tu = self.index.parse(str(filename), args=self.flags, options=self.clang_opts) 397 if tu.diagnostics: 398 self._vprint('\n'.join(map(str, tu.diagnostics))) 399 self.process(tu) 400 return self 401 402 def parse_in_memory(self, src: str) -> clx.TranslationUnit: 403 r"""Parse a particular source string in memory 404 405 Parameters 406 ---------- 407 src : 408 the source string to parse 409 410 Returns 411 ------- 412 tu : 413 the translation unit resulting from the parse 414 415 Notes 416 ----- 417 This lets you act as if `src` was some mini file somewhere on disk 418 """ 419 fname = 'tempfile.cpp' 420 return clx.TranslationUnit.from_source( 421 fname, args=self.flags, unsaved_files=[(fname, src)], options=self.clang_opts 422 ) 423 424 def _check_possible_static_function(self, func: Cursor) -> None: 425 r"""Check that `func` could be make static 426 427 Parameters 428 ---------- 429 func : 430 the function cursor to check 431 432 Notes 433 ----- 434 Determines whether `func` can be made static, and if so, adds the static qualifier to it. Currently 435 the check is very basic, it only catches functions with are defined in a TU, and used absolutely 436 nowhere else. As soon as it is defined in a header, or other file, this function bails immediately. 437 438 We could try and figure out whether it belongs in that header, but that has many false positives. 439 We would need to be able to: 440 441 1. (reliably) distinguish between public and internal API 442 2. if the function is internal API, (relibaly) determine whether it is used within the same mansec. 443 If it is, we can make the decl PETSC_INTERN (if it isn't already). If it's used from multiple 444 mansecs, we can make it PETSC_SINGLE_LIBRARY_INTERN (if it isn't already). 445 446 But these are hard problems, which we leave for another time... 447 """ 448 def cursor_is_public(cursor: CursorLike) -> bool: 449 if cursor.storage_class == clx.StorageClass.EXTERN or cursor.spelling == 'main': 450 return True 451 452 for child in cursor.get_children(): 453 if child.kind == clx.CursorKind.VISIBILITY_ATTR and child.spelling in {'default', 'hidden'}: 454 # The function cursor has a PETSC_INTERN or PETSC_EXTERN attached 455 return True 456 return False 457 458 if func.kind != clx.CursorKind.FUNCTION_DECL or func.storage_class == clx.StorageClass.STATIC: 459 # nothing to do 460 return 461 462 if cursor_is_public(func): 463 return 464 465 func_decl = func.get_declaration() 466 if cursor_is_public(func_decl): 467 # the cursor declaration has some kind of public api, be that extern, PETSC_EXTERN, 468 # or whatever 469 return 470 471 lex_parent = func_decl.lexical_parent 472 try: 473 lex_parent_kind = lex_parent.kind 474 except ValueError as ve: 475 # Possible ValueError: Unknown template argument kind 300 476 # 477 # I think this is a bug in libclang. clx.CursorKind.TRANSLATION_UNIT is 350, I 478 # think it used to be 300, and they haven't updated the python bindings? 479 if 'unknown template argument kind 300' not in str(ve).casefold() and 'unknown template argument kind 350' not in str(ve).casefold(): 480 raise 481 lex_parent_kind = clx.CursorKind.TRANSLATION_UNIT 482 if lex_parent_kind == clx.CursorKind.CLASS_DECL: 483 # we have a situation like 484 # 485 # class Foo <---- func_decl.lexical_parent 486 # { 487 # friend void bar(); 488 # ^^^---- func_decl 489 # }; 490 # 491 # void bar() { } 492 # ^^^------- func 493 # 494 # Note, I have *ONLY* seen this happen with friend functions, so let's assert that 495 # that is the case here so we can potentially handle the other variants 496 assert any('friend' in t.spelling for t in lex_parent.get_tokens()) 497 return 498 499 origin_file = func.get_file() 500 decl_file = Cursor.get_file_from_cursor(func_decl).resolve() 501 if origin_file != decl_file: 502 # The function declaration is in some other file, presumably a header. This implies 503 # the function is used elsewhere/is public API. 504 return 505 506 result_type = func.result_type 507 result_spelling = result_type.spelling 508 if result_type.get_declaration().kind == clx.CursorKind.NO_DECL_FOUND: 509 # The result type declaration (i.e. typedef x_type result_type) cannot be located! 510 # This indicates 1 of 2 scenarios: 511 # 512 # 1. The type is built-in, e.g. int, or void, or double 513 # 2. The type is actually completely uknown (likely due to linter not having the 514 # appropriate package installed). In this case, the type defaults to int, meaning 515 # that instead of searching for e.g. "BlopexInt PETSC_dpotrf_interface" the 516 # linter searches for "int PETSC_dpotrf_interface" which of course it will not 517 # find! 518 519 # extract 'PETSC_EXTERN inline void' from 'PETSC_EXTERN inline void FooBar(...)' 520 raw_result_spelling = func.raw().partition(func.spelling)[0] 521 if result_spelling not in raw_result_spelling: 522 # The type is likely unknown, i.e. it defaulted to int 523 assert result_type.kind == clx.TypeKind.INT 524 # Let's try and extract the type nonetheless 525 raw_types = [ 526 t 527 for t in raw_result_spelling.split() 528 if t not in {'static', 'inline', 'extern', '\"C\"', '\"C++\"'} or 529 not t.startswith(('PETSC_', 'SLEPC_')) 530 ] 531 if len(raw_types) > 1: 532 # something we didn't handle 533 return 534 # we have narrowed the type down to just a single string, let's try it out 535 result_spelling = raw_types[0] 536 537 if result_spelling.endswith('*'): 538 # if the result type is a pointer, it will sit flush against the function name, so 539 # we should match for potentially 0 spaces, i.e. 540 # 541 # void *Foo() 542 # 543 # We don't match for exactly 0 spaces since someone may have disabled clang-format 544 # and hence it's possible the pointer is not flush. 545 result_type_spacing = ' *' 546 else: 547 # not a pointer, so result type must always be at least 1 space away from function 548 # name 549 result_type_spacing = ' +' 550 # have to escape the pointers 551 result_spelling = result_spelling.replace('*', '\*') 552 func_name_and_type = rf'{result_spelling}{result_type_spacing}{func.spelling} *\(' 553 # The absolute final check, we need to grep for the symbol across the code-base. This 554 # is needed for cases when: 555 # 556 # // my_file.c 557 # PetscErrorCode PetscFoo(PetscBar baz) <--- marked as a potential static candidate 558 # { 559 # ... 560 # 561 # // my_file.h 562 # #if PetscDefined(HAVE_FOO) 563 # PETSC_EXTERN PetscErrorCode PetscFoo(PetscBar); 564 # #endif 565 # 566 # In the case where the linter is not configured with PETSC_HAVE_FOO, the above checks 567 # will fail to find the extern decl (since from the viewpoint of the compiler, it 568 # literally does not exist) and hence produce a false positive. So the only way to 569 # reliably determine is to search the text. 570 # 571 # The alternative is to emit the diagnostic anyway, knowing that there will be false 572 # positives. To offset this, we could attach a note that says "if this is a false 573 # positive, add PETSC_EXTERN/PETSC_INTERN to the definition as well". 574 # 575 # We chose not to do that because it makes the whole process more brittle, and 576 # introduces otherwise unecessary changes just to placate the linter. 577 ret = subprocess_capture_output([ 578 'git', '--no-pager', 'grep', '--color=never', '-r', '-l', 579 '-P', # use Perl regex, which is -- for whatever reason -- over 6x faster 580 '-e', func_name_and_type, '--', 581 # The reason for all of this exclusion nonsense is because without it, this search 582 # is __slooooow__. Running the test-lint job, even a naive search (search only 583 # src and include) takes ~30s to complete. Adding these exclusions drops that to 584 # just under 7s. Not great, but manageable. 585 586 # magic git pathspecs see 587 # https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefpathspecapathspec 588 589 # :/NAME means match NAME from the project root directory, i.e. PETSC_DIR 590 ':/src', 591 ':/include', 592 # exclude all Fortran wrappers 593 ':!**/ftn-*/**', 594 # exclude docs, we don't want to match changelog mentions 595 ':!**/docs/**', 596 # exclude bindings (the symbol must have been declared extern somewhere else for it 597 # to be usable from them anyways) 598 ':!**/binding/**', 599 # similarly, for the symbol to be usable from tests and tutorials it must be 600 # extern somewhere else 601 ':!**/tests/**', 602 ':!**/tutorials/**', 603 # ignore code we don't own 604 ':!**/yaml/**', 605 ':!**/perfstubs/**', 606 # last but not least, don't search makefiles 607 ':!*makefile' 608 ], check=False) 609 if (retcode := ret.returncode) == 0: 610 # found matches 611 str_origin = str(origin_file) 612 for found_file in filter(len, map(str.strip, ret.stdout.splitlines())): 613 # origin_file is: 614 # '/full/path/to/petsc/src/sys/tests/linter/testStaticFunctionCandidates.cxx' 615 # found_file is: 616 # 'src/sys/tests/linter/testStaticFunctionCandidates.cxx' 617 if not str_origin.endswith(found_file): 618 # Some other file contains a match. We conservatively assume that the API is 619 # somehow public and bail 620 return 621 elif retcode == 1: 622 # no matches, the current file appears to be the only place this function is visible 623 # from 624 pass 625 else: 626 # something else went wrong, propagate the error 627 subprocess_check_returncode(ret) 628 629 start = SourceLocation.cast(func.extent.start, tu=func.translation_unit) 630 self.add_diagnostic_from_cursor( 631 func, Diagnostic( 632 Diagnostic.Kind.ERROR, self.diags.static_function_candidate, 633 Diagnostic.make_message_from_formattable( 634 f'Function \'{func.name}()\' does not appear to be used anywhere outside of ' 635 f'{origin_file.name}, and can be made static', 636 crange=func 637 ), 638 start, 639 # Note the space in 'static '! start points to exactly the first letter of 640 # the type, so we need to insert an extra space 641 # 642 # ~~~ start 643 # v 644 # PetscErrorCode Foo(...) 645 # { 646 # ... 647 # 648 patch=Patch(SourceRange.from_locations(start, start), 'static ') 649 ) 650 ) 651 return 652 653 def process(self, tu: clx.TranslationUnit) -> None: 654 r"""Process a translation unit for errors 655 656 Parameters 657 ---------- 658 tu : 659 the translation unit to process 660 661 Notes 662 ----- 663 This is the main entry point for the linter 664 """ 665 from ..checks import _register 666 667 # TODO: these check function maps need to be unified into a single dispatcher... it is 668 # not very intuitive currently how to add "general" AST-matching checks. 669 func_map = _register.check_function_map 670 docs_map = _register.check_doc_map 671 parsing_diag = self.diags.parsing_error 672 processed_funcs = collections.defaultdict(list) 673 674 for results in self.find_lintable_expressions(tu, set(func_map.keys())): 675 try: 676 if isinstance(results, clx.Cursor): 677 func = Cursor.cast(results) 678 docs_map[results.kind](self, func) 679 else: 680 func, parent, scope = results 681 func = Cursor.cast(func) 682 parent = Cursor.cast(parent) 683 func_map[func.spelling](self, func, parent) 684 processed_funcs[parent.name].append((func, scope)) 685 except KnownUnhandleableCursorError: 686 # ignored 687 pass 688 except ParsingError as pe: 689 tu_cursor = Cursor.cast(tu.cursor) 690 self.add_diagnostic_from_cursor( 691 tu_cursor, Diagnostic(Diagnostic.Kind.WARNING, parsing_diag, str(pe), tu_cursor.extent.start) 692 ) 693 # we don't want to check these 694 continue 695 self._check_possible_static_function(func) 696 697 self._check_duplicate_function_calls(processed_funcs) 698 return 699 700 def add_diagnostic_from_cursor(self, cursor: Cursor, diagnostic: Diagnostic) -> None: 701 r"""Given a cursor and a diagnostic, log the diagnostic with the linter 702 703 Parameters 704 ---------- 705 cursor : 706 the cursor about which the `diagnostic` is concerned 707 diagnostic : 708 the diagnostic detailing the error or warning 709 710 Raises 711 ------ 712 TypeError 713 if `cursor` is not a `Cursor` 714 ValueError 715 if the diagnostic kind is not handled 716 """ 717 if not isinstance(cursor, Cursor): 718 raise TypeError(type(cursor)) 719 if diagnostic.kind == Diagnostic.Kind.ERROR: 720 container = self.errors 721 elif diagnostic.kind == Diagnostic.Kind.WARNING: 722 container = self.warnings 723 else: 724 raise ValueError(f'Unhandled diagnostic kind {diagnostic.kind}') 725 726 if diagnostic.disabled(): 727 return 728 729 container.add_diagnostic(cursor, diagnostic) 730 if (patch := diagnostic.patch) is not None: 731 self.patches[cursor.get_file()].append(patch) 732 return 733 734 def view_last_error(self) -> None: 735 r"""Print the last error added, useful for debugging""" 736 return self.errors.view_last() 737 738 def view_last_warning(self) -> None: 739 r"""Print the last warning added, useful for debugging""" 740 return self.warnings.view_last() 741 742 def get_all_errors(self) -> tuple[CondensedDiags, CondensedDiags]: 743 r"""Return all errors collected so far 744 745 Returns 746 ------- 747 all_unresolved : 748 a list of tuples of the path and message of unresolved errors (i.e. those without a `Patch`) 749 all_resolved : 750 a list of tuples of the path and message of resolved errors (i.e. those with a `Patch`) 751 """ 752 return self.errors.split_and_condense() 753 754 def get_all_warnings(self) -> tuple[CondensedDiags, CondensedDiags]: 755 r"""Return all warnings collected so far 756 757 Returns 758 ------- 759 all_unresolved : 760 a list of tuples of the path and message of unresolved warnings (i.e. those without a `Patch`) 761 all_resolved : 762 a list of tuples of the path and message of resolved warnings (i.e. those with a `Patch`) 763 (should be empty!) 764 """ 765 return self.warnings.split_and_condense() 766 767 def coalesce_patches(self) -> list[PathDiffPair]: 768 r"""Given a set of patches, collapse all patches and return the minimal set of diffs required 769 770 Returns 771 ------- 772 patches : 773 the list of pairs of coalesced patches and their source files 774 """ 775 def combine(filename: Path, patches: list[Patch]) -> PathDiffPair: 776 fstr = str(filename) 777 diffs: list[list[str]] = [] 778 for patch in patches: 779 rn = datetime.datetime.now().ctime() 780 tmp = list( 781 difflib.unified_diff( 782 patch._make_source().splitlines(True), patch.collapse().splitlines(True), 783 fromfile=fstr, tofile=fstr, fromfiledate=rn, tofiledate=rn, n=patch.ctxlines 784 ) 785 ) 786 tmp[2] = Addline.diff_line_re.sub(Addline(patch.extent.start.line), tmp[2]) 787 # only the first diff should get the file heading 788 diffs.append(tmp[2:] if diffs else tmp) 789 return filename, ''.join(itertools.chain.from_iterable(diffs)) 790 791 def merge_patches(patch_list: list[Patch], patch: Patch) -> tuple[bool, Patch]: 792 patch_extent = patch.extent 793 patch_extent_start = patch_extent.start.line 794 for i, previous_patch in enumerate(patch_list): 795 prev_patch_extent = previous_patch.extent 796 if patch_extent_start == prev_patch_extent.start.line or patch_extent.overlaps( 797 prev_patch_extent 798 ): 799 # this should now be the previous patch on the same line 800 merged_patch = previous_patch.merge(patch) 801 assert patch_list[i] == previous_patch 802 del patch_list[i] 803 return True, merged_patch 804 return False, patch 805 806 for patch_list in self.patches.values(): 807 # merge overlapping patches together before we collapse the actual patches 808 # themselves 809 new_list: list[Patch] = [] 810 for patch in sorted(patch_list, key=lambda x: x.extent.start.line): 811 # we loop until we cannot merge the patch with any additional patches 812 while 1: 813 merged, patch = merge_patches(new_list, patch) 814 if not merged: 815 break 816 new_list.append(patch) 817 patch_list[:] = new_list 818 819 return list(itertools.starmap(combine, self.patches.items())) 820 821 def diagnostics(self) -> tuple[CondensedDiags, CondensedDiags, CondensedDiags, list[PathDiffPair]]: 822 r"""Return the errors left (unfixed), fixed errors, warnings and avaiable patches. Automatically 823 coalesces the patches 824 825 Returns 826 ------- 827 errors_left : 828 the condensed set of filename - list of error-messages for errors that could not be patched 829 errors_fixed : 830 the condensed set of filename - list of error-messages for errors that are patchable 831 warnings_left : 832 the condensed set of filename - list of warning-messages for warnings that could not be patched 833 patches : 834 the list of patches corresponding to entries in `errors_fixed` 835 836 Raises 837 ------ 838 RuntimeError 839 if there exist any fixable warnings 840 841 Notes 842 ----- 843 The linter technically also collects a `warnings_fixed` set, but these are not returned. 844 As warnings indicate a failure of the linter to parse or understand some construct there is no 845 reason for a warning to ever be fixable. These diagnostics should be errors instead. 846 """ 847 # order is critical, coalesce_patches() will prune the patch and warning lists 848 patches = self.coalesce_patches() 849 errors_left, errors_fixed = self.get_all_errors() 850 warnings_left, warnings_fixed = self.get_all_warnings() 851 if nfix := sum(map(len, warnings_fixed.values())): 852 raise RuntimeError( 853 f'Have {nfix} "fixable" warnings, this should not happen! ' 854 'If a warning has a fix then it should be an error instead!' 855 ) 856 return errors_left, errors_fixed, warnings_left, patches 857