1#!/usr/bin/env python3 2""" 3# Created: Mon Jun 20 18:58:57 2022 (-0400) 4# @author: Jacob Faibussowitsch 5""" 6from __future__ import annotations 7 8import re 9import enum 10import difflib 11import textwrap 12import collections 13import clang.cindex as clx # type: ignore[import] 14 15from ..._typing import * 16from ...__version__ import py_version_lt 17from ..._error import BaseError, KnownUnhandleableCursorError 18 19from .. import _util 20 21from .._diag import DiagnosticManager, Diagnostic 22from .._src_pos import SourceRange, SourceLocation 23from .._patch import Patch 24 25from ._doc_section_base import DocBase, SectionBase 26from ._doc_section import ( 27 DefaultSection, Synopsis, FunctionParameterList, OptionDatabaseKeys, Level, Notes, FortranNotes, 28 DeveloperNotes, SourceCode, References, SeeAlso 29) 30 31@enum.unique 32class Verdict(enum.IntEnum): 33 r"""An enum describing whether a particular line is deemed a sowing heading or not.""" 34 IS_HEADING_BUT_PROBABLY_SHOULDNT_BE = -1 35 NOT_HEADING = 0 36 IS_HEADING = 1 37 MAYBE_HEADING = 2 38 39@enum.unique 40class DocStringType(enum.Enum): 41 UNKNOWN = 0 42 FUNCTION = enum.auto() 43 TYPEDEF = enum.auto() 44 ENUM = enum.auto() 45 46@enum.unique 47class DocStringTypeModifier(enum.Flag): 48 NONE = 0 49 MACRO = enum.auto() 50 FLOATING = enum.auto() 51 C_FUNC = enum.auto() 52 53@enum.unique 54class MatchReason(enum.IntEnum): 55 r"""An enum that describes the reason a header was matched""" 56 NOT_FOUND = 0 57 NAME = enum.auto() 58 KEYWORD = enum.auto() 59 SUBWORD = enum.auto() 60 61 def direct_match(self) -> bool: 62 return self == MatchReason.NAME 63 64 def __bool__(self) -> bool: 65 return self != MatchReason.NOT_FOUND 66 67 def __str__(self) -> str: 68 return self.name.casefold() 69 70# expressions that usually end in an unescpaped colon causing the resulting sentence to be 71# considered a title 72_suspicious_patterns = set( 73 map( 74 str.casefold, 75 ( 76 r'follows', r'following.*', r'example', r'instance', r'one\sof.*', r'available.*include', 77 r'supports.*approaches.*', r'see.*user.*manual', r'y\.\ssaad,\siterative\smethods.*philadelphia', 78 r'default', r'in\s.*\scase.*', r'use\sthe.*', r'for\s+example', r'note\s+(also|that)?', 79 r'example[,;-]\s', r'.*etc\.', r'references\s+(to|a|so)\s+', 80 r'(the|an|the|a)\s+options\s+database\s+(for|to)?' 81 ) 82 ) 83) 84_suspicious_colon_regex = re.compile(r'|'.join(f'{expr}:$' for expr in _suspicious_patterns)) 85_suspicious_plain_regex = re.compile(r'|'.join(_suspicious_patterns - {'example'}), flags=re.MULTILINE) 86del _suspicious_patterns 87 88_pragma_regex = re.compile(r'.*PetscClangLinter\s+pragma\s+(\w+):\s*(.*)') 89 90# Regex to match /* */ patterns 91_c_comment_regex = re.compile(r'\/\*(\*(?!\/)|[^*])*\*\/') 92 93_T_co = TypeVar('_T_co', covariant=True) 94 95class SectionNotFoundError(BaseError): 96 r"""Exception thrown when a section is searched for, not found, and strict mode was enabled""" 97 pass 98 99class GuessHeadingFailError(BaseError): 100 r"""Exception thrown then sections fails to guess the appropriate heading for a line""" 101 pass 102 103class SectionManager: 104 __slots__ = '_verbose', '_sections', '_findcache', '_cachekey' 105 106 _verbose: int 107 _sections: dict[str, SectionBase] 108 _cachekey: tuple[str, ...] 109 _findcache: dict[tuple[str, ...], dict[str, str]] 110 111 def __init__(self, *args: SectionImpl, verbose: int = 0) -> None: 112 r"""Construct a `SectionManager` object 113 114 Parameters 115 ---------- 116 *args : 117 a set of unique sections to register with the section manager 118 verbose : optional 119 whether to print verbose output 120 121 Raises 122 ------ 123 ValueError 124 if the set of sections to register is not unique 125 """ 126 self._verbose = verbose 127 self._sections = {section.name : section for section in args} 128 self._cachekey = tuple(self._sections.keys()) 129 self._findcache = {self._cachekey : {}} 130 if len(self._cachekey) != len(args): 131 raise ValueError('Have sections with conflicting names!') 132 return 133 134 def __getattr__(self, attr: str) -> SectionBase: 135 r"""Allows looking up a section via its name, i.e. 'self.fortran_notes'""" 136 sections = self._sections 137 try: 138 return sections[attr] 139 except KeyError as ke: 140 replaced_attr = attr.replace('_', ' ').casefold() 141 try: 142 return sections[replaced_attr] 143 except KeyError: 144 pass 145 raise AttributeError(attr) 146 147 def __iter__(self) -> Iterator[SectionBase]: 148 yield from self._sections.values() 149 150 def __contains__(self, section: SectionImpl) -> bool: 151 return self.registered(section) 152 153 def _print(self, *args, verbosity = 1, **kwargs) -> None: 154 r"""Print, but only if verbosity if high enough 155 156 Parameters 157 ---------- 158 *args : 159 positional arguments to `petsclinter.sync_print` 160 verbosity : 161 the minimum verbosity at which to print 162 **kwargs : 163 keyword arguments to `petsclinter.sync_print` 164 """ 165 if self._verbose >= verbosity: 166 import petsclinter as pl 167 168 pl.sync_print(*args, **kwargs) 169 return 170 171 def set_verbose(self, verbose: int) -> int: 172 r"""Sets verbosity level 173 174 Parameters 175 ---------- 176 verbose : 177 the new verbosity level 178 179 Returns 180 ------- 181 verbose : 182 the old verbosity level 183 """ 184 old_verbose = self._verbose 185 self._verbose = verbose 186 return old_verbose 187 188 def find(self, heading: str, cache_result: bool = True, strict: bool = False) -> SectionBase: 189 r"""Given a heading, find the section which best matches it 190 191 Parameters 192 ---------- 193 heading : 194 the heading to search for 195 cache_result : optional 196 should the result of the lookup be cached? 197 strict : optional 198 is not finding the section considered an error? 199 200 Returns 201 ------- 202 section : 203 the section 204 205 Raises 206 ------ 207 SectionNotFoundError 208 if `strict` is True and a section could not be matched 209 """ 210 lohead = heading.casefold() 211 sections = self._sections 212 cache = self._findcache[self._cachekey] 213 try: 214 return sections[cache[lohead]] 215 except KeyError: 216 pass 217 218 section_names = sections.keys() 219 found_reason = MatchReason.NOT_FOUND 220 matched = self.UNKNOWN_SECTION.name 221 try: 222 matched = difflib.get_close_matches(heading, section_names, n=1)[0] 223 except IndexError: 224 pass 225 else: 226 found_reason = MatchReason.NAME 227 228 if found_reason == MatchReason.NOT_FOUND: 229 keywords = [(kw, section.name) for section in self for kw in section.keywords] 230 kw_only = [k for k, _ in keywords] 231 try: 232 matched = difflib.get_close_matches(heading, kw_only, n=1)[0] 233 except IndexError: 234 pass 235 else: 236 found_reason = MatchReason.KEYWORD 237 238 if found_reason == MatchReason.NOT_FOUND: 239 # try if we can find a sub-word 240 # if heading splits into more than 3 params, then chances are its being mislabeled 241 # as a heading anyways 242 for head in heading.split(maxsplit=3): 243 try: 244 # higher cutoff, we have to be pretty sure of a match when using subwords, 245 # because it's a lot easier for false positives 246 matched = difflib.get_close_matches(head, kw_only, n=1, cutoff=0.8)[0] 247 except IndexError: 248 continue 249 else: 250 found_reason = MatchReason.SUBWORD 251 break 252 253 max_match_len = max(map(len, section_names)) 254 if found_reason == MatchReason.NOT_FOUND: 255 self._print( 256 80 * '*', 257 f'UNHANDLED POSSIBLE HEADING! (strict = {strict}, cached = {cache_result})', 258 heading, 259 80 * '*', 260 verbosity=2, 261 sep='\n' 262 ) 263 if strict: 264 raise SectionNotFoundError(heading) 265 # when in doubt, it's probably notes 266 self._print( 267 '*********** DEFAULTED TO {:{}} FROM {} FOR {}'.format( 268 f'{matched} (strict = {strict})', max_match_len, found_reason, heading 269 ), 270 verbosity=2 271 ) 272 else: 273 if not found_reason.direct_match(): 274 # found via keyword or subword 275 matched = next(filter(lambda item: item[0] == matched, keywords))[1] 276 self._print( 277 f'**** CLOSEST MATCH FOUND {matched:{max_match_len}} FROM {found_reason} FOR {heading}', 278 verbosity=2 279 ) 280 281 if cache_result: 282 cache[lohead] = matched 283 return sections[matched] 284 285 def registered(self, section: SectionImpl) -> bool: 286 r"""Determine whether a section has already been registered with the `SectionManager` 287 288 Parameters 289 ---------- 290 section : 291 the section to check for 292 293 Returns 294 ------- 295 reg : 296 True if `section` has been registered, False otherwise 297 298 Raises 299 ------ 300 NotImplementedError 301 if `section` is not derived from `SectionBase` 302 """ 303 if not isinstance(section, SectionBase): 304 raise NotImplementedError(type(section)) 305 return section.name in self._sections 306 307 def gen_titles(self) -> Generator[str, None, None]: 308 r"""Return a generator over all registered titles 309 310 Parameters 311 ---------- 312 get_sections : optional 313 retrieve the sections as well 314 315 Returns 316 ------- 317 gen : 318 the generator 319 """ 320 return (attr for section in self for attr in section.titles) 321 322 def is_heading(self, line: str, prev_line: str) -> Verdict: 323 r"""Determine whether `line` contains a valid heading 324 325 Parameters 326 ---------- 327 line : 328 the current line to be checked 329 prev_line : 330 the previous line 331 332 Returns 333 ------- 334 verdict : 335 whether the line is a heading 336 """ 337 def handle_header_with_colon(text: str) -> Verdict: 338 if text.endswith(r'\:'): 339 return Verdict.NOT_HEADING 340 341 textlo = text.casefold() 342 if any(map(textlo.startswith, (t.casefold() + ':' for t in self.gen_titles()))): 343 return Verdict.IS_HEADING 344 345 if text.endswith(':'): 346 if any(map(text.__contains__, (' - ', '=', '(', ')', '%', '$', '@', '#', '!', '^', '&', '+'))): 347 return Verdict.IS_HEADING_BUT_PROBABLY_SHOULDNT_BE 348 349 if _suspicious_colon_regex.search(textlo) is None: 350 return Verdict.IS_HEADING 351 return Verdict.IS_HEADING_BUT_PROBABLY_SHOULDNT_BE 352 353 try: 354 _, _, section = self.fuzzy_find_section(text, cache_result=False, strict=True) 355 except GuessHeadingFailError: 356 return Verdict.NOT_HEADING 357 return Verdict.NOT_HEADING if isinstance(section, DefaultSection) else Verdict.IS_HEADING 358 359 def handle_header_without_colon(line: str, prev_line: str) -> Verdict: 360 linelo = line.casefold() 361 results = list(filter(linelo.startswith, map(str.casefold, self.gen_titles()))) 362 if not results: 363 return Verdict.NOT_HEADING 364 if _suspicious_plain_regex.search(' '.join((prev_line.casefold(), linelo))): 365 # suspicious regex detected, err on the side of caution and say this line is not a 366 # heading 367 return Verdict.NOT_HEADING 368 # not suspicious, still not 100% though 369 return Verdict.MAYBE_HEADING 370 371 prev_line = prev_line.strip() 372 line = line.strip() 373 if not line or line.startswith(('+', '. ', '-', '$', '.vb', '.ve')): 374 return Verdict.NOT_HEADING 375 if ':' in line: 376 return handle_header_with_colon(line) 377 return handle_header_without_colon(line, prev_line) 378 379 def fuzzy_find_section(self, line: str, strict: bool = False, **kwargs) -> tuple[str, str, SectionBase]: 380 r"""Try to fuzzy guess what section a heading belongs to. 381 382 Parameters 383 ---------- 384 line : 385 the line 386 strict : optional 387 whether to be strict about matching 388 **kwargs : 389 additional keywords arguments to `SectionManager.find()` 390 391 Returns 392 ------- 393 attempt : 394 the attempt which was successful 395 match_title : 396 the matched title of the guessed section 397 section : 398 the matched section 399 400 Raises 401 ------ 402 GuessHeadingFailError 403 if header guessing failed 404 405 Notes 406 ----- 407 This needs to be combined with self.find() somehow... 408 """ 409 if strp := line.split(':', maxsplit=1)[0].strip(): 410 for attempt in (strp, strp.split(maxsplit=1)[0].strip(), strp.title()): 411 section = self.find(attempt, **kwargs) 412 titles = section.titles 413 if len(titles) > 1: 414 titles = tuple(difflib.get_close_matches(attempt, titles, n=1)) 415 416 if titles: 417 if strict and isinstance(section, DefaultSection): 418 break 419 return attempt, titles[0], section 420 421 raise GuessHeadingFailError(f'Could not guess heading for:\n{line}') 422 423@DiagnosticManager.register( 424 ('internal-linkage','Verify that symbols with internal linkage don\'t have docstrings'), 425 ('sowing-chars','Verify that sowing begin and end indicators match the symbol type'), 426 ('symbol-spacing','Verify that dosctrings occur immediately above that which they describe'), 427 ('indentation','Verify that docstring text is correctly indented'), 428 ('section-spacing','Verify that there section headers are separated by at least 1 empty line'), 429 ('section-header-maybe-header','Check for lines that seem like they are supposed to be headers'), 430 ('section-header-fishy-header','Check for headers that seem like they should not be headers'), 431) 432class PetscDocString(DocBase): 433 """ 434 Container to encapsulate a sowing docstring and retrieve various objects for it. 435 Essentially a Cursor for comments. 436 """ 437 438 # to pacify type checkers... 439 diags: DiagnosticMap 440 441 Type = DocStringType 442 Modifier = DocStringTypeModifier 443 sections = SectionManager( 444 Synopsis(), 445 FunctionParameterList(), 446 OptionDatabaseKeys(), 447 Notes(), 448 SourceCode(), 449 DeveloperNotes(), 450 References(), 451 FortranNotes(), 452 Level(), 453 SeeAlso(), 454 DefaultSection(), 455 ) 456 sowing_types = {'@', 'S', 'E', 'M'} 457 clx_to_sowing_type = { 458 clx.TypeKind.FUNCTIONPROTO : ('@', 'functions', Type.FUNCTION), 459 clx.TypeKind.ENUM : ('E', 'enums', Type.ENUM), 460 } 461 __slots__ = '_linter', 'cursor', 'raw', 'extent', 'indent', 'type', 'type_mod', '_attr' 462 463 _linter: Linter 464 cursor: Cursor 465 raw: str 466 extent: SourceRange 467 indent: int 468 type: DocStringType 469 type_mod: DocStringTypeModifier 470 _attr: dict[str, Any] 471 472 def __init__(self, linter: Linter, cursor: Cursor, indent: int = 2) -> None: 473 r"""Construct a `PetscDocString 474 475 Parameters 476 ---------- 477 linter : 478 a `Linter` instance 479 cursor : 480 the cursor to which this docstring belongs 481 indent : optional 482 the number of line indents for normal lines 483 """ 484 self.sections.set_verbose(linter.verbose) 485 self._linter = linter 486 self.cursor = cursor 487 self.raw, self.extent = self._get_sanitized_comment_and_range_from_cursor(self.cursor) 488 self.indent = indent 489 self.type = self.Type.UNKNOWN 490 self.type_mod = self.Modifier.NONE 491 self._attr = self._default_attributes() 492 return 493 494 @staticmethod 495 def _default_attributes() -> dict[str, Any]: 496 return dict() 497 498 @classmethod 499 def _is_valid_docstring(cls, cursor: Cursor, raw: str, doc_extent: SourceRange) -> bool: 500 r"""Determine whether docstring in `raw` (of `cursor`) is a valid sowing docstring worth checking. 501 502 Parameters 503 ---------- 504 cursor : 505 the cursor to which the docstring belongs 506 raw : 507 the raw text of the docstring 508 doc_extent : 509 the source range for the docstring itself 510 511 Returns 512 ------- 513 ret : 514 True if the docstring is valid, False otherwise 515 """ 516 if not raw or not isinstance(raw, str): 517 return False 518 519 # if cursor.extent is *before* the doc_extent we have the following situation: 520 # 521 # extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec); 522 # ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^ cursor.extent 523 # ... 524 # /*@ < 525 # MatMult_SeqFFTW - ... < doc_extent 526 # */ < 527 # PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) 528 # 529 # We can safely "ignore" this cursor and mark it as not a docstring since we will hit 530 # the main cursor (i.e. the function definition) later on. 531 if cursor.extent < doc_extent: 532 return False 533 534 # if we find sowing chars, its probably a docstring 535 raw = raw.strip() 536 if any(raw.startswith(f'/*{char}') for char in cls.sowing_types): 537 return True 538 539 # if we find at least 2 titles, likely this is a docstring, unless it ends in one of 540 # the internal suffixes or has internal linkage 541 rawlo = raw.casefold() 542 have_title = sum(f'{title}:' in rawlo for title in map(str.casefold, cls.sections.gen_titles())) 543 if have_title < 2: 544 return False 545 546 # intentionally private symbols don't have docstrings. This is not technically correct 547 # since people might still create a docstring for them, but it's very tricky to guess 548 # what people meant 549 if cursor.name.casefold().endswith(('_private', '_internal')): 550 return False 551 552 # likewise symbols with explicitly internal linkage are not considered to have a docstring 553 has_internal_linkage, _, _ = cursor.has_internal_linkage() 554 return not has_internal_linkage 555 556 @classmethod 557 def _get_sanitized_comment_and_range_from_cursor(cls, cursor: Cursor) -> tuple[str, SourceRange]: 558 r"""Get the raw docstring text and its source range from a cursor 559 560 Parameters 561 ---------- 562 cursor : 563 the cursor 564 565 Returns 566 ------- 567 raw : 568 the raw docstring text 569 range : 570 the source range for `raw` 571 572 Raises 573 ------ 574 KnownUnhandleableCursorError 575 if the cursor is not a valid docstring 576 """ 577 raw, clx_extent = cursor.get_comment_and_range() 578 extent = SourceRange.cast(clx_extent, tu=cursor.translation_unit) 579 580 if not cls._is_valid_docstring(cursor, raw, extent): 581 raise KnownUnhandleableCursorError('Not a docstring') 582 583 last_match = None 584 for re_match in _c_comment_regex.finditer(raw): 585 last_match = re_match 586 587 assert last_match is not None 588 if start := last_match.start(): 589 # this handles the following case: 590 # 591 # /* a dummy comment that is attributed to the symbol */ 592 # /* 593 # the real docstring comment, note no empty line between this and the previous! 594 # // also handles internal comments 595 # /* of both kinds */ 596 # */ 597 # <the symbol> 598 assert start > 0 599 extent = extent.resized(lbegin=raw.count('\n', 0, start), cbegin=None, cend=None) 600 raw = raw[start:] 601 return raw, extent 602 603 def get_pragmas(self) -> dict[str, set[re.Pattern[str]]]: 604 r"""Retrieve a dict of pragmas for a particular docstring 605 606 Returns 607 ------- 608 pragmas : 609 the pragmas 610 611 Notes 612 ----- 613 `pragmas` is in the form: 614 615 {command_name : set(regex_patterns)} 616 """ 617 def str_remove_prefix(string: str, prefix: str) -> str: 618 if py_version_lt(3, 9): 619 ret = string[len(prefix):] if string.startswith(prefix) else string 620 else: 621 # the type checkers do not grok the py_version_lt version guard: 622 # error: "str" has no attribute "removeprefix" 623 ret = string.removeprefix(prefix) # type: ignore[attr-defined] 624 return ret 625 626 start = self.extent.start 627 flag_prefix = DiagnosticManager.flagprefix 628 pragmas: collections.defaultdict[str, set[re.Pattern[str]]] = collections.defaultdict(set) 629 for line in reversed(_util.read_file_lines_cached(start.file.name, 'r')[:start.line - 1]): 630 line = line.rstrip() 631 if line.endswith(('}', ';', ')', '>', '"')): 632 break 633 if re_match := _pragma_regex.match(line): 634 pragmas[re_match.group(1)].update( 635 map( 636 re.compile, 637 filter(None, map(str.strip, str_remove_prefix(re_match.group(2), flag_prefix).split(','))) 638 ) 639 ) 640 return dict(pragmas) 641 642 def make_source_location(self, lineno: int, col: int) -> SourceLocation: 643 r"""Make a `SourceLocation` 644 645 Parameters 646 ---------- 647 lineno : 648 the line number of the location 649 col : 650 the column number of the location 651 652 Returns 653 ------- 654 loc : 655 the `SourceLocation` 656 657 Notes 658 ----- 659 This is a convenience routine for attaching the docstrings' cursors' translation unit to the 660 source location 661 """ 662 return SourceLocation.from_position(self.cursor.translation_unit, lineno, col) 663 664 def make_source_range(self, token: str, string: str, lineno: int, offset: int = 0) -> SourceRange: 665 r"""Make a `SourceRange` from a token 666 667 Parameters 668 ---------- 669 token : 670 the substring of `string` to make the `SourceRange` for 671 string : 672 the string to search for `token` in 673 lineno : 674 the line number of the range 675 offset : optional 676 the offset into `string` from which to search 677 678 Returns 679 ------- 680 rng : 681 the `SourceRange` 682 683 Notes 684 ----- 685 Like `PetscDocString.make_source_location()` this is a convenience routine for properly attaching 686 the translation unit to the `SourceRange`. Note though that this is only produces single-lined 687 `SourceRange`s. 688 """ 689 col_begin = string.index(token, offset) + 1 690 col_end = col_begin + len(token) 691 return SourceRange.from_positions(self.cursor.translation_unit, lineno, col_begin, lineno, col_end) 692 693 def make_diagnostic(self, kind: DiagnosticKind, diag_flag: str, msg: str, src_range: Optional[Union[SourceRange, Cursor]], patch: Optional[Patch] = None, **kwargs) -> Diagnostic: 694 r"""Construct a `Diagnostic` 695 696 Parameters 697 ---------- 698 kind : 699 the class of `Diagnostic` to create 700 diag_flag : 701 the command-line flag controlling the diagnostic 702 msg : 703 the description message for the diagnostic, e.g. the error emssage 704 src_range : optional 705 the source range to attribute to the diagnostic, if None, the extent for the entire docstring is 706 used 707 patch : optional 708 the patch to fix the diagnostic 709 710 Returns 711 ------- 712 diag : 713 the constructed `Diagnotic` 714 """ 715 if src_range is None: 716 src_range = self.extent 717 else: 718 src_range = SourceRange.cast(src_range) 719 return Diagnostic.from_source_range(kind, diag_flag, msg, src_range, patch=patch, **kwargs) 720 721 def add_diagnostic_from_source_range(self, kind: DiagnosticKind, diag_flag: str, msg: str, src_range: SourceRangeLike, **kwargs) -> None: 722 r"""Log an error from a given source range 723 724 Parameters 725 ---------- 726 kind : 727 the kind of `Diagnostic` to add 728 diag_flag : 729 the diagnostic flag to control the error 730 msg : 731 the diagnostic message describing the problem in detail 732 src_range : 733 the `SourceRange` which shows the error in the source 734 **kwargs : 735 any additional keyword arguments to `PetscDocString.make_diagnostic()` 736 """ 737 return self.add_diagnostic(self.make_diagnostic(kind, diag_flag, msg, src_range, **kwargs)) 738 739 def add_diagnostic(self, diagnostic: Diagnostic, cursor: Optional[Cursor] = None) -> None: 740 r"""Log an error from a fully-formed diagnostic 741 742 Parameters 743 ---------- 744 diagnostic : 745 the diagnostic describing the error 746 cursor : optional 747 the cursor to attach the error to, if None, the docstrings cursor is used 748 """ 749 return self._linter.add_diagnostic_from_cursor( 750 self.cursor if cursor is None else cursor, diagnostic 751 ) 752 753 def reset(self) -> None: 754 r"""Reset any internal state for the `PetscDocString` 755 756 Notes 757 ----- 758 This probably doesn't fully work. 759 """ 760 for section in self.sections: 761 section.clear() 762 self._attr = self._default_attributes() 763 return 764 765 def guess_heading(self, line: str, **kwargs) -> tuple[str, str, SectionBase]: 766 r"""A shorthand for `SectionManager.fuzzy_find_section()`""" 767 return self.sections.fuzzy_find_section(line, **kwargs) 768 769 def _check_floating(self) -> None: 770 r"""Check that the docstring isn't a floating docstring, i.e. for a mansection or particular type 771 772 Raises 773 ------ 774 KnownUnhandleableCursorError 775 if the docstring is 'floating', i.e. has 'M' in it 776 """ 777 for line in filter(None, map(str.lstrip, self.raw.splitlines())): 778 if not line.startswith(('/*', '//')): 779 lsplit = line.split() 780 try: 781 is_floating = lsplit[0].isupper() and lsplit[1] in {'-', '='} 782 except IndexError: 783 # the lsplit[1] indexing failed, if it is a macro docstring, it is likely 784 # floating 785 is_floating = self.Modifier.MACRO in self.type_mod 786 if is_floating: 787 # don't really know how to handle this for now 788 self.type_mod |= self.Modifier.FLOATING 789 raise KnownUnhandleableCursorError( 790 'DON\'T KNOW HOW TO PROPERLY HANDLE FLOATING DOCSTRINGS' 791 ) 792 break 793 return 794 795 def _check_valid_cursor_linkage(self) -> bool: 796 r"""Check that a cursor has external linkage, there is no point producing a manpage for function 797 that is impossible to call. 798 799 Returns 800 ------- 801 ret : 802 True if the cursor has external linkage (and therefore should be checked), False if the cursor 803 has internal linkage (and is therefore pointless to check) 804 """ 805 cursor = self.cursor 806 # TODO, this should probably also check that the header the cursor is defined in is public 807 has_internal_linkage, linked_cursor_name, linkage_cursor = cursor.has_internal_linkage() 808 # sometimes a static function has the class description above it, for example 809 # VECSEQCUDA sits above a private cuda impls function 810 pointless = has_internal_linkage and not ( 811 cursor.location.file.name.endswith(('.h', '.hpp', '.inc')) or 812 self.Modifier.FLOATING in self.type_mod 813 ) 814 if pointless: 815 assert linkage_cursor is not None 816 begin_sowing_range = self._attr['sowing_char_range'] 817 linkage_extent = SourceRange.cast(linkage_cursor.extent) 818 diag = self.make_diagnostic( 819 Diagnostic.Kind.ERROR, self.diags.internal_linkage, 820 'A sowing docstring for a symbol with internal linkage is pointless', self.extent, 821 highlight=False 822 ).add_note( 823 Diagnostic.make_message_from_formattable( 824 f'\'{cursor.displayname}\' is declared \'{linked_cursor_name}\' here', crange=linkage_extent 825 ), 826 location=linkage_extent.start 827 ).add_note( 828 'If this docstring is meant as developer-only documentation, remove the sowing chars from the docstring declaration. The linter will then ignore this docstring.' 829 ).add_note( 830 Diagnostic.make_message_from_formattable( 831 'Sowing chars declared here', crange=begin_sowing_range 832 ), 833 location=begin_sowing_range.start 834 ) 835 self.add_diagnostic(diag) 836 return not pointless 837 838 def _check_valid_sowing_chars(self) -> None: 839 r"""Check that the sowing prefix and postfix match the expected and are symmetric 840 841 Raises 842 ------ 843 KnownUnhandleableCursorError 844 if start of the comment line is invalid 845 RuntimeError 846 if the start comment contains an unknown sowing char 847 """ 848 sowing_type, lay_type, self.type = self.clx_to_sowing_type[self.cursor.type.kind] 849 # check the beginning 850 splitlines = self.raw.splitlines() 851 line = splitlines[0] 852 begin_sowing_range = self.make_source_range(line, line, self.extent.start.line) 853 diag_name = self.diags.sowing_chars 854 possible_sowing_chars = line.split('/*')[1].split() 855 try: 856 begin_sowing = possible_sowing_chars[0] 857 except IndexError: 858 begin_sowing = sowing_type 859 mess = f'Invalid comment begin line, does not contain sowing identifier. Expected \'/*{sowing_type}\' for {lay_type}' 860 self.add_diagnostic_from_source_range(Diagnostic.Kind.ERROR, diag_name, mess, begin_sowing_range) 861 else: 862 assert isinstance(begin_sowing, str), f'begin_sowing is not a string: {begin_sowing}' 863 if begin_sowing[0] not in self.sowing_types: 864 diagnosed = False 865 if line[line.find(begin_sowing) - 1].isspace(): 866 # There is a space between the "sowing char" and the character before 867 # it. Therefore it is likely just regular text. Sometimes people make internal 868 # sowing-like docstrings just to keep things consistent, for example: 869 # 870 # v--- identified as begin_sowing 871 # /* KSPSolve_LCD - This routine actually applies the left conjugate 872 # ... 873 # 874 # we should ignore it, and stop processing this docstring altogether since it is 875 # not an actual docstring. 876 raise KnownUnhandleableCursorError 877 if begin_sowing[0] == 'C': 878 # sometimes people mix up the order, or forget to add the right letter for the 879 # type, for example: 880 # 881 # v--- begin_sowing, should be @C 882 # /*C 883 # MatElimininateZeroes 884 # 885 if len(begin_sowing) == 1: 886 # they forgot the correct identifier 887 sub_mess = f'It appears you forgot to prepend \'{sowing_type}\'' 888 expected = f'{sowing_type}{begin_sowing}' 889 diagnosed = True 890 # making a new source range instead of using begin_sowing_range is 891 # deliberate. The line may still contain other garbage, i.e.: 892 # 893 # /*C FooBarBaz - asdasdasdasd 894 # ^~~~~~~~~~~~~~~~~~~~~~~~~^ begin_sowing_range 895 # 896 # which we do not want to overwrite with 'expected'. In order for the patch to 897 # be maximally stable we also don't want to have the replacement contain the 898 # (possibly) trailing stuff, so we make our new range just encompass 'C'. 899 patch = Patch( 900 self.make_source_range(begin_sowing, line, begin_sowing_range.start.line), expected 901 ) 902 elif any(c in self.sowing_types for c in begin_sowing): 903 # wrong order 904 sub_mess = 'Did you put it in the wrong order' 905 expected = f'{sowing_type}{begin_sowing.replace(sowing_type, "")}' 906 diagnosed = True 907 patch = None 908 if diagnosed: 909 self.add_diagnostic_from_source_range( 910 Diagnostic.Kind.ERROR, diag_name, 911 f'Invalid docstring identifier, contains unexpected char sequence \'{begin_sowing}\', expected \'/*{expected}\'. {sub_mess}?', 912 begin_sowing_range, 913 patch=patch 914 ) 915 if not diagnosed: 916 raise RuntimeError(f'Unknown sowing char {begin_sowing[0]} not in sowing types {self.sowing_types} found in {line}') 917 begin_sowing_range = self.make_source_range(begin_sowing, line, begin_sowing_range.start.line) 918 919 self._attr['sowing_char_range'] = begin_sowing_range 920 921 if 'M' in begin_sowing: 922 self.type_mod |= self.Modifier.MACRO 923 if 'C' in begin_sowing: 924 self.type_mod |= self.Modifier.C_FUNC 925 926 # check that nothing else is on the comment begin line 927 lsplit = line.strip().split(maxsplit=1) 928 if len(lsplit) != 1: 929 rest = lsplit[1] 930 restloc = self.make_source_range(rest, line, self.extent.start.line) 931 mess = 'Invalid comment begin line, must only contain \'/*\' and sowing identifier' 932 self.add_diagnostic_from_source_range( 933 Diagnostic.Kind.ERROR, diag_name, mess, restloc, 934 patch=Patch(restloc, '\n' + (' '*self.indent) + rest) 935 ) 936 return 937 938 def _check_valid_docstring_spacing(self) -> None: 939 r"""Check that the docstring itself is flush against the thing it describes. 940 941 Notes 942 ----- 943 Checks that the docstring looks like 944 ``` 945 /* 946 PetscFooBar - ... 947 */ 948 PetscErrorCode PetscFooBar(...) 949 ``` 950 not 951 ``` 952 /* 953 PetscFooBar - ... 954 */ 955 956 PetscErrorCode PetscFooBar(...) 957 ``` 958 """ 959 if self.Modifier.FLOATING in self.type_mod: 960 return # floating docstring sections need not be checked for this 961 962 end_line = self.extent.end.line + 1 963 cursor_start = self.cursor.extent.start 964 if end_line != cursor_start.line: 965 # there is at least 1 (probably empty) line between the comment end and whatever it 966 # is describing 967 diag = self.diags.symbol_spacing 968 mess = 'Invalid line-spacing between docstring and the symbol it describes. The docstring must appear immediately above its target' 969 eloc = self.make_source_range('', '', end_line) 970 floc = SourceRange.from_locations(self.make_source_location(end_line, 1), cursor_start) 971 self.add_diagnostic_from_source_range( 972 Diagnostic.Kind.ERROR, diag, mess, eloc, highlight=False, patch=Patch(floc, '') 973 ) 974 return 975 976 def _check_valid_indentation(self, lineno: int, line: str, left_stripped: str) -> None: 977 r"""If the line is regular (not empty, or a parameter list), check that line is indented correctly 978 979 Parameters 980 ---------- 981 lineno : 982 the line number of the line 983 line : 984 the line itself 985 left_stripped : 986 the line that has been left-stripped 987 """ 988 if linelen := len(line): 989 indent = linelen - len(left_stripped) 990 expected_ind = 0 if line.startswith(('.', '+', '-', '$')) else self.indent 991 if indent != expected_ind: 992 diag = self.diags.indentation 993 loc = self.make_source_range(' ' * indent, line, lineno) 994 mess = f'Invalid indentation ({indent}), all regular (non-empty, non-parameter, non-seealso) text must be indented to {self.indent} columns' 995 self.add_diagnostic_from_source_range( 996 Diagnostic.Kind.ERROR, diag, mess, loc, patch=Patch(loc, ' ' * expected_ind) 997 ) 998 return 999 1000 def _check_valid_section_spacing(self, prevline: str, lineno: int) -> None: 1001 r"""Check that sections have at least 1 empty line between them 1002 1003 Parameters 1004 ---------- 1005 prevline : 1006 the previous line 1007 lineno : 1008 the current line number 1009 1010 Notes 1011 ----- 1012 Checks that sections are formatted like 1013 ``` 1014 Notes: 1015 asdadsadasdads 1016 1017 Example Usage: 1018 asdasdasd 1019 ``` 1020 not 1021 ``` 1022 Notes: 1023 asdasdasd 1024 Example Usage: 1025 asdadasd 1026 ``` 1027 """ 1028 if prevline and not prevline.isspace(): 1029 loc = self.make_source_range('', '', lineno) 1030 self.add_diagnostic_from_source_range( 1031 Diagnostic.Kind.ERROR, self.diags.section_spacing, 1032 'Missing empty line between sections, must have one before this section', 1033 loc, highlight=False, patch=Patch(loc, '\n') 1034 ) 1035 return 1036 1037 def _check_section_header_typo(self, verdict: Verdict, line: str, lineno: int) -> Verdict: 1038 r"""Check that a section header that looks like a section header is actually one 1039 1040 Parameters 1041 ---------- 1042 verdict : 1043 the current header verdict of the line 1044 line : 1045 the line 1046 lineno : 1047 the line number 1048 1049 Returns 1050 ------- 1051 verdict : 1052 the new verdict (if changed) 1053 """ 1054 if verdict == Verdict.MAYBE_HEADING: 1055 try: 1056 name, match_title, _ = self.guess_heading(line, strict=True) 1057 except GuessHeadingFailError as ghfe: 1058 # Not being able to guess the heading here is OK since we only *think* it's a 1059 # heading 1060 self.sections._print(ghfe) 1061 return Verdict.NOT_HEADING 1062 if ':' in line: 1063 mess = f'Line seems to be a section header but doesn\'t directly end with \':\', did you mean \'{match_title}\'?' 1064 else: 1065 mess = f'Line seems to be a section header but missing \':\', did you mean \'{match_title}:\'?' 1066 self.add_diagnostic_from_source_range( 1067 Diagnostic.Kind.ERROR, self.diags.section_header_maybe_header, mess, 1068 self.make_source_range(name, line, lineno) 1069 ) 1070 return verdict 1071 1072 def _check_section_header_that_probably_should_not_be_one(self, verdict: Verdict, line: str, stripped: str, lineno: int) -> Verdict: 1073 r"""Check that a section header that ends with ':' is not really a header 1074 1075 Parameters 1076 ---------- 1077 verdict : 1078 the current heading verdict 1079 line : 1080 the line 1081 stripped : 1082 `line` but stripped 1083 lineno : 1084 the line number 1085 1086 Returns 1087 ------- 1088 verdict : 1089 the update verdict 1090 """ 1091 if verdict < 0: 1092 try: 1093 _, _, section_guess = self.guess_heading(line, cache_result=False) 1094 except GuessHeadingFailError as ghfe: 1095 # Not being able to guess the heading here is OK since we aren't sure this isn't a 1096 # heading after all 1097 self.sections._print(ghfe) 1098 verdict = Verdict.NOT_HEADING 1099 else: 1100 assert isinstance(section_guess, SectionBase) 1101 if isinstance(section_guess, DefaultSection): 1102 # we could not find a suitable section for it 1103 assert not line.endswith(r'\:') 1104 eloc = self.make_source_range(':', line, lineno, offset=line.rfind(':')) 1105 mess = f'Sowing treats all lines ending with \':\' as header, are you sure \'{textwrap.shorten(stripped, width=35)}\' qualifies? Use \'\:\' to escape the colon if not' 1106 self.add_diagnostic_from_source_range( 1107 Diagnostic.Kind.ERROR, self.diags.section_header_fishy_header, mess, eloc 1108 ) 1109 return verdict 1110 1111 def parse(self) -> PetscDocString: 1112 r"""Parse a docstring 1113 1114 Returns 1115 ------- 1116 docstring : 1117 the `PetscDocString` instance 1118 1119 Raises 1120 ------ 1121 KnownUnhandleableCursorError 1122 if the cursor has internal linkage and should not have its docstring checked 1123 """ 1124 self.reset() 1125 self._check_valid_sowing_chars() 1126 self._check_floating() 1127 if not self._check_valid_cursor_linkage(): 1128 # no point in continuing analysis, the docstring should not exist! 1129 raise KnownUnhandleableCursorError() 1130 self._check_valid_docstring_spacing() 1131 1132 section = self.sections.synopsis 1133 check_indent = section.check_indent_allowed() 1134 # if True we are in a verbatim block. We should not try to detect any kind of 1135 # headers until we reach the end of the verbatim block 1136 in_verbatim = 0 1137 prev_line = '' 1138 1139 raw_data: list[tuple[SourceRange, str, Verdict]] = [] 1140 for lineno, line in enumerate(self.raw.splitlines(), start=self.extent.start.line): 1141 left_stripped = line.lstrip() 1142 stripped = left_stripped.rstrip() 1143 if stripped.startswith('/*') or stripped.endswith('*/'): 1144 continue 1145 1146 # TODO remove this, the current active section should be deciding what to do here instead 1147 # we shouldn't be checking indentation in verbatim blocks 1148 if stripped.startswith('.vb'): 1149 check_indent = False 1150 in_verbatim = 1 1151 elif stripped.startswith('.ve'): 1152 check_indent = True # note we don't need to check indentation of line with .ve 1153 in_verbatim = 0 1154 elif stripped.startswith('$'): 1155 # inline verbatim don't modify check flag but dont check indentation either 1156 in_verbatim = 2 1157 elif check_indent: 1158 self._check_valid_indentation(lineno, line, left_stripped) 1159 1160 if in_verbatim == 0: 1161 heading_verdict = self.sections.is_heading(stripped, prev_line) 1162 heading_verdict = self._check_section_header_typo(heading_verdict, line, lineno) 1163 if heading_verdict > 0: 1164 # we may switch headings, we should check indentation 1165 if not check_indent: 1166 self._check_valid_indentation(lineno, line, left_stripped) 1167 self._check_valid_section_spacing(prev_line, lineno) 1168 new_section = self.sections.find(stripped.split(':', maxsplit=1)[0].strip().casefold()) 1169 if new_section != section: 1170 raw_data = section.consume(raw_data) 1171 section = new_section 1172 check_indent = section.check_indent_allowed() 1173 else: 1174 heading_verdict = self._check_section_header_that_probably_should_not_be_one( 1175 heading_verdict, line, stripped, lineno 1176 ) 1177 else: 1178 # verbatim blocks are never headings 1179 heading_verdict = Verdict.NOT_HEADING 1180 1181 raw_data.append((self.make_source_range(line, line, lineno), line, heading_verdict)) 1182 if in_verbatim == 2: 1183 # reset the dollar verbatim 1184 in_verbatim = 0 1185 prev_line = stripped 1186 1187 section.consume(raw_data) 1188 for sec in self.sections: 1189 sec.setup(self) 1190 return self 1191 1192del DocStringType 1193del DocStringTypeModifier 1194