xref: /petsc/lib/petsc/bin/maint/petsclinter/petsclinter/classes/docs/_doc_str.py (revision 76de251a2723aae6aa29693b5df5716ed0f0f554)
1#!/usr/bin/env python3
2"""
3# Created: Mon Jun 20 18:58:57 2022 (-0400)
4# @author: Jacob Faibussowitsch
5"""
6from __future__ import annotations
7
8import re
9import enum
10import difflib
11import textwrap
12import collections
13import clang.cindex as clx # type: ignore[import]
14
15from ..._typing     import *
16from ...__version__ import py_version_lt
17from ..._error      import BaseError, KnownUnhandleableCursorError
18
19from .. import _util
20
21from .._diag    import DiagnosticManager, Diagnostic
22from .._src_pos import SourceRange, SourceLocation
23from .._patch   import Patch
24
25from ._doc_section_base import DocBase, SectionBase
26from ._doc_section      import (
27  DefaultSection, Synopsis, FunctionParameterList, OptionDatabaseKeys, Level, Notes, FortranNotes,
28  DeveloperNotes, SourceCode, References, SeeAlso
29)
30
31@enum.unique
32class Verdict(enum.IntEnum):
33  r"""An enum describing whether a particular line is deemed a sowing heading or not."""
34  IS_HEADING_BUT_PROBABLY_SHOULDNT_BE = -1
35  NOT_HEADING                         = 0
36  IS_HEADING                          = 1
37  MAYBE_HEADING                       = 2
38
39@enum.unique
40class DocStringType(enum.Enum):
41  UNKNOWN  = 0
42  FUNCTION = enum.auto()
43  TYPEDEF  = enum.auto()
44  ENUM     = enum.auto()
45
46@enum.unique
47class DocStringTypeModifier(enum.Flag):
48  NONE     = 0
49  MACRO    = enum.auto()
50  FLOATING = enum.auto()
51  C_FUNC   = enum.auto()
52
53@enum.unique
54class MatchReason(enum.IntEnum):
55  r"""An enum that describes the reason a header was matched"""
56  NOT_FOUND = 0
57  NAME      = enum.auto()
58  KEYWORD   = enum.auto()
59  SUBWORD   = enum.auto()
60
61  def direct_match(self) -> bool:
62    return self == MatchReason.NAME
63
64  def __bool__(self) -> bool:
65    return self != MatchReason.NOT_FOUND
66
67  def __str__(self) -> str:
68    return self.name.casefold()
69
70# expressions that usually end in an unescpaped colon causing the resulting sentence to be
71# considered a title
72_suspicious_patterns = set(
73  map(
74    str.casefold,
75    (
76      r'follows', r'following.*', r'example', r'instance', r'one\sof.*', r'available.*include',
77      r'supports.*approaches.*', r'see.*user.*manual', r'y\.\ssaad,\siterative\smethods.*philadelphia',
78      r'default', r'in\s.*\scase.*', r'use\sthe.*', r'for\s+example', r'note\s+(also|that)?',
79      r'example[,;-]\s', r'.*etc\.', r'references\s+(to|a|so)\s+',
80      r'(the|an|the|a)\s+options\s+database\s+(for|to)?'
81    )
82  )
83)
84_suspicious_colon_regex = re.compile(r'|'.join(f'{expr}:$' for expr in _suspicious_patterns))
85_suspicious_plain_regex = re.compile(r'|'.join(_suspicious_patterns - {'example'}), flags=re.MULTILINE)
86del _suspicious_patterns
87
88_pragma_regex = re.compile(r'.*PetscClangLinter\s+pragma\s+(\w+):\s*(.*)')
89
90# Regex to match /* */ patterns
91_c_comment_regex = re.compile(r'\/\*(\*(?!\/)|[^*])*\*\/')
92
93_T_co = TypeVar('_T_co', covariant=True)
94
95class SectionNotFoundError(BaseError):
96  r"""Exception thrown when a section is searched for, not found, and strict mode was enabled"""
97  pass
98
99class GuessHeadingFailError(BaseError):
100  r"""Exception thrown then sections fails to guess the appropriate heading for a line"""
101  pass
102
103class SectionManager:
104  __slots__ = '_verbose', '_sections', '_findcache', '_cachekey'
105
106  _verbose: int
107  _sections: dict[str, SectionBase]
108  _cachekey: tuple[str, ...]
109  _findcache: dict[tuple[str, ...], dict[str, str]]
110
111  def __init__(self, *args: SectionImpl, verbose: int = 0) -> None:
112    r"""Construct a `SectionManager` object
113
114    Parameters
115    ----------
116    *args :
117      a set of unique sections to register with the section manager
118    verbose : optional
119      whether to print verbose output
120
121    Raises
122    ------
123    ValueError
124      if the set of sections to register is not unique
125    """
126    self._verbose   = verbose
127    self._sections  = {section.name : section for section in args}
128    self._cachekey  = tuple(self._sections.keys())
129    self._findcache = {self._cachekey : {}}
130    if len(self._cachekey) != len(args):
131      raise ValueError('Have sections with conflicting names!')
132    return
133
134  def __getattr__(self, attr: str) -> SectionBase:
135    r"""Allows looking up a section via its name, i.e. 'self.fortran_notes'"""
136    sections = self._sections
137    try:
138      return sections[attr]
139    except KeyError as ke:
140      replaced_attr = attr.replace('_', ' ').casefold()
141      try:
142        return sections[replaced_attr]
143      except KeyError:
144        pass
145    raise AttributeError(attr)
146
147  def __iter__(self) -> Iterator[SectionBase]:
148    yield from self._sections.values()
149
150  def __contains__(self, section: SectionImpl) -> bool:
151    return self.registered(section)
152
153  def _print(self, *args, verbosity = 1, **kwargs) -> None:
154    r"""Print, but only if verbosity if high enough
155
156    Parameters
157    ----------
158    *args :
159      positional arguments to `petsclinter.sync_print`
160    verbosity :
161      the minimum verbosity at which to print
162    **kwargs :
163      keyword arguments to `petsclinter.sync_print`
164    """
165    if self._verbose >= verbosity:
166      import petsclinter as pl
167
168      pl.sync_print(*args, **kwargs)
169    return
170
171  def set_verbose(self, verbose: int) -> int:
172    r"""Sets verbosity level
173
174    Parameters
175    ----------
176    verbose :
177      the new verbosity level
178
179    Returns
180    -------
181    verbose :
182      the old verbosity level
183    """
184    old_verbose   = self._verbose
185    self._verbose = verbose
186    return old_verbose
187
188  def find(self, heading: str, cache_result: bool = True, strict: bool = False) -> SectionBase:
189    r"""Given a heading, find the section which best matches it
190
191    Parameters
192    ----------
193    heading :
194      the heading to search for
195    cache_result : optional
196      should the result of the lookup be cached?
197    strict : optional
198      is not finding the section considered an error?
199
200    Returns
201    -------
202    section :
203      the section
204
205    Raises
206    ------
207    SectionNotFoundError
208      if `strict` is True and a section could not be matched
209    """
210    lohead   = heading.casefold()
211    sections = self._sections
212    cache    = self._findcache[self._cachekey]
213    try:
214      return sections[cache[lohead]]
215    except KeyError:
216      pass
217
218    section_names = sections.keys()
219    found_reason  = MatchReason.NOT_FOUND
220    matched       = self.UNKNOWN_SECTION.name
221    try:
222      matched = difflib.get_close_matches(heading, section_names, n=1)[0]
223    except IndexError:
224      pass
225    else:
226      found_reason = MatchReason.NAME
227
228    if found_reason == MatchReason.NOT_FOUND:
229      keywords = [(kw, section.name) for section in self for kw in section.keywords]
230      kw_only  = [k for k, _ in keywords]
231      try:
232        matched = difflib.get_close_matches(heading, kw_only, n=1)[0]
233      except IndexError:
234        pass
235      else:
236        found_reason = MatchReason.KEYWORD
237
238    if found_reason == MatchReason.NOT_FOUND:
239      # try if we can find a sub-word
240      # if heading splits into more than 3 params, then chances are its being mislabeled
241      # as a heading anyways
242      for head in heading.split(maxsplit=3):
243        try:
244          # higher cutoff, we have to be pretty sure of a match when using subwords,
245          # because it's a lot easier for false positives
246          matched = difflib.get_close_matches(head, kw_only, n=1, cutoff=0.8)[0]
247        except IndexError:
248          continue
249        else:
250          found_reason = MatchReason.SUBWORD
251          break
252
253    max_match_len = max(map(len, section_names))
254    if found_reason == MatchReason.NOT_FOUND:
255      self._print(
256        80 * '*',
257        f'UNHANDLED POSSIBLE HEADING! (strict = {strict}, cached = {cache_result})',
258        heading,
259        80 * '*',
260        verbosity=2,
261        sep='\n'
262      )
263      if strict:
264        raise SectionNotFoundError(heading)
265      # when in doubt, it's probably notes
266      self._print(
267        '*********** DEFAULTED TO {:{}} FROM {} FOR {}'.format(
268          f'{matched} (strict = {strict})', max_match_len, found_reason, heading
269        ),
270        verbosity=2
271      )
272    else:
273      if not found_reason.direct_match():
274        # found via keyword or subword
275        matched = next(filter(lambda item: item[0] == matched, keywords))[1]
276      self._print(
277        f'**** CLOSEST MATCH FOUND {matched:{max_match_len}} FROM {found_reason} FOR {heading}',
278        verbosity=2
279      )
280
281    if cache_result:
282      cache[lohead] = matched
283    return sections[matched]
284
285  def registered(self, section: SectionImpl) -> bool:
286    r"""Determine whether a section has already been registered with the `SectionManager`
287
288    Parameters
289    ----------
290    section :
291      the section to check for
292
293    Returns
294    -------
295    reg :
296      True if `section` has been registered, False otherwise
297
298    Raises
299    ------
300    NotImplementedError
301      if `section` is not derived from `SectionBase`
302    """
303    if not isinstance(section, SectionBase):
304      raise NotImplementedError(type(section))
305    return section.name in self._sections
306
307  def gen_titles(self) -> Generator[str, None, None]:
308    r"""Return a generator over all registered titles
309
310    Parameters
311    ----------
312    get_sections : optional
313      retrieve the sections as well
314
315    Returns
316    -------
317    gen :
318      the generator
319    """
320    return (attr for section in self for attr in section.titles)
321
322  def is_heading(self, line: str, prev_line: str) -> Verdict:
323    r"""Determine whether `line` contains a valid heading
324
325    Parameters
326    ----------
327    line :
328      the current line to be checked
329    prev_line :
330      the previous line
331
332    Returns
333    -------
334    verdict :
335      whether the line is a heading
336    """
337    def handle_header_with_colon(text: str) -> Verdict:
338      if text.endswith(r'\:'):
339        return Verdict.NOT_HEADING
340
341      textlo = text.casefold()
342      if any(map(textlo.startswith, (t.casefold() + ':' for t in self.gen_titles()))):
343        return Verdict.IS_HEADING
344
345      if text.endswith(':'):
346        if any(map(text.__contains__, (' - ', '=', '(', ')', '%', '$', '@', '#', '!', '^', '&', '+'))):
347          return Verdict.IS_HEADING_BUT_PROBABLY_SHOULDNT_BE
348
349        if _suspicious_colon_regex.search(textlo) is None:
350          return Verdict.IS_HEADING
351        return Verdict.IS_HEADING_BUT_PROBABLY_SHOULDNT_BE
352
353      try:
354        _, _, section = self.fuzzy_find_section(text, cache_result=False, strict=True)
355      except GuessHeadingFailError:
356        return Verdict.NOT_HEADING
357      return Verdict.NOT_HEADING if isinstance(section, DefaultSection) else Verdict.IS_HEADING
358
359    def handle_header_without_colon(line: str, prev_line: str) -> Verdict:
360      linelo  = line.casefold()
361      results = list(filter(linelo.startswith, map(str.casefold, self.gen_titles())))
362      if not results:
363        return Verdict.NOT_HEADING
364      if _suspicious_plain_regex.search(' '.join((prev_line.casefold(), linelo))):
365        # suspicious regex detected, err on the side of caution and say this line is not a
366        # heading
367        return Verdict.NOT_HEADING
368      # not suspicious, still not 100% though
369      return Verdict.MAYBE_HEADING
370
371    prev_line = prev_line.strip()
372    line      = line.strip()
373    if not line or line.startswith(('+', '. ', '-', '$', '.vb', '.ve')):
374      return Verdict.NOT_HEADING
375    if ':' in line:
376      return handle_header_with_colon(line)
377    return handle_header_without_colon(line, prev_line)
378
379  def fuzzy_find_section(self, line: str, strict: bool = False, **kwargs) -> tuple[str, str, SectionBase]:
380    r"""Try to fuzzy guess what section a heading belongs to.
381
382    Parameters
383    ----------
384    line :
385      the line
386    strict : optional
387      whether to be strict about matching
388    **kwargs :
389      additional keywords arguments to `SectionManager.find()`
390
391    Returns
392    -------
393    attempt :
394      the attempt which was successful
395    match_title :
396      the matched title of the guessed section
397    section :
398      the matched section
399
400    Raises
401    ------
402    GuessHeadingFailError
403      if header guessing failed
404
405    Notes
406    -----
407    This needs to be combined with self.find() somehow...
408    """
409    if strp := line.split(':', maxsplit=1)[0].strip():
410      for attempt in (strp, strp.split(maxsplit=1)[0].strip(), strp.title()):
411        section = self.find(attempt, **kwargs)
412        titles  = section.titles
413        if len(titles) > 1:
414          titles = tuple(difflib.get_close_matches(attempt, titles, n=1))
415
416        if titles:
417          if strict and isinstance(section, DefaultSection):
418            break
419          return attempt, titles[0], section
420
421    raise GuessHeadingFailError(f'Could not guess heading for:\n{line}')
422
423@DiagnosticManager.register(
424  ('internal-linkage','Verify that symbols with internal linkage don\'t have docstrings'),
425  ('sowing-chars','Verify that sowing begin and end indicators match the symbol type'),
426  ('symbol-spacing','Verify that dosctrings occur immediately above that which they describe'),
427  ('indentation','Verify that docstring text is correctly indented'),
428  ('section-spacing','Verify that there section headers are separated by at least 1 empty line'),
429  ('section-header-maybe-header','Check for lines that seem like they are supposed to be headers'),
430  ('section-header-fishy-header','Check for headers that seem like they should not be headers'),
431)
432class PetscDocString(DocBase):
433  """
434  Container to encapsulate a sowing docstring and retrieve various objects for it.
435  Essentially a Cursor for comments.
436  """
437
438  # to pacify type checkers...
439  diags: DiagnosticMap
440
441  Type     = DocStringType
442  Modifier = DocStringTypeModifier
443  sections = SectionManager(
444    Synopsis(),
445    FunctionParameterList(),
446    OptionDatabaseKeys(),
447    Notes(),
448    SourceCode(),
449    DeveloperNotes(),
450    References(),
451    FortranNotes(),
452    Level(),
453    SeeAlso(),
454    DefaultSection(),
455  )
456  sowing_types       = {'@', 'S', 'E', 'M'}
457  clx_to_sowing_type = {
458    clx.TypeKind.FUNCTIONPROTO : ('@', 'functions', Type.FUNCTION),
459    clx.TypeKind.ENUM          : ('E', 'enums',     Type.ENUM),
460  }
461  __slots__ = '_linter', 'cursor', 'raw', 'extent', 'indent', 'type', 'type_mod', '_attr'
462
463  _linter: Linter
464  cursor: Cursor
465  raw: str
466  extent: SourceRange
467  indent: int
468  type: DocStringType
469  type_mod: DocStringTypeModifier
470  _attr: dict[str, Any]
471
472  def __init__(self, linter: Linter, cursor: Cursor, indent: int = 2) -> None:
473    r"""Construct a `PetscDocString
474
475    Parameters
476    ----------
477    linter :
478      a `Linter` instance
479    cursor :
480      the cursor to which this docstring belongs
481    indent : optional
482      the number of line indents for normal lines
483    """
484    self.sections.set_verbose(linter.verbose)
485    self._linter          = linter
486    self.cursor           = cursor
487    self.raw, self.extent = self._get_sanitized_comment_and_range_from_cursor(self.cursor)
488    self.indent           = indent
489    self.type             = self.Type.UNKNOWN
490    self.type_mod         = self.Modifier.NONE
491    self._attr            = self._default_attributes()
492    return
493
494  @staticmethod
495  def _default_attributes() -> dict[str, Any]:
496    return dict()
497
498  @classmethod
499  def _is_valid_docstring(cls, cursor: Cursor, raw: str, doc_extent: SourceRange) -> bool:
500    r"""Determine whether docstring in `raw` (of `cursor`) is a valid sowing docstring worth checking.
501
502    Parameters
503    ----------
504    cursor :
505      the cursor to which the docstring belongs
506    raw :
507      the raw text of the docstring
508    doc_extent :
509      the source range for the docstring itself
510
511    Returns
512    -------
513    ret :
514      True if the docstring is valid, False otherwise
515    """
516    if not raw or not isinstance(raw, str):
517      return False
518
519    # if cursor.extent is *before* the doc_extent we have the following situation:
520    #
521    # extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec);
522    # ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^ cursor.extent
523    # ...
524    # /*@                     <
525    #   MatMult_SeqFFTW - ... < doc_extent
526    # */                      <
527    # PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
528    #
529    # We can safely "ignore" this cursor and mark it as not a docstring since we will hit
530    # the main cursor (i.e. the function definition) later on.
531    if cursor.extent < doc_extent:
532      return False
533
534    # if we find sowing chars, its probably a docstring
535    raw = raw.strip()
536    if any(raw.startswith(f'/*{char}') for char in cls.sowing_types):
537      return True
538
539    # if we find at least 2 titles, likely this is a docstring, unless it ends in one of
540    # the internal suffixes or has internal linkage
541    rawlo      = raw.casefold()
542    have_title = sum(f'{title}:' in rawlo for title in map(str.casefold, cls.sections.gen_titles()))
543    if have_title < 2:
544      return False
545
546    # intentionally private symbols don't have docstrings. This is not technically correct
547    # since people might still create a docstring for them, but it's very tricky to guess
548    # what people meant
549    if cursor.name.casefold().endswith(('_private', '_internal')):
550      return False
551
552    # likewise symbols with explicitly internal linkage are not considered to have a docstring
553    has_internal_linkage, _, _ = cursor.has_internal_linkage()
554    return not has_internal_linkage
555
556  @classmethod
557  def _get_sanitized_comment_and_range_from_cursor(cls, cursor: Cursor) -> tuple[str, SourceRange]:
558    r"""Get the raw docstring text and its source range from a cursor
559
560    Parameters
561    ----------
562    cursor :
563      the cursor
564
565    Returns
566    -------
567    raw :
568      the raw docstring text
569    range :
570      the source range for `raw`
571
572    Raises
573    ------
574    KnownUnhandleableCursorError
575      if the cursor is not a valid docstring
576    """
577    raw, clx_extent = cursor.get_comment_and_range()
578    extent          = SourceRange.cast(clx_extent, tu=cursor.translation_unit)
579
580    if not cls._is_valid_docstring(cursor, raw, extent):
581      raise KnownUnhandleableCursorError('Not a docstring')
582
583    last_match = None
584    for re_match in _c_comment_regex.finditer(raw):
585      last_match = re_match
586
587    assert last_match is not None
588    if start := last_match.start():
589      # this handles the following case:
590      #
591      # /* a dummy comment that is attributed to the symbol */
592      # /*
593      #   the real docstring comment, note no empty line between this and the previous!
594      #   // also handles internal comments
595      #   /* of both kinds */
596      # */
597      # <the symbol>
598      assert start > 0
599      extent = extent.resized(lbegin=raw.count('\n', 0, start), cbegin=None, cend=None)
600      raw    = raw[start:]
601    return raw, extent
602
603  def get_pragmas(self) -> dict[str, set[re.Pattern[str]]]:
604    r"""Retrieve a dict of pragmas for a particular docstring
605
606    Returns
607    -------
608    pragmas :
609      the pragmas
610
611    Notes
612    -----
613    `pragmas` is in the form:
614
615    {command_name : set(regex_patterns)}
616    """
617    def str_remove_prefix(string: str, prefix: str) -> str:
618      if py_version_lt(3, 9):
619        ret = string[len(prefix):] if string.startswith(prefix) else string
620      else:
621        # the type checkers do not grok the py_version_lt version guard:
622        # error: "str" has no attribute "removeprefix"
623        ret = string.removeprefix(prefix) # type: ignore[attr-defined]
624      return ret
625
626    start       = self.extent.start
627    flag_prefix = DiagnosticManager.flagprefix
628    pragmas: collections.defaultdict[str, set[re.Pattern[str]]] = collections.defaultdict(set)
629    for line in reversed(_util.read_file_lines_cached(start.file.name, 'r')[:start.line - 1]):
630      line = line.rstrip()
631      if line.endswith(('}', ';', ')', '>', '"')):
632        break
633      if re_match := _pragma_regex.match(line):
634        pragmas[re_match.group(1)].update(
635          map(
636            re.compile,
637            filter(None, map(str.strip, str_remove_prefix(re_match.group(2), flag_prefix).split(',')))
638          )
639        )
640    return dict(pragmas)
641
642  def make_source_location(self, lineno: int, col: int) -> SourceLocation:
643    r"""Make a `SourceLocation`
644
645    Parameters
646    ----------
647    lineno :
648      the line number of the location
649    col :
650      the column number of the location
651
652    Returns
653    -------
654    loc :
655      the `SourceLocation`
656
657    Notes
658    -----
659    This is a convenience routine for attaching the docstrings' cursors' translation unit to the
660    source location
661    """
662    return SourceLocation.from_position(self.cursor.translation_unit, lineno, col)
663
664  def make_source_range(self, token: str, string: str, lineno: int, offset: int = 0) -> SourceRange:
665    r"""Make a `SourceRange` from a token
666
667    Parameters
668    ----------
669    token :
670      the substring of `string` to make the `SourceRange` for
671    string :
672      the string to search for `token` in
673    lineno :
674      the line number of the range
675    offset : optional
676      the offset into `string` from which to search
677
678    Returns
679    -------
680    rng :
681      the `SourceRange`
682
683    Notes
684    -----
685    Like `PetscDocString.make_source_location()` this is a convenience routine for properly attaching
686    the translation unit to the `SourceRange`. Note though that this is only produces single-lined
687    `SourceRange`s.
688    """
689    col_begin = string.index(token, offset) + 1
690    col_end   = col_begin + len(token)
691    return SourceRange.from_positions(self.cursor.translation_unit, lineno, col_begin, lineno, col_end)
692
693  def make_diagnostic(self, kind: DiagnosticKind, diag_flag: str, msg: str, src_range: Optional[Union[SourceRange, Cursor]], patch: Optional[Patch] = None, **kwargs) -> Diagnostic:
694    r"""Construct a `Diagnostic`
695
696    Parameters
697    ----------
698    kind :
699      the class of `Diagnostic` to create
700    diag_flag :
701      the command-line flag controlling the diagnostic
702    msg :
703      the description message for the diagnostic, e.g. the error emssage
704    src_range : optional
705      the source range to attribute to the diagnostic, if None, the extent for the entire docstring is
706      used
707    patch : optional
708      the patch to fix the diagnostic
709
710    Returns
711    -------
712    diag :
713      the constructed `Diagnotic`
714    """
715    if src_range is None:
716      src_range = self.extent
717    else:
718      src_range = SourceRange.cast(src_range)
719    return Diagnostic.from_source_range(kind, diag_flag, msg, src_range, patch=patch, **kwargs)
720
721  def add_diagnostic_from_source_range(self, kind: DiagnosticKind, diag_flag: str, msg: str, src_range: SourceRangeLike, **kwargs) -> None:
722    r"""Log an error from a given source range
723
724    Parameters
725    ----------
726    kind :
727      the kind of `Diagnostic` to add
728    diag_flag :
729      the diagnostic flag to control the error
730    msg :
731      the diagnostic message describing the problem in detail
732    src_range :
733      the `SourceRange` which shows the error in the source
734    **kwargs :
735      any additional keyword arguments to `PetscDocString.make_diagnostic()`
736    """
737    return self.add_diagnostic(self.make_diagnostic(kind, diag_flag, msg, src_range, **kwargs))
738
739  def add_diagnostic(self, diagnostic: Diagnostic, cursor: Optional[Cursor] = None) -> None:
740    r"""Log an error from a fully-formed diagnostic
741
742    Parameters
743    ----------
744    diagnostic :
745      the diagnostic describing the error
746    cursor : optional
747      the cursor to attach the error to, if None, the docstrings cursor is used
748    """
749    return self._linter.add_diagnostic_from_cursor(
750      self.cursor if cursor is None else cursor, diagnostic
751    )
752
753  def reset(self) -> None:
754    r"""Reset any internal state for the `PetscDocString`
755
756    Notes
757    -----
758    This probably doesn't fully work.
759    """
760    for section in self.sections:
761      section.clear()
762    self._attr = self._default_attributes()
763    return
764
765  def guess_heading(self, line: str, **kwargs) -> tuple[str, str, SectionBase]:
766    r"""A shorthand for `SectionManager.fuzzy_find_section()`"""
767    return self.sections.fuzzy_find_section(line, **kwargs)
768
769  def _check_floating(self) -> None:
770    r"""Check that the docstring isn't a floating docstring, i.e. for a mansection or particular type
771
772    Raises
773    ------
774    KnownUnhandleableCursorError
775      if the docstring is 'floating', i.e. has 'M' in it
776    """
777    for line in filter(None, map(str.lstrip, self.raw.splitlines())):
778      if not line.startswith(('/*', '//')):
779        lsplit = line.split()
780        try:
781          is_floating = lsplit[0].isupper() and lsplit[1] in {'-', '='}
782        except IndexError:
783          # the lsplit[1] indexing failed, if it is a macro docstring, it is likely
784          # floating
785          is_floating = self.Modifier.MACRO in self.type_mod
786        if is_floating:
787           # don't really know how to handle this for now
788          self.type_mod |= self.Modifier.FLOATING
789          raise KnownUnhandleableCursorError(
790            'DON\'T KNOW HOW TO PROPERLY HANDLE FLOATING DOCSTRINGS'
791          )
792        break
793    return
794
795  def _check_valid_cursor_linkage(self) -> bool:
796    r"""Check that a cursor has external linkage, there is no point producing a manpage for function
797    that is impossible to call.
798
799    Returns
800    -------
801    ret :
802      True if the cursor has external linkage (and therefore should be checked), False if the cursor
803      has internal linkage (and is therefore pointless to check)
804    """
805    cursor = self.cursor
806    # TODO, this should probably also check that the header the cursor is defined in is public
807    has_internal_linkage, linked_cursor_name, linkage_cursor = cursor.has_internal_linkage()
808    # sometimes a static function has the class description above it, for example
809    # VECSEQCUDA sits above a private cuda impls function
810    pointless = has_internal_linkage and not (
811      cursor.location.file.name.endswith(('.h', '.hpp', '.inc')) or
812      self.Modifier.FLOATING in self.type_mod
813    )
814    if pointless:
815      assert linkage_cursor is not None
816      begin_sowing_range = self._attr['sowing_char_range']
817      linkage_extent     = SourceRange.cast(linkage_cursor.extent)
818      diag               = self.make_diagnostic(
819        Diagnostic.Kind.ERROR, self.diags.internal_linkage,
820        'A sowing docstring for a symbol with internal linkage is pointless', self.extent,
821        highlight=False
822      ).add_note(
823        Diagnostic.make_message_from_formattable(
824          f'\'{cursor.displayname}\' is declared \'{linked_cursor_name}\' here', crange=linkage_extent
825        ),
826        location=linkage_extent.start
827      ).add_note(
828        'If this docstring is meant as developer-only documentation, remove the sowing chars from the docstring declaration. The linter will then ignore this docstring.'
829      ).add_note(
830        Diagnostic.make_message_from_formattable(
831          'Sowing chars declared here', crange=begin_sowing_range
832        ),
833        location=begin_sowing_range.start
834      )
835      self.add_diagnostic(diag)
836    return not pointless
837
838  def _check_valid_sowing_chars(self) -> None:
839    r"""Check that the sowing prefix and postfix match the expected and are symmetric
840
841    Raises
842    ------
843    KnownUnhandleableCursorError
844      if start of the comment line is invalid
845    RuntimeError
846      if the start comment contains an unknown sowing char
847    """
848    sowing_type, lay_type, self.type = self.clx_to_sowing_type[self.cursor.type.kind]
849    # check the beginning
850    splitlines            = self.raw.splitlines()
851    line                  = splitlines[0]
852    begin_sowing_range    = self.make_source_range(line, line, self.extent.start.line)
853    diag_name             = self.diags.sowing_chars
854    possible_sowing_chars = line.split('/*')[1].split()
855    try:
856      begin_sowing = possible_sowing_chars[0]
857    except IndexError:
858      begin_sowing = sowing_type
859      mess         = f'Invalid comment begin line, does not contain sowing identifier. Expected \'/*{sowing_type}\' for {lay_type}'
860      self.add_diagnostic_from_source_range(Diagnostic.Kind.ERROR, diag_name, mess, begin_sowing_range)
861    else:
862      assert isinstance(begin_sowing, str), f'begin_sowing is not a string: {begin_sowing}'
863      if begin_sowing[0] not in self.sowing_types:
864        diagnosed = False
865        if line[line.find(begin_sowing) - 1].isspace():
866          # There is a space between the "sowing char" and the character before
867          # it. Therefore it is likely just regular text. Sometimes people make internal
868          # sowing-like docstrings just to keep things consistent, for example:
869          #
870          #        v--- identified as begin_sowing
871          # /*     KSPSolve_LCD - This routine actually applies the left conjugate
872          # ...
873          #
874          # we should ignore it, and stop processing this docstring altogether since it is
875          # not an actual docstring.
876          raise KnownUnhandleableCursorError
877        if begin_sowing[0] == 'C':
878          # sometimes people mix up the order, or forget to add the right letter for the
879          # type, for example:
880          #
881          #   v--- begin_sowing, should be @C
882          # /*C
883          #   MatElimininateZeroes
884          #
885          if len(begin_sowing) == 1:
886            # they forgot the correct identifier
887            sub_mess  = f'It appears you forgot to prepend \'{sowing_type}\''
888            expected  = f'{sowing_type}{begin_sowing}'
889            diagnosed = True
890            # making a new source range instead of using begin_sowing_range is
891            # deliberate. The line may still contain other garbage, i.e.:
892            #
893            # /*C FooBarBaz - asdasdasdasd
894            #   ^~~~~~~~~~~~~~~~~~~~~~~~~^ begin_sowing_range
895            #
896            # which we do not want to overwrite with 'expected'. In order for the patch to
897            # be maximally stable we also don't want to have the replacement contain the
898            # (possibly) trailing stuff, so we make our new range just encompass 'C'.
899            patch = Patch(
900              self.make_source_range(begin_sowing, line, begin_sowing_range.start.line), expected
901            )
902          elif any(c in self.sowing_types for c in begin_sowing):
903            # wrong order
904            sub_mess  = 'Did you put it in the wrong order'
905            expected  = f'{sowing_type}{begin_sowing.replace(sowing_type, "")}'
906            diagnosed = True
907            patch     = None
908          if diagnosed:
909            self.add_diagnostic_from_source_range(
910              Diagnostic.Kind.ERROR, diag_name,
911              f'Invalid docstring identifier, contains unexpected char sequence \'{begin_sowing}\', expected \'/*{expected}\'. {sub_mess}?',
912              begin_sowing_range,
913              patch=patch
914            )
915        if not diagnosed:
916          raise RuntimeError(f'Unknown sowing char {begin_sowing[0]} not in sowing types {self.sowing_types} found in {line}')
917      begin_sowing_range = self.make_source_range(begin_sowing, line, begin_sowing_range.start.line)
918
919    self._attr['sowing_char_range'] = begin_sowing_range
920
921    if 'M' in begin_sowing:
922      self.type_mod |= self.Modifier.MACRO
923    if 'C' in begin_sowing:
924      self.type_mod |= self.Modifier.C_FUNC
925
926    # check that nothing else is on the comment begin line
927    lsplit = line.strip().split(maxsplit=1)
928    if len(lsplit) != 1:
929      rest    = lsplit[1]
930      restloc = self.make_source_range(rest, line, self.extent.start.line)
931      mess    = 'Invalid comment begin line, must only contain \'/*\' and sowing identifier'
932      self.add_diagnostic_from_source_range(
933        Diagnostic.Kind.ERROR, diag_name, mess, restloc,
934        patch=Patch(restloc, '\n' + (' '*self.indent) + rest)
935      )
936    return
937
938  def _check_valid_docstring_spacing(self) -> None:
939    r"""Check that the docstring itself is flush against the thing it describes.
940
941    Notes
942    -----
943    Checks that the docstring looks like
944    ```
945    /*
946      PetscFooBar - ...
947    */
948    PetscErrorCode PetscFooBar(...)
949    ```
950    not
951    ```
952    /*
953      PetscFooBar - ...
954    */
955
956    PetscErrorCode PetscFooBar(...)
957    ```
958    """
959    if self.Modifier.FLOATING in self.type_mod:
960      return # floating docstring sections need not be checked for this
961
962    end_line     = self.extent.end.line + 1
963    cursor_start = self.cursor.extent.start
964    if end_line != cursor_start.line:
965      # there is at least 1 (probably empty) line between the comment end and whatever it
966      # is describing
967      diag = self.diags.symbol_spacing
968      mess = 'Invalid line-spacing between docstring and the symbol it describes. The docstring must appear immediately above its target'
969      eloc = self.make_source_range('', '', end_line)
970      floc = SourceRange.from_locations(self.make_source_location(end_line, 1), cursor_start)
971      self.add_diagnostic_from_source_range(
972        Diagnostic.Kind.ERROR, diag, mess, eloc, highlight=False, patch=Patch(floc, '')
973      )
974    return
975
976  def _check_valid_indentation(self, lineno: int, line: str, left_stripped: str) -> None:
977    r"""If the line is regular (not empty, or a parameter list), check that line is indented correctly
978
979    Parameters
980    ----------
981    lineno :
982      the line number of the line
983    line :
984      the line itself
985    left_stripped :
986      the line that has been left-stripped
987    """
988    if linelen := len(line):
989      indent       = linelen - len(left_stripped)
990      expected_ind = 0 if line.startswith(('.', '+', '-', '$')) else self.indent
991      if indent != expected_ind:
992        diag = self.diags.indentation
993        loc  = self.make_source_range(' ' * indent, line, lineno)
994        mess = f'Invalid indentation ({indent}), all regular (non-empty, non-parameter, non-seealso) text must be indented to {self.indent} columns'
995        self.add_diagnostic_from_source_range(
996          Diagnostic.Kind.ERROR, diag, mess, loc, patch=Patch(loc, ' ' * expected_ind)
997        )
998    return
999
1000  def _check_valid_section_spacing(self, prevline: str, lineno: int) -> None:
1001    r"""Check that sections have at least 1 empty line between them
1002
1003    Parameters
1004    ----------
1005    prevline :
1006      the previous line
1007    lineno :
1008      the current line number
1009
1010    Notes
1011    -----
1012    Checks that sections are formatted like
1013    ```
1014    Notes:
1015    asdadsadasdads
1016
1017    Example Usage:
1018    asdasdasd
1019    ```
1020    not
1021    ```
1022    Notes:
1023    asdasdasd
1024    Example Usage:
1025    asdadasd
1026    ```
1027    """
1028    if prevline and not prevline.isspace():
1029      loc = self.make_source_range('', '', lineno)
1030      self.add_diagnostic_from_source_range(
1031        Diagnostic.Kind.ERROR, self.diags.section_spacing,
1032        'Missing empty line between sections, must have one before this section',
1033        loc, highlight=False, patch=Patch(loc, '\n')
1034      )
1035    return
1036
1037  def _check_section_header_typo(self, verdict: Verdict, line: str, lineno: int) -> Verdict:
1038    r"""Check that a section header that looks like a section header is actually one
1039
1040    Parameters
1041    ----------
1042    verdict :
1043      the current header verdict of the line
1044    line :
1045      the line
1046    lineno :
1047      the line number
1048
1049    Returns
1050    -------
1051    verdict :
1052      the new verdict (if changed)
1053    """
1054    if verdict == Verdict.MAYBE_HEADING:
1055      try:
1056        name, match_title, _ = self.guess_heading(line, strict=True)
1057      except GuessHeadingFailError as ghfe:
1058        # Not being able to guess the heading here is OK since we only *think* it's a
1059        # heading
1060        self.sections._print(ghfe)
1061        return Verdict.NOT_HEADING
1062      if ':' in line:
1063        mess = f'Line seems to be a section header but doesn\'t directly end with \':\', did you mean \'{match_title}\'?'
1064      else:
1065        mess = f'Line seems to be a section header but missing \':\', did you mean \'{match_title}:\'?'
1066      self.add_diagnostic_from_source_range(
1067        Diagnostic.Kind.ERROR, self.diags.section_header_maybe_header, mess,
1068        self.make_source_range(name, line, lineno)
1069      )
1070    return verdict
1071
1072  def _check_section_header_that_probably_should_not_be_one(self, verdict: Verdict, line: str, stripped: str, lineno: int) -> Verdict:
1073    r"""Check that a section header that ends with ':' is not really a header
1074
1075    Parameters
1076    ----------
1077    verdict :
1078      the current heading verdict
1079    line :
1080      the line
1081    stripped :
1082      `line` but stripped
1083    lineno :
1084      the line number
1085
1086    Returns
1087    -------
1088    verdict :
1089      the update verdict
1090    """
1091    if verdict < 0:
1092      try:
1093        _, _, section_guess = self.guess_heading(line, cache_result=False)
1094      except GuessHeadingFailError as ghfe:
1095        # Not being able to guess the heading here is OK since we aren't sure this isn't a
1096        # heading after all
1097        self.sections._print(ghfe)
1098        verdict = Verdict.NOT_HEADING
1099      else:
1100        assert isinstance(section_guess, SectionBase)
1101        if isinstance(section_guess, DefaultSection):
1102          # we could not find a suitable section for it
1103          assert not line.endswith(r'\:')
1104          eloc = self.make_source_range(':', line, lineno, offset=line.rfind(':'))
1105          mess = f'Sowing treats all lines ending with \':\' as header, are you sure \'{textwrap.shorten(stripped, width=35)}\' qualifies? Use \'\:\' to escape the colon if not'
1106          self.add_diagnostic_from_source_range(
1107            Diagnostic.Kind.ERROR, self.diags.section_header_fishy_header, mess, eloc
1108          )
1109    return verdict
1110
1111  def parse(self) -> PetscDocString:
1112    r"""Parse a docstring
1113
1114    Returns
1115    -------
1116    docstring :
1117      the `PetscDocString` instance
1118
1119    Raises
1120    ------
1121    KnownUnhandleableCursorError
1122      if the cursor has internal linkage and should not have its docstring checked
1123    """
1124    self.reset()
1125    self._check_valid_sowing_chars()
1126    self._check_floating()
1127    if not self._check_valid_cursor_linkage():
1128      # no point in continuing analysis, the docstring should not exist!
1129      raise KnownUnhandleableCursorError()
1130    self._check_valid_docstring_spacing()
1131
1132    section      = self.sections.synopsis
1133    check_indent = section.check_indent_allowed()
1134    # if True we are in a verbatim block. We should not try to detect any kind of
1135    # headers until we reach the end of the verbatim block
1136    in_verbatim = 0
1137    prev_line   = ''
1138
1139    raw_data: list[tuple[SourceRange, str, Verdict]] = []
1140    for lineno, line in enumerate(self.raw.splitlines(), start=self.extent.start.line):
1141      left_stripped = line.lstrip()
1142      stripped      = left_stripped.rstrip()
1143      if stripped.startswith('/*') or stripped.endswith('*/'):
1144        continue
1145
1146      # TODO remove this, the current active section should be deciding what to do here instead
1147      # we shouldn't be checking indentation in verbatim blocks
1148      if stripped.startswith('.vb'):
1149        check_indent = False
1150        in_verbatim  = 1
1151      elif stripped.startswith('.ve'):
1152        check_indent = True # note we don't need to check indentation of line with .ve
1153        in_verbatim  = 0
1154      elif stripped.startswith('$'):
1155        # inline verbatim don't modify check flag but dont check indentation either
1156        in_verbatim = 2
1157      elif check_indent:
1158        self._check_valid_indentation(lineno, line, left_stripped)
1159
1160      if in_verbatim == 0:
1161        heading_verdict = self.sections.is_heading(stripped, prev_line)
1162        heading_verdict = self._check_section_header_typo(heading_verdict, line, lineno)
1163        if heading_verdict > 0:
1164          # we may switch headings, we should check indentation
1165          if not check_indent:
1166            self._check_valid_indentation(lineno, line, left_stripped)
1167          self._check_valid_section_spacing(prev_line, lineno)
1168          new_section = self.sections.find(stripped.split(':', maxsplit=1)[0].strip().casefold())
1169          if new_section != section:
1170            raw_data     = section.consume(raw_data)
1171            section      = new_section
1172            check_indent = section.check_indent_allowed()
1173        else:
1174          heading_verdict = self._check_section_header_that_probably_should_not_be_one(
1175            heading_verdict, line, stripped, lineno
1176          )
1177      else:
1178        # verbatim blocks are never headings
1179        heading_verdict = Verdict.NOT_HEADING
1180
1181      raw_data.append((self.make_source_range(line, line, lineno), line, heading_verdict))
1182      if in_verbatim == 2:
1183        # reset the dollar verbatim
1184        in_verbatim = 0
1185      prev_line = stripped
1186
1187    section.consume(raw_data)
1188    for sec in self.sections:
1189      sec.setup(self)
1190    return self
1191
1192del DocStringType
1193del DocStringTypeModifier
1194