xref: /petsc/doc/ext/html5_petsc.py (revision a96705020bd7e362116d3e2c8daa1efc28dedc1e)
1""" Sphinx extension for custom HTML processing for PETSc docs """
2
3from typing import Any, Dict
4import re
5import os
6import types
7
8from docutils import nodes
9from docutils.nodes import Element, Text
10
11from sphinx import version_info as sphinx_version_info
12from sphinx.writers.html5 import HTML5Translator
13from sphinx.application import Sphinx
14
15if not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
16
17
18PETSC_DOC_OUT_ROOT_PLACEHOLDER = 'PETSC_DOC_OUT_ROOT_PLACEHOLDER'
19
20def setup(app: Sphinx) -> None:
21    _check_version(app)
22
23    app.connect('builder-inited', _setup_translators)
24    return {'parallel_read_safe': True}
25
26
27def _check_version(app: Sphinx) -> None:
28    sphinx_version_info_source = (4, 2, 0, 'final', 0)
29    app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
30    if sphinx_version_info[:2] != sphinx_version_info_source[:2]:
31        print('A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
32              'which differs from the current version %s' % (sphinx_version_info,),
33              'so unexpected behavior may be observed.')
34
35
36def _setup_translators(app: Sphinx) -> None:
37    """ Use a mixin strategy to add to the Sphinx HTML translator without overriding
38
39    This allows use of other extensions which modify the translator.
40
41    Duplicates the approach used here in sphinx-hoverref:
42    https://github.com/readthedocs/sphinx-hoverxref/pull/42
43    """
44    if app.builder.format != 'html':
45        return
46
47    for name, klass in app.registry.translators.items():
48        translator = types.new_class(
49            'PETScHTMLTranslator',
50            (
51                PETScHTMLTranslatorMixin,
52                klass,
53            ),
54            {},
55        )
56        app.set_translator(name, translator, override=True)
57
58    translator = types.new_class(
59        'PETScHTMLTranslator',
60        (
61            PETScHTMLTranslatorMixin,
62            app.builder.default_translator_class,
63        ),
64        {},
65    )
66    app.set_translator(app.builder.name, translator, override=True)
67
68
69class PETScHTMLTranslatorMixin:
70    """
71    A custom HTML translator which overrides methods to add PETSc-specific
72    custom processing to the generated HTML.
73
74    Replaces any string XXX that matches a manual page name with
75    <a href="PETSC_DOC_OUT_ROOT_PLACEHOLDER/manualpages/YY/XXX.html">XXX</a>
76    or
77    <a href="PETSC_DOC_OUT_ROOT_PLACEHOLDER/manualpages/YY/XXX">XXX</a>
78    depending on if the Sphinx build is html or dirhtml
79    """
80
81    def __init__(self, *args: Any) -> None:
82        self._manpage_map = None
83        self._word_pattern = re.compile('\w+')
84        super().__init__(*args)
85
86
87    def _get_manpage_map(self) -> Dict[str,str]:
88        """ Return the manpage strings to link, as a dict.  """
89        if not self._manpage_map:
90            htmlmap_filename = os.path.join('manualpages', 'htmlmap')
91            if not os.path.isfile(htmlmap_filename):
92                raise Exception("Expected file %s not found. Run script to build classic docs subset." %  htmlmap_filename)
93            manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
94            manpage_prefix_base = PETSC_DOC_OUT_ROOT_PLACEHOLDER
95            manpage_prefix = os.path.join(manpage_prefix_base, '')
96            self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
97        return self._manpage_map
98
99
100    def _add_manpage_links(self, string: str) -> str:
101        """ Add plain HTML link tags to a string """
102        manpage_map = self._get_manpage_map()
103        def replace(matchobj):
104            word = matchobj.group(0)
105            if word in manpage_map:
106                return manpage_map[word]
107            return word
108
109        return self._word_pattern.sub(replace, string)
110
111
112    # This method consists mostly of code duplicated from Sphinx:
113    # overwritten
114    def visit_Text(self, node: Text) -> None:
115        text = node.astext()
116        encoded = self.encode(text)
117        if self.protect_literal_text:
118            # moved here from base class's visit_literal to support
119            # more formatting in literal nodes
120            for token in self.words_and_spaces.findall(encoded):
121                if token.strip():
122                    # Custom processing to add links to PETSc man pages ########
123                    token_processed = self._add_manpage_links(token)
124
125                    # protect literal text from line wrapping
126                    self.body.append('<span class="pre">%s</span>' % token_processed)
127                    # (end of custom processing) ###############################
128                elif token in ' \n':
129                    # allow breaks at whitespace
130                    self.body.append(token)
131                else:
132                    # protect runs of multiple spaces; the last one can wrap
133                    self.body.append('&#160;' * (len(token) - 1) + ' ')
134        else:
135            if self.in_mailto and self.settings.cloak_email_addresses:
136                encoded = self.cloak_email(encoded)
137            self.body.append(encoded)
138
139    # This method consists mostly of code duplicated from Sphinx:
140    # overwritten
141    def visit_literal_block(self, node: Element) -> None:
142        if node.rawsource != node.astext():
143            # most probably a parsed-literal block -- don't highlight
144            return super().visit_literal_block(node)
145
146        lang = node.get('language', 'default')
147        linenos = node.get('linenos', False)
148        highlight_args = node.get('highlight_args', {})
149        highlight_args['force'] = node.get('force', False)
150        opts = self.config.highlight_options.get(lang, {})
151
152        if linenos and self.config.html_codeblock_linenos_style:
153            linenos = self.config.html_codeblock_linenos_style
154
155        highlighted = self.highlighter.highlight_block(
156            node.rawsource, lang, opts=opts, linenos=linenos,
157            location=node, **highlight_args
158        )
159
160        # Custom processing to add links to PETSc man pages ####################
161        highlighted = self._add_manpage_links(highlighted)
162        # (end of custom processing) ###########################################
163
164        starttag = self.starttag(node, 'div', suffix='',
165                                 CLASS='highlight-%s notranslate' % lang)
166        self.body.append(starttag + highlighted + '</div>\n')
167        raise nodes.SkipNode
168
169def htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
170    """ Extract a dict from an htmlmap file, leaving URLs as they are."""
171    with open(htmlmap_filename, 'r') as f:
172        lines = [l for l in f.readlines() if l.startswith('man:')]
173    string_to_link = dict()
174    pattern        = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
175    for line in lines:
176        m = pattern.match(line)
177        if m:
178            string_to_link[m.group(1)] = m.group(3)
179        else:
180            print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
181            print(line)
182    return string_to_link
183
184
185def dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
186    """
187    Prepend a prefix to any links not starting with 'http' so Sphinx will recognize them as URLs
188    """
189    def link_string(name: str, link: str, prefix: str) -> str:
190        url = link if link.startswith('http') else prefix + link
191        return '<a href=\"' + url + '\">' + name + '</a>'
192    return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
193