xref: /petsc/doc/ext/html5_petsc.py (revision 07f25ad1711774a9442d064cc5f042f486b5a62e)
1""" Sphinx extension for custom HTML processing for PETSc docs """
2
3from typing import Any, Dict
4import re
5import os
6import types
7
8from docutils import nodes
9from docutils.nodes import Element, Text
10
11from sphinx import version_info as sphinx_version_info
12from sphinx.writers.html5 import HTML5Translator
13from sphinx.application import Sphinx
14
15if not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
16
17
18PETSC_DOC_OUT_ROOT_PLACEHOLDER = 'PETSC_DOC_OUT_ROOT_PLACEHOLDER'
19
20def setup(app: Sphinx) -> None:
21    _check_version(app)
22
23    app.connect('builder-inited', _setup_translators)
24    return {'parallel_read_safe': True}
25
26
27def _check_version(app: Sphinx) -> None:
28    sphinx_version_info_source = (4, 2, 0, 'final', 0)
29    app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
30    if sphinx_version_info[:2] != sphinx_version_info_source[:2]:
31        print('A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
32              'which differs from the current version %s' % (sphinx_version_info,),
33              'so unexpected behavior may be observed.')
34
35
36def _setup_translators(app: Sphinx) -> None:
37    """ Use a mixin strategy to add to the Sphinx HTML translator without overriding
38
39    This allows use of other extensions which modify the translator.
40
41    Duplicates the approach used here in sphinx-hoverref:
42    https://github.com/readthedocs/sphinx-hoverxref/pull/42
43    """
44    if app.builder.format != 'html':
45        return
46
47    for name, klass in app.registry.translators.items():
48        translator = types.new_class(
49            'PETScHTMLTranslator',
50            (
51                PETScHTMLTranslatorMixin,
52                klass,
53            ),
54            {},
55        )
56        app.set_translator(name, translator, override=True)
57
58    translator = types.new_class(
59        'PETScHTMLTranslator',
60        (
61            PETScHTMLTranslatorMixin,
62            app.builder.default_translator_class,
63        ),
64        {},
65    )
66    app.set_translator(app.builder.name, translator, override=True)
67
68
69class PETScHTMLTranslatorMixin:
70    """
71    A custom HTML translator which overrides methods to add PETSc-specific
72    custom processing to the generated HTML.
73
74    Replaces any string XXX that matches a manual page name with
75    <a href="PETSC_DOC_OUT_ROOT_PLACEHOLDER/manualpages/YY/XXX.html">XXX</a>
76    or
77    <a href="PETSC_DOC_OUT_ROOT_PLACEHOLDER/manualpages/YY/XXX">XXX</a>
78    depending on if the Sphinx build is html or dirhtml
79    """
80
81    def __init__(self, *args: Any) -> None:
82        self._manpage_map = None
83        self._word_pattern = re.compile(r'\w+')
84        super().__init__(*args)
85
86
87    def _get_manpage_map(self) -> Dict[str,str]:
88        """ Return the manpage strings to link, as a dict.  """
89        if not self._manpage_map:
90            htmlmap_filename = os.path.join('manualpages', 'htmlmap')
91            if not os.path.isfile(htmlmap_filename):
92                raise Exception("Expected file %s not found. " %  htmlmap_filename)
93            manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
94            manpage_prefix_base = PETSC_DOC_OUT_ROOT_PLACEHOLDER
95            manpage_prefix = os.path.join(manpage_prefix_base, '')
96            self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
97        return self._manpage_map
98
99
100    def _add_manpage_links(self, string: str) -> str:
101        """ Add plain HTML link tags to a string """
102        manpage_map = self._get_manpage_map()
103        def replace(matchobj):
104            word = matchobj.group(0)
105            if word in manpage_map:
106                return manpage_map[word]
107            return word
108
109        return self._word_pattern.sub(replace, string)
110
111
112    # This method consists mostly of code duplicated from Sphinx:
113    # overwritten
114    def visit_Text(self, node: Text) -> None:
115        text = node.astext()
116        encoded = self.encode(text)
117        if self.protect_literal_text:
118            # moved here from base class's visit_literal to support
119            # more formatting in literal nodes
120            for token in self.words_and_spaces.findall(encoded):
121                if token.strip():
122                    # Custom processing to add links to PETSc man pages ########
123                    token_processed = self._add_manpage_links(token)
124
125                    # protect literal text from line wrapping
126                    self.body.append('<span class="pre">%s</span>' % token_processed)
127                    # (end of custom processing) ###############################
128                elif token in ' \n':
129                    # allow breaks at whitespace
130                    self.body.append(token)
131                else:
132                    # protect runs of multiple spaces; the last one can wrap
133                    self.body.append('&#160;' * (len(token) - 1) + ' ')
134        else:
135            if self.in_mailto and self.settings.cloak_email_addresses:
136                encoded = self.cloak_email(encoded)
137            self.body.append(encoded)
138
139    # This method consists mostly of code duplicated from Sphinx:
140    # overwritten
141    def visit_literal_block(self, node: Element) -> None:
142        if node.rawsource != node.astext():
143            # most probably a parsed-literal block -- don't highlight
144            return super().visit_literal_block(node)
145
146        lang = node.get('language', 'default')
147        linenos = node.get('linenos', False)
148        highlight_args = node.get('highlight_args', {})
149        highlight_args['force'] = node.get('force', False)
150        opts = self.config.highlight_options.get(lang, {})
151
152        if linenos and self.config.html_codeblock_linenos_style:
153            linenos = self.config.html_codeblock_linenos_style
154
155        highlighted = self.highlighter.highlight_block(
156            node.rawsource, lang, opts=opts, linenos=linenos,
157            location=node, **highlight_args
158        )
159
160        # Custom processing to add links to PETSc man pages ####################
161        highlighted = self._add_manpage_links(highlighted)
162        # (end of custom processing) ###########################################
163
164        starttag = self.starttag(node, 'div', suffix='',
165                                 CLASS='highlight-%s notranslate' % lang)
166        self.body.append(starttag + highlighted + '</div>\n')
167        raise nodes.SkipNode
168
169    # This method consists mostly of code duplicated from Sphinx:
170    # overwritten to remove from CLASS literal that caused an ugly extra pre and post space
171    # in the manual pages for all PETSc links
172    def visit_literal(self, node: Element) -> None:
173        if 'kbd' in node['classes']:
174            self.body.append(self.starttag(node, 'kbd', '',
175                                           CLASS='docutils notranslate'))
176            return
177        lang = node.get("language", None)
178        if 'code' not in node['classes'] or not lang:
179            self.body.append(self.starttag(node, 'code', '',
180                                           CLASS='docutils notranslate'))
181            self.protect_literal_text += 1
182            return
183
184        opts = self.config.highlight_options.get(lang, {})
185        highlighted = self.highlighter.highlight_block(
186            node.astext(), lang, opts=opts, location=node, nowrap=True)
187        starttag = self.starttag(
188            node,
189            "code",
190            suffix="",
191            CLASS="docutils literal highlight highlight-%s" % lang,
192        )
193        self.body.append(starttag + highlighted.strip() + "</code>")
194        raise nodes.SkipNode
195
196def htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
197    """ Extract a dict from an htmlmap file, leaving URLs as they are."""
198    with open(htmlmap_filename, 'r') as f:
199        lines = [l for l in f.readlines() if l.startswith('man:')]
200    string_to_link = dict()
201    pattern        = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
202    for line in lines:
203        m = pattern.match(line)
204        if m:
205            string_to_link[m.group(1)] = m.group(3)
206        else:
207            print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
208            print(line)
209    return string_to_link
210
211
212def dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
213    """
214    Prepend a prefix to any links not starting with 'http' so Sphinx will recognize them as URLs
215    """
216    def link_string(name: str, link: str, prefix: str) -> str:
217        url = link if link.startswith('http') else prefix + link
218        return '<a href=\"' + url + '\">' + name + '</a>'
219    return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
220