xref: /petsc/doc/ext/html5_petsc.py (revision b24fb147d2f783efb2f58813f80260c02fe8ea96)
1""" Sphinx extension for custom HTML processing for PETSc docs """
2
3from typing import Any, Dict
4import re
5import os
6import subprocess
7import types
8
9from docutils import nodes
10from docutils.nodes import Element, Text
11
12from sphinx import version_info as sphinx_version_info
13from sphinx.writers.html5 import HTML5Translator
14from sphinx.application import Sphinx
15
16if not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
17
18
19PETSC_DOC_OUT_ROOT_PLACEHOLDER = 'PETSC_DOC_OUT_ROOT_PLACEHOLDER'
20
21def setup(app: Sphinx) -> None:
22    _check_version(app)
23
24    app.connect('builder-inited', _setup_translators)
25    return {'parallel_read_safe': True}
26
27
28def _check_version(app: Sphinx) -> None:
29    sphinx_version_info_source = (4, 2, 0, 'final', 0)
30    app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
31    if sphinx_version_info[:2] != sphinx_version_info_source[:2]:
32        print('A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
33              'which differs from the current version %s' % (sphinx_version_info,),
34              'so unexpected behavior may be observed.')
35
36
37def _setup_translators(app: Sphinx) -> None:
38    """ Use a mixin strategy to add to the Sphinx HTML translator without overriding
39
40    This allows use of other extensions which modify the translator.
41
42    Duplicates the approach used here in sphinx-hoverref:
43    https://github.com/readthedocs/sphinx-hoverxref/pull/42
44    """
45    if app.builder.format != 'html':
46        return
47
48    for name, klass in app.registry.translators.items():
49        translator = types.new_class(
50            'PETScHTMLTranslator',
51            (
52                PETScHTMLTranslatorMixin,
53                klass,
54            ),
55            {},
56        )
57        app.set_translator(name, translator, override=True)
58
59    translator = types.new_class(
60        'PETScHTMLTranslator',
61        (
62            PETScHTMLTranslatorMixin,
63            app.builder.default_translator_class,
64        ),
65        {},
66    )
67    app.set_translator(app.builder.name, translator, override=True)
68
69
70class PETScHTMLTranslatorMixin:
71    """
72    A custom HTML translator which overrides methods to add PETSc-specific
73    custom processing to the generated HTML.
74
75    Replaces any string XXX that matches a manual page name with
76    <a href="PETSC_DOC_OUT_ROOT_PLACEHOLDER/manualpages/YY/XXX.html">XXX</a>
77    or
78    <a href="PETSC_DOC_OUT_ROOT_PLACEHOLDER/manualpages/YY/XXX">XXX</a>
79    depending on if the Sphinx build is html or dirhtml
80    """
81
82    def __init__(self, *args: Any) -> None:
83        self._manpage_map = None
84        self._word_pattern = re.compile('\w+')
85        super().__init__(*args)
86
87
88    def _get_manpage_map(self) -> Dict[str,str]:
89        """ Return the manpage strings to link, as a dict.  """
90        if not self._manpage_map:
91            htmlmap_filename = os.path.join('manualpages', 'htmlmap')
92            if not os.path.isfile(htmlmap_filename):
93                raise Exception("Expected file %s not found. Run script to build classic docs subset." %  htmlmap_filename)
94            manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
95            manpage_prefix_base = PETSC_DOC_OUT_ROOT_PLACEHOLDER
96            manpage_prefix = os.path.join(manpage_prefix_base, '')
97            self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
98        return self._manpage_map
99
100
101    def _add_manpage_links(self, string: str) -> str:
102        """ Add plain HTML link tags to a string """
103        manpage_map = self._get_manpage_map()
104        def replace(matchobj):
105            word = matchobj.group(0)
106            if word in manpage_map:
107                return manpage_map[word]
108            return word
109
110        return self._word_pattern.sub(replace, string)
111
112
113    # This method consists mostly of code duplicated from Sphinx:
114    # overwritten
115    def visit_Text(self, node: Text) -> None:
116        text = node.astext()
117        encoded = self.encode(text)
118        if self.protect_literal_text:
119            # moved here from base class's visit_literal to support
120            # more formatting in literal nodes
121            for token in self.words_and_spaces.findall(encoded):
122                if token.strip():
123                    # Custom processing to add links to PETSc man pages ########
124                    token_processed = self._add_manpage_links(token)
125
126                    # protect literal text from line wrapping
127                    self.body.append('<span class="pre">%s</span>' % token_processed)
128                    # (end of custom processing) ###############################
129                elif token in ' \n':
130                    # allow breaks at whitespace
131                    self.body.append(token)
132                else:
133                    # protect runs of multiple spaces; the last one can wrap
134                    self.body.append('&#160;' * (len(token) - 1) + ' ')
135        else:
136            if self.in_mailto and self.settings.cloak_email_addresses:
137                encoded = self.cloak_email(encoded)
138            self.body.append(encoded)
139
140    # This method consists mostly of code duplicated from Sphinx:
141    # overwritten
142    def visit_literal_block(self, node: Element) -> None:
143        if node.rawsource != node.astext():
144            # most probably a parsed-literal block -- don't highlight
145            return super().visit_literal_block(node)
146
147        lang = node.get('language', 'default')
148        linenos = node.get('linenos', False)
149        highlight_args = node.get('highlight_args', {})
150        highlight_args['force'] = node.get('force', False)
151        opts = self.config.highlight_options.get(lang, {})
152
153        if linenos and self.config.html_codeblock_linenos_style:
154            linenos = self.config.html_codeblock_linenos_style
155
156        highlighted = self.highlighter.highlight_block(
157            node.rawsource, lang, opts=opts, linenos=linenos,
158            location=node, **highlight_args
159        )
160
161        # Custom processing to add links to PETSc man pages ####################
162        highlighted = self._add_manpage_links(highlighted)
163        # (end of custom processing) ###########################################
164
165        starttag = self.starttag(node, 'div', suffix='',
166                                 CLASS='highlight-%s notranslate' % lang)
167        self.body.append(starttag + highlighted + '</div>\n')
168        raise nodes.SkipNode
169
170def htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
171    """ Extract a dict from an htmlmap file, leaving URLs as they are."""
172    with open(htmlmap_filename, 'r') as f:
173        lines = [l for l in f.readlines() if l.startswith('man:')]
174    string_to_link = dict()
175    pattern        = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
176    for line in lines:
177        m = pattern.match(line)
178        if m:
179            string_to_link[m.group(1)] = m.group(3)
180        else:
181            print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
182            print(line)
183    return string_to_link
184
185
186def dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
187    """
188    Prepend a prefix to any links not starting with 'http' so Sphinx will recognize them as URLs
189    """
190    def link_string(name: str, link: str, prefix: str) -> str:
191        url = link if link.startswith('http') else prefix + link
192        return '<a href=\"' + url + '\">' + name + '</a>'
193    return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
194