xref: /petsc/doc/ext/html5_petsc.py (revision 362febeeeb69b91ebadcb4b2dc0a22cb6dfc4097)
1""" Sphinx extension for custom HTML processing for PETSc docs """
2
3from typing import Any, Dict
4import re
5import os
6import subprocess
7import types
8
9from docutils import nodes
10from docutils.nodes import Element, Text
11
12from sphinx import version_info as sphinx_version_info
13from sphinx.writers.html5 import HTML5Translator
14from sphinx.application import Sphinx
15
16if not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
17
18
19def setup(app: Sphinx) -> None:
20    _check_version(app)
21
22    app.connect('builder-inited', _setup_translators)
23
24
25def _check_version(app: Sphinx) -> None:
26    sphinx_version_info_source = (3, 5, 4, 'final', 0)
27    app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
28    if sphinx_version_info[:2] != sphinx_version_info_source[:2]:
29        print('Warning: A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
30              'which differs from the current version %s' % (sphinx_version_info,),
31              'so unexpected behavior may be observed.')
32
33
34def _setup_translators(app: Sphinx) -> None:
35    """ Use a mixin strategy to add to the HTML translator without overriding
36
37    This allows use of other extensions which modify the translator.
38
39    Duplicates the approach used here in sphinx-hoverref:
40    https://github.com/readthedocs/sphinx-hoverxref/pull/42
41    """
42    if app.builder.format != 'html':
43        return
44
45    for name, klass in app.registry.translators.items():
46        translator = types.new_class(
47            'PETScHTMLTranslator',
48            (
49                PETScHTMLTranslatorMixin,
50                klass,
51            ),
52            {},
53        )
54        app.set_translator(name, translator, override=True)
55
56    translator = types.new_class(
57        'PETScHTMLTranslator',
58        (
59            PETScHTMLTranslatorMixin,
60            app.builder.default_translator_class,
61        ),
62        {},
63    )
64    app.set_translator(app.builder.name, translator, override=True)
65
66
67class PETScHTMLTranslatorMixin:
68    """
69    A custom HTML translator which overrides methods to add PETSc-specific
70    custom processing to the generated HTML.
71    """
72
73    def __init__(self, *args: Any) -> None:
74        self._manpage_map = None
75        self._manpage_pattern = None
76        super().__init__(*args)
77
78
79    def _get_manpage_map(self) -> Dict[str,str]:
80        """ Return the manpage strings to link, as a dict.  """
81        if not self._manpage_map:
82            htmlmap_filename = os.path.join('_build_classic', 'docs', 'manualpages', 'htmlmap')
83            if not os.path.isfile(htmlmap_filename):
84                raise Exception("Expected file %s not found. Run script to build classic docs subset." %  htmlmap_filename)
85            manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
86            manpage_prefix_base = self._get_manpage_prefix_base()
87            manpage_prefix = os.path.join(manpage_prefix_base, 'docs', '')
88            self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
89        return self._manpage_map
90
91    def _get_manpage_pattern(self) -> re.Pattern:
92        """ Return the manpage links pattern.
93
94        This is done lazily, so this function should always be used,
95        instead of the direct data member, which may not be populated yet
96        """
97
98        if not self._manpage_pattern:
99            self._manpage_pattern = get_multiple_replace_pattern(self._get_manpage_map())
100        return self._manpage_pattern
101
102    def _get_manpage_prefix_base(self) -> str:
103        return 'PETSC_DOC_ROOT_PLACEHOLDER'
104
105    def _add_manpage_links(self, string: str) -> str:
106        """ Add plain HTML link tags to a string """
107        manpage_map = self._get_manpage_map()
108        manpage_pattern = self._get_manpage_pattern()
109        return replace_from_dict_and_pattern(string, manpage_map, manpage_pattern)
110
111    # This method consists mostly of code duplicated from Sphinx:
112    # overwritten
113    def visit_Text(self, node: Text) -> None:
114        text = node.astext()
115        encoded = self.encode(text)
116        if self.protect_literal_text:
117            # moved here from base class's visit_literal to support
118            # more formatting in literal nodes
119            for token in self.words_and_spaces.findall(encoded):
120                if token.strip():
121                    # Custom processing to add links to PETSc man pages ########
122                    token_processed = self._add_manpage_links(token)
123
124                    # protect literal text from line wrapping
125                    self.body.append('<span class="pre">%s</span>' % token_processed)
126                    # (end of custom processing) ###############################
127                elif token in ' \n':
128                    # allow breaks at whitespace
129                    self.body.append(token)
130                else:
131                    # protect runs of multiple spaces; the last one can wrap
132                    self.body.append('&#160;' * (len(token) - 1) + ' ')
133        else:
134            if self.in_mailto and self.settings.cloak_email_addresses:
135                encoded = self.cloak_email(encoded)
136            self.body.append(encoded)
137
138    # This method consists mostly of code duplicated from Sphinx:
139    # overwritten
140    def visit_literal_block(self, node: Element) -> None:
141        if node.rawsource != node.astext():
142            # most probably a parsed-literal block -- don't highlight
143            return super().visit_literal_block(node)
144
145        lang = node.get('language', 'default')
146        linenos = node.get('linenos', False)
147        highlight_args = node.get('highlight_args', {})
148        highlight_args['force'] = node.get('force', False)
149        opts = self.config.highlight_options.get(lang, {})
150
151        if linenos and self.config.html_codeblock_linenos_style:
152            linenos = self.config.html_codeblock_linenos_style
153
154        highlighted = self.highlighter.highlight_block(
155            node.rawsource, lang, opts=opts, linenos=linenos,
156            location=node, **highlight_args
157        )
158
159        # Custom processing to add links to PETSc man pages ####################
160        highlighted = self._add_manpage_links(highlighted)
161        # (end of custom processing) ###########################################
162
163        starttag = self.starttag(node, 'div', suffix='',
164                                 CLASS='highlight-%s notranslate' % lang)
165        self.body.append(starttag + highlighted + '</div>\n')
166        raise nodes.SkipNode
167
168def htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
169    """ Extract a dict from an htmlmap file, leaving URLs as they are."""
170    pattern = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
171    string_to_link = dict()
172    with open(htmlmap_filename, 'r') as f:
173        for line in f.readlines():
174            m = re.match(pattern, line)
175            if m:
176                string = m.group(1)
177                string_to_link[string] = m.group(3)
178            else:
179                print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
180                print(line)
181    return string_to_link
182
183
184def dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
185    """ Complete HTML links
186
187    Prepend a prefix to any links not starting with 'http',
188    and add HTML tags
189    """
190    def link_string(name: str, link: str, prefix: str) -> str:
191        url = link if link.startswith('http') else prefix + link
192        return '<a href=\"' + url + '\">' + name + '</a>'
193    return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
194
195
196def get_multiple_replace_pattern(source_dict: Dict[str,str]) -> re.Pattern:
197    """ Generate a regex to match any of the keys in source_dict, as full words """
198    def process_word(word):
199        """ add escape characters and word boundaries """
200        return r'\b' + re.escape(word) + r'\b'
201    return re.compile(r'|'.join(map(process_word, source_dict)))
202
203
204def replace_from_dict_and_pattern(string: str, replacements: Dict, pattern: re.Pattern) -> str:
205    """ Given a pattern which matches keys in replacements, replace keys found in string with their values"""
206    return pattern.sub(lambda match: replacements[match.group(0)], string)
207