xref: /petsc/doc/ext/html5_petsc.py (revision 4e278199b78715991f5c71ebbd945c1489263e6c)
1""" Sphinx extension for custom HTML processing for PETSc docs """
2
3from typing import Any, Dict
4import re
5import os
6import subprocess
7import types
8
9from docutils import nodes
10from docutils.nodes import Element, Text
11
12from sphinx import version_info as sphinx_version_info
13from sphinx.writers.html5 import HTML5Translator
14from sphinx.application import Sphinx
15
16if not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
17
18
19def setup(app: Sphinx) -> None:
20    _check_version(app)
21
22    app.connect('builder-inited', _setup_translators)
23
24
25def _check_version(app: Sphinx) -> None:
26    sphinx_version_info_source = (3, 5, 4, 'final', 0)
27    app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
28    if sphinx_version_info[:2] != sphinx_version_info_source[:2]:
29        print('Warning: A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
30              'which differs from the current version %s' % (sphinx_version_info,),
31              'so unexpected behavior may be observed.')
32
33
34def _setup_translators(app: Sphinx) -> None:
35    """ Use a mixin strategy to add to the HTML translator without overriding
36
37    This allows use of other extensions which modify the translator.
38
39    Duplicates the approach used here in sphinx-hoverref:
40    https://github.com/readthedocs/sphinx-hoverxref/pull/42
41    """
42    if app.builder.format != 'html':
43        return
44
45    for name, klass in app.registry.translators.items():
46        translator = types.new_class(
47            'PETScHTMLTranslator',
48            (
49                PETScHTMLTranslatorMixin,
50                klass,
51            ),
52            {},
53        )
54        app.set_translator(name, translator, override=True)
55
56    translator = types.new_class(
57        'PETScHTMLTranslator',
58        (
59            PETScHTMLTranslatorMixin,
60            app.builder.default_translator_class,
61        ),
62        {},
63    )
64    app.set_translator(app.builder.name, translator, override=True)
65
66
67class PETScHTMLTranslatorMixin:
68    """
69    A custom HTML translator which overrides methods to add PETSc-specific
70    custom processing to the generated HTML.
71    """
72
73    def __init__(self, *args: Any) -> None:
74        self._manpage_map = None
75        self._manpage_pattern = None
76        super().__init__(*args)
77
78
79    def _get_manpage_map(self) -> Dict[str,str]:
80        """ Return the manpage strings to link, as a dict.  """
81        if not self._manpage_map:
82            htmlmap_filename = os.path.join('_build_classic', 'docs', 'manualpages', 'htmlmap')
83            if not os.path.isfile(htmlmap_filename):
84                raise Exception("Expected file %s not found. Run script to build classic docs subset." %  htmlmap_filename)
85            manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
86            manpage_prefix_base = self._get_manpage_prefix_base()
87            manpage_prefix = os.path.join(manpage_prefix_base, 'docs', '')
88            self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
89        return self._manpage_map
90
91    def _get_manpage_pattern(self) -> re.Pattern:
92        """ Return the manpage links pattern.
93
94        This is done lazily, so this function should always be used,
95        instead of the direct data member, which may not be populated yet
96        """
97
98        if not self._manpage_pattern:
99            self._manpage_pattern = get_multiple_replace_pattern(self._get_manpage_map())
100        return self._manpage_pattern
101
102    def _get_manpage_prefix_base(self) -> str:
103        """ Return the base location for the install. This varies by platform. """
104        if 'GITLAB_CI' in os.environ:
105            ci_environment_url = os.getenv('CI_ENVIRONMENT_URL')
106            if ci_environment_url is not None:
107                manpage_prefix_base = ci_environment_url.rstrip('/index.html')
108            else:
109                # This is a brittle stopgap measure
110                ci_commit_ref_name = os.getenv('CI_COMMIT_REF_NAME')
111                if not ci_commit_ref_name:
112                    raise Exception('Could not determine version name from GitLab CI environment variables')
113                version_name = ci_commit_ref_name.replace('release-', '')
114                manpage_prefix_base = 'https://petsc.org/' + version_name
115        else:
116            manpage_prefix_base = self.builder.outdir
117        return manpage_prefix_base
118
119    def _add_manpage_links(self, string: str) -> str:
120        """ Add plain HTML link tags to a string """
121        manpage_map = self._get_manpage_map()
122        manpage_pattern = self._get_manpage_pattern()
123        return replace_from_dict_and_pattern(string, manpage_map, manpage_pattern)
124
125    # This method consists mostly of code duplicated from Sphinx:
126    # overwritten
127    def visit_Text(self, node: Text) -> None:
128        text = node.astext()
129        encoded = self.encode(text)
130        if self.protect_literal_text:
131            # moved here from base class's visit_literal to support
132            # more formatting in literal nodes
133            for token in self.words_and_spaces.findall(encoded):
134                if token.strip():
135                    # Custom processing to add links to PETSc man pages ########
136                    token_processed = self._add_manpage_links(token)
137
138                    # protect literal text from line wrapping
139                    self.body.append('<span class="pre">%s</span>' % token_processed)
140                    # (end of custom processing) ###############################
141                elif token in ' \n':
142                    # allow breaks at whitespace
143                    self.body.append(token)
144                else:
145                    # protect runs of multiple spaces; the last one can wrap
146                    self.body.append('&#160;' * (len(token) - 1) + ' ')
147        else:
148            if self.in_mailto and self.settings.cloak_email_addresses:
149                encoded = self.cloak_email(encoded)
150            self.body.append(encoded)
151
152    # This method consists mostly of code duplicated from Sphinx:
153    # overwritten
154    def visit_literal_block(self, node: Element) -> None:
155        if node.rawsource != node.astext():
156            # most probably a parsed-literal block -- don't highlight
157            return super().visit_literal_block(node)
158
159        lang = node.get('language', 'default')
160        linenos = node.get('linenos', False)
161        highlight_args = node.get('highlight_args', {})
162        highlight_args['force'] = node.get('force', False)
163        opts = self.config.highlight_options.get(lang, {})
164
165        if linenos and self.config.html_codeblock_linenos_style:
166            linenos = self.config.html_codeblock_linenos_style
167
168        highlighted = self.highlighter.highlight_block(
169            node.rawsource, lang, opts=opts, linenos=linenos,
170            location=node, **highlight_args
171        )
172
173        # Custom processing to add links to PETSc man pages ####################
174        highlighted = self._add_manpage_links(highlighted)
175        # (end of custom processing) ###########################################
176
177        starttag = self.starttag(node, 'div', suffix='',
178                                 CLASS='highlight-%s notranslate' % lang)
179        self.body.append(starttag + highlighted + '</div>\n')
180        raise nodes.SkipNode
181
182def htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
183    """ Extract a dict from an htmlmap file, leaving URLs as they are."""
184    pattern = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
185    string_to_link = dict()
186    with open(htmlmap_filename, 'r') as f:
187        for line in f.readlines():
188            m = re.match(pattern, line)
189            if m:
190                string = m.group(1)
191                string_to_link[string] = m.group(3)
192            else:
193                print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
194                print(line)
195    return string_to_link
196
197
198def dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
199    """ Complete HTML links
200
201    Prepend a prefix to any links not starting with 'http',
202    and add HTML tags
203    """
204    def link_string(name: str, link: str, prefix: str) -> str:
205        url = link if link.startswith('http') else prefix + link
206        return '<a href=\"' + url + '\">' + name + '</a>'
207    return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
208
209
210def get_multiple_replace_pattern(source_dict: Dict[str,str]) -> re.Pattern:
211    """ Generate a regex to match any of the keys in source_dict, as full words """
212    def process_word(word):
213        """ add escape characters and word boundaries """
214        return r'\b' + re.escape(word) + r'\b'
215    return re.compile(r'|'.join(map(process_word, source_dict)))
216
217
218def replace_from_dict_and_pattern(string: str, replacements: Dict, pattern: re.Pattern) -> str:
219    """ Given a pattern which matches keys in replacements, replace keys found in string with their values"""
220    return pattern.sub(lambda match: replacements[match.group(0)], string)
221