xref: /petsc/doc/ext/html5_petsc.py (revision c7a4214aa78cb41fbd20979609c6a6680968e7d6)
1""" Sphinx extension for custom HTML processing for PETSc docs """
2
3from typing import Any, Dict
4import re
5import os
6import subprocess
7import types
8
9from docutils import nodes
10from docutils.nodes import Element, Text
11
12from sphinx import version_info as sphinx_version_info
13from sphinx.writers.html5 import HTML5Translator
14from sphinx.application import Sphinx
15
16if not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
17
18
19def setup(app: Sphinx) -> None:
20    _check_version(app)
21
22    app.connect('builder-inited', _setup_translators)
23
24
25def _check_version(app: Sphinx) -> None:
26    sphinx_version_info_source = (3, 5, 4, 'final', 0)
27    app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
28    if sphinx_version_info[:2] != sphinx_version_info_source[:2]:
29        print('Warning: A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
30              'which differs from the current version %s' % (sphinx_version_info,),
31              'so unexpected behavior may be observed.')
32
33
34def _setup_translators(app: Sphinx) -> None:
35    """ Use a mixin strategy to add to the HTML translator without overriding
36
37    This allows use of other extensions which modify the translator.
38
39    Duplicates the approach used here in sphinx-hoverref:
40    https://github.com/readthedocs/sphinx-hoverxref/pull/42
41    """
42    if app.builder.format != 'html':
43        return
44
45    for name, klass in app.registry.translators.items():
46        translator = types.new_class(
47            'PETScHTMLTranslator',
48            (
49                PETScHTMLTranslatorMixin,
50                klass,
51            ),
52            {},
53        )
54        app.set_translator(name, translator, override=True)
55
56    translator = types.new_class(
57        'PETScHTMLTranslator',
58        (
59            PETScHTMLTranslatorMixin,
60            app.builder.default_translator_class,
61        ),
62        {},
63    )
64    app.set_translator(app.builder.name, translator, override=True)
65
66
67class PETScHTMLTranslatorMixin:
68    """
69    A custom HTML translator which overrides methods to add PETSc-specific
70    custom processing to the generated HTML.
71    """
72
73    def __init__(self, *args: Any) -> None:
74        self._manpage_map = None
75        self._manpage_pattern = None
76        super().__init__(*args)
77
78
79    def _get_manpage_map(self) -> Dict[str,str]:
80        """ Return the manpage strings to link, as a dict.  """
81        if not self._manpage_map:
82            htmlmap_filename = os.path.join('_build_classic', 'docs', 'manualpages', 'htmlmap')
83            if not os.path.isfile(htmlmap_filename):
84                raise Exception("Expected file %s not found. Run script to build classic docs subset." %  htmlmap_filename)
85            manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
86            manpage_prefix_base = self._get_manpage_prefix_base()
87            manpage_prefix = os.path.join(manpage_prefix_base, 'docs', '')
88            self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
89        return self._manpage_map
90
91    def _get_manpage_pattern(self) -> re.Pattern:
92        """ Return the manpage links pattern.
93
94        This is done lazily, so this function should always be used,
95        instead of the direct data member, which may not be populated yet
96        """
97
98        if not self._manpage_pattern:
99            self._manpage_pattern = get_multiple_replace_pattern(self._get_manpage_map())
100        return self._manpage_pattern
101
102    def _get_manpage_prefix_base(self) -> str:
103        """ Return the base location for the install. This varies by platform. """
104        if 'GITLAB_CI' in os.environ:
105            ci_environment_url = os.getenv('CI_ENVIRONMENT_URL')
106            if ci_environment_url is not None:
107                manpage_prefix_base = ci_environment_url.rstrip('/index.html')
108            else:
109                # This is a brittle stopgap measure
110                ci_commit_ref_name = os.getenv('CI_COMMIT_REF_NAME')
111                if not ci_commit_ref_name:
112                    raise Exception('Could not determine version name from GitLab CI environment variables')
113                version_name = ci_commit_ref_name.replace('release-', '')
114                manpage_prefix_base = 'https://petsc.org/' + version_name
115        elif 'READTHEDOCS' in os.environ:  # Temporary - remove once ReadTheDocs is abandoned
116            manpage_prefix_base = 'https://www.mcs.anl.gov/petsc/petsc-main'
117        else:
118            manpage_prefix_base = self.builder.outdir
119        return manpage_prefix_base
120
121    def _add_manpage_links(self, string: str) -> str:
122        """ Add plain HTML link tags to a string """
123        manpage_map = self._get_manpage_map()
124        manpage_pattern = self._get_manpage_pattern()
125        return replace_from_dict_and_pattern(string, manpage_map, manpage_pattern)
126
127    # This method consists mostly of code duplicated from Sphinx:
128    # overwritten
129    def visit_Text(self, node: Text) -> None:
130        text = node.astext()
131        encoded = self.encode(text)
132        if self.protect_literal_text:
133            # moved here from base class's visit_literal to support
134            # more formatting in literal nodes
135            for token in self.words_and_spaces.findall(encoded):
136                if token.strip():
137                    # Custom processing to add links to PETSc man pages ########
138                    token_processed = self._add_manpage_links(token)
139
140                    # protect literal text from line wrapping
141                    self.body.append('<span class="pre">%s</span>' % token_processed)
142                    # (end of custom processing) ###############################
143                elif token in ' \n':
144                    # allow breaks at whitespace
145                    self.body.append(token)
146                else:
147                    # protect runs of multiple spaces; the last one can wrap
148                    self.body.append('&#160;' * (len(token) - 1) + ' ')
149        else:
150            if self.in_mailto and self.settings.cloak_email_addresses:
151                encoded = self.cloak_email(encoded)
152            self.body.append(encoded)
153
154    # This method consists mostly of code duplicated from Sphinx:
155    # overwritten
156    def visit_literal_block(self, node: Element) -> None:
157        if node.rawsource != node.astext():
158            # most probably a parsed-literal block -- don't highlight
159            return super().visit_literal_block(node)
160
161        lang = node.get('language', 'default')
162        linenos = node.get('linenos', False)
163        highlight_args = node.get('highlight_args', {})
164        highlight_args['force'] = node.get('force', False)
165        opts = self.config.highlight_options.get(lang, {})
166
167        if linenos and self.config.html_codeblock_linenos_style:
168            linenos = self.config.html_codeblock_linenos_style
169
170        highlighted = self.highlighter.highlight_block(
171            node.rawsource, lang, opts=opts, linenos=linenos,
172            location=node, **highlight_args
173        )
174
175        # Custom processing to add links to PETSc man pages ####################
176        highlighted = self._add_manpage_links(highlighted)
177        # (end of custom processing) ###########################################
178
179        starttag = self.starttag(node, 'div', suffix='',
180                                 CLASS='highlight-%s notranslate' % lang)
181        self.body.append(starttag + highlighted + '</div>\n')
182        raise nodes.SkipNode
183
184def htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
185    """ Extract a dict from an htmlmap file, leaving URLs as they are."""
186    pattern = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
187    string_to_link = dict()
188    with open(htmlmap_filename, 'r') as f:
189        for line in f.readlines():
190            m = re.match(pattern, line)
191            if m:
192                string = m.group(1)
193                string_to_link[string] = m.group(3)
194            else:
195                print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
196                print(line)
197    return string_to_link
198
199
200def dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
201    """ Complete HTML links
202
203    Prepend a prefix to any links not starting with 'http',
204    and add HTML tags
205    """
206    def link_string(name: str, link: str, prefix: str) -> str:
207        url = link if link.startswith('http') else prefix + link
208        return '<a href=\"' + url + '\">' + name + '</a>'
209    return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
210
211
212def get_multiple_replace_pattern(source_dict: Dict[str,str]) -> re.Pattern:
213    """ Generate a regex to match any of the keys in source_dict, as full words """
214    def process_word(word):
215        """ add escape characters and word boundaries """
216        return r'\b' + re.escape(word) + r'\b'
217    return re.compile(r'|'.join(map(process_word, source_dict)))
218
219
220def replace_from_dict_and_pattern(string: str, replacements: Dict, pattern: re.Pattern) -> str:
221    """ Given a pattern which matches keys in replacements, replace keys found in string with their values"""
222    return pattern.sub(lambda match: replacements[match.group(0)], string)
223