xref: /petsc/doc/ext/html5_petsc.py (revision ee12ae39415b2e672d944cdca066227dadbf8b14)
1""" Sphinx extension for custom HTML processing for PETSc docs """
2
3from typing import Any, Dict
4import re
5import os
6import subprocess
7import types
8
9from docutils import nodes
10from docutils.nodes import Element, Text
11
12from sphinx import version_info as sphinx_version_info
13from sphinx.writers.html5 import HTML5Translator
14from sphinx.application import Sphinx
15
16if not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
17
18
19def setup(app: Sphinx) -> None:
20    _check_version(app)
21
22    app.connect('builder-inited', _setup_translators)
23
24
25def _check_version(app: Sphinx) -> None:
26    sphinx_version_info_source = (2, 4, 4, 'final', 0)
27    app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
28    if sphinx_version_info != sphinx_version_info_source:
29        error_message = ' '.join([
30            'This extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
31            'which is incompatible with the current version %s' % (sphinx_version_info,),
32            ])
33        raise NotImplementedError(error_message)
34
35
36def _setup_translators(app: Sphinx) -> None:
37    """ Use a mixin strategy to add to the HTML translator without overriding
38
39    This allows use of other extensions which modify the translator.
40
41    Duplicates the approach used here in sphinx-hoverref:
42    https://github.com/readthedocs/sphinx-hoverxref/pull/42
43    """
44    if app.builder.format != 'html':
45        return
46
47    for name, klass in app.registry.translators.items():
48        translator = types.new_class(
49            'PETScHTMLTranslator',
50            (
51                PETScHTMLTranslatorMixin,
52                klass,
53            ),
54            {},
55        )
56        app.set_translator(name, translator, override=True)
57
58    translator = types.new_class(
59        'PETScHTMLTranslator',
60        (
61            PETScHTMLTranslatorMixin,
62            app.builder.default_translator_class,
63        ),
64        {},
65    )
66    app.set_translator(app.builder.name, translator, override=True)
67
68
69class PETScHTMLTranslatorMixin:
70    """
71    A custom HTML translator which overrides methods to add PETSc-specific
72    custom processing to the generated HTML.
73    """
74
75    def __init__(self, *args: Any) -> None:
76        self._manpage_map = None
77        self._manpage_pattern = None
78        super().__init__(*args)
79
80
81    def _get_manpage_map(self) -> Dict[str,str]:
82        """ Return the manpage strings to link, as a dict.  """
83        if not self._manpage_map:
84            htmlmap_filename = os.path.join('_build_classic', 'docs', 'manualpages', 'htmlmap')
85            if not os.path.isfile(htmlmap_filename):
86                raise Exception("Expected file %s not found. Run script to build classic docs subset." %  htmlmap_filename)
87            manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
88            manpage_prefix_base = self._get_manpage_prefix_base()
89            manpage_prefix = os.path.join(manpage_prefix_base, 'docs', '')
90            self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
91        return self._manpage_map
92
93    def _get_manpage_pattern(self) -> re.Pattern:
94        """ Return the manpage links pattern.
95
96        This is done lazily, so this function should always be used,
97        instead of the direct data member, which may not be populated yet
98        """
99
100        if not self._manpage_pattern:
101            self._manpage_pattern = get_multiple_replace_pattern(self._get_manpage_map())
102        return self._manpage_pattern
103
104    def _get_manpage_prefix_base(self) -> str:
105        """ Return the base location for the install. This varies by platform. """
106        if 'GITLAB_CI' in os.environ:
107            ci_environment_url = os.getenv('CI_ENVIRONMENT_URL')
108            if not ci_environment_url:
109                raise Exception('GitLab CI detected but expected environment variable not found')
110            manpage_prefix_base = ci_environment_url.rstrip('/index.html')
111        elif 'READTHEDOCS' in os.environ:  # Temporary - remove once ReadTheDocs is abandoned
112            manpage_prefix_base = 'https://www.mcs.anl.gov/petsc/petsc-main'
113        else:
114            manpage_prefix_base = self.builder.outdir
115        return manpage_prefix_base
116
117    def _add_manpage_links(self, string: str) -> str:
118        """ Add plain HTML link tags to a string """
119        manpage_map = self._get_manpage_map()
120        manpage_pattern = self._get_manpage_pattern()
121        return replace_from_dict_and_pattern(string, manpage_map, manpage_pattern)
122
123    # This method consists mostly of code duplicated from Sphinx:
124    # overwritten
125    def visit_Text(self, node: Text) -> None:
126        text = node.astext()
127        encoded = self.encode(text)
128        if self.protect_literal_text:
129            # moved here from base class's visit_literal to support
130            # more formatting in literal nodes
131            for token in self.words_and_spaces.findall(encoded):
132                if token.strip():
133                    # Custom processing to add links to PETSc man pages ########
134                    token_processed = self._add_manpage_links(token)
135
136                    # protect literal text from line wrapping
137                    self.body.append('<span class="pre">%s</span>' % token_processed)
138                    # (end of custom processing) ###############################
139                elif token in ' \n':
140                    # allow breaks at whitespace
141                    self.body.append(token)
142                else:
143                    # protect runs of multiple spaces; the last one can wrap
144                    self.body.append('&#160;' * (len(token) - 1) + ' ')
145        else:
146            if self.in_mailto and self.settings.cloak_email_addresses:
147                encoded = self.cloak_email(encoded)
148            self.body.append(encoded)
149
150    # This method consists mostly of code duplicated from Sphinx:
151    # overwritten
152    def visit_literal_block(self, node: Element) -> None:
153        if node.rawsource != node.astext():
154            # most probably a parsed-literal block -- don't highlight
155            return super().visit_literal_block(node)
156
157        lang = node.get('language', 'default')
158        linenos = node.get('linenos', False)
159        highlight_args = node.get('highlight_args', {})
160        highlight_args['force'] = node.get('force', False)
161        if lang is self.builder.config.highlight_language:
162            # only pass highlighter options for original language
163            opts = self.builder.config.highlight_options
164        else:
165            opts = {}
166
167        highlighted = self.highlighter.highlight_block(
168            node.rawsource, lang, opts=opts, linenos=linenos,
169            location=(self.builder.current_docname, node.line), **highlight_args
170        )
171        starttag = self.starttag(node, 'div', suffix='',
172                                 CLASS='highlight-%s notranslate' % lang)
173
174        # Custom processing to add links to PETSc man pages ####################
175        highlighted = self._add_manpage_links(highlighted)
176        # (end of custom processing) ###########################################
177
178        self.body.append(starttag + highlighted + '</div>\n')
179        raise nodes.SkipNode
180
181def htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
182    """ Extract a dict from an htmlmap file, leaving URLs as they are."""
183    pattern = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
184    string_to_link = dict()
185    with open(htmlmap_filename, 'r') as f:
186        for line in f.readlines():
187            m = re.match(pattern, line)
188            if m:
189                string = m.group(1)
190                string_to_link[string] = m.group(3)
191            else:
192                print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
193                print(line)
194    return string_to_link
195
196
197def dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
198    """ Complete HTML links
199
200    Prepend a prefix to any links not starting with 'http',
201    and add HTML tags
202    """
203    def link_string(name: str, link: str, prefix: str) -> str:
204        url = link if link.startswith('http') else prefix + link
205        return '<a href=\"' + url + '\">' + name + '</a>'
206    return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
207
208
209def get_multiple_replace_pattern(source_dict: Dict[str,str]) -> re.Pattern:
210    """ Generate a regex to match any of the keys in source_dict, as full words """
211    def process_word(word):
212        """ add escape characters and word boundaries """
213        return r'\b' + re.escape(word) + r'\b'
214    return re.compile(r'|'.join(map(process_word, source_dict)))
215
216
217def replace_from_dict_and_pattern(string: str, replacements: Dict, pattern: re.Pattern) -> str:
218    """ Given a pattern which matches keys in replacements, replace keys found in string with their values"""
219    return pattern.sub(lambda match: replacements[match.group(0)], string)
220