xref: /petsc/doc/ext/html5_petsc.py (revision f97672e55eacc8688507b9471cd7ec2664d7f203) !
1""" Sphinx extension for custom HTML processing for PETSc docs """
2
3from typing import Any, Dict
4import re
5import os
6import subprocess
7import types
8
9from docutils import nodes
10from docutils.nodes import Element, Text
11
12from sphinx import version_info as sphinx_version_info
13from sphinx.writers.html5 import HTML5Translator
14from sphinx.application import Sphinx
15
16if not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
17
18
19PETSC_DOC_OUT_ROOT_PLACEHOLDER = 'PETSC_DOC_OUT_ROOT_PLACEHOLDER'
20
21
22def setup(app: Sphinx) -> None:
23    _check_version(app)
24
25    app.connect('builder-inited', _setup_translators)
26    return {'parallel_read_safe': True}
27
28
29def _check_version(app: Sphinx) -> None:
30    sphinx_version_info_source = (4, 2, 0, 'final', 0)
31    app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
32    if sphinx_version_info[:2] != sphinx_version_info_source[:2]:
33        print('Warning: A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
34              'which differs from the current version %s' % (sphinx_version_info,),
35              'so unexpected behavior may be observed.')
36
37
38def _setup_translators(app: Sphinx) -> None:
39    """ Use a mixin strategy to add to the HTML translator without overriding
40
41    This allows use of other extensions which modify the translator.
42
43    Duplicates the approach used here in sphinx-hoverref:
44    https://github.com/readthedocs/sphinx-hoverxref/pull/42
45    """
46    if app.builder.format != 'html':
47        return
48
49    for name, klass in app.registry.translators.items():
50        translator = types.new_class(
51            'PETScHTMLTranslator',
52            (
53                PETScHTMLTranslatorMixin,
54                klass,
55            ),
56            {},
57        )
58        app.set_translator(name, translator, override=True)
59
60    translator = types.new_class(
61        'PETScHTMLTranslator',
62        (
63            PETScHTMLTranslatorMixin,
64            app.builder.default_translator_class,
65        ),
66        {},
67    )
68    app.set_translator(app.builder.name, translator, override=True)
69
70
71class PETScHTMLTranslatorMixin:
72    """
73    A custom HTML translator which overrides methods to add PETSc-specific
74    custom processing to the generated HTML.
75    """
76
77    def __init__(self, *args: Any) -> None:
78        self._manpage_map = None
79        self._manpage_pattern = None
80        super().__init__(*args)
81
82
83    def _get_manpage_map(self) -> Dict[str,str]:
84        """ Return the manpage strings to link, as a dict.  """
85        if not self._manpage_map:
86            htmlmap_filename = os.path.join('_build_classic', 'docs', 'manualpages', 'htmlmap_modified')
87            if not os.path.isfile(htmlmap_filename):
88                raise Exception("Expected file %s not found. Run script to build classic docs subset." %  htmlmap_filename)
89            manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
90            manpage_prefix_base = PETSC_DOC_OUT_ROOT_PLACEHOLDER
91            manpage_prefix = os.path.join(manpage_prefix_base, 'docs', '')
92            self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
93        return self._manpage_map
94
95    def _get_manpage_pattern(self) -> re.Pattern:
96        """ Return the manpage links pattern.
97
98        This is done lazily, so this function should always be used,
99        instead of the direct data member, which may not be populated yet
100        """
101
102        if not self._manpage_pattern:
103            self._manpage_pattern = get_multiple_replace_pattern(self._get_manpage_map())
104        return self._manpage_pattern
105
106    def _add_manpage_links(self, string: str) -> str:
107        """ Add plain HTML link tags to a string """
108        manpage_map = self._get_manpage_map()
109        manpage_pattern = self._get_manpage_pattern()
110        return replace_from_dict_and_pattern(string, manpage_map, manpage_pattern)
111
112    # This method consists mostly of code duplicated from Sphinx:
113    # overwritten
114    def visit_Text(self, node: Text) -> None:
115        text = node.astext()
116        encoded = self.encode(text)
117        if self.protect_literal_text:
118            # moved here from base class's visit_literal to support
119            # more formatting in literal nodes
120            for token in self.words_and_spaces.findall(encoded):
121                if token.strip():
122                    # Custom processing to add links to PETSc man pages ########
123                    token_processed = self._add_manpage_links(token)
124
125                    # protect literal text from line wrapping
126                    self.body.append('<span class="pre">%s</span>' % token_processed)
127                    # (end of custom processing) ###############################
128                elif token in ' \n':
129                    # allow breaks at whitespace
130                    self.body.append(token)
131                else:
132                    # protect runs of multiple spaces; the last one can wrap
133                    self.body.append('&#160;' * (len(token) - 1) + ' ')
134        else:
135            if self.in_mailto and self.settings.cloak_email_addresses:
136                encoded = self.cloak_email(encoded)
137            self.body.append(encoded)
138
139    # This method consists mostly of code duplicated from Sphinx:
140    # overwritten
141    def visit_literal_block(self, node: Element) -> None:
142        if node.rawsource != node.astext():
143            # most probably a parsed-literal block -- don't highlight
144            return super().visit_literal_block(node)
145
146        lang = node.get('language', 'default')
147        linenos = node.get('linenos', False)
148        highlight_args = node.get('highlight_args', {})
149        highlight_args['force'] = node.get('force', False)
150        opts = self.config.highlight_options.get(lang, {})
151
152        if linenos and self.config.html_codeblock_linenos_style:
153            linenos = self.config.html_codeblock_linenos_style
154
155        highlighted = self.highlighter.highlight_block(
156            node.rawsource, lang, opts=opts, linenos=linenos,
157            location=node, **highlight_args
158        )
159
160        # Custom processing to add links to PETSc man pages ####################
161        highlighted = self._add_manpage_links(highlighted)
162        # (end of custom processing) ###########################################
163
164        starttag = self.starttag(node, 'div', suffix='',
165                                 CLASS='highlight-%s notranslate' % lang)
166        self.body.append(starttag + highlighted + '</div>\n')
167        raise nodes.SkipNode
168
169def htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
170    """ Extract a dict from an htmlmap file, leaving URLs as they are."""
171    pattern = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
172    string_to_link = dict()
173    with open(htmlmap_filename, 'r') as f:
174        for line in f.readlines():
175            m = re.match(pattern, line)
176            if m:
177                string = m.group(1)
178                string_to_link[string] = m.group(3)
179            else:
180                print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
181                print(line)
182    return string_to_link
183
184
185def dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
186    """ Complete HTML links
187
188    Prepend a prefix to any links not starting with 'http'.
189    Add HTML tags.
190    """
191    def link_string(name: str, link: str, prefix: str) -> str:
192        url = link if link.startswith('http') else prefix + link
193        return '<a href=\"' + url + '\">' + name + '</a>'
194    return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
195
196
197def get_multiple_replace_pattern(source_dict: Dict[str,str]) -> re.Pattern:
198    """ Generate a regex to match any of the keys in source_dict, as full words """
199    def process_word(word):
200        """ add escape characters and word boundaries """
201        return r'\b' + re.escape(word) + r'\b'
202    return re.compile(r'|'.join(map(process_word, source_dict)))
203
204
205def replace_from_dict_and_pattern(string: str, replacements: Dict, pattern: re.Pattern) -> str:
206    """ Given a pattern which matches keys in replacements, replace keys found in string with their values"""
207    return pattern.sub(lambda match: replacements[match.group(0)], string)
208