xref: /petsc/doc/ext/html5_petsc.py (revision d5b43468fb8780a8feea140ccd6fa3e6a50411cc) !
1""" Sphinx extension for custom HTML processing for PETSc docs """
2
3from typing import Any, Dict
4import re
5import os
6import subprocess
7import types
8
9from docutils import nodes
10from docutils.nodes import Element, Text
11
12from sphinx import version_info as sphinx_version_info
13from sphinx.writers.html5 import HTML5Translator
14from sphinx.application import Sphinx
15
16if not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
17
18
19PETSC_DOC_OUT_ROOT_PLACEHOLDER = 'PETSC_DOC_OUT_ROOT_PLACEHOLDER'
20
21
22def setup(app: Sphinx) -> None:
23    _check_version(app)
24
25    app.connect('builder-inited', _setup_translators)
26    return {'parallel_read_safe': True}
27
28
29def _check_version(app: Sphinx) -> None:
30    sphinx_version_info_source = (4, 2, 0, 'final', 0)
31    app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
32    if sphinx_version_info[:2] != sphinx_version_info_source[:2]:
33        print('Warning: A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
34              'which differs from the current version %s' % (sphinx_version_info,),
35              'so unexpected behavior may be observed.')
36
37
38def _setup_translators(app: Sphinx) -> None:
39    """ Use a mixin strategy to add to the Sphinx HTML translator without overriding
40
41    This allows use of other extensions which modify the translator.
42
43    Duplicates the approach used here in sphinx-hoverref:
44    https://github.com/readthedocs/sphinx-hoverxref/pull/42
45    """
46    if app.builder.format != 'html':
47        return
48
49    for name, klass in app.registry.translators.items():
50        translator = types.new_class(
51            'PETScHTMLTranslator',
52            (
53                PETScHTMLTranslatorMixin,
54                klass,
55            ),
56            {},
57        )
58        app.set_translator(name, translator, override=True)
59
60    translator = types.new_class(
61        'PETScHTMLTranslator',
62        (
63            PETScHTMLTranslatorMixin,
64            app.builder.default_translator_class,
65        ),
66        {},
67    )
68    app.set_translator(app.builder.name, translator, override=True)
69
70
71class PETScHTMLTranslatorMixin:
72    """
73    A custom HTML translator which overrides methods to add PETSc-specific
74    custom processing to the generated HTML.
75
76    Replaces any string XXX that matches a manual page name with
77    <a href="PETSC_DOC_OUT_ROOT_PLACEHOLDER/docs/manualpages/YY/XXX.html">XXX</a>
78    or
79    <a href="PETSC_DOC_OUT_ROOT_PLACEHOLDER/docs/manualpages/YY/XXX">XXX</a>
80    depending on if the Sphinx build is html or dirhtml
81    """
82
83    def __init__(self, *args: Any) -> None:
84        self._manpage_map = None
85        self._manpage_pattern = None
86        super().__init__(*args)
87
88
89    def _get_manpage_map(self) -> Dict[str,str]:
90        """ Return the manpage strings to link, as a dict.  """
91        if not self._manpage_map:
92            htmlmap_filename = os.path.join('_build_classic', 'docs', 'manualpages', 'htmlmap_modified')
93            if not os.path.isfile(htmlmap_filename):
94                raise Exception("Expected file %s not found. Run script to build classic docs subset." %  htmlmap_filename)
95            manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
96            manpage_prefix_base = PETSC_DOC_OUT_ROOT_PLACEHOLDER
97            manpage_prefix = os.path.join(manpage_prefix_base, 'docs', '')
98            self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
99        return self._manpage_map
100
101    def _get_manpage_pattern(self) -> re.Pattern:
102        """ Return the manpage links pattern.
103
104        This is done lazily, so this function should always be used,
105        instead of the direct data member, which may not be populated yet
106        """
107
108        if not self._manpage_pattern:
109            self._manpage_pattern = get_multiple_replace_pattern(self._get_manpage_map())
110        return self._manpage_pattern
111
112    def _add_manpage_links(self, string: str) -> str:
113        """ Add plain HTML link tags to a string """
114        manpage_map = self._get_manpage_map()
115        manpage_pattern = self._get_manpage_pattern()
116        return replace_from_dict_and_pattern(string, manpage_map, manpage_pattern)
117
118    # This method consists mostly of code duplicated from Sphinx:
119    # overwritten
120    def visit_Text(self, node: Text) -> None:
121        text = node.astext()
122        encoded = self.encode(text)
123        if self.protect_literal_text:
124            # moved here from base class's visit_literal to support
125            # more formatting in literal nodes
126            for token in self.words_and_spaces.findall(encoded):
127                if token.strip():
128                    # Custom processing to add links to PETSc man pages ########
129                    token_processed = self._add_manpage_links(token)
130
131                    # protect literal text from line wrapping
132                    self.body.append('<span class="pre">%s</span>' % token_processed)
133                    # (end of custom processing) ###############################
134                elif token in ' \n':
135                    # allow breaks at whitespace
136                    self.body.append(token)
137                else:
138                    # protect runs of multiple spaces; the last one can wrap
139                    self.body.append('&#160;' * (len(token) - 1) + ' ')
140        else:
141            if self.in_mailto and self.settings.cloak_email_addresses:
142                encoded = self.cloak_email(encoded)
143            self.body.append(encoded)
144
145    # This method consists mostly of code duplicated from Sphinx:
146    # overwritten
147    def visit_literal_block(self, node: Element) -> None:
148        if node.rawsource != node.astext():
149            # most probably a parsed-literal block -- don't highlight
150            return super().visit_literal_block(node)
151
152        lang = node.get('language', 'default')
153        linenos = node.get('linenos', False)
154        highlight_args = node.get('highlight_args', {})
155        highlight_args['force'] = node.get('force', False)
156        opts = self.config.highlight_options.get(lang, {})
157
158        if linenos and self.config.html_codeblock_linenos_style:
159            linenos = self.config.html_codeblock_linenos_style
160
161        highlighted = self.highlighter.highlight_block(
162            node.rawsource, lang, opts=opts, linenos=linenos,
163            location=node, **highlight_args
164        )
165
166        # Custom processing to add links to PETSc man pages ####################
167        highlighted = self._add_manpage_links(highlighted)
168        # (end of custom processing) ###########################################
169
170        starttag = self.starttag(node, 'div', suffix='',
171                                 CLASS='highlight-%s notranslate' % lang)
172        self.body.append(starttag + highlighted + '</div>\n')
173        raise nodes.SkipNode
174
175def htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
176    """ Extract a dict from an htmlmap file, leaving URLs as they are."""
177    pattern = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
178    string_to_link = dict()
179    with open(htmlmap_filename, 'r') as f:
180        for line in f.readlines():
181            m = re.match(pattern, line)
182            if m:
183                string = m.group(1)
184                string_to_link[string] = m.group(3)
185            else:
186                print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
187                print(line)
188    return string_to_link
189
190
191def dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
192    """
193    Prepend a prefix to any links not starting with 'http' so Sphinx will recognize them as URLs
194    """
195    def link_string(name: str, link: str, prefix: str) -> str:
196        url = link if link.startswith('http') else prefix + link
197        return '<a href=\"' + url + '\">' + name + '</a>'
198    return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
199
200
201def get_multiple_replace_pattern(source_dict: Dict[str,str]) -> re.Pattern:
202    """ Generate a regex to match any of the keys in source_dict, as full words """
203    def process_word(word):
204        """ add escape characters and word boundaries """
205        return r'\b' + re.escape(word) + r'\b'
206    return re.compile(r'|'.join(map(process_word, source_dict)))
207
208
209def replace_from_dict_and_pattern(string: str, replacements: Dict, pattern: re.Pattern) -> str:
210    """ Given a pattern which matches keys in replacements, replace keys found in string with their values"""
211    return pattern.sub(lambda match: replacements[match.group(0)], string)
212