xref: /petsc/doc/ext/html5_petsc.py (revision 07f25ad1711774a9442d064cc5f042f486b5a62e)
1ee12ae39SPatrick Sanan""" Sphinx extension for custom HTML processing for PETSc docs """
2ee12ae39SPatrick Sanan
3ee12ae39SPatrick Sananfrom typing import Any, Dict
4ee12ae39SPatrick Sananimport re
5ee12ae39SPatrick Sananimport os
6ee12ae39SPatrick Sananimport types
7ee12ae39SPatrick Sanan
8ee12ae39SPatrick Sananfrom docutils import nodes
9ee12ae39SPatrick Sananfrom docutils.nodes import Element, Text
10ee12ae39SPatrick Sanan
11ee12ae39SPatrick Sananfrom sphinx import version_info as sphinx_version_info
12ee12ae39SPatrick Sananfrom sphinx.writers.html5 import HTML5Translator
13ee12ae39SPatrick Sananfrom sphinx.application import Sphinx
14ee12ae39SPatrick Sanan
15ee12ae39SPatrick Sananif not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
16ee12ae39SPatrick Sanan
17ee12ae39SPatrick Sanan
181540e0edSPatrick SananPETSC_DOC_OUT_ROOT_PLACEHOLDER = 'PETSC_DOC_OUT_ROOT_PLACEHOLDER'
191540e0edSPatrick Sanan
20ee12ae39SPatrick Sanandef setup(app: Sphinx) -> None:
21ee12ae39SPatrick Sanan    _check_version(app)
22ee12ae39SPatrick Sanan
23ee12ae39SPatrick Sanan    app.connect('builder-inited', _setup_translators)
24075dfc9bSPatrick Sanan    return {'parallel_read_safe': True}
25ee12ae39SPatrick Sanan
26ee12ae39SPatrick Sanan
27ee12ae39SPatrick Sanandef _check_version(app: Sphinx) -> None:
287d789994SPatrick Sanan    sphinx_version_info_source = (4, 2, 0, 'final', 0)
29ee12ae39SPatrick Sanan    app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
30a9d4b3f8SPatrick Sanan    if sphinx_version_info[:2] != sphinx_version_info_source[:2]:
3166c9fbddSBarry Smith        print('A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
32a9d4b3f8SPatrick Sanan              'which differs from the current version %s' % (sphinx_version_info,),
33a9d4b3f8SPatrick Sanan              'so unexpected behavior may be observed.')
34ee12ae39SPatrick Sanan
35ee12ae39SPatrick Sanan
36ee12ae39SPatrick Sanandef _setup_translators(app: Sphinx) -> None:
37862e4a30SBarry Smith    """ Use a mixin strategy to add to the Sphinx HTML translator without overriding
38ee12ae39SPatrick Sanan
39ee12ae39SPatrick Sanan    This allows use of other extensions which modify the translator.
40ee12ae39SPatrick Sanan
41ee12ae39SPatrick Sanan    Duplicates the approach used here in sphinx-hoverref:
42ee12ae39SPatrick Sanan    https://github.com/readthedocs/sphinx-hoverxref/pull/42
43ee12ae39SPatrick Sanan    """
44ee12ae39SPatrick Sanan    if app.builder.format != 'html':
45ee12ae39SPatrick Sanan        return
46ee12ae39SPatrick Sanan
47ee12ae39SPatrick Sanan    for name, klass in app.registry.translators.items():
48ee12ae39SPatrick Sanan        translator = types.new_class(
49ee12ae39SPatrick Sanan            'PETScHTMLTranslator',
50ee12ae39SPatrick Sanan            (
51ee12ae39SPatrick Sanan                PETScHTMLTranslatorMixin,
52ee12ae39SPatrick Sanan                klass,
53ee12ae39SPatrick Sanan            ),
54ee12ae39SPatrick Sanan            {},
55ee12ae39SPatrick Sanan        )
56ee12ae39SPatrick Sanan        app.set_translator(name, translator, override=True)
57ee12ae39SPatrick Sanan
58ee12ae39SPatrick Sanan    translator = types.new_class(
59ee12ae39SPatrick Sanan        'PETScHTMLTranslator',
60ee12ae39SPatrick Sanan        (
61ee12ae39SPatrick Sanan            PETScHTMLTranslatorMixin,
62ee12ae39SPatrick Sanan            app.builder.default_translator_class,
63ee12ae39SPatrick Sanan        ),
64ee12ae39SPatrick Sanan        {},
65ee12ae39SPatrick Sanan    )
66ee12ae39SPatrick Sanan    app.set_translator(app.builder.name, translator, override=True)
67ee12ae39SPatrick Sanan
68ee12ae39SPatrick Sanan
69ee12ae39SPatrick Sananclass PETScHTMLTranslatorMixin:
70ee12ae39SPatrick Sanan    """
71ee12ae39SPatrick Sanan    A custom HTML translator which overrides methods to add PETSc-specific
72ee12ae39SPatrick Sanan    custom processing to the generated HTML.
73862e4a30SBarry Smith
74862e4a30SBarry Smith    Replaces any string XXX that matches a manual page name with
7573fdd05bSBarry Smith    <a href="PETSC_DOC_OUT_ROOT_PLACEHOLDER/manualpages/YY/XXX.html">XXX</a>
76862e4a30SBarry Smith    or
7773fdd05bSBarry Smith    <a href="PETSC_DOC_OUT_ROOT_PLACEHOLDER/manualpages/YY/XXX">XXX</a>
78862e4a30SBarry Smith    depending on if the Sphinx build is html or dirhtml
79ee12ae39SPatrick Sanan    """
80ee12ae39SPatrick Sanan
81ee12ae39SPatrick Sanan    def __init__(self, *args: Any) -> None:
82ee12ae39SPatrick Sanan        self._manpage_map = None
8334c645fdSBarry Smith        self._word_pattern = re.compile(r'\w+')
84ee12ae39SPatrick Sanan        super().__init__(*args)
85ee12ae39SPatrick Sanan
86ee12ae39SPatrick Sanan
87ee12ae39SPatrick Sanan    def _get_manpage_map(self) -> Dict[str,str]:
88ee12ae39SPatrick Sanan        """ Return the manpage strings to link, as a dict.  """
89ee12ae39SPatrick Sanan        if not self._manpage_map:
909cd31cfbSBarry Smith            htmlmap_filename = os.path.join('manualpages', 'htmlmap')
91ee12ae39SPatrick Sanan            if not os.path.isfile(htmlmap_filename):
92*c6267af9SBarry Smith                raise Exception("Expected file %s not found. " %  htmlmap_filename)
93ee12ae39SPatrick Sanan            manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
941540e0edSPatrick Sanan            manpage_prefix_base = PETSC_DOC_OUT_ROOT_PLACEHOLDER
9573fdd05bSBarry Smith            manpage_prefix = os.path.join(manpage_prefix_base, '')
96ee12ae39SPatrick Sanan            self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
97ee12ae39SPatrick Sanan        return self._manpage_map
98ee12ae39SPatrick Sanan
99ee12ae39SPatrick Sanan
100ee12ae39SPatrick Sanan    def _add_manpage_links(self, string: str) -> str:
101ee12ae39SPatrick Sanan        """ Add plain HTML link tags to a string """
102ee12ae39SPatrick Sanan        manpage_map = self._get_manpage_map()
10329912973SJacob Faibussowitsch        def replace(matchobj):
10429912973SJacob Faibussowitsch            word = matchobj.group(0)
10529912973SJacob Faibussowitsch            if word in manpage_map:
10629912973SJacob Faibussowitsch                return manpage_map[word]
10729912973SJacob Faibussowitsch            return word
10829912973SJacob Faibussowitsch
10929912973SJacob Faibussowitsch        return self._word_pattern.sub(replace, string)
11029912973SJacob Faibussowitsch
111ee12ae39SPatrick Sanan
112ee12ae39SPatrick Sanan    # This method consists mostly of code duplicated from Sphinx:
113ee12ae39SPatrick Sanan    # overwritten
114ee12ae39SPatrick Sanan    def visit_Text(self, node: Text) -> None:
115ee12ae39SPatrick Sanan        text = node.astext()
116ee12ae39SPatrick Sanan        encoded = self.encode(text)
117ee12ae39SPatrick Sanan        if self.protect_literal_text:
118ee12ae39SPatrick Sanan            # moved here from base class's visit_literal to support
119ee12ae39SPatrick Sanan            # more formatting in literal nodes
120ee12ae39SPatrick Sanan            for token in self.words_and_spaces.findall(encoded):
121ee12ae39SPatrick Sanan                if token.strip():
122ee12ae39SPatrick Sanan                    # Custom processing to add links to PETSc man pages ########
123ee12ae39SPatrick Sanan                    token_processed = self._add_manpage_links(token)
124ee12ae39SPatrick Sanan
125ee12ae39SPatrick Sanan                    # protect literal text from line wrapping
126ee12ae39SPatrick Sanan                    self.body.append('<span class="pre">%s</span>' % token_processed)
127ee12ae39SPatrick Sanan                    # (end of custom processing) ###############################
128ee12ae39SPatrick Sanan                elif token in ' \n':
129ee12ae39SPatrick Sanan                    # allow breaks at whitespace
130ee12ae39SPatrick Sanan                    self.body.append(token)
131ee12ae39SPatrick Sanan                else:
132ee12ae39SPatrick Sanan                    # protect runs of multiple spaces; the last one can wrap
133ee12ae39SPatrick Sanan                    self.body.append('&#160;' * (len(token) - 1) + ' ')
134ee12ae39SPatrick Sanan        else:
135ee12ae39SPatrick Sanan            if self.in_mailto and self.settings.cloak_email_addresses:
136ee12ae39SPatrick Sanan                encoded = self.cloak_email(encoded)
137ee12ae39SPatrick Sanan            self.body.append(encoded)
138ee12ae39SPatrick Sanan
139ee12ae39SPatrick Sanan    # This method consists mostly of code duplicated from Sphinx:
140ee12ae39SPatrick Sanan    # overwritten
141ee12ae39SPatrick Sanan    def visit_literal_block(self, node: Element) -> None:
142ee12ae39SPatrick Sanan        if node.rawsource != node.astext():
143ee12ae39SPatrick Sanan            # most probably a parsed-literal block -- don't highlight
144ee12ae39SPatrick Sanan            return super().visit_literal_block(node)
145ee12ae39SPatrick Sanan
146ee12ae39SPatrick Sanan        lang = node.get('language', 'default')
147ee12ae39SPatrick Sanan        linenos = node.get('linenos', False)
148ee12ae39SPatrick Sanan        highlight_args = node.get('highlight_args', {})
149ee12ae39SPatrick Sanan        highlight_args['force'] = node.get('force', False)
150a9d4b3f8SPatrick Sanan        opts = self.config.highlight_options.get(lang, {})
151a9d4b3f8SPatrick Sanan
152a9d4b3f8SPatrick Sanan        if linenos and self.config.html_codeblock_linenos_style:
153a9d4b3f8SPatrick Sanan            linenos = self.config.html_codeblock_linenos_style
154ee12ae39SPatrick Sanan
155ee12ae39SPatrick Sanan        highlighted = self.highlighter.highlight_block(
156ee12ae39SPatrick Sanan            node.rawsource, lang, opts=opts, linenos=linenos,
157a9d4b3f8SPatrick Sanan            location=node, **highlight_args
158ee12ae39SPatrick Sanan        )
159ee12ae39SPatrick Sanan
160ee12ae39SPatrick Sanan        # Custom processing to add links to PETSc man pages ####################
161ee12ae39SPatrick Sanan        highlighted = self._add_manpage_links(highlighted)
162ee12ae39SPatrick Sanan        # (end of custom processing) ###########################################
163ee12ae39SPatrick Sanan
164a9d4b3f8SPatrick Sanan        starttag = self.starttag(node, 'div', suffix='',
165a9d4b3f8SPatrick Sanan                                 CLASS='highlight-%s notranslate' % lang)
166ee12ae39SPatrick Sanan        self.body.append(starttag + highlighted + '</div>\n')
167ee12ae39SPatrick Sanan        raise nodes.SkipNode
168ee12ae39SPatrick Sanan
169b0b385f4SBarry Smith    # This method consists mostly of code duplicated from Sphinx:
170b0b385f4SBarry Smith    # overwritten to remove from CLASS literal that caused an ugly extra pre and post space
171b0b385f4SBarry Smith    # in the manual pages for all PETSc links
172b0b385f4SBarry Smith    def visit_literal(self, node: Element) -> None:
173b0b385f4SBarry Smith        if 'kbd' in node['classes']:
174b0b385f4SBarry Smith            self.body.append(self.starttag(node, 'kbd', '',
175b0b385f4SBarry Smith                                           CLASS='docutils notranslate'))
176b0b385f4SBarry Smith            return
177b0b385f4SBarry Smith        lang = node.get("language", None)
178b0b385f4SBarry Smith        if 'code' not in node['classes'] or not lang:
179b0b385f4SBarry Smith            self.body.append(self.starttag(node, 'code', '',
180b0b385f4SBarry Smith                                           CLASS='docutils notranslate'))
181b0b385f4SBarry Smith            self.protect_literal_text += 1
182b0b385f4SBarry Smith            return
183b0b385f4SBarry Smith
184b0b385f4SBarry Smith        opts = self.config.highlight_options.get(lang, {})
185b0b385f4SBarry Smith        highlighted = self.highlighter.highlight_block(
186b0b385f4SBarry Smith            node.astext(), lang, opts=opts, location=node, nowrap=True)
187b0b385f4SBarry Smith        starttag = self.starttag(
188b0b385f4SBarry Smith            node,
189b0b385f4SBarry Smith            "code",
190b0b385f4SBarry Smith            suffix="",
191b0b385f4SBarry Smith            CLASS="docutils literal highlight highlight-%s" % lang,
192b0b385f4SBarry Smith        )
193b0b385f4SBarry Smith        self.body.append(starttag + highlighted.strip() + "</code>")
194b0b385f4SBarry Smith        raise nodes.SkipNode
195b0b385f4SBarry Smith
196ee12ae39SPatrick Sanandef htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
197ee12ae39SPatrick Sanan    """ Extract a dict from an htmlmap file, leaving URLs as they are."""
198ee12ae39SPatrick Sanan    with open(htmlmap_filename, 'r') as f:
19929912973SJacob Faibussowitsch        lines = [l for l in f.readlines() if l.startswith('man:')]
20029912973SJacob Faibussowitsch    string_to_link = dict()
20129912973SJacob Faibussowitsch    pattern        = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
20229912973SJacob Faibussowitsch    for line in lines:
20329912973SJacob Faibussowitsch        m = pattern.match(line)
204ee12ae39SPatrick Sanan        if m:
20529912973SJacob Faibussowitsch            string_to_link[m.group(1)] = m.group(3)
206ee12ae39SPatrick Sanan        else:
207ee12ae39SPatrick Sanan            print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
208ee12ae39SPatrick Sanan            print(line)
209ee12ae39SPatrick Sanan    return string_to_link
210ee12ae39SPatrick Sanan
211ee12ae39SPatrick Sanan
212ee12ae39SPatrick Sanandef dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
213862e4a30SBarry Smith    """
214862e4a30SBarry Smith    Prepend a prefix to any links not starting with 'http' so Sphinx will recognize them as URLs
215ee12ae39SPatrick Sanan    """
216ee12ae39SPatrick Sanan    def link_string(name: str, link: str, prefix: str) -> str:
217ee12ae39SPatrick Sanan        url = link if link.startswith('http') else prefix + link
218ee12ae39SPatrick Sanan        return '<a href=\"' + url + '\">' + name + '</a>'
219ee12ae39SPatrick Sanan    return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
220