""" Sphinx extension for custom HTML processing for PETSc docs """
from typing import Any, Dict
import re
import os
import subprocess
import types
from docutils import nodes
from docutils.nodes import Element, Text
from sphinx import version_info as sphinx_version_info
from sphinx.writers.html5 import HTML5Translator
from sphinx.application import Sphinx
if not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
PETSC_DOC_OUT_ROOT_PLACEHOLDER = 'PETSC_DOC_OUT_ROOT_PLACEHOLDER'
def setup(app: Sphinx) -> None:
_check_version(app)
app.connect('builder-inited', _setup_translators)
return {'parallel_read_safe': True}
def _check_version(app: Sphinx) -> None:
sphinx_version_info_source = (4, 2, 0, 'final', 0)
app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
if sphinx_version_info[:2] != sphinx_version_info_source[:2]:
print('A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
'which differs from the current version %s' % (sphinx_version_info,),
'so unexpected behavior may be observed.')
def _setup_translators(app: Sphinx) -> None:
""" Use a mixin strategy to add to the Sphinx HTML translator without overriding
This allows use of other extensions which modify the translator.
Duplicates the approach used here in sphinx-hoverref:
https://github.com/readthedocs/sphinx-hoverxref/pull/42
"""
if app.builder.format != 'html':
return
for name, klass in app.registry.translators.items():
translator = types.new_class(
'PETScHTMLTranslator',
(
PETScHTMLTranslatorMixin,
klass,
),
{},
)
app.set_translator(name, translator, override=True)
translator = types.new_class(
'PETScHTMLTranslator',
(
PETScHTMLTranslatorMixin,
app.builder.default_translator_class,
),
{},
)
app.set_translator(app.builder.name, translator, override=True)
class PETScHTMLTranslatorMixin:
"""
A custom HTML translator which overrides methods to add PETSc-specific
custom processing to the generated HTML.
Replaces any string XXX that matches a manual page name with
XXX
or
XXX
depending on if the Sphinx build is html or dirhtml
"""
def __init__(self, *args: Any) -> None:
self._manpage_map = None
self._manpage_pattern = None
super().__init__(*args)
def _get_manpage_map(self) -> Dict[str,str]:
""" Return the manpage strings to link, as a dict. """
if not self._manpage_map:
htmlmap_filename = os.path.join('_build_classic', 'manualpages', 'htmlmap_modified')
if not os.path.isfile(htmlmap_filename):
raise Exception("Expected file %s not found. Run script to build classic docs subset." % htmlmap_filename)
manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
manpage_prefix_base = PETSC_DOC_OUT_ROOT_PLACEHOLDER
manpage_prefix = os.path.join(manpage_prefix_base, '')
self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
return self._manpage_map
def _get_manpage_pattern(self) -> re.Pattern:
""" Return the manpage links pattern.
This is done lazily, so this function should always be used,
instead of the direct data member, which may not be populated yet
"""
if not self._manpage_pattern:
self._manpage_pattern = get_multiple_replace_pattern(self._get_manpage_map())
return self._manpage_pattern
def _add_manpage_links(self, string: str) -> str:
""" Add plain HTML link tags to a string """
manpage_map = self._get_manpage_map()
manpage_pattern = self._get_manpage_pattern()
return replace_from_dict_and_pattern(string, manpage_map, manpage_pattern)
# This method consists mostly of code duplicated from Sphinx:
# overwritten
def visit_Text(self, node: Text) -> None:
text = node.astext()
encoded = self.encode(text)
if self.protect_literal_text:
# moved here from base class's visit_literal to support
# more formatting in literal nodes
for token in self.words_and_spaces.findall(encoded):
if token.strip():
# Custom processing to add links to PETSc man pages ########
token_processed = self._add_manpage_links(token)
# protect literal text from line wrapping
self.body.append('%s' % token_processed)
# (end of custom processing) ###############################
elif token in ' \n':
# allow breaks at whitespace
self.body.append(token)
else:
# protect runs of multiple spaces; the last one can wrap
self.body.append(' ' * (len(token) - 1) + ' ')
else:
if self.in_mailto and self.settings.cloak_email_addresses:
encoded = self.cloak_email(encoded)
self.body.append(encoded)
# This method consists mostly of code duplicated from Sphinx:
# overwritten
def visit_literal_block(self, node: Element) -> None:
if node.rawsource != node.astext():
# most probably a parsed-literal block -- don't highlight
return super().visit_literal_block(node)
lang = node.get('language', 'default')
linenos = node.get('linenos', False)
highlight_args = node.get('highlight_args', {})
highlight_args['force'] = node.get('force', False)
opts = self.config.highlight_options.get(lang, {})
if linenos and self.config.html_codeblock_linenos_style:
linenos = self.config.html_codeblock_linenos_style
highlighted = self.highlighter.highlight_block(
node.rawsource, lang, opts=opts, linenos=linenos,
location=node, **highlight_args
)
# Custom processing to add links to PETSc man pages ####################
highlighted = self._add_manpage_links(highlighted)
# (end of custom processing) ###########################################
starttag = self.starttag(node, 'div', suffix='',
CLASS='highlight-%s notranslate' % lang)
self.body.append(starttag + highlighted + '\n')
raise nodes.SkipNode
def htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
""" Extract a dict from an htmlmap file, leaving URLs as they are."""
pattern = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
string_to_link = dict()
with open(htmlmap_filename, 'r') as f:
for line in f.readlines():
m = re.match(pattern, line)
if m:
string = m.group(1)
string_to_link[string] = m.group(3)
else:
print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
print(line)
return string_to_link
def dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
"""
Prepend a prefix to any links not starting with 'http' so Sphinx will recognize them as URLs
"""
def link_string(name: str, link: str, prefix: str) -> str:
url = link if link.startswith('http') else prefix + link
return '' + name + ''
return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
def get_multiple_replace_pattern(source_dict: Dict[str,str]) -> re.Pattern:
""" Generate a regex to match any of the keys in source_dict, as full words """
def process_word(word):
""" add escape characters and word boundaries """
return r'\b' + re.escape(word) + r'\b'
return re.compile(r'|'.join(map(process_word, source_dict)))
def replace_from_dict_and_pattern(string: str, replacements: Dict, pattern: re.Pattern) -> str:
""" Given a pattern which matches keys in replacements, replace keys found in string with their values"""
return pattern.sub(lambda match: replacements[match.group(0)], string)