1""" Sphinx extension for custom HTML processing for PETSc docs """ 2 3from typing import Any, Dict 4import re 5import os 6import subprocess 7import types 8 9from docutils import nodes 10from docutils.nodes import Element, Text 11 12from sphinx import version_info as sphinx_version_info 13from sphinx.writers.html5 import HTML5Translator 14from sphinx.application import Sphinx 15 16if not hasattr(re,'Pattern'): re.Pattern = re._pattern_type 17 18 19def setup(app: Sphinx) -> None: 20 _check_version(app) 21 22 app.connect('builder-inited', _setup_translators) 23 24 25def _check_version(app: Sphinx) -> None: 26 sphinx_version_info_source = (3, 5, 4, 'final', 0) 27 app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1])) 28 if sphinx_version_info[:2] != sphinx_version_info_source[:2]: 29 print('Warning: A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,), 30 'which differs from the current version %s' % (sphinx_version_info,), 31 'so unexpected behavior may be observed.') 32 33 34def _setup_translators(app: Sphinx) -> None: 35 """ Use a mixin strategy to add to the HTML translator without overriding 36 37 This allows use of other extensions which modify the translator. 38 39 Duplicates the approach used here in sphinx-hoverref: 40 https://github.com/readthedocs/sphinx-hoverxref/pull/42 41 """ 42 if app.builder.format != 'html': 43 return 44 45 for name, klass in app.registry.translators.items(): 46 translator = types.new_class( 47 'PETScHTMLTranslator', 48 ( 49 PETScHTMLTranslatorMixin, 50 klass, 51 ), 52 {}, 53 ) 54 app.set_translator(name, translator, override=True) 55 56 translator = types.new_class( 57 'PETScHTMLTranslator', 58 ( 59 PETScHTMLTranslatorMixin, 60 app.builder.default_translator_class, 61 ), 62 {}, 63 ) 64 app.set_translator(app.builder.name, translator, override=True) 65 66 67class PETScHTMLTranslatorMixin: 68 """ 69 A custom HTML translator which overrides methods to add PETSc-specific 70 custom processing to the generated HTML. 71 """ 72 73 def __init__(self, *args: Any) -> None: 74 self._manpage_map = None 75 self._manpage_pattern = None 76 super().__init__(*args) 77 78 79 def _get_manpage_map(self) -> Dict[str,str]: 80 """ Return the manpage strings to link, as a dict. """ 81 if not self._manpage_map: 82 htmlmap_filename = os.path.join('_build_classic', 'docs', 'manualpages', 'htmlmap') 83 if not os.path.isfile(htmlmap_filename): 84 raise Exception("Expected file %s not found. Run script to build classic docs subset." % htmlmap_filename) 85 manpage_map_raw = htmlmap_to_dict(htmlmap_filename) 86 manpage_prefix_base = self._get_manpage_prefix_base() 87 manpage_prefix = os.path.join(manpage_prefix_base, 'docs', '') 88 self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix) 89 return self._manpage_map 90 91 def _get_manpage_pattern(self) -> re.Pattern: 92 """ Return the manpage links pattern. 93 94 This is done lazily, so this function should always be used, 95 instead of the direct data member, which may not be populated yet 96 """ 97 98 if not self._manpage_pattern: 99 self._manpage_pattern = get_multiple_replace_pattern(self._get_manpage_map()) 100 return self._manpage_pattern 101 102 def _get_manpage_prefix_base(self) -> str: 103 """ Return the base location for the install. This varies by platform. """ 104 if 'GITLAB_CI' in os.environ: 105 ci_environment_url = os.getenv('CI_ENVIRONMENT_URL') 106 if ci_environment_url is not None: 107 manpage_prefix_base = ci_environment_url.rstrip('/index.html') 108 else: 109 # This is a brittle stopgap measure 110 ci_commit_ref_name = os.getenv('CI_COMMIT_REF_NAME') 111 if not ci_commit_ref_name: 112 raise Exception('Could not determine version name from GitLab CI environment variables') 113 version_name = ci_commit_ref_name.replace('release-', '') 114 manpage_prefix_base = 'https://petsc.org/' + version_name 115 elif 'READTHEDOCS' in os.environ: # Temporary - remove once ReadTheDocs is abandoned 116 manpage_prefix_base = 'https://www.mcs.anl.gov/petsc/petsc-main' 117 else: 118 manpage_prefix_base = self.builder.outdir 119 return manpage_prefix_base 120 121 def _add_manpage_links(self, string: str) -> str: 122 """ Add plain HTML link tags to a string """ 123 manpage_map = self._get_manpage_map() 124 manpage_pattern = self._get_manpage_pattern() 125 return replace_from_dict_and_pattern(string, manpage_map, manpage_pattern) 126 127 # This method consists mostly of code duplicated from Sphinx: 128 # overwritten 129 def visit_Text(self, node: Text) -> None: 130 text = node.astext() 131 encoded = self.encode(text) 132 if self.protect_literal_text: 133 # moved here from base class's visit_literal to support 134 # more formatting in literal nodes 135 for token in self.words_and_spaces.findall(encoded): 136 if token.strip(): 137 # Custom processing to add links to PETSc man pages ######## 138 token_processed = self._add_manpage_links(token) 139 140 # protect literal text from line wrapping 141 self.body.append('<span class="pre">%s</span>' % token_processed) 142 # (end of custom processing) ############################### 143 elif token in ' \n': 144 # allow breaks at whitespace 145 self.body.append(token) 146 else: 147 # protect runs of multiple spaces; the last one can wrap 148 self.body.append(' ' * (len(token) - 1) + ' ') 149 else: 150 if self.in_mailto and self.settings.cloak_email_addresses: 151 encoded = self.cloak_email(encoded) 152 self.body.append(encoded) 153 154 # This method consists mostly of code duplicated from Sphinx: 155 # overwritten 156 def visit_literal_block(self, node: Element) -> None: 157 if node.rawsource != node.astext(): 158 # most probably a parsed-literal block -- don't highlight 159 return super().visit_literal_block(node) 160 161 lang = node.get('language', 'default') 162 linenos = node.get('linenos', False) 163 highlight_args = node.get('highlight_args', {}) 164 highlight_args['force'] = node.get('force', False) 165 opts = self.config.highlight_options.get(lang, {}) 166 167 if linenos and self.config.html_codeblock_linenos_style: 168 linenos = self.config.html_codeblock_linenos_style 169 170 highlighted = self.highlighter.highlight_block( 171 node.rawsource, lang, opts=opts, linenos=linenos, 172 location=node, **highlight_args 173 ) 174 175 # Custom processing to add links to PETSc man pages #################### 176 highlighted = self._add_manpage_links(highlighted) 177 # (end of custom processing) ########################################### 178 179 starttag = self.starttag(node, 'div', suffix='', 180 CLASS='highlight-%s notranslate' % lang) 181 self.body.append(starttag + highlighted + '</div>\n') 182 raise nodes.SkipNode 183 184def htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]: 185 """ Extract a dict from an htmlmap file, leaving URLs as they are.""" 186 pattern = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)') 187 string_to_link = dict() 188 with open(htmlmap_filename, 'r') as f: 189 for line in f.readlines(): 190 m = re.match(pattern, line) 191 if m: 192 string = m.group(1) 193 string_to_link[string] = m.group(3) 194 else: 195 print("Warning: skipping unexpected line in " + htmlmap_filename + ":") 196 print(line) 197 return string_to_link 198 199 200def dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]: 201 """ Complete HTML links 202 203 Prepend a prefix to any links not starting with 'http', 204 and add HTML tags 205 """ 206 def link_string(name: str, link: str, prefix: str) -> str: 207 url = link if link.startswith('http') else prefix + link 208 return '<a href=\"' + url + '\">' + name + '</a>' 209 return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items()) 210 211 212def get_multiple_replace_pattern(source_dict: Dict[str,str]) -> re.Pattern: 213 """ Generate a regex to match any of the keys in source_dict, as full words """ 214 def process_word(word): 215 """ add escape characters and word boundaries """ 216 return r'\b' + re.escape(word) + r'\b' 217 return re.compile(r'|'.join(map(process_word, source_dict))) 218 219 220def replace_from_dict_and_pattern(string: str, replacements: Dict, pattern: re.Pattern) -> str: 221 """ Given a pattern which matches keys in replacements, replace keys found in string with their values""" 222 return pattern.sub(lambda match: replacements[match.group(0)], string) 223