1ee12ae39SPatrick Sanan""" Sphinx extension for custom HTML processing for PETSc docs """ 2ee12ae39SPatrick Sanan 3ee12ae39SPatrick Sananfrom typing import Any, Dict 4ee12ae39SPatrick Sananimport re 5ee12ae39SPatrick Sananimport os 6ee12ae39SPatrick Sananimport types 7ee12ae39SPatrick Sanan 8ee12ae39SPatrick Sananfrom docutils import nodes 9ee12ae39SPatrick Sananfrom docutils.nodes import Element, Text 10ee12ae39SPatrick Sanan 11ee12ae39SPatrick Sananfrom sphinx import version_info as sphinx_version_info 12ee12ae39SPatrick Sananfrom sphinx.writers.html5 import HTML5Translator 13ee12ae39SPatrick Sananfrom sphinx.application import Sphinx 14ee12ae39SPatrick Sanan 15ee12ae39SPatrick Sananif not hasattr(re,'Pattern'): re.Pattern = re._pattern_type 16ee12ae39SPatrick Sanan 17ee12ae39SPatrick Sanan 181540e0edSPatrick SananPETSC_DOC_OUT_ROOT_PLACEHOLDER = 'PETSC_DOC_OUT_ROOT_PLACEHOLDER' 191540e0edSPatrick Sanan 20ee12ae39SPatrick Sanandef setup(app: Sphinx) -> None: 21ee12ae39SPatrick Sanan _check_version(app) 22ee12ae39SPatrick Sanan 23ee12ae39SPatrick Sanan app.connect('builder-inited', _setup_translators) 24075dfc9bSPatrick Sanan return {'parallel_read_safe': True} 25ee12ae39SPatrick Sanan 26ee12ae39SPatrick Sanan 27ee12ae39SPatrick Sanandef _check_version(app: Sphinx) -> None: 287d789994SPatrick Sanan sphinx_version_info_source = (4, 2, 0, 'final', 0) 29ee12ae39SPatrick Sanan app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1])) 30a9d4b3f8SPatrick Sanan if sphinx_version_info[:2] != sphinx_version_info_source[:2]: 3166c9fbddSBarry Smith print('A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,), 32a9d4b3f8SPatrick Sanan 'which differs from the current version %s' % (sphinx_version_info,), 33a9d4b3f8SPatrick Sanan 'so unexpected behavior may be observed.') 34ee12ae39SPatrick Sanan 35ee12ae39SPatrick Sanan 36ee12ae39SPatrick Sanandef _setup_translators(app: Sphinx) -> None: 37862e4a30SBarry Smith """ Use a mixin strategy to add to the Sphinx HTML translator without overriding 38ee12ae39SPatrick Sanan 39ee12ae39SPatrick Sanan This allows use of other extensions which modify the translator. 40ee12ae39SPatrick Sanan 41ee12ae39SPatrick Sanan Duplicates the approach used here in sphinx-hoverref: 42ee12ae39SPatrick Sanan https://github.com/readthedocs/sphinx-hoverxref/pull/42 43ee12ae39SPatrick Sanan """ 44ee12ae39SPatrick Sanan if app.builder.format != 'html': 45ee12ae39SPatrick Sanan return 46ee12ae39SPatrick Sanan 47ee12ae39SPatrick Sanan for name, klass in app.registry.translators.items(): 48ee12ae39SPatrick Sanan translator = types.new_class( 49ee12ae39SPatrick Sanan 'PETScHTMLTranslator', 50ee12ae39SPatrick Sanan ( 51ee12ae39SPatrick Sanan PETScHTMLTranslatorMixin, 52ee12ae39SPatrick Sanan klass, 53ee12ae39SPatrick Sanan ), 54ee12ae39SPatrick Sanan {}, 55ee12ae39SPatrick Sanan ) 56ee12ae39SPatrick Sanan app.set_translator(name, translator, override=True) 57ee12ae39SPatrick Sanan 58ee12ae39SPatrick Sanan translator = types.new_class( 59ee12ae39SPatrick Sanan 'PETScHTMLTranslator', 60ee12ae39SPatrick Sanan ( 61ee12ae39SPatrick Sanan PETScHTMLTranslatorMixin, 62ee12ae39SPatrick Sanan app.builder.default_translator_class, 63ee12ae39SPatrick Sanan ), 64ee12ae39SPatrick Sanan {}, 65ee12ae39SPatrick Sanan ) 66ee12ae39SPatrick Sanan app.set_translator(app.builder.name, translator, override=True) 67ee12ae39SPatrick Sanan 68ee12ae39SPatrick Sanan 69ee12ae39SPatrick Sananclass PETScHTMLTranslatorMixin: 70ee12ae39SPatrick Sanan """ 71ee12ae39SPatrick Sanan A custom HTML translator which overrides methods to add PETSc-specific 72ee12ae39SPatrick Sanan custom processing to the generated HTML. 73862e4a30SBarry Smith 74862e4a30SBarry Smith Replaces any string XXX that matches a manual page name with 7573fdd05bSBarry Smith <a href="PETSC_DOC_OUT_ROOT_PLACEHOLDER/manualpages/YY/XXX.html">XXX</a> 76862e4a30SBarry Smith or 7773fdd05bSBarry Smith <a href="PETSC_DOC_OUT_ROOT_PLACEHOLDER/manualpages/YY/XXX">XXX</a> 78862e4a30SBarry Smith depending on if the Sphinx build is html or dirhtml 79ee12ae39SPatrick Sanan """ 80ee12ae39SPatrick Sanan 81ee12ae39SPatrick Sanan def __init__(self, *args: Any) -> None: 82ee12ae39SPatrick Sanan self._manpage_map = None 8334c645fdSBarry Smith self._word_pattern = re.compile(r'\w+') 84ee12ae39SPatrick Sanan super().__init__(*args) 85ee12ae39SPatrick Sanan 86ee12ae39SPatrick Sanan 87ee12ae39SPatrick Sanan def _get_manpage_map(self) -> Dict[str,str]: 88ee12ae39SPatrick Sanan """ Return the manpage strings to link, as a dict. """ 89ee12ae39SPatrick Sanan if not self._manpage_map: 909cd31cfbSBarry Smith htmlmap_filename = os.path.join('manualpages', 'htmlmap') 91ee12ae39SPatrick Sanan if not os.path.isfile(htmlmap_filename): 92*c6267af9SBarry Smith raise Exception("Expected file %s not found. " % htmlmap_filename) 93ee12ae39SPatrick Sanan manpage_map_raw = htmlmap_to_dict(htmlmap_filename) 941540e0edSPatrick Sanan manpage_prefix_base = PETSC_DOC_OUT_ROOT_PLACEHOLDER 9573fdd05bSBarry Smith manpage_prefix = os.path.join(manpage_prefix_base, '') 96ee12ae39SPatrick Sanan self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix) 97ee12ae39SPatrick Sanan return self._manpage_map 98ee12ae39SPatrick Sanan 99ee12ae39SPatrick Sanan 100ee12ae39SPatrick Sanan def _add_manpage_links(self, string: str) -> str: 101ee12ae39SPatrick Sanan """ Add plain HTML link tags to a string """ 102ee12ae39SPatrick Sanan manpage_map = self._get_manpage_map() 10329912973SJacob Faibussowitsch def replace(matchobj): 10429912973SJacob Faibussowitsch word = matchobj.group(0) 10529912973SJacob Faibussowitsch if word in manpage_map: 10629912973SJacob Faibussowitsch return manpage_map[word] 10729912973SJacob Faibussowitsch return word 10829912973SJacob Faibussowitsch 10929912973SJacob Faibussowitsch return self._word_pattern.sub(replace, string) 11029912973SJacob Faibussowitsch 111ee12ae39SPatrick Sanan 112ee12ae39SPatrick Sanan # This method consists mostly of code duplicated from Sphinx: 113ee12ae39SPatrick Sanan # overwritten 114ee12ae39SPatrick Sanan def visit_Text(self, node: Text) -> None: 115ee12ae39SPatrick Sanan text = node.astext() 116ee12ae39SPatrick Sanan encoded = self.encode(text) 117ee12ae39SPatrick Sanan if self.protect_literal_text: 118ee12ae39SPatrick Sanan # moved here from base class's visit_literal to support 119ee12ae39SPatrick Sanan # more formatting in literal nodes 120ee12ae39SPatrick Sanan for token in self.words_and_spaces.findall(encoded): 121ee12ae39SPatrick Sanan if token.strip(): 122ee12ae39SPatrick Sanan # Custom processing to add links to PETSc man pages ######## 123ee12ae39SPatrick Sanan token_processed = self._add_manpage_links(token) 124ee12ae39SPatrick Sanan 125ee12ae39SPatrick Sanan # protect literal text from line wrapping 126ee12ae39SPatrick Sanan self.body.append('<span class="pre">%s</span>' % token_processed) 127ee12ae39SPatrick Sanan # (end of custom processing) ############################### 128ee12ae39SPatrick Sanan elif token in ' \n': 129ee12ae39SPatrick Sanan # allow breaks at whitespace 130ee12ae39SPatrick Sanan self.body.append(token) 131ee12ae39SPatrick Sanan else: 132ee12ae39SPatrick Sanan # protect runs of multiple spaces; the last one can wrap 133ee12ae39SPatrick Sanan self.body.append(' ' * (len(token) - 1) + ' ') 134ee12ae39SPatrick Sanan else: 135ee12ae39SPatrick Sanan if self.in_mailto and self.settings.cloak_email_addresses: 136ee12ae39SPatrick Sanan encoded = self.cloak_email(encoded) 137ee12ae39SPatrick Sanan self.body.append(encoded) 138ee12ae39SPatrick Sanan 139ee12ae39SPatrick Sanan # This method consists mostly of code duplicated from Sphinx: 140ee12ae39SPatrick Sanan # overwritten 141ee12ae39SPatrick Sanan def visit_literal_block(self, node: Element) -> None: 142ee12ae39SPatrick Sanan if node.rawsource != node.astext(): 143ee12ae39SPatrick Sanan # most probably a parsed-literal block -- don't highlight 144ee12ae39SPatrick Sanan return super().visit_literal_block(node) 145ee12ae39SPatrick Sanan 146ee12ae39SPatrick Sanan lang = node.get('language', 'default') 147ee12ae39SPatrick Sanan linenos = node.get('linenos', False) 148ee12ae39SPatrick Sanan highlight_args = node.get('highlight_args', {}) 149ee12ae39SPatrick Sanan highlight_args['force'] = node.get('force', False) 150a9d4b3f8SPatrick Sanan opts = self.config.highlight_options.get(lang, {}) 151a9d4b3f8SPatrick Sanan 152a9d4b3f8SPatrick Sanan if linenos and self.config.html_codeblock_linenos_style: 153a9d4b3f8SPatrick Sanan linenos = self.config.html_codeblock_linenos_style 154ee12ae39SPatrick Sanan 155ee12ae39SPatrick Sanan highlighted = self.highlighter.highlight_block( 156ee12ae39SPatrick Sanan node.rawsource, lang, opts=opts, linenos=linenos, 157a9d4b3f8SPatrick Sanan location=node, **highlight_args 158ee12ae39SPatrick Sanan ) 159ee12ae39SPatrick Sanan 160ee12ae39SPatrick Sanan # Custom processing to add links to PETSc man pages #################### 161ee12ae39SPatrick Sanan highlighted = self._add_manpage_links(highlighted) 162ee12ae39SPatrick Sanan # (end of custom processing) ########################################### 163ee12ae39SPatrick Sanan 164a9d4b3f8SPatrick Sanan starttag = self.starttag(node, 'div', suffix='', 165a9d4b3f8SPatrick Sanan CLASS='highlight-%s notranslate' % lang) 166ee12ae39SPatrick Sanan self.body.append(starttag + highlighted + '</div>\n') 167ee12ae39SPatrick Sanan raise nodes.SkipNode 168ee12ae39SPatrick Sanan 169b0b385f4SBarry Smith # This method consists mostly of code duplicated from Sphinx: 170b0b385f4SBarry Smith # overwritten to remove from CLASS literal that caused an ugly extra pre and post space 171b0b385f4SBarry Smith # in the manual pages for all PETSc links 172b0b385f4SBarry Smith def visit_literal(self, node: Element) -> None: 173b0b385f4SBarry Smith if 'kbd' in node['classes']: 174b0b385f4SBarry Smith self.body.append(self.starttag(node, 'kbd', '', 175b0b385f4SBarry Smith CLASS='docutils notranslate')) 176b0b385f4SBarry Smith return 177b0b385f4SBarry Smith lang = node.get("language", None) 178b0b385f4SBarry Smith if 'code' not in node['classes'] or not lang: 179b0b385f4SBarry Smith self.body.append(self.starttag(node, 'code', '', 180b0b385f4SBarry Smith CLASS='docutils notranslate')) 181b0b385f4SBarry Smith self.protect_literal_text += 1 182b0b385f4SBarry Smith return 183b0b385f4SBarry Smith 184b0b385f4SBarry Smith opts = self.config.highlight_options.get(lang, {}) 185b0b385f4SBarry Smith highlighted = self.highlighter.highlight_block( 186b0b385f4SBarry Smith node.astext(), lang, opts=opts, location=node, nowrap=True) 187b0b385f4SBarry Smith starttag = self.starttag( 188b0b385f4SBarry Smith node, 189b0b385f4SBarry Smith "code", 190b0b385f4SBarry Smith suffix="", 191b0b385f4SBarry Smith CLASS="docutils literal highlight highlight-%s" % lang, 192b0b385f4SBarry Smith ) 193b0b385f4SBarry Smith self.body.append(starttag + highlighted.strip() + "</code>") 194b0b385f4SBarry Smith raise nodes.SkipNode 195b0b385f4SBarry Smith 196ee12ae39SPatrick Sanandef htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]: 197ee12ae39SPatrick Sanan """ Extract a dict from an htmlmap file, leaving URLs as they are.""" 198ee12ae39SPatrick Sanan with open(htmlmap_filename, 'r') as f: 19929912973SJacob Faibussowitsch lines = [l for l in f.readlines() if l.startswith('man:')] 20029912973SJacob Faibussowitsch string_to_link = dict() 20129912973SJacob Faibussowitsch pattern = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)') 20229912973SJacob Faibussowitsch for line in lines: 20329912973SJacob Faibussowitsch m = pattern.match(line) 204ee12ae39SPatrick Sanan if m: 20529912973SJacob Faibussowitsch string_to_link[m.group(1)] = m.group(3) 206ee12ae39SPatrick Sanan else: 207ee12ae39SPatrick Sanan print("Warning: skipping unexpected line in " + htmlmap_filename + ":") 208ee12ae39SPatrick Sanan print(line) 209ee12ae39SPatrick Sanan return string_to_link 210ee12ae39SPatrick Sanan 211ee12ae39SPatrick Sanan 212ee12ae39SPatrick Sanandef dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]: 213862e4a30SBarry Smith """ 214862e4a30SBarry Smith Prepend a prefix to any links not starting with 'http' so Sphinx will recognize them as URLs 215ee12ae39SPatrick Sanan """ 216ee12ae39SPatrick Sanan def link_string(name: str, link: str, prefix: str) -> str: 217ee12ae39SPatrick Sanan url = link if link.startswith('http') else prefix + link 218ee12ae39SPatrick Sanan return '<a href=\"' + url + '\">' + name + '</a>' 219ee12ae39SPatrick Sanan return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items()) 220