blob: 3cc4ec9bda17df7c7b62bff2ca19237b456a7eda [file] [log] [blame]
#!/usr/bin/env python
from __future__ import print_function
import re
import subprocess
import sys
def run():
if len(sys.argv) > 1:
print("""
ns-html2rst - Convert Cocoa HTML documentation into ReST
usage: nshtml2rst < NSString.html > NSString.rst
""")
sys.exit(0)
html = sys.stdin.read()
# Treat <div class="declaration>...</div> as <pre>...</pre>
html = re.sub(
r'<div\s+class="declaration">(.*?)</div>',
r'<pre>\1</pre>',
html, flags=re.MULTILINE | re.DOTALL)
# Strip all attributes from <pre>...</pre> containing class="..."
# The resulting classes confound ReST
html = re.sub(
r'<pre\s[^>]*class=[^>]*>(.*?)</pre>',
r'<pre>\1</pre>',
html, flags=re.MULTILINE | re.DOTALL)
# Remove links from <code>...</code>, which doesn't have a rendering in
# ReST
html = re.sub(
r'<code>(.*?)<a[^>]*?>(.*?)</a>(.*?)</code>',
r'<code>\1\2\3</code>',
html, flags=re.MULTILINE | re.DOTALL)
# Let pandoc do most of the hard work
p = subprocess.Popen(
args=['pandoc', '--reference-links', '-f', 'html', '-t', 'rst'],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE
)
rst, stderr = p.communicate(html)
# HACKETY HACK HACK: Our html documents apparently contain some
# bogus heading level nesting. Just fix up the one we know about
# so that ReST doesn't complain later.
rst = re.sub("(^|\n)('+)($|\n)",
lambda m: m.group(1) + len(m.group(2)) * '^' + m.group(3),
rst, flags=re.MULTILINE)
sys.stdout.write(rst)
if __name__ == '__main__':
run()