| #!/usr/bin/env python |
| from __future__ import print_function |
| |
| import re |
| import subprocess |
| import sys |
| |
| |
| def run(): |
| if len(sys.argv) > 1: |
| print(""" |
| ns-html2rst - Convert Cocoa HTML documentation into ReST |
| |
| usage: nshtml2rst < NSString.html > NSString.rst |
| """) |
| sys.exit(0) |
| |
| html = sys.stdin.read() |
| |
| # Treat <div class="declaration>...</div> as <pre>...</pre> |
| html = re.sub( |
| r'<div\s+class="declaration">(.*?)</div>', |
| r'<pre>\1</pre>', |
| html, flags=re.MULTILINE | re.DOTALL) |
| |
| # Strip all attributes from <pre>...</pre> containing class="..." |
| # The resulting classes confound ReST |
| html = re.sub( |
| r'<pre\s[^>]*class=[^>]*>(.*?)</pre>', |
| r'<pre>\1</pre>', |
| html, flags=re.MULTILINE | re.DOTALL) |
| |
| # Remove links from <code>...</code>, which doesn't have a rendering in |
| # ReST |
| html = re.sub( |
| r'<code>(.*?)<a[^>]*?>(.*?)</a>(.*?)</code>', |
| r'<code>\1\2\3</code>', |
| html, flags=re.MULTILINE | re.DOTALL) |
| |
| # Let pandoc do most of the hard work |
| p = subprocess.Popen( |
| args=['pandoc', '--reference-links', '-f', 'html', '-t', 'rst'], |
| stdin=subprocess.PIPE, |
| stdout=subprocess.PIPE |
| ) |
| rst, stderr = p.communicate(html) |
| |
| # HACKETY HACK HACK: Our html documents apparently contain some |
| # bogus heading level nesting. Just fix up the one we know about |
| # so that ReST doesn't complain later. |
| rst = re.sub("(^|\n)('+)($|\n)", |
| lambda m: m.group(1) + len(m.group(2)) * '^' + m.group(3), |
| rst, flags=re.MULTILINE) |
| |
| sys.stdout.write(rst) |
| |
| |
| if __name__ == '__main__': |
| run() |