| """xmlWriter.py -- Simple XML authoring class""" |
| |
| from fontTools.misc.py23 import byteord, strjoin, tobytes, tostr |
| import sys |
| import os |
| import string |
| |
| INDENT = " " |
| |
| |
| class XMLWriter(object): |
| |
| def __init__(self, fileOrPath, indentwhite=INDENT, idlefunc=None, encoding="utf_8", |
| newlinestr=None): |
| if encoding.lower().replace('-','').replace('_','') != 'utf8': |
| raise Exception('Only UTF-8 encoding is supported.') |
| if fileOrPath == '-': |
| fileOrPath = sys.stdout |
| if not hasattr(fileOrPath, "write"): |
| self.filename = fileOrPath |
| self.file = open(fileOrPath, "wb") |
| self._closeStream = True |
| else: |
| self.filename = None |
| # assume writable file object |
| self.file = fileOrPath |
| self._closeStream = False |
| |
| # Figure out if writer expects bytes or unicodes |
| try: |
| # The bytes check should be first. See: |
| # https://github.com/fonttools/fonttools/pull/233 |
| self.file.write(b'') |
| self.totype = tobytes |
| except TypeError: |
| # This better not fail. |
| self.file.write('') |
| self.totype = tostr |
| self.indentwhite = self.totype(indentwhite) |
| if newlinestr is None: |
| self.newlinestr = self.totype(os.linesep) |
| else: |
| self.newlinestr = self.totype(newlinestr) |
| self.indentlevel = 0 |
| self.stack = [] |
| self.needindent = 1 |
| self.idlefunc = idlefunc |
| self.idlecounter = 0 |
| self._writeraw('<?xml version="1.0" encoding="UTF-8"?>') |
| self.newline() |
| |
| def __enter__(self): |
| return self |
| |
| def __exit__(self, exception_type, exception_value, traceback): |
| self.close() |
| |
| def close(self): |
| if self._closeStream: |
| self.file.close() |
| |
| def write(self, string, indent=True): |
| """Writes text.""" |
| self._writeraw(escape(string), indent=indent) |
| |
| def writecdata(self, string): |
| """Writes text in a CDATA section.""" |
| self._writeraw("<![CDATA[" + string + "]]>") |
| |
| def write8bit(self, data, strip=False): |
| """Writes a bytes() sequence into the XML, escaping |
| non-ASCII bytes. When this is read in xmlReader, |
| the original bytes can be recovered by encoding to |
| 'latin-1'.""" |
| self._writeraw(escape8bit(data), strip=strip) |
| |
| def write_noindent(self, string): |
| """Writes text without indentation.""" |
| self._writeraw(escape(string), indent=False) |
| |
| def _writeraw(self, data, indent=True, strip=False): |
| """Writes bytes, possibly indented.""" |
| if indent and self.needindent: |
| self.file.write(self.indentlevel * self.indentwhite) |
| self.needindent = 0 |
| s = self.totype(data, encoding="utf_8") |
| if (strip): |
| s = s.strip() |
| self.file.write(s) |
| |
| def newline(self): |
| self.file.write(self.newlinestr) |
| self.needindent = 1 |
| idlecounter = self.idlecounter |
| if not idlecounter % 100 and self.idlefunc is not None: |
| self.idlefunc() |
| self.idlecounter = idlecounter + 1 |
| |
| def comment(self, data): |
| data = escape(data) |
| lines = data.split("\n") |
| self._writeraw("<!-- " + lines[0]) |
| for line in lines[1:]: |
| self.newline() |
| self._writeraw(" " + line) |
| self._writeraw(" -->") |
| |
| def simpletag(self, _TAG_, *args, **kwargs): |
| attrdata = self.stringifyattrs(*args, **kwargs) |
| data = "<%s%s/>" % (_TAG_, attrdata) |
| self._writeraw(data) |
| |
| def begintag(self, _TAG_, *args, **kwargs): |
| attrdata = self.stringifyattrs(*args, **kwargs) |
| data = "<%s%s>" % (_TAG_, attrdata) |
| self._writeraw(data) |
| self.stack.append(_TAG_) |
| self.indent() |
| |
| def endtag(self, _TAG_): |
| assert self.stack and self.stack[-1] == _TAG_, "nonmatching endtag" |
| del self.stack[-1] |
| self.dedent() |
| data = "</%s>" % _TAG_ |
| self._writeraw(data) |
| |
| def dumphex(self, data): |
| linelength = 16 |
| hexlinelength = linelength * 2 |
| chunksize = 8 |
| for i in range(0, len(data), linelength): |
| hexline = hexStr(data[i:i+linelength]) |
| line = "" |
| white = "" |
| for j in range(0, hexlinelength, chunksize): |
| line = line + white + hexline[j:j+chunksize] |
| white = " " |
| self._writeraw(line) |
| self.newline() |
| |
| def indent(self): |
| self.indentlevel = self.indentlevel + 1 |
| |
| def dedent(self): |
| assert self.indentlevel > 0 |
| self.indentlevel = self.indentlevel - 1 |
| |
| def stringifyattrs(self, *args, **kwargs): |
| if kwargs: |
| assert not args |
| attributes = sorted(kwargs.items()) |
| elif args: |
| assert len(args) == 1 |
| attributes = args[0] |
| else: |
| return "" |
| data = "" |
| for attr, value in attributes: |
| if not isinstance(value, (bytes, str)): |
| value = str(value) |
| data = data + ' %s="%s"' % (attr, escapeattr(value)) |
| return data |
| |
| |
| def escape(data): |
| data = tostr(data, 'utf_8') |
| data = data.replace("&", "&") |
| data = data.replace("<", "<") |
| data = data.replace(">", ">") |
| data = data.replace("\r", " ") |
| return data |
| |
| def escapeattr(data): |
| data = escape(data) |
| data = data.replace('"', """) |
| return data |
| |
| def escape8bit(data): |
| """Input is Unicode string.""" |
| def escapechar(c): |
| n = ord(c) |
| if 32 <= n <= 127 and c not in "<&>": |
| return c |
| else: |
| return "&#" + repr(n) + ";" |
| return strjoin(map(escapechar, data.decode('latin-1'))) |
| |
| def hexStr(s): |
| h = string.hexdigits |
| r = '' |
| for c in s: |
| i = byteord(c) |
| r = r + h[(i >> 4) & 0xF] + h[i & 0xF] |
| return r |