mako/filters.py - third_party/mako - Git at Google

 # mako/filters.py
 # Copyright 2006-2019 the Mako authors and contributors <see AUTHORS file>
 #
 # This module is part of Mako and is released under
 # the MIT License: http://www.opensource.org/licenses/mit-license.php


 import codecs
 import re

 from mako import compat
 from mako.compat import codepoint2name
 from mako.compat import name2codepoint
 from mako.compat import quote_plus
 from mako.compat import unquote_plus

 xml_escapes = {
     "&": "&amp;",
     ">": "&gt;",
     "<": "&lt;",
     '"': "&#34;",  # also &quot; in html-only
     "'": "&#39;",  # also &apos; in html-only
 }

 # XXX: &quot; is valid in HTML and XML
 #      &apos; is not valid HTML, but is valid XML


 def legacy_html_escape(s):
     """legacy HTML escape for non-unicode mode."""
     s = s.replace("&", "&amp;")
     s = s.replace(">", "&gt;")
     s = s.replace("<", "&lt;")
     s = s.replace('"', "&#34;")
     s = s.replace("'", "&#39;")
     return s


 try:
     import markupsafe

     html_escape = markupsafe.escape
 except ImportError:
     html_escape = legacy_html_escape


 def xml_escape(string):
     return re.sub(r'([&<"\'>])', lambda m: xml_escapes[m.group()], string)


 def url_escape(string):
     # convert into a list of octets
     string = string.encode("utf8")
     return quote_plus(string)


 def legacy_url_escape(string):
     # convert into a list of octets
     return quote_plus(string)


 def url_unescape(string):
     text = unquote_plus(string)
     if not is_ascii_str(text):
         text = text.decode("utf8")
     return text


 def trim(string):
     return string.strip()


 class Decode(object):
     def __getattr__(self, key):
         def decode(x):
             if isinstance(x, compat.text_type):
                 return x
             elif not isinstance(x, compat.binary_type):
                 return decode(str(x))
             else:
                 return compat.text_type(x, encoding=key)

         return decode


 decode = Decode()


 _ASCII_re = re.compile(r"\A[\x00-\x7f]*\Z")


 def is_ascii_str(text):
     return isinstance(text, str) and _ASCII_re.match(text)


 ################################################################


 class XMLEntityEscaper(object):
     def __init__(self, codepoint2name, name2codepoint):
         self.codepoint2entity = dict(
             [
                 (c, compat.text_type("&%s;" % n))
                 for c, n in codepoint2name.items()
             ]
         )
         self.name2codepoint = name2codepoint

     def escape_entities(self, text):
         """Replace characters with their character entity references.

         Only characters corresponding to a named entity are replaced.
         """
         return compat.text_type(text).translate(self.codepoint2entity)

     def __escape(self, m):
         codepoint = ord(m.group())
         try:
             return self.codepoint2entity[codepoint]
         except (KeyError, IndexError):
             return "&#x%X;" % codepoint

     __escapable = re.compile(r'["&<>]|[^\x00-\x7f]')

     def escape(self, text):
         """Replace characters with their character references.

         Replace characters by their named entity references.
         Non-ASCII characters, if they do not have a named entity reference,
         are replaced by numerical character references.

         The return value is guaranteed to be ASCII.
         """
         return self.__escapable.sub(
             self.__escape, compat.text_type(text)
         ).encode("ascii")

     # XXX: This regexp will not match all valid XML entity names__.
     # (It punts on details involving involving CombiningChars and Extenders.)
     #
     # .. __: http://www.w3.org/TR/2000/REC-xml-20001006#NT-EntityRef
     __characterrefs = re.compile(
         r"""& (?:
                                           \#(\d+)
                                           | \#x([\da-f]+)
                                           | ( (?!\d) [:\w] [-.:\w]+ )
                                           ) ;""",
         re.X | re.UNICODE,
     )

     def __unescape(self, m):
         dval, hval, name = m.groups()
         if dval:
             codepoint = int(dval)
         elif hval:
             codepoint = int(hval, 16)
         else:
             codepoint = self.name2codepoint.get(name, 0xFFFD)
             # U+FFFD = "REPLACEMENT CHARACTER"
         if codepoint < 128:
             return chr(codepoint)
         return chr(codepoint)

     def unescape(self, text):
         """Unescape character references.

         All character references (both entity references and numerical
         character references) are unescaped.
         """
         return self.__characterrefs.sub(self.__unescape, text)


 _html_entities_escaper = XMLEntityEscaper(codepoint2name, name2codepoint)

 html_entities_escape = _html_entities_escaper.escape_entities
 html_entities_unescape = _html_entities_escaper.unescape


 def htmlentityreplace_errors(ex):
     """An encoding error handler.

     This python codecs error handler replaces unencodable
     characters with HTML entities, or, if no HTML entity exists for
     the character, XML character references::

         >>> u'The cost was \u20ac12.'.encode('latin1', 'htmlentityreplace')
         'The cost was &euro;12.'
     """
     if isinstance(ex, UnicodeEncodeError):
         # Handle encoding errors
         bad_text = ex.object[ex.start : ex.end]
         text = _html_entities_escaper.escape(bad_text)
         return (compat.text_type(text), ex.end)
     raise ex


 codecs.register_error("htmlentityreplace", htmlentityreplace_errors)


 # TODO: options to make this dynamic per-compilation will be added in a later
 # release
 DEFAULT_ESCAPES = {
     "x": "filters.xml_escape",
     "h": "filters.html_escape",
     "u": "filters.url_escape",
     "trim": "filters.trim",
     "entity": "filters.html_entities_escape",
     "unicode": "unicode",
     "decode": "decode",
     "str": "str",
     "n": "n",
 }

 if compat.py3k:
     DEFAULT_ESCAPES.update({"unicode": "str"})

 NON_UNICODE_ESCAPES = DEFAULT_ESCAPES.copy()
 NON_UNICODE_ESCAPES["h"] = "filters.legacy_html_escape"
 NON_UNICODE_ESCAPES["u"] = "filters.legacy_url_escape"
	# mako/filters.py
	# Copyright 2006-2019 the Mako authors and contributors <see AUTHORS file>
	#
	# This module is part of Mako and is released under
	# the MIT License: http://www.opensource.org/licenses/mit-license.php


	import codecs
	import re

	from mako import compat
	from mako.compat import codepoint2name
	from mako.compat import name2codepoint
	from mako.compat import quote_plus
	from mako.compat import unquote_plus

	xml_escapes = {
	"&": "&",
	">": ">",
	"<": "<",
	'"': """, # also " in html-only
	"'": "'", # also ' in html-only
	}

	# XXX: " is valid in HTML and XML
	# ' is not valid HTML, but is valid XML


	def legacy_html_escape(s):
	"""legacy HTML escape for non-unicode mode."""
	s = s.replace("&", "&")
	s = s.replace(">", ">")
	s = s.replace("<", "<")
	s = s.replace('"', """)
	s = s.replace("'", "'")
	return s


	try:
	import markupsafe

	html_escape = markupsafe.escape
	except ImportError:
	html_escape = legacy_html_escape


	def xml_escape(string):
	return re.sub(r'([&<"\'>])', lambda m: xml_escapes[m.group()], string)


	def url_escape(string):
	# convert into a list of octets
	string = string.encode("utf8")
	return quote_plus(string)


	def legacy_url_escape(string):
	# convert into a list of octets
	return quote_plus(string)


	def url_unescape(string):
	text = unquote_plus(string)
	if not is_ascii_str(text):
	text = text.decode("utf8")
	return text


	def trim(string):
	return string.strip()


	class Decode(object):
	def __getattr__(self, key):
	def decode(x):
	if isinstance(x, compat.text_type):
	return x
	elif not isinstance(x, compat.binary_type):
	return decode(str(x))
	else:
	return compat.text_type(x, encoding=key)

	return decode


	decode = Decode()


	_ASCII_re = re.compile(r"\A[\x00-\x7f]*\Z")


	def is_ascii_str(text):
	return isinstance(text, str) and _ASCII_re.match(text)


	################################################################


	class XMLEntityEscaper(object):
	def __init__(self, codepoint2name, name2codepoint):
	self.codepoint2entity = dict(
	[
	(c, compat.text_type("&%s;" % n))
	for c, n in codepoint2name.items()
	]
	)
	self.name2codepoint = name2codepoint

	def escape_entities(self, text):
	"""Replace characters with their character entity references.

	Only characters corresponding to a named entity are replaced.
	"""
	return compat.text_type(text).translate(self.codepoint2entity)

	def __escape(self, m):
	codepoint = ord(m.group())
	try:
	return self.codepoint2entity[codepoint]
	except (KeyError, IndexError):
	return "&#x%X;" % codepoint

	__escapable = re.compile(r'["&<>]\|[^\x00-\x7f]')

	def escape(self, text):
	"""Replace characters with their character references.

	Replace characters by their named entity references.
	Non-ASCII characters, if they do not have a named entity reference,
	are replaced by numerical character references.

	The return value is guaranteed to be ASCII.
	"""
	return self.__escapable.sub(
	self.__escape, compat.text_type(text)
	).encode("ascii")

	# XXX: This regexp will not match all valid XML entity names__.
	# (It punts on details involving involving CombiningChars and Extenders.)
	#
	# .. __: http://www.w3.org/TR/2000/REC-xml-20001006#NT-EntityRef
	__characterrefs = re.compile(
	r"""& (?:
	\#(\d+)
	\| \#x([\da-f]+)
	\| ( (?!\d) [:\w] [-.:\w]+ )
	) ;""",
	re.X \| re.UNICODE,
	)

	def __unescape(self, m):
	dval, hval, name = m.groups()
	if dval:
	codepoint = int(dval)
	elif hval:
	codepoint = int(hval, 16)
	else:
	codepoint = self.name2codepoint.get(name, 0xFFFD)
	# U+FFFD = "REPLACEMENT CHARACTER"
	if codepoint < 128:
	return chr(codepoint)
	return chr(codepoint)

	def unescape(self, text):
	"""Unescape character references.

	All character references (both entity references and numerical
	character references) are unescaped.
	"""
	return self.__characterrefs.sub(self.__unescape, text)


	_html_entities_escaper = XMLEntityEscaper(codepoint2name, name2codepoint)

	html_entities_escape = _html_entities_escaper.escape_entities
	html_entities_unescape = _html_entities_escaper.unescape


	def htmlentityreplace_errors(ex):
	"""An encoding error handler.

	This python codecs error handler replaces unencodable
	characters with HTML entities, or, if no HTML entity exists for
	the character, XML character references::

	>>> u'The cost was \u20ac12.'.encode('latin1', 'htmlentityreplace')
	'The cost was €12.'
	"""
	if isinstance(ex, UnicodeEncodeError):
	# Handle encoding errors
	bad_text = ex.object[ex.start : ex.end]
	text = _html_entities_escaper.escape(bad_text)
	return (compat.text_type(text), ex.end)
	raise ex


	codecs.register_error("htmlentityreplace", htmlentityreplace_errors)


	# TODO: options to make this dynamic per-compilation will be added in a later
	# release
	DEFAULT_ESCAPES = {
	"x": "filters.xml_escape",
	"h": "filters.html_escape",
	"u": "filters.url_escape",
	"trim": "filters.trim",
	"entity": "filters.html_entities_escape",
	"unicode": "unicode",
	"decode": "decode",
	"str": "str",
	"n": "n",
	}

	if compat.py3k:
	DEFAULT_ESCAPES.update({"unicode": "str"})

	NON_UNICODE_ESCAPES = DEFAULT_ESCAPES.copy()
	NON_UNICODE_ESCAPES["h"] = "filters.legacy_html_escape"
	NON_UNICODE_ESCAPES["u"] = "filters.legacy_url_escape"