test/Syntax/Inputs/invalid.sed - third_party/swift - Git at Google


 # [0xC2] is utf8 2 byte character start byte.
 # 0xC2 without second byte is invalid UTF-8 sequence.
 # It becomes garbage text trivia.
 # Marker(1) is replaced to this sequence.
 s/Z1/Â/g

 # [0xCC, 0x82] in UTF-8 is U+0302.
 # This character is invalid for identifier start, but valid for identifier body.
 # It becomes unknown token.
 # If this type characters are conitguous, they are concatenated to one long unknown token.
 # Marker(2) is replaced to this sequence.
 s/Z2/Ì/g

 # [0xE2, 0x80, 0x9C] in UTF-8 is U+201C, left quote.
 # It becomes single character unknown token.
 # If this left quote and right quote enclosure text,
 # they become one long unknown token.
 # Marker(3) is replaced to this sequence.
 s/Z3/â/g

 # [0xE2, 0x80, 0x9D] in UTF-8 is U+201D, right quote.
 # It becomes single character unknown token.
 # Marker(4) is replaced to this sequence.
 s/Z4/â/g

 # [0xE1, 0x9A, 0x80] in UTF-8 is U+1680.
 # This character is invalid for swift source.
 # It becomes garbage trivia.
 # Marker(5) is replaced to this sequence.
 s/Z5/á/g

	# [0xC2] is utf8 2 byte character start byte.
	# 0xC2 without second byte is invalid UTF-8 sequence.
	# It becomes garbage text trivia.
	# Marker(1) is replaced to this sequence.
	s/Z1/Â/g

	# [0xCC, 0x82] in UTF-8 is U+0302.
	# This character is invalid for identifier start, but valid for identifier body.
	# It becomes unknown token.
	# If this type characters are conitguous, they are concatenated to one long unknown token.
	# Marker(2) is replaced to this sequence.
	s/Z2/Ì/g

	# [0xE2, 0x80, 0x9C] in UTF-8 is U+201C, left quote.
	# It becomes single character unknown token.
	# If this left quote and right quote enclosure text,
	# they become one long unknown token.
	# Marker(3) is replaced to this sequence.
	s/Z3/â/g

	# [0xE2, 0x80, 0x9D] in UTF-8 is U+201D, right quote.
	# It becomes single character unknown token.
	# Marker(4) is replaced to this sequence.
	s/Z4/â/g

	# [0xE1, 0x9A, 0x80] in UTF-8 is U+1680.
	# This character is invalid for swift source.
	# It becomes garbage trivia.
	# Marker(5) is replaced to this sequence.
	s/Z5/á/g