| // RUN: %target-run-simple-swiftgyb |
| // REQUIRES: executable_test |
| |
| import SwiftPrivate |
| import StdlibUnittest |
| import StdlibUnicodeUnittest |
| import StdlibCollectionUnittest |
| |
| var UTF16APIs = TestSuite("UTF16APIs") |
| |
| UTF16APIs.test("width") { |
| expectEqual(1, UTF16.width("x")) |
| expectEqual(2, UTF16.width("\u{101010}")) |
| expectEqual(2, UTF16.width("𝄞")) |
| } |
| |
| UTF16APIs.test("leadSurrogate,trailSurrogate") { |
| do { |
| let us: UnicodeScalar = "𝄞" |
| expectEqual(0xD834, UTF16.leadSurrogate(us)) |
| expectEqual(0xDD1E, UTF16.trailSurrogate(us)) |
| } |
| do { |
| let us: UnicodeScalar = "\u{10000}" |
| expectEqual(0xD800, UTF16.leadSurrogate(us)) |
| expectEqual(0xDC00, UTF16.trailSurrogate(us)) |
| } |
| do { |
| let us: UnicodeScalar = "\u{20000}" |
| expectEqual(0xD840, UTF16.leadSurrogate(us)) |
| expectEqual(0xDC00, UTF16.trailSurrogate(us)) |
| } |
| do { |
| let us: UnicodeScalar = "\u{80000}" |
| expectEqual(0xD9C0, UTF16.leadSurrogate(us)) |
| expectEqual(0xDC00, UTF16.trailSurrogate(us)) |
| } |
| do { |
| let us: UnicodeScalar = "\u{F0000}" |
| expectEqual(0xDB80, UTF16.leadSurrogate(us)) |
| expectEqual(0xDC00, UTF16.trailSurrogate(us)) |
| } |
| do { |
| let us: UnicodeScalar = "\u{100000}" |
| expectEqual(0xDBC0, UTF16.leadSurrogate(us)) |
| expectEqual(0xDC00, UTF16.trailSurrogate(us)) |
| } |
| do { |
| let us: UnicodeScalar = "\u{10FFFF}" |
| expectEqual(0xDBFF, UTF16.leadSurrogate(us)) |
| expectEqual(0xDFFF, UTF16.trailSurrogate(us)) |
| } |
| } |
| |
| UTF16APIs.test("leadSurrogate/trap/U+0000") { |
| let us: UnicodeScalar = "\u{00}" |
| expectCrashLater() |
| _ = UTF16.leadSurrogate(us) |
| } |
| |
| UTF16APIs.test("leadSurrogate/trap/U+005A") { |
| let us: UnicodeScalar = "\u{5A}" |
| expectCrashLater() |
| _ = UTF16.leadSurrogate(us) |
| } |
| |
| UTF16APIs.test("leadSurrogate/trap/U+FFFF") { |
| let us: UnicodeScalar = "\u{FFFF}" |
| expectCrashLater() |
| _ = UTF16.leadSurrogate(us) |
| } |
| |
| UTF16APIs.test("trailSurrogate/trap/U+0000") { |
| let us: UnicodeScalar = "\u{00}" |
| expectCrashLater() |
| _ = UTF16.trailSurrogate(us) |
| } |
| |
| UTF16APIs.test("trailSurrogate/trap/U+005A") { |
| let us: UnicodeScalar = "\u{5A}" |
| expectCrashLater() |
| _ = UTF16.trailSurrogate(us) |
| } |
| |
| UTF16APIs.test("trailSurrogate/trap/U+FFFF") { |
| let us: UnicodeScalar = "\u{FFFF}" |
| expectCrashLater() |
| _ = UTF16.trailSurrogate(us) |
| } |
| |
| func checkDecodeUTF<Codec : UnicodeCodec>( |
| _ codec: Codec.Type, _ expectedHead: [UInt32], |
| _ expectedRepairedTail: [UInt32], _ utfStr: [Codec.CodeUnit] |
| ) -> AssertionResult { |
| do { |
| var decoded = [UInt32]() |
| let output: (UInt32) -> Void = { decoded.append($0) } |
| let iterator = utfStr.makeIterator() |
| transcode( |
| iterator, |
| from: codec, |
| to: UTF32.self, |
| stoppingOnError: true, |
| into: output) |
| if expectedHead != decoded { |
| return assertionFailure() |
| .withDescription("\n") |
| .withDescription("expectedHead: \(asHex(expectedHead))\n") |
| .withDescription("actual: \(asHex(decoded))") |
| } |
| } |
| |
| do { |
| var expected = expectedHead |
| expected += expectedRepairedTail |
| |
| var decoded = [UInt32]() |
| let output: (UInt32) -> Void = { decoded.append($0) } |
| let iterator = utfStr.makeIterator() |
| transcode( |
| iterator, |
| from: codec, |
| to: UTF32.self, |
| stoppingOnError: false, |
| into: output) |
| if expected != decoded { |
| return assertionFailure() |
| .withDescription("\n") |
| .withDescription("expected: \(asHex(expected))\n") |
| .withDescription("actual: \(asHex(decoded))") |
| } |
| } |
| |
| return assertionSuccess() |
| } |
| |
| func checkDecodeUTF8( |
| _ expectedHead: [UInt32], |
| _ expectedRepairedTail: [UInt32], _ utf8Str: [UInt8] |
| ) -> AssertionResult { |
| return checkDecodeUTF(UTF8.self, expectedHead, expectedRepairedTail, utf8Str) |
| } |
| |
| func checkDecodeUTF16( |
| _ expectedHead: [UInt32], |
| _ expectedRepairedTail: [UInt32], _ utf16Str: [UInt16] |
| ) -> AssertionResult { |
| return checkDecodeUTF(UTF16.self, expectedHead, expectedRepairedTail, |
| utf16Str) |
| } |
| |
| func checkDecodeUTF32( |
| _ expectedHead: [UInt32], |
| _ expectedRepairedTail: [UInt32], _ utf32Str: [UInt32] |
| ) -> AssertionResult { |
| return checkDecodeUTF(UTF32.self, expectedHead, expectedRepairedTail, |
| utf32Str) |
| } |
| |
| func checkEncodeUTF8(_ expected: [UInt8], |
| _ scalars: [UInt32]) -> AssertionResult { |
| var encoded = [UInt8]() |
| let output: (UInt8) -> Void = { encoded.append($0) } |
| let iterator = scalars.makeIterator() |
| let hadError = transcode( |
| iterator, |
| from: UTF32.self, |
| to: UTF8.self, |
| stoppingOnError: true, |
| into: output) |
| expectFalse(hadError) |
| if expected != encoded { |
| return assertionFailure() |
| .withDescription("\n") |
| .withDescription("expected: \(asHex(expected))\n") |
| .withDescription("actual: \(asHex(encoded))") |
| } |
| |
| return assertionSuccess() |
| } |
| |
| var UnicodeScalarTests = TestSuite("UnicodeScalarTests") |
| |
| UnicodeScalarTests.test("literal") { |
| do { |
| // U+0041 LATIN CAPITAL LETTER A |
| let us: UnicodeScalar = "A" |
| expectEqual(0x0041, us.value) |
| } |
| |
| do { |
| // U+3042 HIRAGANA LETTER A |
| let us: UnicodeScalar = "あ" |
| expectEqual(0x3042, us.value) |
| } |
| |
| do { |
| // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B |
| let us: UnicodeScalar = "例" |
| expectEqual(0x4F8B, us.value) |
| } |
| } |
| |
| UnicodeScalarTests.test("init") { |
| expectEqual("f", UnicodeScalar(102)) |
| expectEqual("g", UnicodeScalar(UInt32(103))) |
| expectEqual("h", UnicodeScalar(UInt16(104))) |
| expectEqual("i", UnicodeScalar(UInt8(105))) |
| expectEqual(nil, UnicodeScalar(UInt32(0xD800))) |
| } |
| |
| var UTF8Decoder = TestSuite("UTF8Decoder") |
| |
| UTF8Decoder.test("SmokeTest").forEach(in: utfTests) { |
| test in |
| |
| expectTrue( |
| checkDecodeUTF8(test.utf32, [], test.utf8), |
| stackTrace: test.loc.withCurrentLoc()) |
| return () |
| } |
| |
| UTF8Decoder.test("FirstPossibleSequence") { |
| // |
| // First possible sequence of a certain length |
| // |
| |
| // U+0000 NULL |
| expectTrue(checkDecodeUTF8([ 0x0000 ], [], [ 0x00 ])) |
| |
| // U+0080 PADDING CHARACTER |
| expectTrue(checkDecodeUTF8([ 0x0080 ], [], [ 0xc2, 0x80 ])) |
| |
| // U+0800 SAMARITAN LETTER ALAF |
| expectTrue(checkDecodeUTF8( |
| [ 0x0800 ], [], |
| [ 0xe0, 0xa0, 0x80 ])) |
| |
| // U+10000 LINEAR B SYLLABLE B008 A |
| expectTrue(checkDecodeUTF8( |
| [ 0x10000 ], [], |
| [ 0xf0, 0x90, 0x80, 0x80 ])) |
| |
| // U+200000 (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf8, 0x88, 0x80, 0x80, 0x80 ])) |
| |
| // U+4000000 (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80 ])) |
| } |
| |
| UTF8Decoder.test("LastPossibleSequence") { |
| // |
| // Last possible sequence of a certain length |
| // |
| |
| // U+007F DELETE |
| expectTrue(checkDecodeUTF8([ 0x007f ], [], [ 0x7f ])) |
| |
| // U+07FF (unassigned) |
| expectTrue(checkDecodeUTF8([ 0x07ff ], [], [ 0xdf, 0xbf ])) |
| |
| // U+FFFF (noncharacter) |
| expectTrue(checkDecodeUTF8( |
| [ 0xffff ], [], |
| [ 0xef, 0xbf, 0xbf ])) |
| |
| // U+1FFFFF (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf7, 0xbf, 0xbf, 0xbf ])) |
| |
| // U+3FFFFFF (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfb, 0xbf, 0xbf, 0xbf, 0xbf ])) |
| |
| // U+7FFFFFFF (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf ])) |
| } |
| |
| UTF8Decoder.test("CodeSpaceBoundaryConditions") { |
| // |
| // Other boundary conditions |
| // |
| |
| // U+D7FF (unassigned) |
| expectTrue(checkDecodeUTF8([ 0xd7ff ], [], [ 0xed, 0x9f, 0xbf ])) |
| |
| // U+E000 (private use) |
| expectTrue(checkDecodeUTF8([ 0xe000 ], [], [ 0xee, 0x80, 0x80 ])) |
| |
| // U+FFFD REPLACEMENT CHARACTER |
| expectTrue(checkDecodeUTF8([ 0xfffd ], [], [ 0xef, 0xbf, 0xbd ])) |
| |
| // U+10FFFF (noncharacter) |
| expectTrue(checkDecodeUTF8([ 0x10ffff ], [], [ 0xf4, 0x8f, 0xbf, 0xbf ])) |
| |
| // U+110000 (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf4, 0x90, 0x80, 0x80 ])) |
| } |
| |
| UTF8Decoder.test("UnexpectedContinuationBytes") { |
| // |
| // Unexpected continuation bytes |
| // |
| |
| // A sequence of unexpected continuation bytes that don't follow a first |
| // byte, every byte is a maximal subpart. |
| |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xbf ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0x80, 0xbf ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xbf, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0x80, 0xbf, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0x80, 0xbf, 0x80, 0xbf ])) |
| expectTrue(checkDecodeUTF8( |
| [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0x80, 0xbf, 0x82, 0xbf, 0xaa ])) |
| expectTrue(checkDecodeUTF8( |
| [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xaa, 0xb0, 0xbb, 0xbf, 0xaa, 0xa0 ])) |
| expectTrue(checkDecodeUTF8( |
| [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xaa, 0xb0, 0xbb, 0xbf, 0xaa, 0xa0, 0x8f ])) |
| |
| // All continuation bytes (0x80--0xbf). |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, |
| 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, |
| 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, |
| 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, |
| 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf ])) |
| } |
| |
| UTF8Decoder.test("LonelyStartBytes") { |
| // |
| // Lonely start bytes |
| // |
| |
| // Start bytes of 2-byte sequences (0xc0--0xdf). |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, |
| 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, |
| 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, |
| 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf ])) |
| |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, |
| 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, |
| 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, |
| 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, |
| 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, |
| 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, |
| 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, |
| 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ], |
| [ 0xc0, 0x20, 0xc1, 0x20, 0xc2, 0x20, 0xc3, 0x20, |
| 0xc4, 0x20, 0xc5, 0x20, 0xc6, 0x20, 0xc7, 0x20, |
| 0xc8, 0x20, 0xc9, 0x20, 0xca, 0x20, 0xcb, 0x20, |
| 0xcc, 0x20, 0xcd, 0x20, 0xce, 0x20, 0xcf, 0x20, |
| 0xd0, 0x20, 0xd1, 0x20, 0xd2, 0x20, 0xd3, 0x20, |
| 0xd4, 0x20, 0xd5, 0x20, 0xd6, 0x20, 0xd7, 0x20, |
| 0xd8, 0x20, 0xd9, 0x20, 0xda, 0x20, 0xdb, 0x20, |
| 0xdc, 0x20, 0xdd, 0x20, 0xde, 0x20, 0xdf, 0x20 ])) |
| |
| // Start bytes of 3-byte sequences (0xe0--0xef). |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, |
| 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef ])) |
| |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, |
| 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, |
| 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, |
| 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ], |
| [ 0xe0, 0x20, 0xe1, 0x20, 0xe2, 0x20, 0xe3, 0x20, |
| 0xe4, 0x20, 0xe5, 0x20, 0xe6, 0x20, 0xe7, 0x20, |
| 0xe8, 0x20, 0xe9, 0x20, 0xea, 0x20, 0xeb, 0x20, |
| 0xec, 0x20, 0xed, 0x20, 0xee, 0x20, 0xef, 0x20 ])) |
| |
| // Start bytes of 4-byte sequences (0xf0--0xf7). |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 ])) |
| |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, |
| 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ], |
| [ 0xf0, 0x20, 0xf1, 0x20, 0xf2, 0x20, 0xf3, 0x20, |
| 0xf4, 0x20, 0xf5, 0x20, 0xf6, 0x20, 0xf7, 0x20 ])) |
| |
| // Start bytes of 5-byte sequences (0xf8--0xfb). |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf8, 0xf9, 0xfa, 0xfb ])) |
| |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ], |
| [ 0xf8, 0x20, 0xf9, 0x20, 0xfa, 0x20, 0xfb, 0x20 ])) |
| |
| // Start bytes of 6-byte sequences (0xfc--0xfd). |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0xfd ])) |
| |
| expectTrue(checkDecodeUTF8( |
| [], [ 0xfffd, 0x0020, 0xfffd, 0x0020 ], |
| [ 0xfc, 0x20, 0xfd, 0x20 ])) |
| } |
| |
| UTF8Decoder.test("InvalidStartBytes") { |
| // |
| // Other bytes (0xc0--0xc1, 0xfe--0xff). |
| // |
| |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc0 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc1 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfe ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xff ])) |
| |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xc0, 0xc1, 0xfe, 0xff ])) |
| |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfe, 0xfe, 0xff, 0xff ])) |
| |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfe, 0x80, 0x80, 0x80, 0x80, 0x80 ])) |
| |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xff, 0x80, 0x80, 0x80, 0x80, 0x80 ])) |
| |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ], |
| [ 0xc0, 0x20, 0xc1, 0x20, 0xfe, 0x20, 0xff, 0x20 ])) |
| } |
| |
| UTF8Decoder.test("MissingContinuationBytes") { |
| // |
| // Sequences with one continuation byte missing |
| // |
| |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc2 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xdf ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xc2, 0x41 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xdf, 0x41 ])) |
| |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe0, 0xa0 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe0, 0xbf ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xe0, 0xa0, 0x41 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xe0, 0xbf, 0x41 ])) |
| |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe1, 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xec, 0xbf ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xe1, 0x80, 0x41 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xec, 0xbf, 0x41 ])) |
| |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xed, 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xed, 0x9f ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xed, 0x80, 0x41 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xed, 0x9f, 0x41 ])) |
| |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xee, 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xef, 0xbf ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xee, 0x80, 0x41 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xef, 0xbf, 0x41 ])) |
| |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0x90, 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0xbf, 0xbf ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf0, 0x90, 0x80, 0x41 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf0, 0xbf, 0xbf, 0x41 ])) |
| |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf1, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf3, 0xbf, 0xbf ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf1, 0x80, 0x80, 0x41 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf3, 0xbf, 0xbf, 0x41 ])) |
| |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x8f, 0xbf ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf4, 0x80, 0x80, 0x41 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf4, 0x8f, 0xbf, 0x41 ])) |
| |
| // Overlong sequences with one trailing byte missing. |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc0 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc1 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xe0, 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xe0, 0x9f ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf0, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf0, 0x8f, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf8, 0x80, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfc, 0x80, 0x80, 0x80, 0x80 ])) |
| |
| // Sequences that represent surrogates with one trailing byte missing. |
| // High-surrogates |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xa0 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xac ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xaf ])) |
| // Low-surrogates |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xb0 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xb4 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xbf ])) |
| |
| // Ill-formed 4-byte sequences. |
| // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx |
| // U+1100xx (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf4, 0x90, 0x80 ])) |
| // U+13FBxx (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf4, 0xbf, 0xbf ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf5, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf6, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf7, 0x80, 0x80 ])) |
| // U+1FFBxx (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf7, 0xbf, 0xbf ])) |
| |
| // Ill-formed 5-byte sequences. |
| // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| // U+2000xx (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf8, 0x88, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf8, 0xbf, 0xbf, 0xbf ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf9, 0x80, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfa, 0x80, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfb, 0x80, 0x80, 0x80 ])) |
| // U+3FFFFxx (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfb, 0xbf, 0xbf, 0xbf ])) |
| |
| // Ill-formed 6-byte sequences. |
| // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx |
| // U+40000xx (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfc, 0x84, 0x80, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfc, 0xbf, 0xbf, 0xbf, 0xbf ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfd, 0x80, 0x80, 0x80, 0x80 ])) |
| // U+7FFFFFxx (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ])) |
| |
| // |
| // Sequences with two continuation bytes missing |
| // |
| |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0x90 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0xbf ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf1, 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf3, 0xbf ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x8f ])) |
| |
| // Overlong sequences with two trailing byte missing. |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe0 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf0, 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf0, 0x8f ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf8, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfc, 0x80, 0x80, 0x80 ])) |
| |
| // Sequences that represent surrogates with two trailing bytes missing. |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xed ])) |
| |
| // Ill-formed 4-byte sequences. |
| // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx |
| // U+110yxx (invalid) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf4, 0x90 ])) |
| // U+13Fyxx (invalid) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf4, 0xbf ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf5, 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf6, 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf7, 0x80 ])) |
| // U+1FFyxx (invalid) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf7, 0xbf ])) |
| |
| // Ill-formed 5-byte sequences. |
| // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| // U+200yxx (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf8, 0x88, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf8, 0xbf, 0xbf ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf9, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfa, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfb, 0x80, 0x80 ])) |
| // U+3FFFyxx (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfb, 0xbf, 0xbf ])) |
| |
| // Ill-formed 6-byte sequences. |
| // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| // U+4000yxx (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfc, 0x84, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfc, 0xbf, 0xbf, 0xbf ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfd, 0x80, 0x80, 0x80 ])) |
| // U+7FFFFyxx (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfd, 0xbf, 0xbf, 0xbf ])) |
| |
| // |
| // Sequences with three continuation bytes missing |
| // |
| |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf1 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf2 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf3 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4 ])) |
| |
| // Broken overlong sequences. |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf8, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfc, 0x80, 0x80 ])) |
| |
| // Ill-formed 4-byte sequences. |
| // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx |
| // U+14yyxx (invalid) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf5 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf6 ])) |
| // U+1Cyyxx (invalid) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf7 ])) |
| |
| // Ill-formed 5-byte sequences. |
| // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| // U+20yyxx (invalid) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf8, 0x88 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf8, 0xbf ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf9, 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfa, 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfb, 0x80 ])) |
| // U+3FCyyxx (invalid) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfb, 0xbf ])) |
| |
| // Ill-formed 6-byte sequences. |
| // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| // U+400yyxx (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfc, 0x84, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfc, 0xbf, 0xbf ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfd, 0x80, 0x80 ])) |
| // U+7FFCyyxx (invalid) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfd, 0xbf, 0xbf ])) |
| |
| // |
| // Sequences with four continuation bytes missing |
| // |
| |
| // Ill-formed 5-byte sequences. |
| // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| // U+uzyyxx (invalid) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf8 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf9 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfa ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfb ])) |
| // U+3zyyxx (invalid) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfb ])) |
| |
| // Broken overlong sequences. |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf8 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0x80 ])) |
| |
| // Ill-formed 6-byte sequences. |
| // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| // U+uzzyyxx (invalid) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0x84 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0xbf ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfd, 0x80 ])) |
| // U+7Fzzyyxx (invalid) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfd, 0xbf ])) |
| |
| // |
| // Sequences with five continuation bytes missing |
| // |
| |
| // Ill-formed 6-byte sequences. |
| // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| // U+uzzyyxx (invalid) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfc ])) |
| // U+uuzzyyxx (invalid) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfd ])) |
| |
| // |
| // Consecutive sequences with trailing bytes missing |
| // |
| |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd, |
| 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd, |
| 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xc0, /**/ 0xe0, 0x80, /**/ 0xf0, 0x80, 0x80, |
| 0xf8, 0x80, 0x80, 0x80, |
| 0xfc, 0x80, 0x80, 0x80, 0x80, |
| 0xdf, /**/ 0xef, 0xbf, /**/ 0xf7, 0xbf, 0xbf, |
| 0xfb, 0xbf, 0xbf, 0xbf, |
| 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ])) |
| } |
| |
| UTF8Decoder.test("OverlongSequences") { |
| // |
| // Overlong UTF-8 sequences |
| // |
| |
| // U+002F SOLIDUS |
| expectTrue(checkDecodeUTF8([ 0x002f ], [], [ 0x2f ])) |
| |
| // Overlong sequences of the above. |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc0, 0xaf ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xe0, 0x80, 0xaf ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf0, 0x80, 0x80, 0xaf ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf8, 0x80, 0x80, 0x80, 0xaf ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf ])) |
| |
| // U+0000 NULL |
| expectTrue(checkDecodeUTF8([ 0x0000 ], [], [ 0x00 ])) |
| |
| // Overlong sequences of the above. |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc0, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xe0, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf0, 0x80, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf8, 0x80, 0x80, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfc, 0x80, 0x80, 0x80, 0x80, 0x80 ])) |
| |
| // Other overlong and ill-formed sequences. |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc0, 0xbf ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc1, 0x80 ])) |
| expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc1, 0xbf ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xe0, 0x9f, 0xbf ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xa0, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xbf, 0xbf ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf0, 0x8f, 0x80, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf0, 0x8f, 0xbf, 0xbf ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xf8, 0x87, 0xbf, 0xbf, 0xbf ])) |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf ])) |
| } |
| |
| UTF8Decoder.test("IsolatedSurrogates") { |
| // Unicode 6.3.0: |
| // |
| // D71. High-surrogate code point: A Unicode code point in the range |
| // U+D800 to U+DBFF. |
| // |
| // D73. Low-surrogate code point: A Unicode code point in the range |
| // U+DC00 to U+DFFF. |
| |
| // Note: U+E0100 is <DB40 DD00> in UTF-16. |
| |
| // High-surrogates |
| |
| // U+D800 |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xa0, 0x80 ])) |
| expectTrue(checkDecodeUTF8( |
| [ 0x0041 ], |
| [ 0xfffd, 0xfffd, 0xfffd, 0x0041 ], |
| [ 0x41, 0xed, 0xa0, 0x80, 0x41 ])) |
| |
| // U+DB40 |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xac, 0xa0 ])) |
| |
| // U+DBFF |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xaf, 0xbf ])) |
| |
| // Low-surrogates |
| |
| // U+DC00 |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xb0, 0x80 ])) |
| |
| // U+DD00 |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xb4, 0x80 ])) |
| |
| // U+DFFF |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xbf, 0xbf ])) |
| } |
| |
| UTF8Decoder.test("SurrogatePairs") { |
| // Surrogate pairs |
| |
| // U+D800 U+DC00 |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80 ])) |
| |
| // U+D800 U+DD00 |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xa0, 0x80, 0xed, 0xb4, 0x80 ])) |
| |
| // U+D800 U+DFFF |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf ])) |
| |
| // U+DB40 U+DC00 |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xac, 0xa0, 0xed, 0xb0, 0x80 ])) |
| |
| // U+DB40 U+DD00 |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xac, 0xa0, 0xed, 0xb4, 0x80 ])) |
| |
| // U+DB40 U+DFFF |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xac, 0xa0, 0xed, 0xbf, 0xbf ])) |
| |
| // U+DBFF U+DC00 |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80 ])) |
| |
| // U+DBFF U+DD00 |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xaf, 0xbf, 0xed, 0xb4, 0x80 ])) |
| |
| // U+DBFF U+DFFF |
| expectTrue(checkDecodeUTF8( |
| [], |
| [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], |
| [ 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf ])) |
| } |
| |
| UTF8Decoder.test("Noncharacters") { |
| // |
| // Noncharacters |
| // |
| |
| // Unicode 6.3.0: |
| // |
| // D14. Noncharacter: A code point that is permanently reserved for |
| // internal use and that should never be interchanged. Noncharacters |
| // consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016) |
| // and the values U+FDD0..U+FDEF. |
| |
| // U+FFFE |
| expectTrue(checkDecodeUTF8([ 0xfffe ], [], [ 0xef, 0xbf, 0xbe ])) |
| |
| // U+FFFF |
| expectTrue(checkDecodeUTF8([ 0xffff ], [], [ 0xef, 0xbf, 0xbf ])) |
| |
| // U+1FFFE |
| expectTrue(checkDecodeUTF8([ 0x1fffe ], [], [ 0xf0, 0x9f, 0xbf, 0xbe ])) |
| |
| // U+1FFFF |
| expectTrue(checkDecodeUTF8([ 0x1ffff ], [], [ 0xf0, 0x9f, 0xbf, 0xbf ])) |
| |
| // U+2FFFE |
| expectTrue(checkDecodeUTF8([ 0x2fffe ], [], [ 0xf0, 0xaf, 0xbf, 0xbe ])) |
| |
| // U+2FFFF |
| expectTrue(checkDecodeUTF8([ 0x2ffff ], [], [ 0xf0, 0xaf, 0xbf, 0xbf ])) |
| |
| // U+3FFFE |
| expectTrue(checkDecodeUTF8([ 0x3fffe ], [], [ 0xf0, 0xbf, 0xbf, 0xbe ])) |
| |
| // U+3FFFF |
| expectTrue(checkDecodeUTF8([ 0x3ffff ], [], [ 0xf0, 0xbf, 0xbf, 0xbf ])) |
| |
| // U+4FFFE |
| expectTrue(checkDecodeUTF8([ 0x4fffe ], [], [ 0xf1, 0x8f, 0xbf, 0xbe ])) |
| |
| // U+4FFFF |
| expectTrue(checkDecodeUTF8([ 0x4ffff ], [], [ 0xf1, 0x8f, 0xbf, 0xbf ])) |
| |
| // U+5FFFE |
| expectTrue(checkDecodeUTF8([ 0x5fffe ], [], [ 0xf1, 0x9f, 0xbf, 0xbe ])) |
| |
| // U+5FFFF |
| expectTrue(checkDecodeUTF8([ 0x5ffff ], [], [ 0xf1, 0x9f, 0xbf, 0xbf ])) |
| |
| // U+6FFFE |
| expectTrue(checkDecodeUTF8([ 0x6fffe ], [], [ 0xf1, 0xaf, 0xbf, 0xbe ])) |
| |
| // U+6FFFF |
| expectTrue(checkDecodeUTF8([ 0x6ffff ], [], [ 0xf1, 0xaf, 0xbf, 0xbf ])) |
| |
| // U+7FFFE |
| expectTrue(checkDecodeUTF8([ 0x7fffe ], [], [ 0xf1, 0xbf, 0xbf, 0xbe ])) |
| |
| // U+7FFFF |
| expectTrue(checkDecodeUTF8([ 0x7ffff ], [], [ 0xf1, 0xbf, 0xbf, 0xbf ])) |
| |
| // U+8FFFE |
| expectTrue(checkDecodeUTF8([ 0x8fffe ], [], [ 0xf2, 0x8f, 0xbf, 0xbe ])) |
| |
| // U+8FFFF |
| expectTrue(checkDecodeUTF8([ 0x8ffff ], [], [ 0xf2, 0x8f, 0xbf, 0xbf ])) |
| |
| // U+9FFFE |
| expectTrue(checkDecodeUTF8([ 0x9fffe ], [], [ 0xf2, 0x9f, 0xbf, 0xbe ])) |
| |
| // U+9FFFF |
| expectTrue(checkDecodeUTF8([ 0x9ffff ], [], [ 0xf2, 0x9f, 0xbf, 0xbf ])) |
| |
| // U+AFFFE |
| expectTrue(checkDecodeUTF8([ 0xafffe ], [], [ 0xf2, 0xaf, 0xbf, 0xbe ])) |
| |
| // U+AFFFF |
| expectTrue(checkDecodeUTF8([ 0xaffff ], [], [ 0xf2, 0xaf, 0xbf, 0xbf ])) |
| |
| // U+BFFFE |
| expectTrue(checkDecodeUTF8([ 0xbfffe ], [], [ 0xf2, 0xbf, 0xbf, 0xbe ])) |
| |
| // U+BFFFF |
| expectTrue(checkDecodeUTF8([ 0xbffff ], [], [ 0xf2, 0xbf, 0xbf, 0xbf ])) |
| |
| // U+CFFFE |
| expectTrue(checkDecodeUTF8([ 0xcfffe ], [], [ 0xf3, 0x8f, 0xbf, 0xbe ])) |
| |
| // U+CFFFF |
| expectTrue(checkDecodeUTF8([ 0xcfffF ], [], [ 0xf3, 0x8f, 0xbf, 0xbf ])) |
| |
| // U+DFFFE |
| expectTrue(checkDecodeUTF8([ 0xdfffe ], [], [ 0xf3, 0x9f, 0xbf, 0xbe ])) |
| |
| // U+DFFFF |
| expectTrue(checkDecodeUTF8([ 0xdffff ], [], [ 0xf3, 0x9f, 0xbf, 0xbf ])) |
| |
| // U+EFFFE |
| expectTrue(checkDecodeUTF8([ 0xefffe ], [], [ 0xf3, 0xaf, 0xbf, 0xbe ])) |
| |
| // U+EFFFF |
| expectTrue(checkDecodeUTF8([ 0xeffff ], [], [ 0xf3, 0xaf, 0xbf, 0xbf ])) |
| |
| // U+FFFFE |
| expectTrue(checkDecodeUTF8([ 0xffffe ], [], [ 0xf3, 0xbf, 0xbf, 0xbe ])) |
| |
| // U+FFFFF |
| expectTrue(checkDecodeUTF8([ 0xfffff ], [], [ 0xf3, 0xbf, 0xbf, 0xbf ])) |
| |
| // U+10FFFE |
| expectTrue(checkDecodeUTF8([ 0x10fffe ], [], [ 0xf4, 0x8f, 0xbf, 0xbe ])) |
| |
| // U+10FFFF |
| expectTrue(checkDecodeUTF8([ 0x10ffff ], [], [ 0xf4, 0x8f, 0xbf, 0xbf ])) |
| |
| // U+FDD0 |
| expectTrue(checkDecodeUTF8([ 0xfdd0 ], [], [ 0xef, 0xb7, 0x90 ])) |
| |
| // U+FDD1 |
| expectTrue(checkDecodeUTF8([ 0xfdd1 ], [], [ 0xef, 0xb7, 0x91 ])) |
| |
| // U+FDD2 |
| expectTrue(checkDecodeUTF8([ 0xfdd2 ], [], [ 0xef, 0xb7, 0x92 ])) |
| |
| // U+FDD3 |
| expectTrue(checkDecodeUTF8([ 0xfdd3 ], [], [ 0xef, 0xb7, 0x93 ])) |
| |
| // U+FDD4 |
| expectTrue(checkDecodeUTF8([ 0xfdd4 ], [], [ 0xef, 0xb7, 0x94 ])) |
| |
| // U+FDD5 |
| expectTrue(checkDecodeUTF8([ 0xfdd5 ], [], [ 0xef, 0xb7, 0x95 ])) |
| |
| // U+FDD6 |
| expectTrue(checkDecodeUTF8([ 0xfdd6 ], [], [ 0xef, 0xb7, 0x96 ])) |
| |
| // U+FDD7 |
| expectTrue(checkDecodeUTF8([ 0xfdd7 ], [], [ 0xef, 0xb7, 0x97 ])) |
| |
| // U+FDD8 |
| expectTrue(checkDecodeUTF8([ 0xfdd8 ], [], [ 0xef, 0xb7, 0x98 ])) |
| |
| // U+FDD9 |
| expectTrue(checkDecodeUTF8([ 0xfdd9 ], [], [ 0xef, 0xb7, 0x99 ])) |
| |
| // U+FDDA |
| expectTrue(checkDecodeUTF8([ 0xfdda ], [], [ 0xef, 0xb7, 0x9a ])) |
| |
| // U+FDDB |
| expectTrue(checkDecodeUTF8([ 0xfddb ], [], [ 0xef, 0xb7, 0x9b ])) |
| |
| // U+FDDC |
| expectTrue(checkDecodeUTF8([ 0xfddc ], [], [ 0xef, 0xb7, 0x9c ])) |
| |
| // U+FDDD |
| expectTrue(checkDecodeUTF8([ 0xfddd ], [], [ 0xef, 0xb7, 0x9d ])) |
| |
| // U+FDDE |
| expectTrue(checkDecodeUTF8([ 0xfdde ], [], [ 0xef, 0xb7, 0x9e ])) |
| |
| // U+FDDF |
| expectTrue(checkDecodeUTF8([ 0xfddf ], [], [ 0xef, 0xb7, 0x9f ])) |
| |
| // U+FDE0 |
| expectTrue(checkDecodeUTF8([ 0xfde0 ], [], [ 0xef, 0xb7, 0xa0 ])) |
| |
| // U+FDE1 |
| expectTrue(checkDecodeUTF8([ 0xfde1 ], [], [ 0xef, 0xb7, 0xa1 ])) |
| |
| // U+FDE2 |
| expectTrue(checkDecodeUTF8([ 0xfde2 ], [], [ 0xef, 0xb7, 0xa2 ])) |
| |
| // U+FDE3 |
| expectTrue(checkDecodeUTF8([ 0xfde3 ], [], [ 0xef, 0xb7, 0xa3 ])) |
| |
| // U+FDE4 |
| expectTrue(checkDecodeUTF8([ 0xfde4 ], [], [ 0xef, 0xb7, 0xa4 ])) |
| |
| // U+FDE5 |
| expectTrue(checkDecodeUTF8([ 0xfde5 ], [], [ 0xef, 0xb7, 0xa5 ])) |
| |
| // U+FDE6 |
| expectTrue(checkDecodeUTF8([ 0xfde6 ], [], [ 0xef, 0xb7, 0xa6 ])) |
| |
| // U+FDE7 |
| expectTrue(checkDecodeUTF8([ 0xfde7 ], [], [ 0xef, 0xb7, 0xa7 ])) |
| |
| // U+FDE8 |
| expectTrue(checkDecodeUTF8([ 0xfde8 ], [], [ 0xef, 0xb7, 0xa8 ])) |
| |
| // U+FDE9 |
| expectTrue(checkDecodeUTF8([ 0xfde9 ], [], [ 0xef, 0xb7, 0xa9 ])) |
| |
| // U+FDEA |
| expectTrue(checkDecodeUTF8([ 0xfdea ], [], [ 0xef, 0xb7, 0xaa ])) |
| |
| // U+FDEB |
| expectTrue(checkDecodeUTF8([ 0xfdeb ], [], [ 0xef, 0xb7, 0xab ])) |
| |
| // U+FDEC |
| expectTrue(checkDecodeUTF8([ 0xfdec ], [], [ 0xef, 0xb7, 0xac ])) |
| |
| // U+FDED |
| expectTrue(checkDecodeUTF8([ 0xfded ], [], [ 0xef, 0xb7, 0xad ])) |
| |
| // U+FDEE |
| expectTrue(checkDecodeUTF8([ 0xfdee ], [], [ 0xef, 0xb7, 0xae ])) |
| |
| // U+FDEF |
| expectTrue(checkDecodeUTF8([ 0xfdef ], [], [ 0xef, 0xb7, 0xaf ])) |
| |
| // U+FDF0 |
| expectTrue(checkDecodeUTF8([ 0xfdf0 ], [], [ 0xef, 0xb7, 0xb0 ])) |
| |
| // U+FDF1 |
| expectTrue(checkDecodeUTF8([ 0xfdf1 ], [], [ 0xef, 0xb7, 0xb1 ])) |
| |
| // U+FDF2 |
| expectTrue(checkDecodeUTF8([ 0xfdf2 ], [], [ 0xef, 0xb7, 0xb2 ])) |
| |
| // U+FDF3 |
| expectTrue(checkDecodeUTF8([ 0xfdf3 ], [], [ 0xef, 0xb7, 0xb3 ])) |
| |
| // U+FDF4 |
| expectTrue(checkDecodeUTF8([ 0xfdf4 ], [], [ 0xef, 0xb7, 0xb4 ])) |
| |
| // U+FDF5 |
| expectTrue(checkDecodeUTF8([ 0xfdf5 ], [], [ 0xef, 0xb7, 0xb5 ])) |
| |
| // U+FDF6 |
| expectTrue(checkDecodeUTF8([ 0xfdf6 ], [], [ 0xef, 0xb7, 0xb6 ])) |
| |
| // U+FDF7 |
| expectTrue(checkDecodeUTF8([ 0xfdf7 ], [], [ 0xef, 0xb7, 0xb7 ])) |
| |
| // U+FDF8 |
| expectTrue(checkDecodeUTF8([ 0xfdf8 ], [], [ 0xef, 0xb7, 0xb8 ])) |
| |
| // U+FDF9 |
| expectTrue(checkDecodeUTF8([ 0xfdf9 ], [], [ 0xef, 0xb7, 0xb9 ])) |
| |
| // U+FDFA |
| expectTrue(checkDecodeUTF8([ 0xfdfa ], [], [ 0xef, 0xb7, 0xba ])) |
| |
| // U+FDFB |
| expectTrue(checkDecodeUTF8([ 0xfdfb ], [], [ 0xef, 0xb7, 0xbb ])) |
| |
| // U+FDFC |
| expectTrue(checkDecodeUTF8([ 0xfdfc ], [], [ 0xef, 0xb7, 0xbc ])) |
| |
| // U+FDFD |
| expectTrue(checkDecodeUTF8([ 0xfdfd ], [], [ 0xef, 0xb7, 0xbd ])) |
| |
| // U+FDFE |
| expectTrue(checkDecodeUTF8([ 0xfdfe ], [], [ 0xef, 0xb7, 0xbe ])) |
| |
| // U+FDFF |
| expectTrue(checkDecodeUTF8([ 0xfdff ], [], [ 0xef, 0xb7, 0xbf ])) |
| } |
| |
| var UTF16Decoder = TestSuite("UTF16Decoder") |
| |
| UTF16Decoder.test("UTF16.transcodedLength") { |
| do { |
| var u8: [UTF8.CodeUnit] = [ 0, 1, 2, 3, 4, 5 ] |
| let (count, isASCII) = UTF16.transcodedLength( |
| of: u8.makeIterator(), |
| decodedAs: UTF8.self, |
| repairingIllFormedSequences: false)! |
| expectEqual(6, count) |
| expectTrue(isASCII) |
| } |
| |
| do { |
| // "€" == U+20AC. |
| var u8: [UTF8.CodeUnit] = [ 0xF0, 0xA4, 0xAD, 0xA2 ] |
| let (count, isASCII) = UTF16.transcodedLength( |
| of: u8.makeIterator(), |
| decodedAs: UTF8.self, |
| repairingIllFormedSequences: false)! |
| expectEqual(2, count) |
| expectFalse(isASCII) |
| } |
| |
| do { |
| let u16: [UTF16.CodeUnit] = [ 6, 7, 8, 9, 10, 11 ] |
| let (count, isASCII) = UTF16.transcodedLength( |
| of: u16.makeIterator(), |
| decodedAs: UTF16.self, |
| repairingIllFormedSequences: false)! |
| expectEqual(6, count) |
| expectTrue(isASCII) |
| } |
| } |
| |
| UTF16Decoder.test("Decoding1").forEach(in: utfTests) { |
| test in |
| |
| expectTrue( |
| checkDecodeUTF16( |
| test.utf32, test.utf32RepairedTail, test.utf16), |
| stackTrace: test.loc.withCurrentLoc()) |
| return () |
| } |
| |
| UTF16Decoder.test("Decoding2") { |
| for (name, batch) in utf16Tests { |
| print("Batch: \(name)") |
| for test in batch { |
| expectTrue(checkDecodeUTF16(test.scalarsHead, test.scalarsRepairedTail, |
| test.encoded), stackTrace: test.loc.withCurrentLoc()) |
| } |
| } |
| } |
| |
| var UTF32Decoder = TestSuite("UTF32Decoder") |
| |
| UTF32Decoder.test("Empty") { |
| expectTrue(checkDecodeUTF32([], [], [])) |
| } |
| |
| UTF32Decoder.test("SmokeTest") { |
| // U+0041 LATIN CAPITAL LETTER A |
| expectTrue(checkDecodeUTF32([ 0x0041 ], [], [ 0x0000_0041 ])) |
| |
| // U+0041 LATIN CAPITAL LETTER A |
| // U+0042 LATIN CAPITAL LETTER B |
| expectTrue(checkDecodeUTF32( |
| [ 0x0041, 0x0042 ], [], |
| [ 0x0000_0041, 0x0000_0042 ])) |
| |
| // U+0000 NULL |
| // U+0041 LATIN CAPITAL LETTER A |
| // U+0042 LATIN CAPITAL LETTER B |
| // U+0000 NULL |
| expectTrue(checkDecodeUTF32( |
| [ 0x0000, 0x0041, 0x0042, 0x0000 ], [], |
| [ 0x0000_0000, 0x0000_0041, 0x0000_0042, 0x0000_0000 ])) |
| |
| // U+0283 LATIN SMALL LETTER ESH |
| expectTrue(checkDecodeUTF32([ 0x0283 ], [], [ 0x0000_0283 ])) |
| |
| // U+03BA GREEK SMALL LETTER KAPPA |
| // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA |
| // U+03C3 GREEK SMALL LETTER SIGMA |
| // U+03BC GREEK SMALL LETTER MU |
| // U+03B5 GREEK SMALL LETTER EPSILON |
| expectTrue(checkDecodeUTF32( |
| [ 0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5 ], [], |
| [ 0x0000_03ba, 0x0000_1f79, 0x0000_03c3, 0x0000_03bc, 0x0000_03b5 ])) |
| |
| // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B |
| // U+6587 CJK UNIFIED IDEOGRAPH-6587 |
| expectTrue(checkDecodeUTF32( |
| [ 0x4f8b, 0x6587 ], [], |
| [ 0x0000_4f8b, 0x0000_6587 ])) |
| |
| // U+D55C HANGUL SYLLABLE HAN |
| // U+AE00 HANGUL SYLLABLE GEUL |
| expectTrue(checkDecodeUTF32( |
| [ 0xd55c, 0xae00 ], [], |
| [ 0x0000_d55c, 0x0000_ae00 ])) |
| |
| // U+1112 HANGUL CHOSEONG HIEUH |
| // U+1161 HANGUL JUNGSEONG A |
| // U+11AB HANGUL JONGSEONG NIEUN |
| // U+1100 HANGUL CHOSEONG KIYEOK |
| // U+1173 HANGUL JUNGSEONG EU |
| // U+11AF HANGUL JONGSEONG RIEUL |
| expectTrue(checkDecodeUTF32( |
| [ 0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af ], [], |
| [ 0x0000_1112, 0x0000_1161, 0x0000_11ab, 0x0000_1100, 0x0000_1173, |
| 0x0000_11af ])) |
| |
| // U+D7FF (unassigned) |
| expectTrue(checkDecodeUTF16([ 0xd7ff ], [], [ 0x0000_d7ff ])) |
| |
| // U+E000 (private use) |
| expectTrue(checkDecodeUTF16([ 0xe000 ], [], [ 0x0000_e000 ])) |
| |
| // U+FFFD REPLACEMENT CHARACTER |
| expectTrue(checkDecodeUTF16([ 0xfffd ], [], [ 0x0000_fffd ])) |
| |
| // U+FFFF (noncharacter) |
| expectTrue(checkDecodeUTF16([ 0xffff ], [], [ 0x0000_ffff ])) |
| |
| // U+10000 LINEAR B SYLLABLE B008 A |
| expectTrue(checkDecodeUTF32([ 0x00010000 ], [], [ 0x0001_0000 ])) |
| |
| // U+10100 AEGEAN WORD SEPARATOR LINE |
| expectTrue(checkDecodeUTF32([ 0x00010100 ], [], [ 0x0001_0100 ])) |
| |
| // U+103FF (unassigned) |
| expectTrue(checkDecodeUTF32([ 0x000103ff ], [], [ 0x0001_03ff ])) |
| |
| // U+1D800 (unassigned) |
| expectTrue(checkDecodeUTF32([ 0x0001d800 ], [], [ 0x0001_d800 ])) |
| |
| |
| // U+E0000 (unassigned) |
| expectTrue(checkDecodeUTF32([ 0x000e0000 ], [], [ 0x000e_0000 ])) |
| |
| // U+E0100 VARIATION SELECTOR-17 |
| expectTrue(checkDecodeUTF32([ 0x000e0100 ], [], [ 0x000e_0100 ])) |
| |
| // U+E03FF (unassigned) |
| expectTrue(checkDecodeUTF32([ 0x000e03ff ], [], [ 0x000e_03ff ])) |
| |
| |
| // U+10FC00 (private use) |
| expectTrue(checkDecodeUTF32([ 0x0010fc00 ], [], [ 0x0010_fc00 ])) |
| |
| // U+10FD00 (private use) |
| expectTrue(checkDecodeUTF32([ 0x0010fd00 ], [], [ 0x0010_fd00 ])) |
| |
| // U+10FFFF (private use, noncharacter) |
| expectTrue(checkDecodeUTF32([ 0x0010ffff ], [], [ 0x0010_ffff ])) |
| } |
| |
| UTF32Decoder.test("IllFormed") { |
| // U+D800 (high-surrogate) |
| expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_d800 ])) |
| |
| // U+DB40 (high-surrogate) |
| expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_db40 ])) |
| |
| // U+DBFF (high-surrogate) |
| expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_dbff ])) |
| |
| // U+DC00 (low-surrogate) |
| expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_dc00 ])) |
| |
| // U+DD00 (low-surrogate) |
| expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_dd00 ])) |
| |
| // U+DFFF (low-surrogate) |
| expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0000_dfff ])) |
| |
| // U+110000 (invalid) |
| expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0011_0000 ])) |
| |
| // U+1000000 (invalid) |
| expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x0100_0000 ])) |
| |
| // U+80000000 (invalid) |
| expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0x8000_0000 ])) |
| |
| // U+FFFF0000 (invalid) |
| expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0xffff_0000 ])) |
| |
| // U+FFFFFFFF (invalid) |
| expectTrue(checkDecodeUTF32([], [ 0xfffd ], [ 0xffff_ffff ])) |
| } |
| |
| var UTF8Encoder = TestSuite("UTF8Encoder") |
| |
| UTF8Encoder.test("SmokeTest").forEach(in: utfTests) { |
| test in |
| |
| expectTrue( |
| checkEncodeUTF8(test.utf8, test.utf32), |
| stackTrace: test.loc.withCurrentLoc()) |
| return () |
| } |
| |
| var UnicodeAPIs = TestSuite("UnicodeAPIs") |
| |
| UnicodeAPIs.test("transcode/MutableArray") { |
| var input: [UInt16] = [ 0x0041, 0x0042 ] |
| var transcoded = [UInt16]() |
| let output: (UInt16) -> Void = { transcoded.append($0) } |
| transcode( |
| input.makeIterator(), |
| from: UTF16.self, |
| to: UTF16.self, |
| stoppingOnError: true, |
| into: output) |
| expectEqual(input, transcoded) |
| } |
| |
| UnicodeAPIs.test("transcode/ReferenceTypedArray") { |
| var input: [UInt16] = [ 0x0041, 0x0042 ] |
| var transcoded = [UInt16]() |
| let output: (UInt16) -> Void = { transcoded.append($0) } |
| transcode( |
| input.makeIterator(), |
| from: UTF16.self, |
| to: UTF16.self, |
| stoppingOnError: true, |
| into: output) |
| expectEqual(input, transcoded) |
| } |
| |
| #if _runtime(_ObjC) |
| import Foundation |
| |
| // The most simple subclass of NSString that CoreFoundation does not know |
| // about. |
| class NonContiguousNSString : NSString { |
| override init() { |
| _value = [] |
| super.init() |
| } |
| |
| required init(coder aDecoder: NSCoder) { |
| fatalError("don't call this initializer") |
| } |
| required init(itemProviderData data: Data, typeIdentifier: String) throws { |
| fatalError("don't call this initializer") |
| } |
| |
| @nonobjc |
| convenience init(_ utf8: [UInt8]) { |
| var encoded = [UInt16]() |
| let output: (UInt16) -> Void = { encoded.append($0) } |
| let iterator = utf8.makeIterator() |
| let hadError = transcode( |
| iterator, |
| from: UTF8.self, |
| to: UTF16.self, |
| stoppingOnError: true, |
| into: output) |
| expectFalse(hadError) |
| self.init(encoded) |
| } |
| |
| @nonobjc |
| init(_ value: [UInt16]) { |
| _value = value |
| super.init() |
| } |
| |
| @nonobjc |
| convenience init(_ scalars: [UInt32]) { |
| var encoded = [UInt16]() |
| let output: (UInt16) -> Void = { encoded.append($0) } |
| let iterator = scalars.makeIterator() |
| let hadError = transcode( |
| iterator, |
| from: UTF32.self, |
| to: UTF16.self, |
| stoppingOnError: true, |
| into: output) |
| expectFalse(hadError) |
| self.init(encoded) |
| } |
| |
| @objc(copyWithZone:) |
| override func copy(with zone: NSZone?) -> Any { |
| // Ensure that copying this string produces a class that CoreFoundation |
| // does not know about. |
| return self |
| } |
| |
| @objc override var length: Int { |
| return _value.count |
| } |
| |
| @objc override func character(at index: Int) -> unichar { |
| return _value[index] |
| } |
| |
| var _value: [UInt16] |
| } |
| |
| func checkUTF8View(_ expected: [UInt8], _ subject: String, |
| _ stackTrace: SourceLocStack) { |
| checkForwardCollection(expected, subject.utf8) |
| } |
| |
| func checkUTF16View(_ expected: [UInt16], _ subject: String, |
| _ stackTrace: SourceLocStack) { |
| checkBidirectionalCollection(expected, subject.utf16) |
| } |
| |
| func forStringsWithUnpairedSurrogates(_ checkClosure: (UTF16Test, String) -> Void) { |
| for (name, batch) in utf16Tests { |
| print("Batch: \(name)") |
| for test in batch { |
| let subject = NonContiguousNSString(test.encoded) as String |
| checkClosure(test, subject) |
| } |
| } |
| |
| for test in utfTests { |
| let utf16Test = UTF16Test( |
| test.utf32, |
| test.utf32RepairedTail, |
| test.utf16, |
| file: test.loc.file, line: test.loc.line) |
| let subject = NonContiguousNSString(test.utf16) as String |
| checkClosure(utf16Test, subject) |
| } |
| } |
| |
| var StringCookedViews = TestSuite("StringCookedViews") |
| |
| StringCookedViews.test("UTF8ForContiguousUTF16").forEach(in: utfTests) { |
| test in |
| |
| // Add a non-ASCII character at the beginning to force Swift String and |
| // CoreFoundation off the ASCII fast path. |
| // |
| // U+0283 LATIN SMALL LETTER ESH |
| var backingStorage: [UInt16] = [ 0x0283 ] |
| let expected: [UInt8] = [ 0xca, 0x83 ] + test.utf8 |
| let output: (UInt16) -> Void = { backingStorage.append($0) } |
| |
| var iterator = test.utf32.makeIterator() |
| transcode( |
| iterator, |
| from: UTF32.self, |
| to: UTF16.self, |
| stoppingOnError: false, |
| into: output) |
| |
| backingStorage.withUnsafeBufferPointer { |
| (ptr) -> Void in |
| let cfstring = CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault, |
| ptr.baseAddress, backingStorage.count, kCFAllocatorNull)! |
| expectFalse(CFStringGetCStringPtr(cfstring, |
| CFStringBuiltInEncodings.ASCII.rawValue) != nil) |
| expectTrue(CFStringGetCharactersPtr(cfstring) != nil) |
| checkUTF8View( |
| expected, cfstring as NSString as String, test.loc.withCurrentLoc()) |
| return () |
| } |
| } |
| |
| StringCookedViews.test("UTF8ForContiguousUTF16/StringsWithUnpairedSurrogates") { |
| forStringsWithUnpairedSurrogates { |
| (test: UTF16Test, subject: String) -> Void in |
| var expected = [UInt8]() |
| let output: (UInt8) -> Void = { expected.append($0) } |
| var expectedScalars = test.scalarsHead + test.scalarsRepairedTail |
| var iterator = expectedScalars.makeIterator() |
| transcode( |
| iterator, |
| from: UTF32.self, |
| to: UTF8.self, |
| stoppingOnError: false, |
| into: output) |
| |
| checkUTF8View(expected, subject, test.loc.withCurrentLoc()) |
| } |
| } |
| |
| func verifyThatStringIsOpaqueForCoreFoundation(_ nss: NSString) { |
| // Sanity checks to make sure we are testing the code path that does UTF-8 |
| // encoding itself, instead of dispatching to CF. Both the original string |
| // itself and its copies should be resilient to CF's fast path functions, |
| // because Swift bridging may copy the string to ensure that it is not |
| // mutated. |
| let cfstring = unsafeBitCast(nss, to: CFString.self) |
| assert( |
| CFStringGetCStringPtr( |
| cfstring, CFStringBuiltInEncodings.ASCII.rawValue) == nil) |
| assert( |
| CFStringGetCStringPtr( |
| cfstring, CFStringBuiltInEncodings.UTF8.rawValue) == nil) |
| |
| assert(CFStringGetCharactersPtr(cfstring) == nil) |
| |
| let copy = CFStringCreateCopy(nil, cfstring) |
| assert( |
| CFStringGetCStringPtr( |
| copy, CFStringBuiltInEncodings.ASCII.rawValue) == nil) |
| assert( |
| CFStringGetCStringPtr( |
| copy, CFStringBuiltInEncodings.UTF8.rawValue) == nil) |
| assert(CFStringGetCharactersPtr(copy) == nil) |
| } |
| |
| StringCookedViews.test("UTF8ForNonContiguousUTF16/fromUTF32").forEach(in: utfTests) { |
| test in |
| |
| var nss = NonContiguousNSString(test.utf32) |
| verifyThatStringIsOpaqueForCoreFoundation(nss) |
| checkUTF8View(test.utf8, nss as String, test.loc.withCurrentLoc()) |
| } |
| |
| StringCookedViews.test("UTF8ForNonContiguousUTF16/fromUTF16").forEach(in: utfTests) { |
| test in |
| |
| var expected = [UInt8]() |
| var expectedScalars = test.utf32 + test.utf32RepairedTail |
| transcode( |
| expectedScalars.makeIterator(), |
| from: UTF32.self, |
| to: UTF8.self, |
| stoppingOnError: false, |
| into: { expected.append($0) }) |
| |
| var nss = NonContiguousNSString(test.utf16) |
| verifyThatStringIsOpaqueForCoreFoundation(nss) |
| checkUTF8View(expected, nss as String, test.loc.withCurrentLoc()) |
| } |
| |
| StringCookedViews.test("UTF8ForNonContiguousUTF16/utf16Tests"){ |
| for (name, batch) in utf16Tests { |
| print("Batch: \(name)") |
| for test in batch { |
| var expected = [UInt8]() |
| let output: (UInt8) -> Void = { expected.append($0) } |
| var expectedScalars = test.scalarsHead + test.scalarsRepairedTail |
| var iterator = expectedScalars.makeIterator() |
| transcode( |
| iterator, |
| from: UTF32.self, |
| to: UTF8.self, |
| stoppingOnError: false, |
| into: output) |
| |
| var nss = NonContiguousNSString(test.encoded) |
| verifyThatStringIsOpaqueForCoreFoundation(nss) |
| checkUTF8View(expected, nss as String, test.loc.withCurrentLoc()) |
| } |
| } |
| } |
| |
| StringCookedViews.test("UTF8ForNonContiguousUTF16Extra") { |
| // These tests don't add much additional value as long as tests above |
| // actually test the code path we care about. |
| do { |
| var bytes: [UInt8] = [ 97, 98, 99 ] |
| var cfstring: CFString = CFStringCreateWithBytesNoCopy( |
| kCFAllocatorDefault, bytes, bytes.count, |
| CFStringBuiltInEncodings.macRoman.rawValue, false, kCFAllocatorNull) |
| |
| // Sanity checks to make sure we are testing the code path that does UTF-8 |
| // encoding itself, instead of dispatching to CF. |
| // GetCStringPtr fails because our un-copied bytes aren't zero-terminated. |
| // GetCharactersPtr fails because our un-copied bytes aren't UTF-16. |
| assert( |
| CFStringGetCStringPtr( |
| cfstring, CFStringBuiltInEncodings.ASCII.rawValue) == nil) |
| assert( |
| CFStringGetCStringPtr( |
| cfstring, CFStringBuiltInEncodings.UTF8.rawValue) == nil) |
| assert(CFStringGetCharactersPtr(cfstring) == nil) |
| |
| checkUTF8View( |
| bytes, cfstring as NSString as String, |
| SourceLocStack().withCurrentLoc()) |
| _fixLifetime(bytes) |
| } |
| |
| do { |
| var bytes: [UInt8] = [ 97, 98, 99 ] |
| var cfstring: CFString = CFStringCreateWithBytes(kCFAllocatorDefault, |
| bytes, bytes.count, CFStringBuiltInEncodings.macRoman.rawValue, false) |
| |
| // Sanity checks to make sure we are testing the code path that does UTF-8 |
| // encoding itself, instead of dispatching to CF. |
| // CFStringCreateWithBytes() usually allocates zero-terminated ASCII |
| // or UTF-16, in which case one of the fast paths will succeed. |
| // This test operates only when CF creates a tagged pointer string object. |
| if strcmp(object_getClassName(cfstring), "NSTaggedPointerString") == 0 { |
| assert( |
| CFStringGetCStringPtr( |
| cfstring, CFStringBuiltInEncodings.ASCII.rawValue) == nil) |
| assert( |
| CFStringGetCStringPtr( |
| cfstring, CFStringBuiltInEncodings.UTF8.rawValue) == nil) |
| assert(CFStringGetCharactersPtr(cfstring) == nil) |
| |
| checkUTF8View( |
| bytes, cfstring as NSString as String, |
| SourceLocStack().withCurrentLoc()) |
| } |
| } |
| } |
| |
| StringCookedViews.test("UTF16").forEach(in: utfTests) { |
| test in |
| |
| var expected = [UInt16]() |
| let output: (UInt16) -> Void = { expected.append($0) } |
| var expectedScalars = test.utf32 |
| var iterator = expectedScalars.makeIterator() |
| transcode( |
| iterator, |
| from: UTF32.self, |
| to: UTF16.self, |
| stoppingOnError: false, |
| into: output) |
| |
| var nss = NonContiguousNSString(test.utf32) |
| checkUTF16View(expected, nss as String, test.loc.withCurrentLoc()) |
| } |
| |
| StringCookedViews.test("UTF16/StringsWithUnpairedSurrogates") { |
| forStringsWithUnpairedSurrogates { |
| (test: UTF16Test, subject: String) -> Void in |
| var expected = [UInt16]() |
| let output: (UInt16) -> Void = { expected.append($0) } |
| var expectedScalars = test.scalarsHead + test.scalarsRepairedTail |
| var iterator = expectedScalars.makeIterator() |
| transcode( |
| iterator, |
| from: UTF32.self, |
| to: UTF16.self, |
| stoppingOnError: false, |
| into: output) |
| |
| checkUTF16View(expected, subject, test.loc.withCurrentLoc()) |
| } |
| } |
| |
| StringCookedViews.test("UnicodeScalars").forEach(in: utfTests) { |
| test in |
| |
| let subject = NonContiguousNSString(test.utf32) as String |
| checkBidirectionalCollection( |
| test.unicodeScalars, subject.unicodeScalars) |
| } |
| |
| StringCookedViews.test("UnicodeScalars/StringsWithUnpairedSurrogates") { |
| forStringsWithUnpairedSurrogates { |
| (test: UTF16Test, subject: String) -> Void in |
| let expectedScalars = (test.scalarsHead + test.scalarsRepairedTail).map { |
| UnicodeScalar($0)! |
| } |
| checkBidirectionalCollection( |
| expectedScalars, subject.unicodeScalars) |
| } |
| } |
| |
| var StringTests = TestSuite("StringTests") |
| |
| StringTests.test("StreamableConformance") { |
| forStringsWithUnpairedSurrogates { |
| (test: UTF16Test, subject: String) -> Void in |
| let expected = test.scalarsHead + test.scalarsRepairedTail |
| var printedSubject = "" |
| subject.write(to: &printedSubject) |
| let actual = printedSubject.unicodeScalars.map { $0.value } |
| expectEqual(expected, actual) |
| } |
| } |
| |
| #endif // _runtime(_ObjC) |
| |
| let nullOffsetTests = [ |
| (input: "\0", expected: 0), |
| (input: "a\0", expected: 1), |
| (input: "foobar\0", expected: 6) |
| ] |
| |
| let NullCodeUnitOffsetTests = TestSuite("NullCodeUnitOffsetTests") |
| |
| % for (Encoding, View) in [ |
| % ('UTF8', 'utf8'), |
| % ('UTF16', 'utf16'), |
| % ('UTF32', 'unicodeScalars.map{ $0.value }') |
| % ]: |
| |
| NullCodeUnitOffsetTests.test("${Encoding}._nullCodeUnitOffset(in:)") { |
| for test in nullOffsetTests { |
| let actual = Array(test.input.${View}) |
| .withUnsafeBufferPointer { p in |
| ${Encoding}._nullCodeUnitOffset(in: p.baseAddress!) |
| } |
| expectEqual(test.expected, actual) |
| } |
| } |
| |
| % end |
| |
| runAllTests() |
| |