| //===--- UnicodeTrie.swift.gyb --------------------------------*- swift -*-===// |
| // |
| // This source file is part of the Swift.org open source project |
| // |
| // Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors |
| // Licensed under Apache License v2.0 with Runtime Library Exception |
| // |
| // See http://swift.org/LICENSE.txt for license information |
| // See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| // |
| //===----------------------------------------------------------------------===// |
| |
| // RUN: rm -rf %t && mkdir -p %t && %gyb -DunicodeGraphemeBreakPropertyFile=%utils/UnicodeData/GraphemeBreakProperty.txt -DunicodeGraphemeBreakTestFile=%utils/UnicodeData/GraphemeBreakTest.txt %s -o %t/UnicodeTrie.swift |
| // RUN: %line-directive %t/UnicodeTrie.swift -- %target-build-swift %t/UnicodeTrie.swift -o %t/a.out -g -Xfrontend -disable-access-control |
| // RUN: %line-directive %t/UnicodeTrie.swift -- %target-run %t/a.out |
| // REQUIRES: executable_test |
| |
| // FIXME: rdar://problem/19648117 Needs splitting objc parts out |
| // XFAIL: linux |
| |
| %{ |
| |
| from GYBUnicodeDataUtils import * |
| |
| grapheme_cluster_break_property_table = \ |
| GraphemeClusterBreakPropertyTable(unicodeGraphemeBreakPropertyFile) |
| |
| }% |
| |
| import SwiftPrivate |
| import StdlibUnittest |
| import StdlibCollectionUnittest |
| import Darwin |
| import Foundation |
| |
| var graphemeBreakPropertyTable = [ |
| // 'as Int' annotations are needed to help prevent the type-checker from |
| // blowing the stack. <rdar://problem/17539704> |
| % for start_code_point, end_code_point, value in grapheme_cluster_break_property_table.property_value_ranges: |
| (${start_code_point} as Int, ${end_code_point} as Int, _GraphemeClusterBreakPropertyValue.${value}), |
| % end |
| ] |
| |
| var UnicodeTrie = TestSuite("UnicodeTrie") |
| |
| UnicodeTrie.test("_UnicodeGraphemeClusterBreakPropertyTrie") { |
| // Verify that the trie reports correct values of the property for every code |
| // point. |
| |
| var trie = _UnicodeGraphemeClusterBreakPropertyTrie() |
| |
| var expected = [_GraphemeClusterBreakPropertyValue]( |
| repeating: _GraphemeClusterBreakPropertyValue.Other, |
| count: 0x110000) |
| for (startCodePoint, endCodePoint, value) in graphemeBreakPropertyTable { |
| for cp in startCodePoint...endCodePoint { |
| expected[cp] = value |
| } |
| } |
| |
| for cp in UInt32(0)...UInt32(0x10ffff) { |
| if cp % 0x10000 == 0 { |
| print("\(cp)...") |
| } |
| expectEqual( |
| expected[Int(cp)], trie.getPropertyValue(cp), "code point \(cp)") |
| } |
| } |
| |
| %{ |
| |
| grapheme_cluster_break_tests = \ |
| get_grapheme_cluster_break_tests_as_unicode_scalars( |
| unicodeGraphemeBreakTestFile) |
| |
| }% |
| |
| // The most simple subclass of NSString that CoreFoundation does not know |
| // about. |
| class NonContiguousNSString : NSString { |
| override init() { |
| _value = [] |
| super.init() |
| } |
| |
| required init(coder aDecoder: NSCoder) { |
| fatalError("don't call this initializer") |
| } |
| |
| init(_ value: [UInt16]) { |
| _value = value |
| super.init() |
| } |
| |
| convenience init(_ scalars: [UInt32]) { |
| var encoded: [UInt16] = [] |
| let iter = scalars.makeIterator() |
| let output: (UInt16) -> Void = { encoded.append($0) } |
| let hadError = transcode( |
| iter, |
| from: UTF32.self, |
| to: UTF16.self, |
| stoppingOnError: true, |
| into: output) |
| expectFalse(hadError) |
| self.init(encoded) |
| } |
| |
| @objc(copyWithZone:) |
| override func copy(with zone: NSZone?) -> Any { |
| // Ensure that copying this string produces a class that CoreFoundation |
| // does not know about. |
| return self |
| } |
| |
| @objc override var length: Int { |
| return _value.count |
| } |
| |
| @objc override func character(at index: Int) -> unichar { |
| return _value[index] |
| } |
| |
| var _value: [UInt16] |
| } |
| |
| /// Verify that extended grapheme cluster boundaries in `subject` occur at |
| /// positions specified in `expectedBoundaries`. |
| func checkGraphemeClusterSegmentation( |
| _ expectedBoundaries: [Int], _ subject: String, _ stackTrace: SourceLocStack |
| ) { |
| var actualBoundaries: [Int] = [ 0 ] |
| var unicodeScalarCount = 0 |
| for c in subject.characters { |
| let currentClusterSize = String(c).unicodeScalars.count |
| unicodeScalarCount += currentClusterSize |
| actualBoundaries += [unicodeScalarCount] |
| } |
| expectEqual( |
| expectedBoundaries, actualBoundaries, |
| "scalars: \(asHex(Array(subject.unicodeScalars.lazy.map { $0.value })))" |
| ) |
| |
| let expectedCharacters: [Character] = Array(subject.characters) |
| checkSliceableWithBidirectionalIndex(expectedCharacters, subject.characters) |
| } |
| |
| func checkGraphemeClusterSegmentation( |
| _ expectedBoundaries: [Int], scalars: [UInt32], _ stackTrace: SourceLocStack |
| ) { |
| let subject = NonContiguousNSString(scalars) as String |
| checkGraphemeClusterSegmentation(expectedBoundaries, subject, |
| stackTrace.withCurrentLoc()) |
| } |
| |
| func checkGraphemeClusterSegmentation( |
| _ expectedBoundaries: [Int], codeUnits: [UInt16], _ stackTrace: SourceLocStack |
| ) { |
| let subject = NonContiguousNSString(codeUnits) as String |
| checkGraphemeClusterSegmentation(expectedBoundaries, subject, |
| stackTrace.withCurrentLoc()) |
| } |
| |
| UnicodeTrie.test("GraphemeClusterSegmentation/UnicodeSpec") { |
| // Test segmentation algorithm using test data from the Unicode |
| // specification. |
| |
| % for code_points, expected_boundaries in grapheme_cluster_break_tests: |
| do { |
| let scalars: [UInt32] = |
| [ ${", ".join([str(cp) for cp in code_points])} ] |
| let expectedBoundaries: [Int] = |
| [ ${", ".join([str(x) for x in expected_boundaries])} ] |
| checkGraphemeClusterSegmentation(expectedBoundaries, scalars: scalars, |
| SourceLocStack().withCurrentLoc()) |
| } |
| |
| % end |
| } |
| |
| UnicodeTrie.test("GraphemeClusterSegmentation/Extra") { |
| // Extra tests for input Strings that contain ill-formed code unit sequences. |
| |
| // U+D800 (high-surrogate) |
| checkGraphemeClusterSegmentation( |
| [ 0, 1 ], |
| codeUnits: [ 0xd800 ], |
| SourceLocStack().withCurrentLoc()) |
| |
| // U+D800 (high-surrogate) |
| // U+D800 (high-surrogate) |
| checkGraphemeClusterSegmentation( |
| [ 0, 1, 2 ], |
| codeUnits: [ 0xd800, 0xd800 ], |
| SourceLocStack().withCurrentLoc()) |
| |
| // U+0041 LATIN CAPITAL LETTER A |
| // U+D800 (high-surrogate) |
| checkGraphemeClusterSegmentation( |
| [ 0, 1, 2 ], |
| codeUnits: [ 0x0041, 0xd800 ], |
| SourceLocStack().withCurrentLoc()) |
| |
| // U+D800 (high-surrogate) |
| // U+0041 LATIN CAPITAL LETTER A |
| checkGraphemeClusterSegmentation( |
| [ 0, 1, 2 ], |
| codeUnits: [ 0xd800, 0x0041 ], |
| SourceLocStack().withCurrentLoc()) |
| |
| // U+0041 LATIN CAPITAL LETTER A |
| // U+0301 COMBINING ACUTE ACCENT |
| // U+D800 (high-surrogate) |
| checkGraphemeClusterSegmentation( |
| [ 0, 2, 3 ], |
| codeUnits: [ 0x0041, 0x0301, 0xd800 ], |
| SourceLocStack().withCurrentLoc()) |
| |
| // U+D800 (high-surrogate) |
| // U+0041 LATIN CAPITAL LETTER A |
| // U+0301 COMBINING ACUTE ACCENT |
| checkGraphemeClusterSegmentation( |
| [ 0, 1, 3 ], |
| codeUnits: [ 0xd800, 0x0041, 0x0301 ], |
| SourceLocStack().withCurrentLoc()) |
| } |
| |
| UnicodeTrie.test("GraphemeClusterSegmentation/Unicode_7_0_0") { |
| // Verify that we are using Unicode 7.0.0+ data tables. |
| |
| // In Unicode 6.3.0, this sequence was segmented into two grapheme clusters. |
| // |
| // U+0041 LATIN CAPITAL LETTER A |
| // U+1122C KHOJKI VOWEL SIGN AA |
| checkGraphemeClusterSegmentation( |
| [ 0, 2 ], |
| scalars: [ 0x0041, 0x1122c ], |
| SourceLocStack().withCurrentLoc()) |
| } |
| |
| UnicodeTrie.test("GraphemeClusterSegmentation/Unicode_8_0_0") { |
| // Verify that we are using Unicode 8.0.0+ data tables. |
| |
| // In Unicode 7.0.0, this sequence was segmented into two grapheme clusters. |
| // |
| // U+0041 LATIN CAPITAL LETTER A |
| // U+11720 AHOM VOWEL SIGN A |
| checkGraphemeClusterSegmentation( |
| [ 0, 2 ], |
| scalars: [ 0x0041, 0x11720 ], |
| SourceLocStack().withCurrentLoc()) |
| } |
| |
| |
| runAllTests() |
| |