| //===--- UnicodeTrie.swift.gyb --------------------------------*- swift -*-===// |
| // |
| // This source file is part of the Swift.org open source project |
| // |
| // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors |
| // Licensed under Apache License v2.0 with Runtime Library Exception |
| // |
| // See https://swift.org/LICENSE.txt for license information |
| // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // A custom trie implementation to quickly retrieve Unicode property values. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| %{ |
| |
| # Note: keep these constants synchronized with the data that it is actually |
| # generated. There is a runtime check for this, but it is only performed in |
| # builds with INTERNAL_CHECKS_ENABLED. |
| |
| BMPFirstLevelIndexBits = 8 |
| BMPDataOffsetBits = 8 |
| SuppFirstLevelIndexBits = 5 |
| SuppSecondLevelIndexBits = 8 |
| SuppDataOffsetBits = 8 |
| |
| BMPLookupBytesPerEntry = 1 |
| BMPDataBytesPerEntry = 1 |
| SuppLookup1BytesPerEntry = 1 |
| SuppLookup2BytesPerEntry = 1 |
| SuppDataBytesPerEntry = 1 |
| |
| TrieSize = 18961 |
| |
| BMPLookupBytesOffset = 0 |
| BMPDataBytesOffset = 256 |
| SuppLookup1BytesOffset = 12032 |
| SuppLookup2BytesOffset = 12049 |
| SuppDataBytesOffset = 12817 |
| |
| }% |
| |
| import SwiftShims |
| |
| // These case names must be kept in sync with the 'GraphemeClusterBreakProperty' |
| // enum in C++ and with the names in the GYBUnicodeDataUtils script. |
| public // @testable |
| enum _GraphemeClusterBreakPropertyValue : Int { |
| case Other = 0 |
| case CR = 1 |
| case LF = 2 |
| case Control = 3 |
| case Extend = 4 |
| case Regional_Indicator = 5 |
| case Prepend = 6 |
| case SpacingMark = 7 |
| case L = 8 |
| case V = 9 |
| case T = 10 |
| case LV = 11 |
| case LVT = 12 |
| } |
| |
| // It is expensive to convert a raw enum value to an enum, so we use this type |
| // safe wrapper around the raw property value to avoid paying the conversion |
| // cost in hot code paths. |
| struct _GraphemeClusterBreakPropertyRawValue { |
| init(_ rawValue: UInt${BMPDataBytesPerEntry * 8}) { |
| self.rawValue = rawValue |
| } |
| |
| var rawValue: UInt${BMPDataBytesPerEntry * 8} |
| |
| // Use with care: this operation is expensive (even with optimization |
| // turned on the compiler generates code for a switch). |
| var cookedValue: _GraphemeClusterBreakPropertyValue { |
| return _GraphemeClusterBreakPropertyValue(rawValue: Int(rawValue))! |
| } |
| } |
| |
| public // @testable |
| struct _UnicodeGraphemeClusterBreakPropertyTrie { |
| static func _checkParameters() { |
| let metadata = _swift_stdlib_GraphemeClusterBreakPropertyTrieMetadata |
| |
| _sanityCheck(metadata.BMPFirstLevelIndexBits == ${BMPFirstLevelIndexBits}) |
| _sanityCheck(metadata.BMPDataOffsetBits == ${BMPDataOffsetBits}) |
| _sanityCheck(metadata.SuppFirstLevelIndexBits == ${SuppFirstLevelIndexBits}) |
| _sanityCheck(metadata.SuppSecondLevelIndexBits == ${SuppSecondLevelIndexBits}) |
| _sanityCheck(metadata.SuppDataOffsetBits == ${SuppDataOffsetBits}) |
| |
| _sanityCheck(metadata.BMPLookupBytesPerEntry == ${BMPLookupBytesPerEntry}) |
| _sanityCheck(metadata.BMPDataBytesPerEntry == ${BMPDataBytesPerEntry}) |
| _sanityCheck(metadata.SuppLookup1BytesPerEntry == ${SuppLookup1BytesPerEntry}) |
| _sanityCheck(metadata.SuppLookup2BytesPerEntry == ${SuppLookup2BytesPerEntry}) |
| _sanityCheck(metadata.SuppDataBytesPerEntry == ${SuppDataBytesPerEntry}) |
| |
| _sanityCheck(metadata.TrieSize == ${TrieSize}) |
| |
| _sanityCheck(metadata.BMPLookupBytesOffset == ${BMPLookupBytesOffset}) |
| _sanityCheck(metadata.BMPDataBytesOffset == ${BMPDataBytesOffset}) |
| _sanityCheck(metadata.SuppLookup1BytesOffset == ${SuppLookup1BytesOffset}) |
| _sanityCheck(metadata.SuppLookup2BytesOffset == ${SuppLookup2BytesOffset}) |
| _sanityCheck(metadata.SuppDataBytesOffset == ${SuppDataBytesOffset}) |
| } |
| |
| let _trieData: UnsafePointer<UInt8> |
| |
| % if BMPLookupBytesPerEntry == 1: |
| @_transparent var _bmpLookup: UnsafePointer<UInt8> { |
| return _trieData + ${BMPLookupBytesOffset} |
| } |
| % end |
| |
| % if BMPDataBytesPerEntry == 1: |
| @_transparent var _bmpData: UnsafePointer<UInt8> { |
| return _trieData + ${BMPDataBytesOffset} |
| } |
| % end |
| |
| % if SuppLookup1BytesPerEntry == 1: |
| @_transparent var _suppLookup1: UnsafePointer<UInt8> { |
| return _trieData + ${SuppLookup1BytesOffset} |
| } |
| % end |
| |
| % if SuppLookup2BytesPerEntry == 1: |
| @_transparent var _suppLookup2: UnsafePointer<UInt8> { |
| return _trieData + ${SuppLookup2BytesOffset} |
| } |
| % end |
| |
| % if SuppDataBytesPerEntry == 1: |
| @_transparent var _suppData: UnsafePointer<UInt8> { |
| return _trieData + ${SuppDataBytesOffset} |
| } |
| % end |
| |
| public // @testable |
| init() { |
| _UnicodeGraphemeClusterBreakPropertyTrie._checkParameters() |
| _trieData = _swift_stdlib_GraphemeClusterBreakPropertyTrie |
| } |
| |
| @_transparent |
| func _getBMPFirstLevelIndex(_ cp: UInt32) -> Int { |
| return Int(cp >> ${BMPFirstLevelIndexBits}) |
| } |
| |
| @_transparent |
| func _getBMPDataOffset(_ cp: UInt32) -> Int { |
| return Int(cp & ((1 << ${BMPDataOffsetBits}) - 1)) |
| } |
| |
| @_transparent |
| func _getSuppFirstLevelIndex(_ cp: UInt32) -> Int { |
| return Int(cp >> (${SuppSecondLevelIndexBits} + ${SuppDataOffsetBits})) |
| } |
| |
| @_transparent |
| func _getSuppSecondLevelIndex(_ cp: UInt32) -> Int { |
| return Int((cp >> ${SuppDataOffsetBits}) & |
| ((1 << ${SuppSecondLevelIndexBits}) - 1)) |
| } |
| |
| @_transparent |
| func _getSuppDataOffset(_ cp: UInt32) -> Int { |
| return Int(cp & ((1 << ${SuppDataOffsetBits}) - 1)) |
| } |
| |
| func getPropertyRawValue( |
| _ codePoint: UInt32 |
| ) -> _GraphemeClusterBreakPropertyRawValue { |
| // Note: for optimization, the code below uses '&+' instead of '+' to avoid |
| // a few branches. There is no possibility of overflow here. |
| // |
| // The optimizer could figure this out, but right now it keeps extra checks |
| // if '+' is used. |
| |
| if _fastPath(codePoint <= 0xffff) { |
| let dataBlockIndex = Int(_bmpLookup[_getBMPFirstLevelIndex(codePoint)]) |
| return _GraphemeClusterBreakPropertyRawValue( |
| _bmpData[ |
| (dataBlockIndex << ${BMPDataOffsetBits}) &+ |
| _getBMPDataOffset(codePoint)]) |
| } else { |
| _precondition(codePoint <= 0x10ffff) |
| let secondLookupIndex = Int(_suppLookup1[_getSuppFirstLevelIndex(codePoint)]) |
| let dataBlockIndex = Int(_suppLookup2[ |
| (secondLookupIndex << ${SuppSecondLevelIndexBits}) &+ |
| _getSuppSecondLevelIndex(codePoint)]) |
| return _GraphemeClusterBreakPropertyRawValue( |
| _suppData[ |
| (dataBlockIndex << ${SuppDataOffsetBits}) &+ |
| _getSuppDataOffset(codePoint)]) |
| } |
| } |
| |
| public // @testable |
| func getPropertyValue( |
| _ codePoint: UInt32 |
| ) -> _GraphemeClusterBreakPropertyValue { |
| return getPropertyRawValue(codePoint).cookedValue |
| } |
| } |
| |
| // FIXME(ABI)#74 : don't mark this type versioned, or any of its APIs inlineable. |
| // Grapheme cluster segmentation uses a completely different algorithm in |
| // Unicode 9.0. |
| internal struct _UnicodeExtendedGraphemeClusterSegmenter { |
| let _noBoundaryRulesMatrix: UnsafePointer<UInt16> |
| |
| init() { |
| _noBoundaryRulesMatrix = |
| _swift_stdlib_ExtendedGraphemeClusterNoBoundaryRulesMatrix |
| } |
| |
| /// Returns `true` if there is always a grapheme cluster break after a code |
| /// point with a given `Grapheme_Cluster_Break` property value. |
| func isBoundaryAfter(_ gcb: _GraphemeClusterBreakPropertyRawValue) -> Bool { |
| let ruleRow = _noBoundaryRulesMatrix[Int(gcb.rawValue)] |
| return ruleRow == 0 |
| } |
| |
| /// Returns `true` if there is a grapheme cluster break between code points |
| /// with given `Grapheme_Cluster_Break` property values. |
| func isBoundary( |
| _ gcb1: _GraphemeClusterBreakPropertyRawValue, |
| _ gcb2: _GraphemeClusterBreakPropertyRawValue |
| ) -> Bool { |
| let ruleRow = _noBoundaryRulesMatrix[Int(gcb1.rawValue)] |
| return (ruleRow & (1 << UInt16(gcb2.rawValue))) == 0 |
| } |
| } |
| |
| // ${'Local Variables'}: |
| // eval: (read-only-mode 1) |
| // End: |