blob: e38bb8c29cef4cda51a224f1d24393e50c737aab [file] [log] [blame]
//===--- UnicodeTrie.swift.gyb --------------------------------*- swift -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// A custom trie implementation to quickly retrieve Unicode property values.
//
//===----------------------------------------------------------------------===//
%{
# Note: keep these constants synchronized with the data that it is actually
# generated. There is a runtime check for this, but it is only performed in
# builds with INTERNAL_CHECKS_ENABLED.
BMPFirstLevelIndexBits = 8
BMPDataOffsetBits = 8
SuppFirstLevelIndexBits = 5
SuppSecondLevelIndexBits = 8
SuppDataOffsetBits = 8
BMPLookupBytesPerEntry = 1
BMPDataBytesPerEntry = 1
SuppLookup1BytesPerEntry = 1
SuppLookup2BytesPerEntry = 1
SuppDataBytesPerEntry = 1
TrieSize = 18961
BMPLookupBytesOffset = 0
BMPDataBytesOffset = 256
SuppLookup1BytesOffset = 12032
SuppLookup2BytesOffset = 12049
SuppDataBytesOffset = 12817
}%
import SwiftShims
// These case names must be kept in sync with the 'GraphemeClusterBreakProperty'
// enum in C++ and with the names in the GYBUnicodeDataUtils script.
public // @testable
enum _GraphemeClusterBreakPropertyValue : Int {
case Other = 0
case CR = 1
case LF = 2
case Control = 3
case Extend = 4
case Regional_Indicator = 5
case Prepend = 6
case SpacingMark = 7
case L = 8
case V = 9
case T = 10
case LV = 11
case LVT = 12
}
// It is expensive to convert a raw enum value to an enum, so we use this type
// safe wrapper around the raw property value to avoid paying the conversion
// cost in hot code paths.
struct _GraphemeClusterBreakPropertyRawValue {
init(_ rawValue: UInt${BMPDataBytesPerEntry * 8}) {
self.rawValue = rawValue
}
var rawValue: UInt${BMPDataBytesPerEntry * 8}
// Use with care: this operation is expensive (even with optimization
// turned on the compiler generates code for a switch).
var cookedValue: _GraphemeClusterBreakPropertyValue {
return _GraphemeClusterBreakPropertyValue(rawValue: Int(rawValue))!
}
}
public // @testable
struct _UnicodeGraphemeClusterBreakPropertyTrie {
static func _checkParameters() {
let metadata = _swift_stdlib_GraphemeClusterBreakPropertyTrieMetadata
_sanityCheck(metadata.BMPFirstLevelIndexBits == ${BMPFirstLevelIndexBits})
_sanityCheck(metadata.BMPDataOffsetBits == ${BMPDataOffsetBits})
_sanityCheck(metadata.SuppFirstLevelIndexBits == ${SuppFirstLevelIndexBits})
_sanityCheck(metadata.SuppSecondLevelIndexBits == ${SuppSecondLevelIndexBits})
_sanityCheck(metadata.SuppDataOffsetBits == ${SuppDataOffsetBits})
_sanityCheck(metadata.BMPLookupBytesPerEntry == ${BMPLookupBytesPerEntry})
_sanityCheck(metadata.BMPDataBytesPerEntry == ${BMPDataBytesPerEntry})
_sanityCheck(metadata.SuppLookup1BytesPerEntry == ${SuppLookup1BytesPerEntry})
_sanityCheck(metadata.SuppLookup2BytesPerEntry == ${SuppLookup2BytesPerEntry})
_sanityCheck(metadata.SuppDataBytesPerEntry == ${SuppDataBytesPerEntry})
_sanityCheck(metadata.TrieSize == ${TrieSize})
_sanityCheck(metadata.BMPLookupBytesOffset == ${BMPLookupBytesOffset})
_sanityCheck(metadata.BMPDataBytesOffset == ${BMPDataBytesOffset})
_sanityCheck(metadata.SuppLookup1BytesOffset == ${SuppLookup1BytesOffset})
_sanityCheck(metadata.SuppLookup2BytesOffset == ${SuppLookup2BytesOffset})
_sanityCheck(metadata.SuppDataBytesOffset == ${SuppDataBytesOffset})
}
let _trieData: UnsafePointer<UInt8>
% if BMPLookupBytesPerEntry == 1:
@_transparent var _bmpLookup: UnsafePointer<UInt8> {
return _trieData + ${BMPLookupBytesOffset}
}
% end
% if BMPDataBytesPerEntry == 1:
@_transparent var _bmpData: UnsafePointer<UInt8> {
return _trieData + ${BMPDataBytesOffset}
}
% end
% if SuppLookup1BytesPerEntry == 1:
@_transparent var _suppLookup1: UnsafePointer<UInt8> {
return _trieData + ${SuppLookup1BytesOffset}
}
% end
% if SuppLookup2BytesPerEntry == 1:
@_transparent var _suppLookup2: UnsafePointer<UInt8> {
return _trieData + ${SuppLookup2BytesOffset}
}
% end
% if SuppDataBytesPerEntry == 1:
@_transparent var _suppData: UnsafePointer<UInt8> {
return _trieData + ${SuppDataBytesOffset}
}
% end
public // @testable
init() {
_UnicodeGraphemeClusterBreakPropertyTrie._checkParameters()
_trieData = _swift_stdlib_GraphemeClusterBreakPropertyTrie
}
@_transparent
func _getBMPFirstLevelIndex(_ cp: UInt32) -> Int {
return Int(cp >> ${BMPFirstLevelIndexBits})
}
@_transparent
func _getBMPDataOffset(_ cp: UInt32) -> Int {
return Int(cp & ((1 << ${BMPDataOffsetBits}) - 1))
}
@_transparent
func _getSuppFirstLevelIndex(_ cp: UInt32) -> Int {
return Int(cp >> (${SuppSecondLevelIndexBits} + ${SuppDataOffsetBits}))
}
@_transparent
func _getSuppSecondLevelIndex(_ cp: UInt32) -> Int {
return Int((cp >> ${SuppDataOffsetBits}) &
((1 << ${SuppSecondLevelIndexBits}) - 1))
}
@_transparent
func _getSuppDataOffset(_ cp: UInt32) -> Int {
return Int(cp & ((1 << ${SuppDataOffsetBits}) - 1))
}
func getPropertyRawValue(
_ codePoint: UInt32
) -> _GraphemeClusterBreakPropertyRawValue {
// Note: for optimization, the code below uses '&+' instead of '+' to avoid
// a few branches. There is no possibility of overflow here.
//
// The optimizer could figure this out, but right now it keeps extra checks
// if '+' is used.
if _fastPath(codePoint <= 0xffff) {
let dataBlockIndex = Int(_bmpLookup[_getBMPFirstLevelIndex(codePoint)])
return _GraphemeClusterBreakPropertyRawValue(
_bmpData[
(dataBlockIndex << ${BMPDataOffsetBits}) &+
_getBMPDataOffset(codePoint)])
} else {
_precondition(codePoint <= 0x10ffff)
let secondLookupIndex = Int(_suppLookup1[_getSuppFirstLevelIndex(codePoint)])
let dataBlockIndex = Int(_suppLookup2[
(secondLookupIndex << ${SuppSecondLevelIndexBits}) &+
_getSuppSecondLevelIndex(codePoint)])
return _GraphemeClusterBreakPropertyRawValue(
_suppData[
(dataBlockIndex << ${SuppDataOffsetBits}) &+
_getSuppDataOffset(codePoint)])
}
}
public // @testable
func getPropertyValue(
_ codePoint: UInt32
) -> _GraphemeClusterBreakPropertyValue {
return getPropertyRawValue(codePoint).cookedValue
}
}
internal struct _UnicodeExtendedGraphemeClusterSegmenter {
let _noBoundaryRulesMatrix: UnsafePointer<UInt16>
init() {
_noBoundaryRulesMatrix =
_swift_stdlib_ExtendedGraphemeClusterNoBoundaryRulesMatrix
}
/// Returns `true` if there is always a grapheme cluster break after a code
/// point with a given `Grapheme_Cluster_Break` property value.
func isBoundaryAfter(_ gcb: _GraphemeClusterBreakPropertyRawValue) -> Bool {
let ruleRow = _noBoundaryRulesMatrix[Int(gcb.rawValue)]
return ruleRow == 0
}
/// Returns `true` if there is a grapheme cluster break between code points
/// with given `Grapheme_Cluster_Break` property values.
func isBoundary(
_ gcb1: _GraphemeClusterBreakPropertyRawValue,
_ gcb2: _GraphemeClusterBreakPropertyRawValue
) -> Bool {
let ruleRow = _noBoundaryRulesMatrix[Int(gcb1.rawValue)]
return (ruleRow & (1 << UInt16(gcb2.rawValue))) == 0
}
}
// ${'Local Variables'}:
// eval: (read-only-mode 1)
// End: