| //===----------------------------------------------------------------------===// |
| // |
| // This source file is part of the Swift.org open source project |
| // |
| // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors |
| // Licensed under Apache License v2.0 with Runtime Library Exception |
| // |
| // See https://swift.org/LICENSE.txt for license information |
| // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| // |
| //===----------------------------------------------------------------------===// |
| |
| import SwiftShims |
| |
| // Conversions between different Unicode encodings. Note that UTF-16 and |
| // UTF-32 decoding are *not* currently resilient to erroneous data. |
| |
| /// The result of one Unicode decoding step. |
| /// |
| /// Each `UnicodeDecodingResult` instance can represent a Unicode scalar value, |
| /// an indication that no more Unicode scalars are available, or an indication |
| /// of a decoding error. |
| @_frozen |
| public enum UnicodeDecodingResult : Equatable { |
| /// A decoded Unicode scalar value. |
| case scalarValue(Unicode.Scalar) |
| |
| /// An indication that no more Unicode scalars are available in the input. |
| case emptyInput |
| |
| /// An indication of a decoding error. |
| case error |
| |
| @inlinable |
| public static func == ( |
| lhs: UnicodeDecodingResult, |
| rhs: UnicodeDecodingResult |
| ) -> Bool { |
| switch (lhs, rhs) { |
| case (.scalarValue(let lhsScalar), .scalarValue(let rhsScalar)): |
| return lhsScalar == rhsScalar |
| case (.emptyInput, .emptyInput): |
| return true |
| case (.error, .error): |
| return true |
| default: |
| return false |
| } |
| } |
| } |
| |
| /// A Unicode encoding form that translates between Unicode scalar values and |
| /// form-specific code units. |
| /// |
| /// The `UnicodeCodec` protocol declares methods that decode code unit |
| /// sequences into Unicode scalar values and encode Unicode scalar values |
| /// into code unit sequences. The standard library implements codecs for the |
| /// UTF-8, UTF-16, and UTF-32 encoding schemes as the `UTF8`, `UTF16`, and |
| /// `UTF32` types, respectively. Use the `Unicode.Scalar` type to work with |
| /// decoded Unicode scalar values. |
| public protocol UnicodeCodec : Unicode.Encoding { |
| |
| /// Creates an instance of the codec. |
| init() |
| |
| /// Starts or continues decoding a code unit sequence into Unicode scalar |
| /// values. |
| /// |
| /// To decode a code unit sequence completely, call this method repeatedly |
| /// until it returns `UnicodeDecodingResult.emptyInput`. Checking that the |
| /// iterator was exhausted is not sufficient, because the decoder can store |
| /// buffered data from the input iterator. |
| /// |
| /// Because of buffering, it is impossible to find the corresponding position |
| /// in the iterator for a given returned `Unicode.Scalar` or an error. |
| /// |
| /// The following example decodes the UTF-8 encoded bytes of a string into an |
| /// array of `Unicode.Scalar` instances: |
| /// |
| /// let str = "✨Unicode✨" |
| /// print(Array(str.utf8)) |
| /// // Prints "[226, 156, 168, 85, 110, 105, 99, 111, 100, 101, 226, 156, 168]" |
| /// |
| /// var bytesIterator = str.utf8.makeIterator() |
| /// var scalars: [Unicode.Scalar] = [] |
| /// var utf8Decoder = UTF8() |
| /// Decode: while true { |
| /// switch utf8Decoder.decode(&bytesIterator) { |
| /// case .scalarValue(let v): scalars.append(v) |
| /// case .emptyInput: break Decode |
| /// case .error: |
| /// print("Decoding error") |
| /// break Decode |
| /// } |
| /// } |
| /// print(scalars) |
| /// // Prints "["\u{2728}", "U", "n", "i", "c", "o", "d", "e", "\u{2728}"]" |
| /// |
| /// - Parameter input: An iterator of code units to be decoded. `input` must be |
| /// the same iterator instance in repeated calls to this method. Do not |
| /// advance the iterator or any copies of the iterator outside this |
| /// method. |
| /// - Returns: A `UnicodeDecodingResult` instance, representing the next |
| /// Unicode scalar, an indication of an error, or an indication that the |
| /// UTF sequence has been fully decoded. |
| mutating func decode<I : IteratorProtocol>( |
| _ input: inout I |
| ) -> UnicodeDecodingResult where I.Element == CodeUnit |
| |
| /// Encodes a Unicode scalar as a series of code units by calling the given |
| /// closure on each code unit. |
| /// |
| /// For example, the musical fermata symbol ("𝄐") is a single Unicode scalar |
| /// value (`\u{1D110}`) but requires four code units for its UTF-8 |
| /// representation. The following code uses the `UTF8` codec to encode a |
| /// fermata in UTF-8: |
| /// |
| /// var bytes: [UTF8.CodeUnit] = [] |
| /// UTF8.encode("𝄐", into: { bytes.append($0) }) |
| /// print(bytes) |
| /// // Prints "[240, 157, 132, 144]" |
| /// |
| /// - Parameters: |
| /// - input: The Unicode scalar value to encode. |
| /// - processCodeUnit: A closure that processes one code unit argument at a |
| /// time. |
| static func encode( |
| _ input: Unicode.Scalar, |
| into processCodeUnit: (CodeUnit) -> Void |
| ) |
| |
| /// Searches for the first occurrence of a `CodeUnit` that is equal to 0. |
| /// |
| /// Is an equivalent of `strlen` for C-strings. |
| /// |
| /// - Complexity: O(*n*) |
| static func _nullCodeUnitOffset(in input: UnsafePointer<CodeUnit>) -> Int |
| } |
| |
| /// A codec for translating between Unicode scalar values and UTF-8 code |
| /// units. |
| extension Unicode.UTF8 : UnicodeCodec { |
| /// Creates an instance of the UTF-8 codec. |
| @inlinable |
| public init() { self = ._swift3Buffer(ForwardParser()) } |
| |
| /// Starts or continues decoding a UTF-8 sequence. |
| /// |
| /// To decode a code unit sequence completely, call this method repeatedly |
| /// until it returns `UnicodeDecodingResult.emptyInput`. Checking that the |
| /// iterator was exhausted is not sufficient, because the decoder can store |
| /// buffered data from the input iterator. |
| /// |
| /// Because of buffering, it is impossible to find the corresponding position |
| /// in the iterator for a given returned `Unicode.Scalar` or an error. |
| /// |
| /// The following example decodes the UTF-8 encoded bytes of a string into an |
| /// array of `Unicode.Scalar` instances. This is a demonstration only---if |
| /// you need the Unicode scalar representation of a string, use its |
| /// `unicodeScalars` view. |
| /// |
| /// let str = "✨Unicode✨" |
| /// print(Array(str.utf8)) |
| /// // Prints "[226, 156, 168, 85, 110, 105, 99, 111, 100, 101, 226, 156, 168]" |
| /// |
| /// var bytesIterator = str.utf8.makeIterator() |
| /// var scalars: [Unicode.Scalar] = [] |
| /// var utf8Decoder = UTF8() |
| /// Decode: while true { |
| /// switch utf8Decoder.decode(&bytesIterator) { |
| /// case .scalarValue(let v): scalars.append(v) |
| /// case .emptyInput: break Decode |
| /// case .error: |
| /// print("Decoding error") |
| /// break Decode |
| /// } |
| /// } |
| /// print(scalars) |
| /// // Prints "["\u{2728}", "U", "n", "i", "c", "o", "d", "e", "\u{2728}"]" |
| /// |
| /// - Parameter input: An iterator of code units to be decoded. `input` must be |
| /// the same iterator instance in repeated calls to this method. Do not |
| /// advance the iterator or any copies of the iterator outside this |
| /// method. |
| /// - Returns: A `UnicodeDecodingResult` instance, representing the next |
| /// Unicode scalar, an indication of an error, or an indication that the |
| /// UTF sequence has been fully decoded. |
| @inlinable |
| @inline(__always) |
| public mutating func decode<I : IteratorProtocol>( |
| _ input: inout I |
| ) -> UnicodeDecodingResult where I.Element == CodeUnit { |
| guard case ._swift3Buffer(var parser) = self else { |
| Builtin.unreachable() |
| } |
| defer { self = ._swift3Buffer(parser) } |
| |
| switch parser.parseScalar(from: &input) { |
| case .valid(let s): return .scalarValue(UTF8.decode(s)) |
| case .error: return .error |
| case .emptyInput: return .emptyInput |
| } |
| } |
| |
| /// Attempts to decode a single UTF-8 code unit sequence starting at the LSB |
| /// of `buffer`. |
| /// |
| /// - Returns: |
| /// - result: The decoded code point if the code unit sequence is |
| /// well-formed; `nil` otherwise. |
| /// - length: The length of the code unit sequence in bytes if it is |
| /// well-formed; otherwise the *maximal subpart of the ill-formed |
| /// sequence* (Unicode 8.0.0, Ch 3.9, D93b), i.e. the number of leading |
| /// code units that were valid or 1 in case none were valid. Unicode |
| /// recommends to skip these bytes and replace them by a single |
| /// replacement character (U+FFFD). |
| /// |
| /// - Requires: There is at least one used byte in `buffer`, and the unused |
| /// space in `buffer` is filled with some value not matching the UTF-8 |
| /// continuation byte form (`0b10xxxxxx`). |
| @inlinable |
| public // @testable |
| static func _decodeOne(_ buffer: UInt32) -> (result: UInt32?, length: UInt8) { |
| // Note the buffer is read least significant byte first: [ #3 #2 #1 #0 ]. |
| |
| if buffer & 0x80 == 0 { // 1-byte sequence (ASCII), buffer: [ ... ... ... CU0 ]. |
| let value = buffer & 0xff |
| return (value, 1) |
| } |
| var p = ForwardParser() |
| p._buffer._storage = buffer |
| p._buffer._bitCount = 32 |
| var i = EmptyCollection<UInt8>().makeIterator() |
| switch p.parseScalar(from: &i) { |
| case .valid(let s): |
| return ( |
| result: UTF8.decode(s).value, |
| length: UInt8(truncatingIfNeeded: s.count)) |
| case .error(let l): |
| return (result: nil, length: UInt8(truncatingIfNeeded: l)) |
| case .emptyInput: Builtin.unreachable() |
| } |
| } |
| |
| /// Encodes a Unicode scalar as a series of code units by calling the given |
| /// closure on each code unit. |
| /// |
| /// For example, the musical fermata symbol ("𝄐") is a single Unicode scalar |
| /// value (`\u{1D110}`) but requires four code units for its UTF-8 |
| /// representation. The following code encodes a fermata in UTF-8: |
| /// |
| /// var bytes: [UTF8.CodeUnit] = [] |
| /// UTF8.encode("𝄐", into: { bytes.append($0) }) |
| /// print(bytes) |
| /// // Prints "[240, 157, 132, 144]" |
| /// |
| /// - Parameters: |
| /// - input: The Unicode scalar value to encode. |
| /// - processCodeUnit: A closure that processes one code unit argument at a |
| /// time. |
| @inlinable |
| @inline(__always) |
| public static func encode( |
| _ input: Unicode.Scalar, |
| into processCodeUnit: (CodeUnit) -> Void |
| ) { |
| var s = encode(input)!._biasedBits |
| processCodeUnit(UInt8(truncatingIfNeeded: s) &- 0x01) |
| s &>>= 8 |
| if _fastPath(s == 0) { return } |
| processCodeUnit(UInt8(truncatingIfNeeded: s) &- 0x01) |
| s &>>= 8 |
| if _fastPath(s == 0) { return } |
| processCodeUnit(UInt8(truncatingIfNeeded: s) &- 0x01) |
| s &>>= 8 |
| if _fastPath(s == 0) { return } |
| processCodeUnit(UInt8(truncatingIfNeeded: s) &- 0x01) |
| } |
| |
| /// Returns a Boolean value indicating whether the specified code unit is a |
| /// UTF-8 continuation byte. |
| /// |
| /// Continuation bytes take the form `0b10xxxxxx`. For example, a lowercase |
| /// "e" with an acute accent above it (`"é"`) uses 2 bytes for its UTF-8 |
| /// representation: `0b11000011` (195) and `0b10101001` (169). The second |
| /// byte is a continuation byte. |
| /// |
| /// let eAcute = "é" |
| /// for codeUnit in eAcute.utf8 { |
| /// print(codeUnit, UTF8.isContinuation(codeUnit)) |
| /// } |
| /// // Prints "195 false" |
| /// // Prints "169 true" |
| /// |
| /// - Parameter byte: A UTF-8 code unit. |
| /// - Returns: `true` if `byte` is a continuation byte; otherwise, `false`. |
| @inlinable |
| public static func isContinuation(_ byte: CodeUnit) -> Bool { |
| return byte & 0b11_00__0000 == 0b10_00__0000 |
| } |
| |
| @inlinable |
| public static func _nullCodeUnitOffset( |
| in input: UnsafePointer<CodeUnit> |
| ) -> Int { |
| return Int(_swift_stdlib_strlen_unsigned(input)) |
| } |
| // Support parsing C strings as-if they are UTF8 strings. |
| @inlinable |
| public static func _nullCodeUnitOffset( |
| in input: UnsafePointer<CChar> |
| ) -> Int { |
| return Int(_swift_stdlib_strlen(input)) |
| } |
| } |
| |
| /// A codec for translating between Unicode scalar values and UTF-16 code |
| /// units. |
| extension Unicode.UTF16 : UnicodeCodec { |
| /// Creates an instance of the UTF-16 codec. |
| @inlinable |
| public init() { self = ._swift3Buffer(ForwardParser()) } |
| |
| /// Starts or continues decoding a UTF-16 sequence. |
| /// |
| /// To decode a code unit sequence completely, call this method repeatedly |
| /// until it returns `UnicodeDecodingResult.emptyInput`. Checking that the |
| /// iterator was exhausted is not sufficient, because the decoder can store |
| /// buffered data from the input iterator. |
| /// |
| /// Because of buffering, it is impossible to find the corresponding position |
| /// in the iterator for a given returned `Unicode.Scalar` or an error. |
| /// |
| /// The following example decodes the UTF-16 encoded bytes of a string into an |
| /// array of `Unicode.Scalar` instances. This is a demonstration only---if |
| /// you need the Unicode scalar representation of a string, use its |
| /// `unicodeScalars` view. |
| /// |
| /// let str = "✨Unicode✨" |
| /// print(Array(str.utf16)) |
| /// // Prints "[10024, 85, 110, 105, 99, 111, 100, 101, 10024]" |
| /// |
| /// var codeUnitIterator = str.utf16.makeIterator() |
| /// var scalars: [Unicode.Scalar] = [] |
| /// var utf16Decoder = UTF16() |
| /// Decode: while true { |
| /// switch utf16Decoder.decode(&codeUnitIterator) { |
| /// case .scalarValue(let v): scalars.append(v) |
| /// case .emptyInput: break Decode |
| /// case .error: |
| /// print("Decoding error") |
| /// break Decode |
| /// } |
| /// } |
| /// print(scalars) |
| /// // Prints "["\u{2728}", "U", "n", "i", "c", "o", "d", "e", "\u{2728}"]" |
| /// |
| /// - Parameter input: An iterator of code units to be decoded. `input` must be |
| /// the same iterator instance in repeated calls to this method. Do not |
| /// advance the iterator or any copies of the iterator outside this |
| /// method. |
| /// - Returns: A `UnicodeDecodingResult` instance, representing the next |
| /// Unicode scalar, an indication of an error, or an indication that the |
| /// UTF sequence has been fully decoded. |
| @inlinable |
| public mutating func decode<I : IteratorProtocol>( |
| _ input: inout I |
| ) -> UnicodeDecodingResult where I.Element == CodeUnit { |
| guard case ._swift3Buffer(var parser) = self else { |
| Builtin.unreachable() |
| } |
| defer { self = ._swift3Buffer(parser) } |
| switch parser.parseScalar(from: &input) { |
| case .valid(let s): return .scalarValue(UTF16.decode(s)) |
| case .error: return .error |
| case .emptyInput: return .emptyInput |
| } |
| } |
| |
| /// Try to decode one Unicode scalar, and return the actual number of code |
| /// units it spanned in the input. This function may consume more code |
| /// units than required for this scalar. |
| @inlinable |
| internal mutating func _decodeOne<I : IteratorProtocol>( |
| _ input: inout I |
| ) -> (UnicodeDecodingResult, Int) where I.Element == CodeUnit { |
| let result = decode(&input) |
| switch result { |
| case .scalarValue(let us): |
| return (result, UTF16.width(us)) |
| |
| case .emptyInput: |
| return (result, 0) |
| |
| case .error: |
| return (result, 1) |
| } |
| } |
| |
| /// Encodes a Unicode scalar as a series of code units by calling the given |
| /// closure on each code unit. |
| /// |
| /// For example, the musical fermata symbol ("𝄐") is a single Unicode scalar |
| /// value (`\u{1D110}`) but requires two code units for its UTF-16 |
| /// representation. The following code encodes a fermata in UTF-16: |
| /// |
| /// var codeUnits: [UTF16.CodeUnit] = [] |
| /// UTF16.encode("𝄐", into: { codeUnits.append($0) }) |
| /// print(codeUnits) |
| /// // Prints "[55348, 56592]" |
| /// |
| /// - Parameters: |
| /// - input: The Unicode scalar value to encode. |
| /// - processCodeUnit: A closure that processes one code unit argument at a |
| /// time. |
| @inlinable |
| public static func encode( |
| _ input: Unicode.Scalar, |
| into processCodeUnit: (CodeUnit) -> Void |
| ) { |
| var s = encode(input)!._storage |
| processCodeUnit(UInt16(truncatingIfNeeded: s)) |
| s &>>= 16 |
| if _fastPath(s == 0) { return } |
| processCodeUnit(UInt16(truncatingIfNeeded: s)) |
| } |
| } |
| |
| /// A codec for translating between Unicode scalar values and UTF-32 code |
| /// units. |
| extension Unicode.UTF32 : UnicodeCodec { |
| /// Creates an instance of the UTF-32 codec. |
| @inlinable |
| public init() { self = ._swift3Codec } |
| |
| /// Starts or continues decoding a UTF-32 sequence. |
| /// |
| /// To decode a code unit sequence completely, call this method repeatedly |
| /// until it returns `UnicodeDecodingResult.emptyInput`. Checking that the |
| /// iterator was exhausted is not sufficient, because the decoder can store |
| /// buffered data from the input iterator. |
| /// |
| /// Because of buffering, it is impossible to find the corresponding position |
| /// in the iterator for a given returned `Unicode.Scalar` or an error. |
| /// |
| /// The following example decodes the UTF-16 encoded bytes of a string |
| /// into an array of `Unicode.Scalar` instances. This is a demonstration |
| /// only---if you need the Unicode scalar representation of a string, use |
| /// its `unicodeScalars` view. |
| /// |
| /// // UTF-32 representation of "✨Unicode✨" |
| /// let codeUnits: [UTF32.CodeUnit] = |
| /// [10024, 85, 110, 105, 99, 111, 100, 101, 10024] |
| /// |
| /// var codeUnitIterator = codeUnits.makeIterator() |
| /// var scalars: [Unicode.Scalar] = [] |
| /// var utf32Decoder = UTF32() |
| /// Decode: while true { |
| /// switch utf32Decoder.decode(&codeUnitIterator) { |
| /// case .scalarValue(let v): scalars.append(v) |
| /// case .emptyInput: break Decode |
| /// case .error: |
| /// print("Decoding error") |
| /// break Decode |
| /// } |
| /// } |
| /// print(scalars) |
| /// // Prints "["\u{2728}", "U", "n", "i", "c", "o", "d", "e", "\u{2728}"]" |
| /// |
| /// - Parameter input: An iterator of code units to be decoded. `input` must be |
| /// the same iterator instance in repeated calls to this method. Do not |
| /// advance the iterator or any copies of the iterator outside this |
| /// method. |
| /// - Returns: A `UnicodeDecodingResult` instance, representing the next |
| /// Unicode scalar, an indication of an error, or an indication that the |
| /// UTF sequence has been fully decoded. |
| @inlinable |
| public mutating func decode<I : IteratorProtocol>( |
| _ input: inout I |
| ) -> UnicodeDecodingResult where I.Element == CodeUnit { |
| var parser = ForwardParser() |
| |
| switch parser.parseScalar(from: &input) { |
| case .valid(let s): return .scalarValue(UTF32.decode(s)) |
| case .error: return .error |
| case .emptyInput: return .emptyInput |
| } |
| } |
| |
| /// Encodes a Unicode scalar as a UTF-32 code unit by calling the given |
| /// closure. |
| /// |
| /// For example, like every Unicode scalar, the musical fermata symbol ("𝄐") |
| /// can be represented in UTF-32 as a single code unit. The following code |
| /// encodes a fermata in UTF-32: |
| /// |
| /// var codeUnit: UTF32.CodeUnit = 0 |
| /// UTF32.encode("𝄐", into: { codeUnit = $0 }) |
| /// print(codeUnit) |
| /// // Prints "119056" |
| /// |
| /// - Parameters: |
| /// - input: The Unicode scalar value to encode. |
| /// - processCodeUnit: A closure that processes one code unit argument at a |
| /// time. |
| @inlinable |
| public static func encode( |
| _ input: Unicode.Scalar, |
| into processCodeUnit: (CodeUnit) -> Void |
| ) { |
| processCodeUnit(UInt32(input)) |
| } |
| } |
| |
| /// Translates the given input from one Unicode encoding to another by calling |
| /// the given closure. |
| /// |
| /// The following example transcodes the UTF-8 representation of the string |
| /// `"Fermata 𝄐"` into UTF-32. |
| /// |
| /// let fermata = "Fermata 𝄐" |
| /// let bytes = fermata.utf8 |
| /// print(Array(bytes)) |
| /// // Prints "[70, 101, 114, 109, 97, 116, 97, 32, 240, 157, 132, 144]" |
| /// |
| /// var codeUnits: [UTF32.CodeUnit] = [] |
| /// let sink = { codeUnits.append($0) } |
| /// transcode(bytes.makeIterator(), from: UTF8.self, to: UTF32.self, |
| /// stoppingOnError: false, into: sink) |
| /// print(codeUnits) |
| /// // Prints "[70, 101, 114, 109, 97, 116, 97, 32, 119056]" |
| /// |
| /// The `sink` closure is called with each resulting UTF-32 code unit as the |
| /// function iterates over its input. |
| /// |
| /// - Parameters: |
| /// - input: An iterator of code units to be translated, encoded as |
| /// `inputEncoding`. If `stopOnError` is `false`, the entire iterator will |
| /// be exhausted. Otherwise, iteration will stop if an encoding error is |
| /// detected. |
| /// - inputEncoding: The Unicode encoding of `input`. |
| /// - outputEncoding: The destination Unicode encoding. |
| /// - stopOnError: Pass `true` to stop translation when an encoding error is |
| /// detected in `input`. Otherwise, a Unicode replacement character |
| /// (`"\u{FFFD}"`) is inserted for each detected error. |
| /// - processCodeUnit: A closure that processes one `outputEncoding` code |
| /// unit at a time. |
| /// - Returns: `true` if the translation detected encoding errors in `input`; |
| /// otherwise, `false`. |
| @inlinable |
| @inline(__always) |
| public func transcode< |
| Input : IteratorProtocol, |
| InputEncoding : Unicode.Encoding, |
| OutputEncoding : Unicode.Encoding |
| >( |
| _ input: Input, |
| from inputEncoding: InputEncoding.Type, |
| to outputEncoding: OutputEncoding.Type, |
| stoppingOnError stopOnError: Bool, |
| into processCodeUnit: (OutputEncoding.CodeUnit) -> Void |
| ) -> Bool |
| where InputEncoding.CodeUnit == Input.Element { |
| var input = input |
| |
| // NB. It is not possible to optimize this routine to a memcpy if |
| // InputEncoding == OutputEncoding. The reason is that memcpy will not |
| // substitute U+FFFD replacement characters for ill-formed sequences. |
| |
| var p = InputEncoding.ForwardParser() |
| var hadError = false |
| loop: |
| while true { |
| switch p.parseScalar(from: &input) { |
| case .valid(let s): |
| let t = OutputEncoding.transcode(s, from: inputEncoding) |
| guard _fastPath(t != nil), let s = t else { break } |
| s.forEach(processCodeUnit) |
| continue loop |
| case .emptyInput: |
| return hadError |
| case .error: |
| if _slowPath(stopOnError) { return true } |
| hadError = true |
| } |
| OutputEncoding.encodedReplacementCharacter.forEach(processCodeUnit) |
| } |
| } |
| |
| /// Instances of conforming types are used in internal `String` |
| /// representation. |
| public // @testable |
| protocol _StringElement { |
| static func _toUTF16CodeUnit(_: Self) -> UTF16.CodeUnit |
| |
| static func _fromUTF16CodeUnit(_ utf16: UTF16.CodeUnit) -> Self |
| } |
| |
| extension UTF16.CodeUnit : _StringElement { |
| @inlinable |
| public // @testable |
| static func _toUTF16CodeUnit(_ x: UTF16.CodeUnit) -> UTF16.CodeUnit { |
| return x |
| } |
| @inlinable |
| public // @testable |
| static func _fromUTF16CodeUnit( |
| _ utf16: UTF16.CodeUnit |
| ) -> UTF16.CodeUnit { |
| return utf16 |
| } |
| } |
| |
| extension UTF8.CodeUnit : _StringElement { |
| @inlinable |
| public // @testable |
| static func _toUTF16CodeUnit(_ x: UTF8.CodeUnit) -> UTF16.CodeUnit { |
| _internalInvariant(x <= 0x7f, "should only be doing this with ASCII") |
| return UTF16.CodeUnit(truncatingIfNeeded: x) |
| } |
| @inlinable |
| public // @testable |
| static func _fromUTF16CodeUnit( |
| _ utf16: UTF16.CodeUnit |
| ) -> UTF8.CodeUnit { |
| _internalInvariant(utf16 <= 0x7f, "should only be doing this with ASCII") |
| return UTF8.CodeUnit(truncatingIfNeeded: utf16) |
| } |
| } |
| |
| // Unchecked init to avoid precondition branches in hot code paths where we |
| // already know the value is a valid unicode scalar. |
| extension Unicode.Scalar { |
| /// Create an instance with numeric value `value`, bypassing the regular |
| /// precondition checks for code point validity. |
| @inlinable |
| internal init(_unchecked value: UInt32) { |
| _internalInvariant(value < 0xD800 || value > 0xDFFF, |
| "high- and low-surrogate code points are not valid Unicode scalar values") |
| _internalInvariant(value <= 0x10FFFF, "value is outside of Unicode codespace") |
| |
| self._value = value |
| } |
| } |
| |
| extension UnicodeCodec { |
| @inlinable |
| public static func _nullCodeUnitOffset( |
| in input: UnsafePointer<CodeUnit> |
| ) -> Int { |
| var length = 0 |
| while input[length] != 0 { |
| length += 1 |
| } |
| return length |
| } |
| } |
| |
| @available(*, unavailable, message: "use 'transcode(_:from:to:stoppingOnError:into:)'") |
| public func transcode<Input, InputEncoding, OutputEncoding>( |
| _ inputEncoding: InputEncoding.Type, _ outputEncoding: OutputEncoding.Type, |
| _ input: Input, _ output: (OutputEncoding.CodeUnit) -> Void, |
| stopOnError: Bool |
| ) -> Bool |
| where |
| Input : IteratorProtocol, |
| InputEncoding : UnicodeCodec, |
| OutputEncoding : UnicodeCodec, |
| InputEncoding.CodeUnit == Input.Element { |
| Builtin.unreachable() |
| } |
| |
| /// A namespace for Unicode utilities. |
| @_frozen |
| public enum Unicode {} |
| |