| //===--- UTFEncoding.swift - Common guts of the big 3 UnicodeEncodings ----===// |
| // |
| // This source file is part of the Swift.org open source project |
| // |
| // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors |
| // Licensed under Apache License v2.0 with Runtime Library Exception |
| // |
| // See https://swift.org/LICENSE.txt for license information |
| // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // These components would be internal if it were possible to use internal |
| // protocols to supply public conformance requirements. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| |
| public protocol _UTFParser { |
| associatedtype Encoding : _UnicodeEncoding |
| |
| func _parseMultipleCodeUnits() -> (isValid: Bool, bitCount: UInt8) |
| func _bufferedScalar(bitCount: UInt8) -> Encoding.EncodedScalar |
| |
| var _buffer: _UIntBuffer<UInt32, Encoding.CodeUnit> { get set } |
| } |
| |
| extension _UTFParser |
| where Encoding.EncodedScalar : RangeReplaceableCollection { |
| |
| @_inlineable // FIXME(sil-serialize-all) |
| @inline(__always) |
| public mutating func parseScalar<I : IteratorProtocol>( |
| from input: inout I |
| ) -> Unicode.ParseResult<Encoding.EncodedScalar> |
| where I.Element == Encoding.CodeUnit { |
| |
| // Bufferless single-scalar fastpath. |
| if _fastPath(_buffer.isEmpty) { |
| guard let codeUnit = input.next() else { return .emptyInput } |
| // ASCII, return immediately. |
| if Encoding._isScalar(codeUnit) { |
| return .valid(Encoding.EncodedScalar(CollectionOfOne(codeUnit))) |
| } |
| // Non-ASCII, proceed to buffering mode. |
| _buffer.append(codeUnit) |
| } else if Encoding._isScalar( |
| Encoding.CodeUnit(truncatingIfNeeded: _buffer._storage) |
| ) { |
| // ASCII in _buffer. We don't refill the buffer so we can return |
| // to bufferless mode once we've exhausted it. |
| let codeUnit = Encoding.CodeUnit(truncatingIfNeeded: _buffer._storage) |
| _buffer.remove(at: _buffer.startIndex) |
| return .valid(Encoding.EncodedScalar(CollectionOfOne(codeUnit))) |
| } |
| // Buffering mode. |
| // Fill buffer back to 4 bytes (or as many as are left in the iterator). |
| repeat { |
| if let codeUnit = input.next() { |
| _buffer.append(codeUnit) |
| } else { |
| if _buffer.isEmpty { return .emptyInput } |
| break // We still have some bytes left in our buffer. |
| } |
| } while _buffer.count < _buffer.capacity |
| |
| // Find one unicode scalar. |
| let (isValid, scalarBitCount) = _parseMultipleCodeUnits() |
| _sanityCheck(scalarBitCount % numericCast(Encoding.CodeUnit.bitWidth) == 0) |
| _sanityCheck(1...4 ~= scalarBitCount / 8) |
| _sanityCheck(scalarBitCount <= _buffer._bitCount) |
| |
| // Consume the decoded bytes (or maximal subpart of ill-formed sequence). |
| let encodedScalar = _bufferedScalar(bitCount: scalarBitCount) |
| |
| _buffer._storage = UInt32( |
| // widen to 64 bits so that we can empty the buffer in the 4-byte case |
| truncatingIfNeeded: UInt64(_buffer._storage) &>> scalarBitCount) |
| |
| _buffer._bitCount = _buffer._bitCount &- scalarBitCount |
| |
| if _fastPath(isValid) { |
| return .valid(encodedScalar) |
| } |
| return .error( |
| length: Int(scalarBitCount / numericCast(Encoding.CodeUnit.bitWidth))) |
| } |
| } |