blob: 8837e434818ec58bfb646d88bcfa6a07899d210d [file] [log] [blame]
//===--- UnicodeParser.swift ----------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
extension _Unicode {
public enum ParseResult<T> {
case valid(T)
case emptyInput
case invalid(length: Int)
var isEmpty : Bool {
switch self {
case .emptyInput: return true
default: return false
}
}
}
}
/// Types that separate streams of code units into encoded unicode scalar values
public protocol UnicodeParser {
/// The encoding with which this parser is associated
associatedtype Encoding : _UnicodeEncoding
init()
/// Parses a single Unicode scalar value from `input`.
mutating func parseScalar<I : IteratorProtocol>(
from input: inout I
) -> _Unicode.ParseResult<Encoding.EncodedScalar>
where I.Element == Encoding.CodeUnit
}
extension UnicodeParser {
@inline(__always)
@discardableResult
public static func decode<I: IteratorProtocol>(
_ input: inout I,
repairingIllFormedSequences makeRepairs: Bool,
into output: (UnicodeScalar)->Void
) -> Int
where I.Element == Encoding.CodeUnit
{
var errorCount = 0
var d = Self()
while true {
switch d.parseScalar(from: &input) {
case let .valid(scalarContent):
output(Encoding.decode(scalarContent))
case .invalid:
if !makeRepairs { return 1 }
errorCount += 1
output(UnicodeScalar(_unchecked: 0xFFFD))
case .emptyInput:
return errorCount
}
}
}
}
extension _Unicode {
public struct ParsingIterator<
CodeUnitIterator : IteratorProtocol,
Parser: UnicodeParser
> where Parser.Encoding.CodeUnit == CodeUnitIterator.Element {
public init(codeUnits: CodeUnitIterator, parser: Parser) {
self.codeUnits = codeUnits
self.parser = parser
}
public var codeUnits: CodeUnitIterator
public var parser: Parser
}
}
extension _Unicode.ParsingIterator : IteratorProtocol, Sequence {
public mutating func next() -> Parser.Encoding.EncodedScalar? {
switch parser.parseScalar(from: &codeUnits) {
case let .valid(scalarContent): return scalarContent
case .invalid: return Parser.Encoding.encodedReplacementCharacter
case .emptyInput: return nil
}
}
}