Merge pull request #700 from KingOfBrian/bugfix/SR-2301

diff --git a/CoreFoundation/Base.subproj/ForSwiftFoundationOnly.h b/CoreFoundation/Base.subproj/ForSwiftFoundationOnly.h
index 11b9f03..a2e844a 100644
--- a/CoreFoundation/Base.subproj/ForSwiftFoundationOnly.h
+++ b/CoreFoundation/Base.subproj/ForSwiftFoundationOnly.h
@@ -171,7 +171,7 @@
     void (*startElementNs)(_CFXMLInterface ctx,
                            const unsigned char *localname,
                            const unsigned char *_Nullable prefix,
-                           const unsigned char *URI,
+                           const unsigned char *_Nullable URI,
                            int nb_namespaces,
                            const unsigned char *_Nullable *_Nonnull namespaces,
                            int nb_attributes,
@@ -180,7 +180,7 @@
     void (*endElementNs)(_CFXMLInterface ctx,
                          const unsigned char *localname,
                          const unsigned char *_Nullable prefix,
-                         const unsigned char *URI);
+                         const unsigned char *_Nullable URI);
     void (*characters)(_CFXMLInterface ctx,
                        const unsigned char *ch,
                        int len);
diff --git a/Foundation/NSXMLParser.swift b/Foundation/NSXMLParser.swift
index 1c8f3b9..b0c5c1f 100644
--- a/Foundation/NSXMLParser.swift
+++ b/Foundation/NSXMLParser.swift
@@ -42,7 +42,8 @@
     }
 }
 
-private func UTF8STRING(_ bytes: UnsafePointer<UInt8>) -> String? {
+private func UTF8STRING(_ bytes: UnsafePointer<UInt8>?) -> String? {
+    guard let bytes = bytes else { return nil }
     // strlen operates on the wrong type, char*. We can't rebind the memory to a different type without knowing it's length,
     // but since we know strlen is in libc, its safe to directly bitcast the pointer without worrying about multiple accesses
     // of different types visible to the compiler.
@@ -243,16 +244,10 @@
     return "\(prefixString!):\(suffixString!)"
 }
 
-internal func _NSXMLParserStartElementNs(_ ctx: _CFXMLInterface, localname: UnsafePointer<UInt8>, prefix: UnsafePointer<UInt8>?, URI: UnsafePointer<UInt8>, nb_namespaces: Int32, namespaces: UnsafeMutablePointer<UnsafePointer<UInt8>?>, nb_attributes: Int32, nb_defaulted: Int32, attributes: UnsafeMutablePointer<UnsafePointer<UInt8>?>) -> Void {
+internal func _NSXMLParserStartElementNs(_ ctx: _CFXMLInterface, localname: UnsafePointer<UInt8>, prefix: UnsafePointer<UInt8>?, URI: UnsafePointer<UInt8>?, nb_namespaces: Int32, namespaces: UnsafeMutablePointer<UnsafePointer<UInt8>?>, nb_attributes: Int32, nb_defaulted: Int32, attributes: UnsafeMutablePointer<UnsafePointer<UInt8>?>) -> Void {
     let parser = ctx.parser
-    let reportQNameURI = parser.shouldProcessNamespaces
     let reportNamespaces = parser.shouldReportNamespacePrefixes
-    // Since strlen is in libc, it's safe to bitcast the UInt8 pointer argument to an Int8 (char *) pointer.
-    let prefixLen = prefix != nil ? UInt(strlen(unsafeBitCast(prefix!, to: UnsafePointer<Int8>.self))) : 0
-    let localnameString = (prefixLen == 0 || reportQNameURI) ? UTF8STRING(localname) : nil
-    let qualifiedNameString = prefixLen != 0 ? _colonSeparatedStringFromPrefixAndSuffix(prefix!, UInt(prefixLen), localname, UInt(strlen(unsafeBitCast(localname, to: UnsafePointer<Int8>.self)))) : localnameString
-    let namespaceURIString = reportQNameURI ? UTF8STRING(URI) : nil
-    
+
     var nsDict = [String:String]()
     var attrDict = [String:String]()
     if nb_attributes + nb_namespaces > 0 {
@@ -274,7 +269,7 @@
                     nsDict[k] = v
                 }
             }
-            if !reportQNameURI {
+            if !parser.shouldProcessNamespaces {
                 if let k = asAttrNamespaceNameString,
                    let v = namespaceValueString {
                     attrDict[k] = v
@@ -323,37 +318,47 @@
         }
         
     }
-    
-    if let delegate = parser.delegate {
-        if reportQNameURI {
-            delegate.parser(parser, didStartElement: localnameString!, namespaceURI: (namespaceURIString != nil ? namespaceURIString : ""), qualifiedName: (qualifiedNameString != nil ? qualifiedNameString : ""), attributes: attrDict)
-        } else {
-            delegate.parser(parser, didStartElement: qualifiedNameString!, namespaceURI: nil, qualifiedName: nil, attributes: attrDict)
+
+    var elementName: String = UTF8STRING(localname)!
+    var namespaceURI: String? = nil
+    var qualifiedName: String? = nil
+    if parser.shouldProcessNamespaces {
+        namespaceURI = UTF8STRING(URI) ?? ""
+        qualifiedName = elementName
+        if let prefix = UTF8STRING(prefix) {
+            qualifiedName = elementName + ":" + prefix
         }
     }
+    else if let prefix = UTF8STRING(prefix) {
+        elementName = elementName + ":" + prefix
+    }
+
+    parser.delegate?.parser(parser, didStartElement: elementName, namespaceURI: namespaceURI, qualifiedName: qualifiedName, attributes: attrDict)
 }
 
-internal func _NSXMLParserEndElementNs(_ ctx: _CFXMLInterface , localname: UnsafePointer<UInt8>, prefix: UnsafePointer<UInt8>?, URI: UnsafePointer<UInt8>) -> Void {
+internal func _NSXMLParserEndElementNs(_ ctx: _CFXMLInterface , localname: UnsafePointer<UInt8>, prefix: UnsafePointer<UInt8>?, URI: UnsafePointer<UInt8>?) -> Void {
     let parser = ctx.parser
-    let reportQNameURI = parser.shouldProcessNamespaces
-    let prefixLen = prefix != nil ? strlen(unsafeBitCast(prefix!, to: UnsafePointer<Int8>.self)) : 0
-    let localnameString = (prefixLen == 0 || reportQNameURI) ? UTF8STRING(localname) : nil
-    let nilStr: String? = nil
-    let qualifiedNameString = (prefixLen != 0) ? _colonSeparatedStringFromPrefixAndSuffix(prefix!, UInt(prefixLen), localname, UInt(strlen(unsafeBitCast(localname, to: UnsafePointer<Int8>.self)))) : nilStr
-    let namespaceURIString = reportQNameURI ? UTF8STRING(URI) : nilStr
-    
-    
-    if let delegate = parser.delegate {
-        if reportQNameURI {
-            // When reporting namespace info, the delegate parameters are not passed in nil
-            delegate.parser(parser, didEndElement: localnameString!, namespaceURI: namespaceURIString == nil ? "" : namespaceURIString, qualifiedName: qualifiedNameString == nil ? "" : qualifiedNameString)
-        } else {
-            delegate.parser(parser, didEndElement: qualifiedNameString!, namespaceURI: nil, qualifiedName: nil)
+
+    var elementName: String = UTF8STRING(localname)!
+    var namespaceURI: String? = nil
+    var qualifiedName: String? = nil
+    if parser.shouldProcessNamespaces {
+        namespaceURI = UTF8STRING(URI) ?? ""
+        qualifiedName = elementName
+        if let prefix = UTF8STRING(prefix) {
+            qualifiedName = elementName + ":" + prefix
         }
     }
-    
+    else if let prefix = UTF8STRING(prefix) {
+        elementName = elementName + ":" + prefix
+    }
+
+    parser.delegate?.parser(parser, didEndElement: elementName, namespaceURI: namespaceURI, qualifiedName: qualifiedName)
+
     // Pop the last namespaces that were pushed (safe since XML is balanced)
-    parser._popNamespaces()
+    if parser.shouldReportNamespacePrefixes {
+        parser._popNamespaces()
+    }
 }
 
 internal func _NSXMLParserCharacters(_ ctx: _CFXMLInterface, ch: UnsafePointer<UInt8>, len: Int32) -> Void {
@@ -410,8 +415,10 @@
     private var _handler: _CFXMLInterfaceSAXHandler
     internal var _stream: InputStream?
     internal var _data: Data?
+
     internal var _chunkSize = Int(4096 * 32) // a suitably large number for a decent chunk size
-    internal var _haveDetectedEncoding = false
+    // This chunk of data stores the head of the stream. We know we have enough information for encoding
+    // when there are atleast 4 bytes in here.
     internal var _bomChunk: Data?
     fileprivate var _parserContext: _CFXMLInterfaceParserContext?
     internal var _delegateAborted = false
@@ -500,72 +507,52 @@
 
     internal func parseData(_ data: Data) -> Bool {
         _CFXMLInterfaceSetStructuredErrorFunc(interface, _structuredErrorFunc)
-        var result = true
-        /* The vast majority of this method just deals with ensuring we do a single parse
-         on the first 4 received bytes before continuing on to the actual incremental section */
-        if _haveDetectedEncoding {
-            var totalLength = data.count
-            if let chunk = _bomChunk {
-                totalLength += chunk.count
+
+        let handler: _CFXMLInterfaceSAXHandler? = (delegate != nil ? _handler : nil)
+        let unparsedData: Data
+        // If the parser context is nil, we have not received enough bytes to create the push parser
+        if _parserContext == nil {
+            // Look at the bomChunk and this data
+            let bomChunk: Data = {
+                guard var bomChunk = _bomChunk else {
+                    return data
+                }
+                bomChunk.append(data)
+                return bomChunk
+            }()
+            // If we have not received 4 bytes, save the bomChunk for next pass
+            if bomChunk.count < 4 {
+                _bomChunk = bomChunk
+                return false
             }
-            if (totalLength < 4) {
-                if let chunk = _bomChunk {
-                    var newData = Data()
-                    newData.append(chunk)
-                    newData.append(data)
-                    _bomChunk = newData
-                } else {
-                    _bomChunk = data
-                }
-            } else {
-                var allExistingData: Data
-                if let chunk = _bomChunk {
-                    var newData = Data()
-                    newData.append(chunk)
-                    newData.append(data)
-                    allExistingData = newData
-                } else {
-                    allExistingData = data
-                }
-
-                var handler: _CFXMLInterfaceSAXHandler? = nil
-                if delegate != nil {
-                    handler = _handler
-                }
-                
-                _parserContext = allExistingData.withUnsafeBytes { (bytes: UnsafePointer<Int8>) -> _CFXMLInterfaceParserContext in
-                    return _CFXMLInterfaceCreatePushParserCtxt(handler, interface, bytes, 4, nil)
-                }
-
-                var options = _kCFXMLInterfaceRecover | _kCFXMLInterfaceNoEnt // substitute entities, recover on errors
-                if shouldResolveExternalEntities {
-                    options |= _kCFXMLInterfaceDTDLoad
-                }
-
-                if handler == nil {
-                    options |= (_kCFXMLInterfaceNoError | _kCFXMLInterfaceNoWarning)
-                }
-
-                _CFXMLInterfaceCtxtUseOptions(_parserContext, options)
-                _haveDetectedEncoding = true
-                _bomChunk = nil
-
-                if (totalLength > 4) {
-                    let remainingData = allExistingData.withUnsafeMutableBytes { (bytes: UnsafeMutablePointer<UInt8>) -> Data in
-                        let ptr = bytes.advanced(by: 4)
-                        return Data(bytesNoCopy: ptr, count: totalLength - 4, deallocator: .none)
-                    }
-                    
-                    let _ = parseData(remainingData)
-                }
+            // Prepare options (substitute entities, recover on errors)
+            var options = _kCFXMLInterfaceRecover | _kCFXMLInterfaceNoEnt
+            if shouldResolveExternalEntities {
+                options |= _kCFXMLInterfaceDTDLoad
             }
-        } else {
-            let parseResult = data.withUnsafeBytes { (bytes: UnsafePointer<Int8>) -> Int32 in
-                return _CFXMLInterfaceParseChunk(_parserContext, bytes, Int32(data.count), 0)
+            if handler == nil {
+                options |= (_kCFXMLInterfaceNoError | _kCFXMLInterfaceNoWarning)
             }
-            
-            result = _handleParseResult(parseResult)
+
+            // Create the push context with the first 4 bytes
+            bomChunk.withUnsafeBytes { bytes in
+                _parserContext = _CFXMLInterfaceCreatePushParserCtxt(handler, interface, bytes, 4, nil)
+            }
+            _CFXMLInterfaceCtxtUseOptions(_parserContext, options)
+            // Prepare the remaining data for parsing
+            let dataRange = bomChunk.indices
+            let unparsed = Range(uncheckedBounds: (dataRange.startIndex.advanced(by: 4), dataRange.endIndex))
+            unparsedData = bomChunk.subdata(in: unparsed)
         }
+        else {
+            unparsedData = data
+        }
+
+        let parseResult = unparsedData.withUnsafeBytes { (bytes: UnsafePointer<Int8>) -> Int32 in
+            return _CFXMLInterfaceParseChunk(_parserContext, bytes, Int32(unparsedData.count), 0)
+        }
+
+        let result = _handleParseResult(parseResult)
         _CFXMLInterfaceSetStructuredErrorFunc(interface, nil)
         return result
     }
@@ -745,13 +732,13 @@
     func parser(_ parser: XMLParser, resolveExternalEntityName name: String, systemID: String?) -> Data?
     // this gives the delegate an opportunity to resolve an external entity itself and reply with the resulting data.
     
-    func parser(_ parser: XMLParser, parseErrorOccurred parseError: NSError)
+    func parser(_ parser: XMLParser, parseErrorOccurred parseError: Error)
     // ...and this reports a fatal error to the delegate. The parser will stop parsing.
     
-    func parser(_ parser: XMLParser, validationErrorOccurred validationError: NSError)
+    func parser(_ parser: XMLParser, validationErrorOccurred validationError: Error)
 }
 
-extension XMLParserDelegate {
+public extension XMLParserDelegate {
     
     func parserDidStartDocument(_ parser: XMLParser) { }
     func parserDidEndDocument(_ parser: XMLParser) { }
@@ -788,9 +775,9 @@
     
     func parser(_ parser: XMLParser, resolveExternalEntityName name: String, systemID: String?) -> Data? { return nil }
     
-    func parser(_ parser: XMLParser, parseErrorOccurred parseError: NSError) { }
+    func parser(_ parser: XMLParser, parseErrorOccurred parseError: Error) { }
     
-    func parser(_ parser: XMLParser, validationErrorOccurred validationError: NSError) { }
+    func parser(_ parser: XMLParser, validationErrorOccurred validationError: Error) { }
 }
 
 extension XMLParser {
diff --git a/TestFoundation/TestNSXMLDocument.swift b/TestFoundation/TestNSXMLDocument.swift
index a493dcd..c71ece6 100644
--- a/TestFoundation/TestNSXMLDocument.swift
+++ b/TestFoundation/TestNSXMLDocument.swift
@@ -13,7 +13,7 @@
     import Foundation
     import XCTest
 #else
-    @testable import SwiftFoundation
+    import SwiftFoundation
     import SwiftXCTest
 #endif
 
diff --git a/TestFoundation/TestNSXMLParser.swift b/TestFoundation/TestNSXMLParser.swift
index 486f23d..b181744 100644
--- a/TestFoundation/TestNSXMLParser.swift
+++ b/TestFoundation/TestNSXMLParser.swift
@@ -9,31 +9,146 @@
 
 
 #if DEPLOYMENT_RUNTIME_OBJC || os(Linux)
-import Foundation
-import XCTest
+    import Foundation
+    import XCTest
 #else
-import SwiftFoundation
-import SwiftXCTest
+    import SwiftFoundation
+    import SwiftXCTest
 #endif
 
+class OptionalParserConformance: XMLParserDelegate {}
+
+enum XMLParserDelegateEvent {
+    case startDocument
+    case endDocument
+    case didStartElement(String, String?, String?, [String: String])
+    case didEndElement(String, String?, String?)
+    case foundCharacters(String)
+}
+
+extension XMLParserDelegateEvent: Equatable {
+
+    public static func ==(lhs: XMLParserDelegateEvent, rhs: XMLParserDelegateEvent) -> Bool {
+        switch (lhs, rhs) {
+        case (.startDocument, startDocument):
+            return true
+        case (.endDocument, endDocument):
+            return true
+        case let (.didStartElement(lhsElement, lhsNamespace, lhsQname, lhsAttr),
+                  didStartElement(rhsElement, rhsNamespace, rhsQname, rhsAttr)):
+            return lhsElement == rhsElement && lhsNamespace == rhsNamespace && lhsQname == rhsQname && lhsAttr == rhsAttr
+        case let (.didEndElement(lhsElement, lhsNamespace, lhsQname),
+                  .didEndElement(rhsElement, rhsNamespace, rhsQname)):
+            return lhsElement == rhsElement && lhsNamespace == rhsNamespace && lhsQname == rhsQname
+        case let (.foundCharacters(lhsChar), .foundCharacters(rhsChar)):
+            return lhsChar == rhsChar
+        default:
+            return false
+        }
+    }
+
+}
+
+class XMLParserDelegateEventStream: XMLParserDelegate {
+    var events: [XMLParserDelegateEvent] = []
+
+    func parserDidStartDocument(_ parser: XMLParser) {
+        events.append(.startDocument)
+    }
+    func parserDidEndDocument(_ parser: XMLParser) {
+        events.append(.endDocument)
+    }
+    func parser(_ parser: XMLParser, didStartElement elementName: String, namespaceURI: String?, qualifiedName qName: String?, attributes attributeDict: [String : String]) {
+        events.append(.didStartElement(elementName, namespaceURI, qName, attributeDict))
+    }
+    func parser(_ parser: XMLParser, didEndElement elementName: String, namespaceURI: String?, qualifiedName qName: String?) {
+        events.append(.didEndElement(elementName, namespaceURI, qName))
+    }
+    func parser(_ parser: XMLParser, foundCharacters string: String) {
+        events.append(.foundCharacters(string))
+    }
+}
 
 class TestNSXMLParser : XCTestCase {
-    
+
     static var allTests: [(String, (TestNSXMLParser) -> () throws -> Void)] {
         return [
-            ("test_data", test_data),
+            ("test_withData", test_withData),
+            ("test_withDataEncodings", test_withDataEncodings),
+            ("test_withDataOptions", test_withDataOptions),
         ]
     }
-    
-    func test_data() {
-        let xml = Array("<test><foo>bar</foo></test>".utf8CString)
+
+    // Helper method to embed the correct encoding in the XML header
+    static func xmlUnderTest(encoding: String.Encoding? = nil) -> String {
+        let xmlUnderTest = "<test attribute='value'><foo>bar</foo></test>"
+        guard var encoding = encoding?.description else {
+            return xmlUnderTest
+        }
+        if let open = encoding.range(of: "(") {
+            encoding = encoding.substring(from: open.upperBound)
+        }
+        if let close = encoding.range(of: ")") {
+            encoding = encoding.substring(to: close.lowerBound)
+        }
+        return "<?xml version='1.0' encoding='\(encoding.uppercased())' standalone='no'?>\n\(xmlUnderTest)\n"
+    }
+
+    static func xmlUnderTestExpectedEvents(namespaces: Bool = false) -> [XMLParserDelegateEvent] {
+        let uri: String? = namespaces ? "" : nil
+        return [
+            .startDocument,
+            .didStartElement("test", uri, namespaces ? "test" : nil, ["attribute": "value"]),
+            .didStartElement("foo", uri, namespaces ? "foo" : nil, [:]),
+            .foundCharacters("bar"),
+            .didEndElement("foo", uri, namespaces ? "foo" : nil),
+            .didEndElement("test", uri, namespaces ? "test" : nil),
+        ]
+    }
+
+
+    func test_withData() {
+        let xml = Array(TestNSXMLParser.xmlUnderTest().utf8CString)
         let data = xml.withUnsafeBufferPointer { (buffer: UnsafeBufferPointer<CChar>) -> Data in
             return buffer.baseAddress!.withMemoryRebound(to: UInt8.self, capacity: buffer.count * MemoryLayout<CChar>.stride) {
                 return Data(bytes: $0, count: buffer.count)
             }
         }
         let parser = XMLParser(data: data)
+        let stream = XMLParserDelegateEventStream()
+        parser.delegate = stream
         let res = parser.parse()
+        XCTAssertEqual(stream.events, TestNSXMLParser.xmlUnderTestExpectedEvents())
         XCTAssertTrue(res)
     }
+
+    func test_withDataEncodings() {
+        // If th <?xml header isn't present, any non-UTF8 encodings fail. This appears to be libxml2 behavior.
+        // These don't work, it may just be an issue with the `encoding=xxx`.
+        //   - .nextstep, .utf32LittleEndian
+        let encodings: [String.Encoding] = [.utf16LittleEndian, .utf16BigEndian, .utf32BigEndian, .ascii]
+        for encoding in encodings {
+            let xml = TestNSXMLParser.xmlUnderTest(encoding: encoding)
+            let parser = XMLParser(data: xml.data(using: encoding)!)
+            let stream = XMLParserDelegateEventStream()
+            parser.delegate = stream
+            let res = parser.parse()
+            XCTAssertEqual(stream.events, TestNSXMLParser.xmlUnderTestExpectedEvents())
+            XCTAssertTrue(res)
+        }
+    }
+
+    func test_withDataOptions() {
+        let xml = TestNSXMLParser.xmlUnderTest()
+        let parser = XMLParser(data: xml.data(using: String.Encoding.utf8)!)
+        parser.shouldProcessNamespaces = true
+        parser.shouldReportNamespacePrefixes = true
+        parser.shouldResolveExternalEntities = true
+        let stream = XMLParserDelegateEventStream()
+        parser.delegate = stream
+        let res = parser.parse()
+        XCTAssertEqual(stream.events, TestNSXMLParser.xmlUnderTestExpectedEvents(namespaces: true)  )
+        XCTAssertTrue(res)
+    }
+
 }