blob: 2e49c41527ad0990a14679d7544089ec7771e43d [file] [log] [blame]
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
// String interop with C
//===----------------------------------------------------------------------===//
import SwiftShims
extension String {
/// Creates a new string by copying the null-terminated UTF-8 data referenced
/// by the given pointer.
///
/// If `cString` contains ill-formed UTF-8 code unit sequences, this
/// initializer replaces them with the Unicode replacement character
/// (`"\u{FFFD}"`).
///
/// The following example calls this initializer with pointers to the
/// contents of two different `CChar` arrays---the first with well-formed
/// UTF-8 code unit sequences and the second with an ill-formed sequence at
/// the end.
///
/// let validUTF8: [CChar] = [67, 97, 102, -61, -87, 0]
/// validUTF8.withUnsafeBufferPointer { ptr in
/// let s = String(cString: ptr.baseAddress!)
/// print(s)
/// }
/// // Prints "Café"
///
/// let invalidUTF8: [CChar] = [67, 97, 102, -61, 0]
/// invalidUTF8.withUnsafeBufferPointer { ptr in
/// let s = String(cString: ptr.baseAddress!)
/// print(s)
/// }
/// // Prints "Caf�"
///
/// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence.
public init(cString: UnsafePointer<CChar>) {
let len = UTF8._nullCodeUnitOffset(in: cString)
self = String._fromUTF8Repairing(
UnsafeBufferPointer(start: cString._asUInt8, count: len)).0
}
/// Creates a new string by copying the null-terminated UTF-8 data referenced
/// by the given pointer.
///
/// This is identical to init(cString: UnsafePointer<CChar> but operates on an
/// unsigned sequence of bytes.
public init(cString: UnsafePointer<UInt8>) {
let len = UTF8._nullCodeUnitOffset(in: cString)
self = String._fromUTF8Repairing(
UnsafeBufferPointer(start: cString, count: len)).0
}
/// Creates a new string by copying and validating the null-terminated UTF-8
/// data referenced by the given pointer.
///
/// This initializer does not try to repair ill-formed UTF-8 code unit
/// sequences. If any are found, the result of the initializer is `nil`.
///
/// The following example calls this initializer with pointers to the
/// contents of two different `CChar` arrays---the first with well-formed
/// UTF-8 code unit sequences and the second with an ill-formed sequence at
/// the end.
///
/// let validUTF8: [CChar] = [67, 97, 102, -61, -87, 0]
/// validUTF8.withUnsafeBufferPointer { ptr in
/// let s = String(validatingUTF8: ptr.baseAddress!)
/// print(s)
/// }
/// // Prints "Optional(Café)"
///
/// let invalidUTF8: [CChar] = [67, 97, 102, -61, 0]
/// invalidUTF8.withUnsafeBufferPointer { ptr in
/// let s = String(validatingUTF8: ptr.baseAddress!)
/// print(s)
/// }
/// // Prints "nil"
///
/// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence.
public init?(validatingUTF8 cString: UnsafePointer<CChar>) {
let len = UTF8._nullCodeUnitOffset(in: cString)
guard let str = String._tryFromUTF8(
UnsafeBufferPointer(start: cString._asUInt8, count: len))
else { return nil }
self = str
}
/// Creates a new string by copying the null-terminated data referenced by
/// the given pointer using the specified encoding.
///
/// When you pass `true` as `isRepairing`, this method replaces ill-formed
/// sequences with the Unicode replacement character (`"\u{FFFD}"`);
/// otherwise, an ill-formed sequence causes this method to stop decoding
/// and return `nil`.
///
/// The following example calls this method with pointers to the contents of
/// two different `CChar` arrays---the first with well-formed UTF-8 code
/// unit sequences and the second with an ill-formed sequence at the end.
///
/// let validUTF8: [UInt8] = [67, 97, 102, 195, 169, 0]
/// validUTF8.withUnsafeBufferPointer { ptr in
/// let s = String.decodeCString(ptr.baseAddress,
/// as: UTF8.self,
/// repairingInvalidCodeUnits: true)
/// print(s)
/// }
/// // Prints "Optional((Café, false))"
///
/// let invalidUTF8: [UInt8] = [67, 97, 102, 195, 0]
/// invalidUTF8.withUnsafeBufferPointer { ptr in
/// let s = String.decodeCString(ptr.baseAddress,
/// as: UTF8.self,
/// repairingInvalidCodeUnits: true)
/// print(s)
/// }
/// // Prints "Optional((Caf�, true))"
///
/// - Parameters:
/// - cString: A pointer to a null-terminated code sequence encoded in
/// `encoding`.
/// - encoding: The Unicode encoding of the data referenced by `cString`.
/// - isRepairing: Pass `true` to create a new string, even when the data
/// referenced by `cString` contains ill-formed sequences. Ill-formed
/// sequences are replaced with the Unicode replacement character
/// (`"\u{FFFD}"`). Pass `false` to interrupt the creation of the new
/// string if an ill-formed sequence is detected.
/// - Returns: A tuple with the new string and a Boolean value that indicates
/// whether any repairs were made. If `isRepairing` is `false` and an
/// ill-formed sequence is detected, this method returns `nil`.
@_specialize(where Encoding == Unicode.UTF8)
@_specialize(where Encoding == Unicode.UTF16)
@inlinable // Fold away specializations
public static func decodeCString<Encoding : _UnicodeEncoding>(
_ cString: UnsafePointer<Encoding.CodeUnit>?,
as encoding: Encoding.Type,
repairingInvalidCodeUnits isRepairing: Bool = true
) -> (result: String, repairsMade: Bool)? {
guard let cPtr = cString else { return nil }
if _fastPath(encoding == Unicode.UTF8.self) {
let ptr = UnsafeRawPointer(cPtr).assumingMemoryBound(to: UInt8.self)
let len = UTF8._nullCodeUnitOffset(in: ptr)
let codeUnits = UnsafeBufferPointer(start: ptr, count: len)
if isRepairing {
return String._fromUTF8Repairing(codeUnits)
} else {
guard let str = String._tryFromUTF8(codeUnits) else { return nil }
return (str, false)
}
}
var end = cPtr
while end.pointee != 0 { end += 1 }
let len = end - cPtr
let codeUnits = UnsafeBufferPointer(start: cPtr, count: len)
return String._fromCodeUnits(
codeUnits, encoding: encoding, repair: isRepairing)
}
/// Creates a string from the null-terminated sequence of bytes at the given
/// pointer.
///
/// - Parameters:
/// - nullTerminatedCodeUnits: A pointer to a sequence of contiguous code
/// units in the encoding specified in `sourceEncoding`, ending just
/// before the first zero code unit.
/// - sourceEncoding: The encoding in which the code units should be
/// interpreted.
@_specialize(where Encoding == Unicode.UTF8)
@_specialize(where Encoding == Unicode.UTF16)
@inlinable // Fold away specializations
public init<Encoding: Unicode.Encoding>(
decodingCString ptr: UnsafePointer<Encoding.CodeUnit>,
as sourceEncoding: Encoding.Type
) {
self = String.decodeCString(ptr, as: sourceEncoding)!.0
}
}
extension UnsafePointer where Pointee == UInt8 {
@inlinable
internal var _asCChar: UnsafePointer<CChar> {
@inline(__always) get {
return UnsafeRawPointer(self).assumingMemoryBound(to: CChar.self)
}
}
}
extension UnsafePointer where Pointee == CChar {
@inlinable
internal var _asUInt8: UnsafePointer<UInt8> {
@inline(__always) get {
return UnsafeRawPointer(self).assumingMemoryBound(to: UInt8.self)
}
}
}