blob: f67b66efad275d1a95094cad49087e50f52c517e [file] [log] [blame]
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
import SwiftShims
@inlinable // FIXME(sil-serialize-all)
@_semantics("optimize.sil.specialize.generic.partial.never")
internal func _withCStringAndLength<
Source : Collection,
SourceEncoding : Unicode.Encoding,
TargetEncoding : Unicode.Encoding,
Result
>(
encodedAs targetEncoding: TargetEncoding.Type,
from source: Source,
encodedAs sourceEncoding: SourceEncoding.Type,
execute body : (UnsafePointer<TargetEncoding.CodeUnit>, Int) throws -> Result
) rethrows -> Result
where Source.Iterator.Element == SourceEncoding.CodeUnit {
var targetLength = 0 // nul terminator
var i = source.makeIterator()
SourceEncoding.ForwardParser._parse(&i) {
targetLength += numericCast(
targetEncoding._transcode($0, from: SourceEncoding.self).count)
}
var a: [TargetEncoding.CodeUnit] = []
a.reserveCapacity(targetLength + 1)
i = source.makeIterator()
SourceEncoding.ForwardParser._parse(&i) {
a.append(
contentsOf: targetEncoding._transcode($0, from: SourceEncoding.self))
}
a.append(0)
return try body(a, targetLength)
}
extension _StringGuts {
//
// TODO:(TODO: JIRA) This is all very bloated code; needs a rewrite given
// StringGuts' new design and the potential to run directly on internal
// storage. For now, follow a hand-coded opaque pattern.
//
/// Invokes `body` on a null-terminated sequence of code units in the given
/// encoding corresponding to the substring in `bounds`.
@inlinable // FIXME(sil-serialize-all)
internal func _withCSubstring<Result, TargetEncoding: Unicode.Encoding>(
in bounds: Range<Int>,
encoding targetEncoding: TargetEncoding.Type,
_ body: (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result
) rethrows -> Result {
return try _withCSubstringAndLength(in: bounds, encoding: targetEncoding) {
p,_ in try body(p)
}
}
@inlinable // FIXME(sil-serialize-all)
@_semantics("optimize.sil.specialize.generic.partial.never")
internal func _withCSubstringAndLength<
Result, TargetEncoding: Unicode.Encoding
>(
in bounds: Range<Int>,
encoding targetEncoding: TargetEncoding.Type,
_ body: (UnsafePointer<TargetEncoding.CodeUnit>, Int) throws -> Result
) rethrows -> Result {
if _slowPath(_isOpaque) {
return try _opaqueWithCStringAndLength(
in: bounds, encoding: targetEncoding, body)
}
defer { _fixLifetime(self) }
if isASCII {
let ascii = _unmanagedASCIIView[bounds]
return try Swift._withCStringAndLength(
encodedAs: targetEncoding,
from: ascii.buffer,
encodedAs: Unicode.ASCII.self,
execute: body)
}
let utf16 = _unmanagedUTF16View[bounds]
return try Swift._withCStringAndLength(
encodedAs: targetEncoding,
from: utf16.buffer,
encodedAs: Unicode.UTF16.self,
execute: body)
}
@usableFromInline // @opaque
func _opaqueWithCStringAndLength<
Result, TargetEncoding: Unicode.Encoding
>(
in bounds: Range<Int>,
encoding targetEncoding: TargetEncoding.Type,
_ body: (UnsafePointer<TargetEncoding.CodeUnit>, Int) throws -> Result
) rethrows -> Result {
_sanityCheck(_isOpaque)
if self._isSmall {
let small = self._smallUTF8String[bounds]
if small.isASCII {
return try small.withUnmanagedASCII {
(ascii: _UnmanagedString<Unicode.UTF8.CodeUnit>) throws -> Result in
return try Swift._withCStringAndLength(
encodedAs: targetEncoding,
from: ascii.buffer,
encodedAs: Unicode.UTF8.self,
execute: body)
}
} else {
fatalError("TODO: UTF-8 support in small strings")
}
}
defer { _fixLifetime(self) }
let opaque = _asOpaque()[bounds]
return try Swift._withCStringAndLength(
encodedAs: targetEncoding,
from: opaque,
encodedAs: Unicode.UTF16.self,
execute: body)
}
}
extension String {
/// Creates a string from the given Unicode code units in the specified
/// encoding.
///
/// - Parameters:
/// - codeUnits: A collection of code units encoded in the encoding
/// specified in `sourceEncoding`.
/// - sourceEncoding: The encoding in which `codeUnits` should be
/// interpreted.
@inlinable // FIXME(sil-serialize-all)
@inline(__always) // Eliminate dynamic type check when possible
public init<C: Collection, Encoding: Unicode.Encoding>(
decoding codeUnits: C, as sourceEncoding: Encoding.Type
) where C.Iterator.Element == Encoding.CodeUnit {
if let contigBytes = codeUnits as? _HasContiguousBytes,
sourceEncoding == UTF8.self
{
self = contigBytes.withUnsafeBytes { rawBufPtr in
let ptr = rawBufPtr.baseAddress._unsafelyUnwrappedUnchecked
return String._fromUTF8(
UnsafeBufferPointer(
start: ptr.assumingMemoryBound(to: UInt8.self),
count: rawBufPtr.count),
repair: true).unsafelyUnwrapped
}
return
}
self = String._fromCodeUnits(
codeUnits, encoding: sourceEncoding, repairIllFormedSequences: true)!
}
/// Creates a string from the null-terminated sequence of bytes at the given
/// pointer.
///
/// - Parameters:
/// - nullTerminatedCodeUnits: A pointer to a sequence of contiguous code
/// units in the encoding specified in `sourceEncoding`, ending just
/// before the first zero code unit.
/// - sourceEncoding: The encoding in which the code units should be
/// interpreted.
@inlinable // FIXME(sil-serialize-all)
public init<Encoding: Unicode.Encoding>(
decodingCString nullTerminatedCodeUnits: UnsafePointer<Encoding.CodeUnit>,
as sourceEncoding: Encoding.Type) {
self = String.decodeCString(
nullTerminatedCodeUnits, as: sourceEncoding)!.result
}
/// Calls the given closure with a pointer to the contents of the string,
/// represented as a null-terminated sequence of code units.
///
/// The pointer passed as an argument to `body` is valid only during the
/// execution of `withCString(encodedAs:_:)`. Do not store or return the
/// pointer for later use.
///
/// - Parameters:
/// - body: A closure with a pointer parameter that points to a
/// null-terminated sequence of code units. If `body` has a return
/// value, that value is also used as the return value for the
/// `withCString(encodedAs:_:)` method. The pointer argument is valid
/// only for the duration of the method's execution.
/// - targetEncoding: The encoding in which the code units should be
/// interpreted.
/// - Returns: The return value, if any, of the `body` closure parameter.
@inlinable // FIXME(sil-serialize-all)
public func withCString<Result, TargetEncoding: Unicode.Encoding>(
encodedAs targetEncoding: TargetEncoding.Type,
_ body: (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result
) rethrows -> Result {
return try _guts._withCSubstring(
in: 0..<_guts.count,
encoding: TargetEncoding.self,
body)
}
}
// FIXME: complexity documentation for most of methods on String ought to be
// qualified with "amortized" at least, as Characters are variable-length.
/// A Unicode string value that is a collection of characters.
///
/// A string is a series of characters, such as `"Swift"`, that forms a
/// collection. Strings in Swift are Unicode correct and locale insensitive,
/// and are designed to be efficient. The `String` type bridges with the
/// Objective-C class `NSString` and offers interoperability with C functions
/// that works with strings.
///
/// You can create new strings using string literals or string interpolations.
/// A *string literal* is a series of characters enclosed in quotes.
///
/// let greeting = "Welcome!"
///
/// *String interpolations* are string literals that evaluate any included
/// expressions and convert the results to string form. String interpolations
/// give you an easy way to build a string from multiple pieces. Wrap each
/// expression in a string interpolation in parentheses, prefixed by a
/// backslash.
///
/// let name = "Rosa"
/// let personalizedGreeting = "Welcome, \(name)!"
/// // personalizedGreeting == "Welcome, Rosa!"
///
/// let price = 2
/// let number = 3
/// let cookiePrice = "\(number) cookies: $\(price * number)."
/// // cookiePrice == "3 cookies: $6."
///
/// Combine strings using the concatenation operator (`+`).
///
/// let longerGreeting = greeting + " We're glad you're here!"
/// // longerGreeting == "Welcome! We're glad you're here!"
///
/// Multiline string literals are enclosed in three double quotation marks
/// (`"""`), with each delimiter on its own line. Indentation is stripped from
/// each line of a multiline string literal to match the indentation of the
/// closing delimiter.
///
/// let banner = """
/// __,
/// ( o /) _/_
/// `. , , , , // /
/// (___)(_(_/_(_ //_ (__
/// /)
/// (/
/// """
///
/// Modifying and Comparing Strings
/// ===============================
///
/// Strings always have value semantics. Modifying a copy of a string leaves
/// the original unaffected.
///
/// var otherGreeting = greeting
/// otherGreeting += " Have a nice time!"
/// // otherGreeting == "Welcome! Have a nice time!"
///
/// print(greeting)
/// // Prints "Welcome!"
///
/// Comparing strings for equality using the equal-to operator (`==`) or a
/// relational operator (like `<` or `>=`) is always performed using Unicode
/// canonical representation. As a result, different representations of a
/// string compare as being equal.
///
/// let cafe1 = "Cafe\u{301}"
/// let cafe2 = "Café"
/// print(cafe1 == cafe2)
/// // Prints "true"
///
/// The Unicode scalar value `"\u{301}"` modifies the preceding character to
/// include an accent, so `"e\u{301}"` has the same canonical representation
/// as the single Unicode scalar value `"é"`.
///
/// Basic string operations are not sensitive to locale settings, ensuring that
/// string comparisons and other operations always have a single, stable
/// result, allowing strings to be used as keys in `Dictionary` instances and
/// for other purposes.
///
/// Accessing String Elements
/// =========================
///
/// A string is a collection of *extended grapheme clusters*, which approximate
/// human-readable characters. Many individual characters, such as "é", "김",
/// and "🇮🇳", can be made up of multiple Unicode scalar values. These scalar
/// values are combined by Unicode's boundary algorithms into extended
/// grapheme clusters, represented by the Swift `Character` type. Each element
/// of a string is represented by a `Character` instance.
///
/// For example, to retrieve the first word of a longer string, you can search
/// for a space and then create a substring from a prefix of the string up to
/// that point:
///
/// let name = "Marie Curie"
/// let firstSpace = name.firstIndex(of: " ") ?? name.endIndex
/// let firstName = name[..<firstSpace]
/// // firstName == "Marie"
///
/// The `firstName` constant is an instance of the `Substring` type---a type
/// that represents substrings of a string while sharing the original string's
/// storage. Substrings present the same interface as strings.
///
/// print("\(name)'s first name has \(firstName.count) letters.")
/// // Prints "Marie Curie's first name has 5 letters."
///
/// Accessing a String's Unicode Representation
/// ===========================================
///
/// If you need to access the contents of a string as encoded in different
/// Unicode encodings, use one of the string's `unicodeScalars`, `utf16`, or
/// `utf8` properties. Each property provides access to a view of the string
/// as a series of code units, each encoded in a different Unicode encoding.
///
/// To demonstrate the different views available for every string, the
/// following examples use this `String` instance:
///
/// let cafe = "Cafe\u{301} du 🌍"
/// print(cafe)
/// // Prints "Café du 🌍"
///
/// The `cafe` string is a collection of the nine characters that are visible
/// when the string is displayed.
///
/// print(cafe.count)
/// // Prints "9"
/// print(Array(cafe))
/// // Prints "["C", "a", "f", "é", " ", "d", "u", " ", "🌍"]"
///
/// Unicode Scalar View
/// -------------------
///
/// A string's `unicodeScalars` property is a collection of Unicode scalar
/// values, the 21-bit codes that are the basic unit of Unicode. Each scalar
/// value is represented by a `Unicode.Scalar` instance and is equivalent to a
/// UTF-32 code unit.
///
/// print(cafe.unicodeScalars.count)
/// // Prints "10"
/// print(Array(cafe.unicodeScalars))
/// // Prints "["C", "a", "f", "e", "\u{0301}", " ", "d", "u", " ", "\u{0001F30D}"]"
/// print(cafe.unicodeScalars.map { $0.value })
/// // Prints "[67, 97, 102, 101, 769, 32, 100, 117, 32, 127757]"
///
/// The `unicodeScalars` view's elements comprise each Unicode scalar value in
/// the `cafe` string. In particular, because `cafe` was declared using the
/// decomposed form of the `"é"` character, `unicodeScalars` contains the
/// scalar values for both the letter `"e"` (101) and the accent character
/// `"´"` (769).
///
/// UTF-16 View
/// -----------
///
/// A string's `utf16` property is a collection of UTF-16 code units, the
/// 16-bit encoding form of the string's Unicode scalar values. Each code unit
/// is stored as a `UInt16` instance.
///
/// print(cafe.utf16.count)
/// // Prints "11"
/// print(Array(cafe.utf16))
/// // Prints "[67, 97, 102, 101, 769, 32, 100, 117, 32, 55356, 57101]"
///
/// The elements of the `utf16` view are the code units for the string when
/// encoded in UTF-16. These elements match those accessed through indexed
/// `NSString` APIs.
///
/// let nscafe = cafe as NSString
/// print(nscafe.length)
/// // Prints "11"
/// print(nscafe.character(at: 3))
/// // Prints "101"
///
/// UTF-8 View
/// ----------
///
/// A string's `utf8` property is a collection of UTF-8 code units, the 8-bit
/// encoding form of the string's Unicode scalar values. Each code unit is
/// stored as a `UInt8` instance.
///
/// print(cafe.utf8.count)
/// // Prints "14"
/// print(Array(cafe.utf8))
/// // Prints "[67, 97, 102, 101, 204, 129, 32, 100, 117, 32, 240, 159, 140, 141]"
///
/// The elements of the `utf8` view are the code units for the string when
/// encoded in UTF-8. This representation matches the one used when `String`
/// instances are passed to C APIs.
///
/// let cLength = strlen(cafe)
/// print(cLength)
/// // Prints "14"
///
/// Measuring the Length of a String
/// ================================
///
/// When you need to know the length of a string, you must first consider what
/// you'll use the length for. Are you measuring the number of characters that
/// will be displayed on the screen, or are you measuring the amount of
/// storage needed for the string in a particular encoding? A single string
/// can have greatly differing lengths when measured by its different views.
///
/// For example, an ASCII character like the capital letter *A* is represented
/// by a single element in each of its four views. The Unicode scalar value of
/// *A* is `65`, which is small enough to fit in a single code unit in both
/// UTF-16 and UTF-8.
///
/// let capitalA = "A"
/// print(capitalA.count)
/// // Prints "1"
/// print(capitalA.unicodeScalars.count)
/// // Prints "1"
/// print(capitalA.utf16.count)
/// // Prints "1"
/// print(capitalA.utf8.count)
/// // Prints "1"
///
/// On the other hand, an emoji flag character is constructed from a pair of
/// Unicode scalar values, like `"\u{1F1F5}"` and `"\u{1F1F7}"`. Each of these
/// scalar values, in turn, is too large to fit into a single UTF-16 or UTF-8
/// code unit. As a result, each view of the string `"🇵🇷"` reports a different
/// length.
///
/// let flag = "🇵🇷"
/// print(flag.count)
/// // Prints "1"
/// print(flag.unicodeScalars.count)
/// // Prints "2"
/// print(flag.utf16.count)
/// // Prints "4"
/// print(flag.utf8.count)
/// // Prints "8"
///
/// To check whether a string is empty, use its `isEmpty` property instead of
/// comparing the length of one of the views to `0`. Unlike with `isEmpty`,
/// calculating a view's `count` property requires iterating through the
/// elements of the string.
///
/// Accessing String View Elements
/// ==============================
///
/// To find individual elements of a string, use the appropriate view for your
/// task. For example, to retrieve the first word of a longer string, you can
/// search the string for a space and then create a new string from a prefix
/// of the string up to that point.
///
/// let name = "Marie Curie"
/// let firstSpace = name.firstIndex(of: " ") ?? name.endIndex
/// let firstName = name[..<firstSpace]
/// print(firstName)
/// // Prints "Marie"
///
/// Strings and their views share indices, so you can access the UTF-8 view of
/// the `name` string using the same `firstSpace` index.
///
/// print(Array(name.utf8[..<firstSpace]))
/// // Prints "[77, 97, 114, 105, 101]"
///
/// Note that an index into one view may not have an exact corresponding
/// position in another view. For example, the `flag` string declared above
/// comprises a single character, but is composed of eight code units when
/// encoded as UTF-8. The following code creates constants for the first and
/// second positions in the `flag.utf8` view. Accessing the `utf8` view with
/// these indices yields the first and second code UTF-8 units.
///
/// let firstCodeUnit = flag.startIndex
/// let secondCodeUnit = flag.utf8.index(after: firstCodeUnit)
/// // flag.utf8[firstCodeUnit] == 240
/// // flag.utf8[secondCodeUnit] == 159
///
/// When used to access the elements of the `flag` string itself, however, the
/// `secondCodeUnit` index does not correspond to the position of a specific
/// character. Instead of only accessing the specific UTF-8 code unit, that
/// index is treated as the position of the character at the index's encoded
/// offset. In the case of `secondCodeUnit`, that character is still the flag
/// itself.
///
/// // flag[firstCodeUnit] == "🇵🇷"
/// // flag[secondCodeUnit] == "🇵🇷"
///
/// If you need to validate that an index from one string's view corresponds
/// with an exact position in another view, use the index's
/// `samePosition(in:)` method or the `init(_:within:)` initializer.
///
/// if let exactIndex = secondCodeUnit.samePosition(in: flag) {
/// print(flag[exactIndex])
/// } else {
/// print("No exact match for this position.")
/// }
/// // Prints "No exact match for this position."
///
/// Performance Optimizations
/// =========================
///
/// Although strings in Swift have value semantics, strings use a copy-on-write
/// strategy to store their data in a buffer. This buffer can then be shared
/// by different copies of a string. A string's data is only copied lazily,
/// upon mutation, when more than one string instance is using the same
/// buffer. Therefore, the first in any sequence of mutating operations may
/// cost O(*n*) time and space.
///
/// When a string's contiguous storage fills up, a new buffer must be allocated
/// and data must be moved to the new storage. String buffers use an
/// exponential growth strategy that makes appending to a string a constant
/// time operation when averaged over many append operations.
///
/// Bridging Between String and NSString
/// ====================================
///
/// Any `String` instance can be bridged to `NSString` using the type-cast
/// operator (`as`), and any `String` instance that originates in Objective-C
/// may use an `NSString` instance as its storage. Because any arbitrary
/// subclass of `NSString` can become a `String` instance, there are no
/// guarantees about representation or efficiency when a `String` instance is
/// backed by `NSString` storage. Because `NSString` is immutable, it is just
/// as though the storage was shared by a copy. The first in any sequence of
/// mutating operations causes elements to be copied into unique, contiguous
/// storage which may cost O(*n*) time and space, where *n* is the length of
/// the string's encoded representation (or more, if the underlying `NSString`
/// has unusual performance characteristics).
///
/// For more information about the Unicode terms used in this discussion, see
/// the [Unicode.org glossary][glossary]. In particular, this discussion
/// mentions [extended grapheme clusters][clusters], [Unicode scalar
/// values][scalars], and [canonical equivalence][equivalence].
///
/// [glossary]: http://www.unicode.org/glossary/
/// [clusters]: http://www.unicode.org/glossary/#extended_grapheme_cluster
/// [scalars]: http://www.unicode.org/glossary/#unicode_scalar_value
/// [equivalence]: http://www.unicode.org/glossary/#canonical_equivalent
@_fixed_layout
public struct String {
public // SPI(Foundation)
var _guts: _StringGuts
/// Creates an empty string.
///
/// Using this initializer is equivalent to initializing a string with an
/// empty string literal.
///
/// let empty = ""
/// let alsoEmpty = String()
@inlinable // FIXME(sil-serialize-all)
public init() {
self._guts = _StringGuts()
}
@inlinable // FIXME(sil-serialize-all)
internal init(_ _guts: _StringGuts) {
self._guts = _guts
}
}
extension String {
public func _dump() { // FIXME: remove
self._guts._dump()
}
}
internal func _isAllASCII(_ input: UnsafeBufferPointer<UInt8>) -> Bool {
for byte in input {
guard byte <= 0x7F else { return false }
}
return true
}
// TODO: re-organize a bit before merging...
@usableFromInline
internal protocol _HasContiguousBytes {
func withUnsafeBytes<R>(
_ body: (UnsafeRawBufferPointer) throws -> R
) rethrows -> R
}
extension Array: _HasContiguousBytes {}
extension UnsafeBufferPointer: _HasContiguousBytes {
@inlinable
@inline(__always)
func withUnsafeBytes<R>(
_ body: (UnsafeRawBufferPointer) throws -> R
) rethrows -> R {
let ptr = UnsafeRawPointer(self.baseAddress._unsafelyUnwrappedUnchecked)
let len = self.count &* MemoryLayout<Element>.stride
return try body(UnsafeRawBufferPointer(start: ptr, count: len))
}
}
extension UnsafeMutableBufferPointer: _HasContiguousBytes {
@inlinable
@inline(__always)
func withUnsafeBytes<R>(
_ body: (UnsafeRawBufferPointer) throws -> R
) rethrows -> R {
let ptr = UnsafeRawPointer(self.baseAddress._unsafelyUnwrappedUnchecked)
let len = self.count &* MemoryLayout<Element>.stride
return try body(UnsafeRawBufferPointer(start: ptr, count: len))
}
}
extension String : _ExpressibleByBuiltinUnicodeScalarLiteral {
@inlinable // FIXME(sil-serialize-all)
@_effects(readonly)
public // @testable
init(_builtinUnicodeScalarLiteral value: Builtin.Int32) {
self.init(Unicode.Scalar(_value: UInt32(value)))
}
@inlinable // FIXME(sil-serialize-all)
public init(_ scalar: Unicode.Scalar) {
// Until we have UTF-8 support in small string, need to be large
//
// TODO: All scalars are small
if scalar.value <= 0x7f {
if let small = _SmallUTF8String(scalar) {
self = String(_StringGuts(small))
return
} else {
#if arch(i386) || arch(arm)
#else
_sanityCheckFailure("Couldn't fit ASCII scalar into small string?")
#endif
}
}
self = String._fromCodeUnits(
CollectionOfOne(scalar.value),
encoding: UTF32.self,
repairIllFormedSequences: false
)._unsafelyUnwrappedUnchecked
}
}
extension String : _ExpressibleByBuiltinExtendedGraphemeClusterLiteral {
@inlinable
@_effects(readonly)
@_semantics("string.makeUTF8")
public init(
_builtinExtendedGraphemeClusterLiteral start: Builtin.RawPointer,
utf8CodeUnitCount: Builtin.Word,
isASCII: Builtin.Int1
) {
self.init(
_builtinStringLiteral: start,
utf8CodeUnitCount: utf8CodeUnitCount,
isASCII: isASCII)
}
}
extension String : _ExpressibleByBuiltinUTF16StringLiteral {
@inlinable
@_effects(readonly)
@_semantics("string.makeUTF16")
public init(
_builtinUTF16StringLiteral start: Builtin.RawPointer,
utf16CodeUnitCount: Builtin.Word
) {
let bufPtr = UnsafeBufferPointer(
start: UnsafeRawPointer(start).assumingMemoryBound(to: UInt16.self),
count: Int(utf16CodeUnitCount))
if let small = _SmallUTF8String(bufPtr) {
self = String(_StringGuts(small))
return
}
self = String(_StringGuts(_large: _UnmanagedString(bufPtr)))
}
}
extension String : _ExpressibleByBuiltinStringLiteral {
@inline(__always)
@inlinable
@_effects(readonly)
@_semantics("string.makeUTF8")
public init(
_builtinStringLiteral start: Builtin.RawPointer,
utf8CodeUnitCount: Builtin.Word,
isASCII: Builtin.Int1
) {
let bufPtr = UnsafeBufferPointer(
start: UnsafeRawPointer(start).assumingMemoryBound(to: UInt8.self),
count: Int(utf8CodeUnitCount))
if bufPtr.isEmpty {
self.init()
return
}
if let small = _SmallUTF8String(bufPtr) {
self = String(_StringGuts(small))
return
}
if _fastPath(Bool(isASCII)) {
self = String(_StringGuts(_large: _UnmanagedString(bufPtr)))
return
}
self = String._fromWellFormedUTF8(bufPtr)
}
}
extension String : ExpressibleByStringLiteral {
/// Creates an instance initialized to the given string value.
///
/// Do not call this initializer directly. It is used by the compiler when you
/// initialize a string using a string literal. For example:
///
/// let nextStop = "Clark & Lake"
///
/// This assignment to the `nextStop` constant calls this string literal
/// initializer behind the scenes.
@inlinable // FIXME(sil-serialize-all)
public init(stringLiteral value: String) {
self = value
}
}
extension String : CustomDebugStringConvertible {
/// A representation of the string that is suitable for debugging.
public var debugDescription: String {
var result = "\""
for us in self.unicodeScalars {
result += us.escaped(asASCII: false)
}
result += "\""
return result
}
}
extension String {
/// Returns the number of code units occupied by this string
/// in the given encoding.
@inlinable // FIXME(sil-serialize-all)
internal func _encodedLength<
Encoding: Unicode.Encoding
>(_ encoding: Encoding.Type) -> Int {
var codeUnitCount = 0
self._encode(encoding, into: { _ in codeUnitCount += 1 })
return codeUnitCount
}
//
// TODO (TODO: JIRA): This needs to be completely rewritten. It's about 12KB
// of code, most of which are MOV instructions. Keeping the by-hand opaque
// visitation pattern for now.
//
// FIXME: this function may not handle the case when a wrapped NSString
// contains unpaired surrogates. Fix this before exposing this function as a
// public API. But it is unclear if it is valid to have such an NSString in
// the first place. If it is not, we should not be crashing in an obscure
// way -- add a test for that.
// Related: <rdar://problem/17340917> Please document how NSString interacts
// with unpaired surrogates
@inlinable // FIXME(sil-serialize-all)
internal func _encode<Encoding: Unicode.Encoding>(
_ encoding: Encoding.Type,
into processCodeUnit: (Encoding.CodeUnit) -> Void
) {
if _slowPath(_guts._isOpaque) {
_opaqueEncode(encoding, into: processCodeUnit)
return
}
defer { _fixLifetime(self) }
if _guts.isASCII {
let ascii = _guts._unmanagedASCIIView
if encoding == Unicode.ASCII.self
|| encoding == Unicode.UTF8.self
|| encoding == Unicode.UTF16.self
|| encoding == Unicode.UTF32.self {
ascii.forEach {
processCodeUnit(Encoding.CodeUnit(truncatingIfNeeded: $0))
}
} else {
// TODO: be sure tests exercise this code path.
for b in ascii {
Encoding._encode(
Unicode.Scalar(_unchecked: UInt32(b))).forEach(processCodeUnit)
}
}
return
}
let utf16 = _guts._unmanagedUTF16View
var i = utf16.makeIterator()
Unicode.UTF16.ForwardParser._parse(&i) {
Encoding._transcode($0, from: UTF16.self).forEach(processCodeUnit)
}
}
@usableFromInline // @opaque
internal func _opaqueEncode<Encoding: Unicode.Encoding>(
_ encoding: Encoding.Type,
into processCodeUnit: (Encoding.CodeUnit) -> Void
) {
// TODO: ASCII fast path, and probably adjust this interface too.
if _guts._isSmall {
_guts._smallUTF8String.withUnmanagedUTF16 { utf16 in
var i = utf16.makeIterator()
Unicode.UTF16.ForwardParser._parse(&i) {
Encoding._transcode($0, from: UTF16.self).forEach(processCodeUnit)
}
}
return
}
_sanityCheck(_guts._isOpaque)
defer { _fixLifetime(self) }
let opaque = _guts._asOpaque()
var i = opaque.makeIterator()
Unicode.UTF16.ForwardParser._parse(&i) {
Encoding._transcode($0, from: UTF16.self).forEach(processCodeUnit)
}
}
}
// Support for copy-on-write
extension String {
/// Appends the given string to this string.
///
/// The following example builds a customized greeting by using the
/// `append(_:)` method:
///
/// var greeting = "Hello, "
/// if let name = getUserName() {
/// greeting.append(name)
/// } else {
/// greeting.append("friend")
/// }
/// print(greeting)
/// // Prints "Hello, friend"
///
/// - Parameter other: Another string.
public mutating func append(_ other: String) {
self._guts.append(other._guts)
}
/// Appends the given Unicode scalar to the string.
///
/// - Parameter x: A Unicode scalar value.
///
/// - Complexity: Appending a Unicode scalar to a string averages to O(1)
/// over many additions.
@available(*, unavailable, message: "Replaced by append(_: String)")
public mutating func append(_ x: Unicode.Scalar) {
Builtin.unreachable()
}
// TODO(SSO): Consider small-checking version
@inlinable // FIXME(sil-serialize-all)
init<CodeUnit>(_largeStorage storage: _SwiftStringStorage<CodeUnit>)
where CodeUnit : FixedWidthInteger & UnsignedInteger {
_guts = _StringGuts(_large: storage)
}
}
extension String {
@inlinable // FIXME(sil-serialize-all)
@_effects(readonly)
@_semantics("string.concat")
public static func + (lhs: String, rhs: String) -> String {
var lhs = lhs
lhs.append(rhs)
return lhs
}
// String append
@inlinable // FIXME(sil-serialize-all)
public static func += (lhs: inout String, rhs: String) {
lhs.append(rhs)
}
}
extension Sequence where Element: StringProtocol {
/// Returns a new string by concatenating the elements of the sequence,
/// adding the given separator between each element.
///
/// The following example shows how an array of strings can be joined to a
/// single, comma-separated string:
///
/// let cast = ["Vivien", "Marlon", "Kim", "Karl"]
/// let list = cast.joined(separator: ", ")
/// print(list)
/// // Prints "Vivien, Marlon, Kim, Karl"
///
/// - Parameter separator: A string to insert between each of the elements
/// in this sequence. The default separator is an empty string.
/// - Returns: A single, concatenated string.
@_specialize(where Self == Array<Substring>)
@_specialize(where Self == Array<String>)
public func joined(separator: String = "") -> String {
return _joined(separator: separator)
}
internal func _joined(separator: String = "") -> String {
let separatorSize = separator._guts.count
var width = separator._guts.byteWidth
let reservation = self._preprocessingPass {
() -> Int in
var r = 0
for chunk in self {
r += separatorSize + chunk._encodedOffsetRange.count
width = Swift.max(width, chunk._wholeString._guts.byteWidth)
}
return r > 0 ? r - separatorSize : 0
}
let capacity = reservation ?? separatorSize
var result = ""
result.reserveCapacity(capacity)
if separator.isEmpty {
for x in self {
result._guts.append(x)
}
return result
}
var iter = makeIterator()
if let first = iter.next() {
result._guts.append(first)
while let next = iter.next() {
result.append(separator)
result._guts.append(next)
}
}
return result
}
}
// This overload is necessary because String now conforms to
// BidirectionalCollection, and there are other `joined` overloads that are
// considered more specific. See Flatten.swift.gyb.
extension BidirectionalCollection where Element == String {
/// Returns a new string by concatenating the elements of the sequence,
/// adding the given separator between each element.
///
/// The following example shows how an array of strings can be joined to a
/// single, comma-separated string:
///
/// let cast = ["Vivien", "Marlon", "Kim", "Karl"]
/// let list = cast.joined(separator: ", ")
/// print(list)
/// // Prints "Vivien, Marlon, Kim, Karl"
///
/// - Parameter separator: A string to insert between each of the elements
/// in this sequence. The default separator is an empty string.
/// - Returns: A single, concatenated string.
@_specialize(where Self == Array<String>)
public func joined(separator: String = "") -> String {
return _joined(separator: separator)
}
}
#if _runtime(_ObjC)
@usableFromInline // FIXME(sil-serialize-all)
@_silgen_name("swift_stdlib_NSStringLowercaseString")
internal func _stdlib_NSStringLowercaseString(_ str: AnyObject) -> _CocoaString
@usableFromInline // FIXME(sil-serialize-all)
@_silgen_name("swift_stdlib_NSStringUppercaseString")
internal func _stdlib_NSStringUppercaseString(_ str: AnyObject) -> _CocoaString
#else
internal func _nativeUnicodeLowercaseString(_ str: String) -> String {
// TODO (TODO: JIRA): check for small
let guts = str._guts._extractContiguousUTF16()
defer { _fixLifetime(guts) }
let utf16 = guts._unmanagedUTF16View
var storage = _SwiftStringStorage<UTF16.CodeUnit>.create(
capacity: utf16.count,
count: utf16.count)
// Try to write it out to the same length.
let z = _swift_stdlib_unicode_strToLower(
storage.start, Int32(storage.capacity), // FIXME: handle overflow case
utf16.start, Int32(utf16.count))
let correctSize = Int(z)
// If more space is needed, do it again with the correct buffer size.
if correctSize > storage.capacity {
storage = _SwiftStringStorage<UTF16.CodeUnit>.create(
capacity: correctSize,
count: correctSize)
_swift_stdlib_unicode_strToLower(
storage.start, Int32(storage.capacity), // FIXME: handle overflow case
utf16.start, Int32(utf16.count))
}
storage.count = correctSize
return String(_largeStorage: storage)
}
@usableFromInline // FIXME(sil-serialize-all)
internal func _nativeUnicodeUppercaseString(_ str: String) -> String {
// TODO (TODO: JIRA): check for small
let guts = str._guts._extractContiguousUTF16()
defer { _fixLifetime(guts) }
let utf16 = guts._unmanagedUTF16View
var storage = _SwiftStringStorage<UTF16.CodeUnit>.create(
capacity: utf16.count,
count: utf16.count)
// Try to write it out to the same length.
let z = _swift_stdlib_unicode_strToUpper(
storage.start, Int32(storage.capacity), // FIXME: handle overflow case
utf16.start, Int32(utf16.count))
let correctSize = Int(z)
// If more space is needed, do it again with the correct buffer size.
if correctSize > storage.capacity {
storage = _SwiftStringStorage<UTF16.CodeUnit>.create(
capacity: correctSize,
count: correctSize)
_swift_stdlib_unicode_strToUpper(
storage.start, Int32(storage.capacity), // FIXME: handle overflow case
utf16.start, Int32(utf16.count))
}
storage.count = correctSize
return String(_largeStorage: storage)
}
#endif
// Unicode algorithms
extension String {
// FIXME: implement case folding without relying on Foundation.
// <rdar://problem/17550602> [unicode] Implement case folding
/// A "table" for which ASCII characters need to be upper cased.
/// To determine which bit corresponds to which ASCII character, subtract 1
/// from the ASCII value of that character and divide by 2. The bit is set iff
/// that character is a lower case character.
@inlinable // FIXME(sil-serialize-all)
internal var _asciiLowerCaseTable: UInt64 {
@inline(__always)
get {
return 0b0001_1111_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000
}
}
/// The same table for upper case characters.
@inlinable // FIXME(sil-serialize-all)
internal var _asciiUpperCaseTable: UInt64 {
@inline(__always)
get {
return 0b0000_0000_0000_0000_0001_1111_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000
}
}
/// Returns a lowercase version of the string.
///
/// Here's an example of transforming a string to all lowercase letters.
///
/// let cafe = "BBQ Café 🍵"
/// print(cafe.lowercased())
/// // Prints "bbq café 🍵"
///
/// - Returns: A lowercase copy of the string.
///
/// - Complexity: O(*n*)
public func lowercased() -> String {
if _guts.isASCII {
var guts = _guts
guts.withMutableASCIIStorage(unusedCapacity: 0) { storage in
for i in 0..<storage._value.count {
// For each character in the string, we lookup if it should be shifted
// in our ascii table, then we return 0x20 if it should, 0x0 if not.
// This code is equivalent to:
// switch source[i] {
// case let x where (x >= 0x41 && x <= 0x5a):
// dest[i] = x &+ 0x20
// case let x:
// dest[i] = x
// }
let value = storage._value.start[i]
let isUpper =
_asciiUpperCaseTable &>>
UInt64(((value &- 1) & 0b0111_1111) &>> 1)
let add = (isUpper & 0x1) &<< 5
// Since we are left with either 0x0 or 0x20, we can safely truncate
// to a UInt8 and add to our ASCII value (this will not overflow
// numbers in the ASCII range).
storage._value.start[i] = value &+ UInt8(truncatingIfNeeded: add)
}
}
return String(guts)
}
#if _runtime(_ObjC)
return String(_cocoaString:
_stdlib_NSStringLowercaseString(self._bridgeToObjectiveCImpl()))
#else
return _nativeUnicodeLowercaseString(self)
#endif
}
/// Returns an uppercase version of the string.
///
/// The following example transforms a string to uppercase letters:
///
/// let cafe = "Café 🍵"
/// print(cafe.uppercased())
/// // Prints "CAFÉ 🍵"
///
/// - Returns: An uppercase copy of the string.
///
/// - Complexity: O(*n*)
public func uppercased() -> String {
if _guts.isASCII {
var guts = _guts
guts.withMutableASCIIStorage(unusedCapacity: 0) { storage in
for i in 0..<storage._value.count {
// See the comment above in lowercased.
let value = storage._value.start[i]
let isLower =
_asciiLowerCaseTable &>>
UInt64(((value &- 1) & 0b0111_1111) &>> 1)
let add = (isLower & 0x1) &<< 5
storage._value.start[i] = value &- UInt8(truncatingIfNeeded: add)
}
}
return String(guts)
}
#if _runtime(_ObjC)
return String(_cocoaString:
_stdlib_NSStringUppercaseString(self._bridgeToObjectiveCImpl()))
#else
return _nativeUnicodeUppercaseString(self)
#endif
}
/// Creates an instance from the description of a given
/// `LosslessStringConvertible` instance.
@inlinable // FIXME(sil-serialize-all)
public init<T : LosslessStringConvertible>(_ value: T) {
self = value.description
}
}
extension String : CustomStringConvertible {
/// The value of this string.
///
/// Using this property directly is discouraged. Instead, use simple
/// assignment to create a new constant or variable equal to this string.
@inlinable // FIXME(sil-serialize-all)
public var description: String {
return self
}
}