| //===----------------------------------------------------------------------===// |
| // |
| // This source file is part of the Swift.org open source project |
| // |
| // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors |
| // Licensed under Apache License v2.0 with Runtime Library Exception |
| // |
| // See https://swift.org/LICENSE.txt for license information |
| // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| // |
| //===----------------------------------------------------------------------===// |
| |
| import SwiftShims |
| |
| @inlinable @_transparent |
| internal func unimplemented_utf8_32bit( |
| _ message: String = "", |
| file: StaticString = #file, line: UInt = #line |
| ) -> Never { |
| fatalError("32-bit: Unimplemented for UTF-8 support", file: file, line: line) |
| } |
| |
| /// A Unicode string value that is a collection of characters. |
| /// |
| /// A string is a series of characters, such as `"Swift"`, that forms a |
| /// collection. Strings in Swift are Unicode correct and locale insensitive, |
| /// and are designed to be efficient. The `String` type bridges with the |
| /// Objective-C class `NSString` and offers interoperability with C functions |
| /// that works with strings. |
| /// |
| /// You can create new strings using string literals or string interpolations. |
| /// A *string literal* is a series of characters enclosed in quotes. |
| /// |
| /// let greeting = "Welcome!" |
| /// |
| /// *String interpolations* are string literals that evaluate any included |
| /// expressions and convert the results to string form. String interpolations |
| /// give you an easy way to build a string from multiple pieces. Wrap each |
| /// expression in a string interpolation in parentheses, prefixed by a |
| /// backslash. |
| /// |
| /// let name = "Rosa" |
| /// let personalizedGreeting = "Welcome, \(name)!" |
| /// // personalizedGreeting == "Welcome, Rosa!" |
| /// |
| /// let price = 2 |
| /// let number = 3 |
| /// let cookiePrice = "\(number) cookies: $\(price * number)." |
| /// // cookiePrice == "3 cookies: $6." |
| /// |
| /// Combine strings using the concatenation operator (`+`). |
| /// |
| /// let longerGreeting = greeting + " We're glad you're here!" |
| /// // longerGreeting == "Welcome! We're glad you're here!" |
| /// |
| /// Multiline string literals are enclosed in three double quotation marks |
| /// (`"""`), with each delimiter on its own line. Indentation is stripped from |
| /// each line of a multiline string literal to match the indentation of the |
| /// closing delimiter. |
| /// |
| /// let banner = """ |
| /// __, |
| /// ( o /) _/_ |
| /// `. , , , , // / |
| /// (___)(_(_/_(_ //_ (__ |
| /// /) |
| /// (/ |
| /// """ |
| /// |
| /// Modifying and Comparing Strings |
| /// =============================== |
| /// |
| /// Strings always have value semantics. Modifying a copy of a string leaves |
| /// the original unaffected. |
| /// |
| /// var otherGreeting = greeting |
| /// otherGreeting += " Have a nice time!" |
| /// // otherGreeting == "Welcome! Have a nice time!" |
| /// |
| /// print(greeting) |
| /// // Prints "Welcome!" |
| /// |
| /// Comparing strings for equality using the equal-to operator (`==`) or a |
| /// relational operator (like `<` or `>=`) is always performed using Unicode |
| /// canonical representation. As a result, different representations of a |
| /// string compare as being equal. |
| /// |
| /// let cafe1 = "Cafe\u{301}" |
| /// let cafe2 = "Café" |
| /// print(cafe1 == cafe2) |
| /// // Prints "true" |
| /// |
| /// The Unicode scalar value `"\u{301}"` modifies the preceding character to |
| /// include an accent, so `"e\u{301}"` has the same canonical representation |
| /// as the single Unicode scalar value `"é"`. |
| /// |
| /// Basic string operations are not sensitive to locale settings, ensuring that |
| /// string comparisons and other operations always have a single, stable |
| /// result, allowing strings to be used as keys in `Dictionary` instances and |
| /// for other purposes. |
| /// |
| /// Accessing String Elements |
| /// ========================= |
| /// |
| /// A string is a collection of *extended grapheme clusters*, which approximate |
| /// human-readable characters. Many individual characters, such as "é", "김", |
| /// and "🇮🇳", can be made up of multiple Unicode scalar values. These scalar |
| /// values are combined by Unicode's boundary algorithms into extended |
| /// grapheme clusters, represented by the Swift `Character` type. Each element |
| /// of a string is represented by a `Character` instance. |
| /// |
| /// For example, to retrieve the first word of a longer string, you can search |
| /// for a space and then create a substring from a prefix of the string up to |
| /// that point: |
| /// |
| /// let name = "Marie Curie" |
| /// let firstSpace = name.firstIndex(of: " ") ?? name.endIndex |
| /// let firstName = name[..<firstSpace] |
| /// // firstName == "Marie" |
| /// |
| /// The `firstName` constant is an instance of the `Substring` type---a type |
| /// that represents substrings of a string while sharing the original string's |
| /// storage. Substrings present the same interface as strings. |
| /// |
| /// print("\(name)'s first name has \(firstName.count) letters.") |
| /// // Prints "Marie Curie's first name has 5 letters." |
| /// |
| /// Accessing a String's Unicode Representation |
| /// =========================================== |
| /// |
| /// If you need to access the contents of a string as encoded in different |
| /// Unicode encodings, use one of the string's `unicodeScalars`, `utf16`, or |
| /// `utf8` properties. Each property provides access to a view of the string |
| /// as a series of code units, each encoded in a different Unicode encoding. |
| /// |
| /// To demonstrate the different views available for every string, the |
| /// following examples use this `String` instance: |
| /// |
| /// let cafe = "Cafe\u{301} du 🌍" |
| /// print(cafe) |
| /// // Prints "Café du 🌍" |
| /// |
| /// The `cafe` string is a collection of the nine characters that are visible |
| /// when the string is displayed. |
| /// |
| /// print(cafe.count) |
| /// // Prints "9" |
| /// print(Array(cafe)) |
| /// // Prints "["C", "a", "f", "é", " ", "d", "u", " ", "🌍"]" |
| /// |
| /// Unicode Scalar View |
| /// ------------------- |
| /// |
| /// A string's `unicodeScalars` property is a collection of Unicode scalar |
| /// values, the 21-bit codes that are the basic unit of Unicode. Each scalar |
| /// value is represented by a `Unicode.Scalar` instance and is equivalent to a |
| /// UTF-32 code unit. |
| /// |
| /// print(cafe.unicodeScalars.count) |
| /// // Prints "10" |
| /// print(Array(cafe.unicodeScalars)) |
| /// // Prints "["C", "a", "f", "e", "\u{0301}", " ", "d", "u", " ", "\u{0001F30D}"]" |
| /// print(cafe.unicodeScalars.map { $0.value }) |
| /// // Prints "[67, 97, 102, 101, 769, 32, 100, 117, 32, 127757]" |
| /// |
| /// The `unicodeScalars` view's elements comprise each Unicode scalar value in |
| /// the `cafe` string. In particular, because `cafe` was declared using the |
| /// decomposed form of the `"é"` character, `unicodeScalars` contains the |
| /// scalar values for both the letter `"e"` (101) and the accent character |
| /// `"´"` (769). |
| /// |
| /// UTF-16 View |
| /// ----------- |
| /// |
| /// A string's `utf16` property is a collection of UTF-16 code units, the |
| /// 16-bit encoding form of the string's Unicode scalar values. Each code unit |
| /// is stored as a `UInt16` instance. |
| /// |
| /// print(cafe.utf16.count) |
| /// // Prints "11" |
| /// print(Array(cafe.utf16)) |
| /// // Prints "[67, 97, 102, 101, 769, 32, 100, 117, 32, 55356, 57101]" |
| /// |
| /// The elements of the `utf16` view are the code units for the string when |
| /// encoded in UTF-16. These elements match those accessed through indexed |
| /// `NSString` APIs. |
| /// |
| /// let nscafe = cafe as NSString |
| /// print(nscafe.length) |
| /// // Prints "11" |
| /// print(nscafe.character(at: 3)) |
| /// // Prints "101" |
| /// |
| /// UTF-8 View |
| /// ---------- |
| /// |
| /// A string's `utf8` property is a collection of UTF-8 code units, the 8-bit |
| /// encoding form of the string's Unicode scalar values. Each code unit is |
| /// stored as a `UInt8` instance. |
| /// |
| /// print(cafe.utf8.count) |
| /// // Prints "14" |
| /// print(Array(cafe.utf8)) |
| /// // Prints "[67, 97, 102, 101, 204, 129, 32, 100, 117, 32, 240, 159, 140, 141]" |
| /// |
| /// The elements of the `utf8` view are the code units for the string when |
| /// encoded in UTF-8. This representation matches the one used when `String` |
| /// instances are passed to C APIs. |
| /// |
| /// let cLength = strlen(cafe) |
| /// print(cLength) |
| /// // Prints "14" |
| /// |
| /// Measuring the Length of a String |
| /// ================================ |
| /// |
| /// When you need to know the length of a string, you must first consider what |
| /// you'll use the length for. Are you measuring the number of characters that |
| /// will be displayed on the screen, or are you measuring the amount of |
| /// storage needed for the string in a particular encoding? A single string |
| /// can have greatly differing lengths when measured by its different views. |
| /// |
| /// For example, an ASCII character like the capital letter *A* is represented |
| /// by a single element in each of its four views. The Unicode scalar value of |
| /// *A* is `65`, which is small enough to fit in a single code unit in both |
| /// UTF-16 and UTF-8. |
| /// |
| /// let capitalA = "A" |
| /// print(capitalA.count) |
| /// // Prints "1" |
| /// print(capitalA.unicodeScalars.count) |
| /// // Prints "1" |
| /// print(capitalA.utf16.count) |
| /// // Prints "1" |
| /// print(capitalA.utf8.count) |
| /// // Prints "1" |
| /// |
| /// On the other hand, an emoji flag character is constructed from a pair of |
| /// Unicode scalar values, like `"\u{1F1F5}"` and `"\u{1F1F7}"`. Each of these |
| /// scalar values, in turn, is too large to fit into a single UTF-16 or UTF-8 |
| /// code unit. As a result, each view of the string `"🇵🇷"` reports a different |
| /// length. |
| /// |
| /// let flag = "🇵🇷" |
| /// print(flag.count) |
| /// // Prints "1" |
| /// print(flag.unicodeScalars.count) |
| /// // Prints "2" |
| /// print(flag.utf16.count) |
| /// // Prints "4" |
| /// print(flag.utf8.count) |
| /// // Prints "8" |
| /// |
| /// To check whether a string is empty, use its `isEmpty` property instead of |
| /// comparing the length of one of the views to `0`. Unlike with `isEmpty`, |
| /// calculating a view's `count` property requires iterating through the |
| /// elements of the string. |
| /// |
| /// Accessing String View Elements |
| /// ============================== |
| /// |
| /// To find individual elements of a string, use the appropriate view for your |
| /// task. For example, to retrieve the first word of a longer string, you can |
| /// search the string for a space and then create a new string from a prefix |
| /// of the string up to that point. |
| /// |
| /// let name = "Marie Curie" |
| /// let firstSpace = name.firstIndex(of: " ") ?? name.endIndex |
| /// let firstName = name[..<firstSpace] |
| /// print(firstName) |
| /// // Prints "Marie" |
| /// |
| /// Strings and their views share indices, so you can access the UTF-8 view of |
| /// the `name` string using the same `firstSpace` index. |
| /// |
| /// print(Array(name.utf8[..<firstSpace])) |
| /// // Prints "[77, 97, 114, 105, 101]" |
| /// |
| /// Note that an index into one view may not have an exact corresponding |
| /// position in another view. For example, the `flag` string declared above |
| /// comprises a single character, but is composed of eight code units when |
| /// encoded as UTF-8. The following code creates constants for the first and |
| /// second positions in the `flag.utf8` view. Accessing the `utf8` view with |
| /// these indices yields the first and second code UTF-8 units. |
| /// |
| /// let firstCodeUnit = flag.startIndex |
| /// let secondCodeUnit = flag.utf8.index(after: firstCodeUnit) |
| /// // flag.utf8[firstCodeUnit] == 240 |
| /// // flag.utf8[secondCodeUnit] == 159 |
| /// |
| /// When used to access the elements of the `flag` string itself, however, the |
| /// `secondCodeUnit` index does not correspond to the position of a specific |
| /// character. Instead of only accessing the specific UTF-8 code unit, that |
| /// index is treated as the position of the character at the index's encoded |
| /// offset. In the case of `secondCodeUnit`, that character is still the flag |
| /// itself. |
| /// |
| /// // flag[firstCodeUnit] == "🇵🇷" |
| /// // flag[secondCodeUnit] == "🇵🇷" |
| /// |
| /// If you need to validate that an index from one string's view corresponds |
| /// with an exact position in another view, use the index's |
| /// `samePosition(in:)` method or the `init(_:within:)` initializer. |
| /// |
| /// if let exactIndex = secondCodeUnit.samePosition(in: flag) { |
| /// print(flag[exactIndex]) |
| /// } else { |
| /// print("No exact match for this position.") |
| /// } |
| /// // Prints "No exact match for this position." |
| /// |
| /// Performance Optimizations |
| /// ========================= |
| /// |
| /// Although strings in Swift have value semantics, strings use a copy-on-write |
| /// strategy to store their data in a buffer. This buffer can then be shared |
| /// by different copies of a string. A string's data is only copied lazily, |
| /// upon mutation, when more than one string instance is using the same |
| /// buffer. Therefore, the first in any sequence of mutating operations may |
| /// cost O(*n*) time and space. |
| /// |
| /// When a string's contiguous storage fills up, a new buffer must be allocated |
| /// and data must be moved to the new storage. String buffers use an |
| /// exponential growth strategy that makes appending to a string a constant |
| /// time operation when averaged over many append operations. |
| /// |
| /// Bridging Between String and NSString |
| /// ==================================== |
| /// |
| /// Any `String` instance can be bridged to `NSString` using the type-cast |
| /// operator (`as`), and any `String` instance that originates in Objective-C |
| /// may use an `NSString` instance as its storage. Because any arbitrary |
| /// subclass of `NSString` can become a `String` instance, there are no |
| /// guarantees about representation or efficiency when a `String` instance is |
| /// backed by `NSString` storage. Because `NSString` is immutable, it is just |
| /// as though the storage was shared by a copy. The first in any sequence of |
| /// mutating operations causes elements to be copied into unique, contiguous |
| /// storage which may cost O(*n*) time and space, where *n* is the length of |
| /// the string's encoded representation (or more, if the underlying `NSString` |
| /// has unusual performance characteristics). |
| /// |
| /// For more information about the Unicode terms used in this discussion, see |
| /// the [Unicode.org glossary][glossary]. In particular, this discussion |
| /// mentions [extended grapheme clusters][clusters], [Unicode scalar |
| /// values][scalars], and [canonical equivalence][equivalence]. |
| /// |
| /// [glossary]: http://www.unicode.org/glossary/ |
| /// [clusters]: http://www.unicode.org/glossary/#extended_grapheme_cluster |
| /// [scalars]: http://www.unicode.org/glossary/#unicode_scalar_value |
| /// [equivalence]: http://www.unicode.org/glossary/#canonical_equivalent |
| @_fixed_layout |
| public struct String { |
| public // @SPI(Foundation) |
| var _guts: _StringGuts |
| |
| @inlinable @inline(__always) |
| internal init(_ _guts: _StringGuts) { |
| self._guts = _guts |
| _invariantCheck() |
| } |
| |
| /// Creates an empty string. |
| /// |
| /// Using this initializer is equivalent to initializing a string with an |
| /// empty string literal. |
| /// |
| /// let empty = "" |
| /// let alsoEmpty = String() |
| @inlinable @inline(__always) |
| @_semantics("string.init_empty") |
| public init() { self.init(_StringGuts()) } |
| } |
| |
| extension String { |
| #if !INTERNAL_CHECKS_ENABLED |
| @inlinable @inline(__always) internal func _invariantCheck() {} |
| #else |
| @usableFromInline @inline(never) @_effects(releasenone) |
| internal func _invariantCheck() { |
| } |
| #endif // INTERNAL_CHECKS_ENABLED |
| |
| public func _dump() { |
| #if INTERNAL_CHECKS_ENABLED |
| _guts._dump() |
| #endif // INTERNAL_CHECKS_ENABLED |
| } |
| } |
| |
| extension String { |
| /// Creates a string from the given Unicode code units in the specified |
| /// encoding. |
| /// |
| /// - Parameters: |
| /// - codeUnits: A collection of code units encoded in the encoding |
| /// specified in `sourceEncoding`. |
| /// - sourceEncoding: The encoding in which `codeUnits` should be |
| /// interpreted. |
| @inlinable |
| @inline(__always) // Eliminate dynamic type check when possible |
| public init<C: Collection, Encoding: Unicode.Encoding>( |
| decoding codeUnits: C, as sourceEncoding: Encoding.Type |
| ) where C.Iterator.Element == Encoding.CodeUnit { |
| if let contigBytes = codeUnits as? _HasContiguousBytes, |
| sourceEncoding == UTF8.self, |
| contigBytes._providesContiguousBytesNoCopy |
| { |
| self = contigBytes.withUnsafeBytes { rawBufPtr in |
| let ptr = rawBufPtr.baseAddress._unsafelyUnwrappedUnchecked |
| return String._fromUTF8Repairing( |
| UnsafeBufferPointer( |
| start: ptr.assumingMemoryBound(to: UInt8.self), |
| count: rawBufPtr.count)).0 |
| } |
| return |
| } |
| |
| self = String._fromCodeUnits( |
| codeUnits, encoding: sourceEncoding, repair: true)!.0 |
| } |
| |
| /// Calls the given closure with a pointer to the contents of the string, |
| /// represented as a null-terminated sequence of code units. |
| /// |
| /// The pointer passed as an argument to `body` is valid only during the |
| /// execution of `withCString(encodedAs:_:)`. Do not store or return the |
| /// pointer for later use. |
| /// |
| /// - Parameters: |
| /// - body: A closure with a pointer parameter that points to a |
| /// null-terminated sequence of code units. If `body` has a return |
| /// value, that value is also used as the return value for the |
| /// `withCString(encodedAs:_:)` method. The pointer argument is valid |
| /// only for the duration of the method's execution. |
| /// - targetEncoding: The encoding in which the code units should be |
| /// interpreted. |
| /// - Returns: The return value, if any, of the `body` closure parameter. |
| @inlinable |
| @inline(__always) // Eliminate dynamic type check when possible |
| public func withCString<Result, TargetEncoding: Unicode.Encoding>( |
| encodedAs targetEncoding: TargetEncoding.Type, |
| _ body: (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result |
| ) rethrows -> Result { |
| if targetEncoding == UTF8.self { |
| return try self.withCString { |
| (cPtr: UnsafePointer<CChar>) -> Result in |
| _internalInvariant(UInt8.self == TargetEncoding.CodeUnit.self) |
| let ptr = UnsafeRawPointer(cPtr).assumingMemoryBound( |
| to: TargetEncoding.CodeUnit.self) |
| return try body(ptr) |
| } |
| } |
| return try _slowWithCString(encodedAs: targetEncoding, body) |
| } |
| |
| @usableFromInline @inline(never) // slow-path |
| @_effects(releasenone) |
| internal func _slowWithCString<Result, TargetEncoding: Unicode.Encoding>( |
| encodedAs targetEncoding: TargetEncoding.Type, |
| _ body: (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result |
| ) rethrows -> Result { |
| return try self._withUTF8 { utf8 in |
| var arg = Array<TargetEncoding.CodeUnit>() |
| arg.reserveCapacity(1 &+ self._guts.count / 4) |
| let repaired = transcode( |
| utf8.makeIterator(), |
| from: UTF8.self, |
| to: targetEncoding, |
| stoppingOnError: false, |
| into: { arg.append($0) }) |
| arg.append(TargetEncoding.CodeUnit(0)) |
| _internalInvariant(!repaired) |
| return try body(arg) |
| } |
| } |
| } |
| |
| extension String: _ExpressibleByBuiltinUnicodeScalarLiteral { |
| @_effects(readonly) |
| @inlinable @inline(__always) |
| public init(_builtinUnicodeScalarLiteral value: Builtin.Int32) { |
| self.init(Unicode.Scalar(_unchecked: UInt32(value))) |
| } |
| |
| @inlinable @inline(__always) |
| public init(_ scalar: Unicode.Scalar) { |
| self = scalar.withUTF8CodeUnits { String._uncheckedFromUTF8($0) } |
| } |
| } |
| |
| extension String: _ExpressibleByBuiltinExtendedGraphemeClusterLiteral { |
| @inlinable @inline(__always) |
| @_effects(readonly) @_semantics("string.makeUTF8") |
| public init( |
| _builtinExtendedGraphemeClusterLiteral start: Builtin.RawPointer, |
| utf8CodeUnitCount: Builtin.Word, |
| isASCII: Builtin.Int1 |
| ) { |
| self.init( |
| _builtinStringLiteral: start, |
| utf8CodeUnitCount: utf8CodeUnitCount, |
| isASCII: isASCII) |
| } |
| } |
| |
| extension String: _ExpressibleByBuiltinStringLiteral { |
| @inlinable @inline(__always) |
| @_effects(readonly) @_semantics("string.makeUTF8") |
| public init( |
| _builtinStringLiteral start: Builtin.RawPointer, |
| utf8CodeUnitCount: Builtin.Word, |
| isASCII: Builtin.Int1 |
| ) { |
| let bufPtr = UnsafeBufferPointer( |
| start: UnsafeRawPointer(start).assumingMemoryBound(to: UInt8.self), |
| count: Int(utf8CodeUnitCount)) |
| if let smol = _SmallString(bufPtr) { |
| self = String(_StringGuts(smol)) |
| return |
| } |
| self.init(_StringGuts(bufPtr, isASCII: Bool(isASCII))) |
| } |
| } |
| |
| extension String: ExpressibleByStringLiteral { |
| /// Creates an instance initialized to the given string value. |
| /// |
| /// Do not call this initializer directly. It is used by the compiler when you |
| /// initialize a string using a string literal. For example: |
| /// |
| /// let nextStop = "Clark & Lake" |
| /// |
| /// This assignment to the `nextStop` constant calls this string literal |
| /// initializer behind the scenes. |
| @inlinable @inline(__always) |
| public init(stringLiteral value: String) { |
| self = value |
| } |
| } |
| |
| extension String: CustomDebugStringConvertible { |
| /// A representation of the string that is suitable for debugging. |
| public var debugDescription: String { |
| var result = "\"" |
| for us in self.unicodeScalars { |
| result += us.escaped(asASCII: false) |
| } |
| result += "\"" |
| return result |
| } |
| } |
| |
| extension String { |
| @inlinable // Forward inlinability to append |
| @_effects(readonly) @_semantics("string.concat") |
| public static func + (lhs: String, rhs: String) -> String { |
| var result = lhs |
| result.append(rhs) |
| return result |
| } |
| |
| // String append |
| @inlinable // Forward inlinability to append |
| public static func += (lhs: inout String, rhs: String) { |
| lhs.append(rhs) |
| } |
| } |
| |
| extension Sequence where Element: StringProtocol { |
| /// Returns a new string by concatenating the elements of the sequence, |
| /// adding the given separator between each element. |
| /// |
| /// The following example shows how an array of strings can be joined to a |
| /// single, comma-separated string: |
| /// |
| /// let cast = ["Vivien", "Marlon", "Kim", "Karl"] |
| /// let list = cast.joined(separator: ", ") |
| /// print(list) |
| /// // Prints "Vivien, Marlon, Kim, Karl" |
| /// |
| /// - Parameter separator: A string to insert between each of the elements |
| /// in this sequence. The default separator is an empty string. |
| /// - Returns: A single, concatenated string. |
| @_specialize(where Self == Array<Substring>) |
| @_specialize(where Self == Array<String>) |
| public func joined(separator: String = "") -> String { |
| return _joined(separator: separator) |
| } |
| |
| @inline(__always) // Pick up @_specialize and devirtualize from two callers |
| internal func _joined(separator: String) -> String { |
| // A likely-under-estimate, but lets us skip some of the growth curve |
| // for large Sequences. |
| let underestimatedCap = |
| (1 &+ separator._guts.count) &* self.underestimatedCount |
| var result = "" |
| result.reserveCapacity(underestimatedCap) |
| if separator.isEmpty { |
| for x in self { |
| result.append(x._ephemeralString) |
| } |
| return result |
| } |
| |
| var iter = makeIterator() |
| if let first = iter.next() { |
| result.append(first._ephemeralString) |
| while let next = iter.next() { |
| result.append(separator) |
| result.append(next._ephemeralString) |
| } |
| } |
| return result |
| } |
| } |
| |
| // This overload is necessary because String now conforms to |
| // BidirectionalCollection, and there are other `joined` overloads that are |
| // considered more specific. See Flatten.swift.gyb. |
| extension BidirectionalCollection where Element == String { |
| /// Returns a new string by concatenating the elements of the sequence, |
| /// adding the given separator between each element. |
| /// |
| /// The following example shows how an array of strings can be joined to a |
| /// single, comma-separated string: |
| /// |
| /// let cast = ["Vivien", "Marlon", "Kim", "Karl"] |
| /// let list = cast.joined(separator: ", ") |
| /// print(list) |
| /// // Prints "Vivien, Marlon, Kim, Karl" |
| /// |
| /// - Parameter separator: A string to insert between each of the elements |
| /// in this sequence. The default separator is an empty string. |
| /// - Returns: A single, concatenated string. |
| @_specialize(where Self == Array<String>) |
| public func joined(separator: String = "") -> String { |
| return _joined(separator: separator) |
| } |
| } |
| |
| // Unicode algorithms |
| extension String { |
| @inline(__always) |
| internal func _uppercaseASCII(_ x: UInt8) -> UInt8 { |
| /// A "table" for which ASCII characters need to be upper cased. |
| /// To determine which bit corresponds to which ASCII character, subtract 1 |
| /// from the ASCII value of that character and divide by 2. The bit is set iff |
| /// that character is a lower case character. |
| let _lowercaseTable: UInt64 = |
| 0b0001_1111_1111_1111_0000_0000_0000_0000 &<< 32 |
| |
| // Lookup if it should be shifted in our ascii table, then we subtract 0x20 if |
| // it should, 0x0 if not. |
| // This code is equivalent to: |
| // This code is equivalent to: |
| // switch sourcex { |
| // case let x where (x >= 0x41 && x <= 0x5a): |
| // return x &- 0x20 |
| // case let x: |
| // return x |
| // } |
| let isLower = _lowercaseTable &>> UInt64(((x &- 1) & 0b0111_1111) &>> 1) |
| let toSubtract = (isLower & 0x1) &<< 5 |
| return x &- UInt8(truncatingIfNeeded: toSubtract) |
| } |
| |
| @inline(__always) |
| internal func _lowercaseASCII(_ x: UInt8) -> UInt8 { |
| /// A "table" for which ASCII characters need to be lower cased. |
| /// To determine which bit corresponds to which ASCII character, subtract 1 |
| /// from the ASCII value of that character and divide by 2. The bit is set iff |
| /// that character is a upper case character. |
| let _uppercaseTable: UInt64 = |
| 0b0000_0000_0000_0000_0001_1111_1111_1111 &<< 32 |
| |
| // Lookup if it should be shifted in our ascii table, then we add 0x20 if |
| // it should, 0x0 if not. |
| // This code is equivalent to: |
| // This code is equivalent to: |
| // switch sourcex { |
| // case let x where (x >= 0x41 && x <= 0x5a): |
| // return x &- 0x20 |
| // case let x: |
| // return x |
| // } |
| let isUpper = _uppercaseTable &>> UInt64(((x &- 1) & 0b0111_1111) &>> 1) |
| let toAdd = (isUpper & 0x1) &<< 5 |
| return x &+ UInt8(truncatingIfNeeded: toAdd) |
| } |
| |
| |
| /// Returns a lowercase version of the string. |
| /// |
| /// Here's an example of transforming a string to all lowercase letters. |
| /// |
| /// let cafe = "BBQ Café 🍵" |
| /// print(cafe.lowercased()) |
| /// // Prints "bbq café 🍵" |
| /// |
| /// - Returns: A lowercase copy of the string. |
| /// |
| /// - Complexity: O(*n*) |
| @_effects(releasenone) |
| public func lowercased() -> String { |
| if _fastPath(_guts.isFastASCII) { |
| return _guts.withFastUTF8 { utf8 in |
| // TODO(String performance): We can directly call appendInPlace |
| var result = String() |
| result.reserveCapacity(utf8.count) |
| for u8 in utf8 { |
| result._guts.append(String(Unicode.Scalar(_lowercaseASCII(u8)))._guts) |
| } |
| return result |
| } |
| } |
| |
| // TODO(String performance): Try out incremental case-conversion rather than |
| // make UTF-16 array beforehand |
| let codeUnits = Array(self.utf16).withUnsafeBufferPointer { |
| (uChars: UnsafeBufferPointer<UInt16>) -> Array<UInt16> in |
| var result = Array<UInt16>(repeating: 0, count: uChars.count) |
| let len = result.withUnsafeMutableBufferPointer { |
| (output) -> Int in |
| var err = __swift_stdlib_U_ZERO_ERROR |
| return Int(truncatingIfNeeded: |
| __swift_stdlib_u_strToLower( |
| output.baseAddress._unsafelyUnwrappedUnchecked, |
| Int32(output.count), |
| uChars.baseAddress._unsafelyUnwrappedUnchecked, |
| Int32(uChars.count), |
| "", |
| &err)) |
| } |
| if len > uChars.count { |
| var err = __swift_stdlib_U_ZERO_ERROR |
| result = Array<UInt16>(repeating: 0, count: len) |
| result.withUnsafeMutableBufferPointer { |
| output -> Void in |
| __swift_stdlib_u_strToLower( |
| output.baseAddress._unsafelyUnwrappedUnchecked, |
| Int32(output.count), |
| uChars.baseAddress._unsafelyUnwrappedUnchecked, |
| Int32(uChars.count), |
| "", |
| &err) |
| } |
| } |
| return result |
| } |
| return codeUnits.withUnsafeBufferPointer { String._uncheckedFromUTF16($0) } |
| } |
| |
| /// Returns an uppercase version of the string. |
| /// |
| /// The following example transforms a string to uppercase letters: |
| /// |
| /// let cafe = "Café 🍵" |
| /// print(cafe.uppercased()) |
| /// // Prints "CAFÉ 🍵" |
| /// |
| /// - Returns: An uppercase copy of the string. |
| /// |
| /// - Complexity: O(*n*) |
| @_effects(releasenone) |
| public func uppercased() -> String { |
| if _fastPath(_guts.isFastASCII) { |
| return _guts.withFastUTF8 { utf8 in |
| // TODO(String performance): code-unit appendInPlace on guts |
| var result = String() |
| result.reserveCapacity(utf8.count) |
| for u8 in utf8 { |
| result._guts.append(String(Unicode.Scalar(_uppercaseASCII(u8)))._guts) |
| } |
| return result |
| } |
| } |
| |
| // TODO(String performance): Try out incremental case-conversion rather than |
| // make UTF-16 array beforehand |
| let codeUnits = Array(self.utf16).withUnsafeBufferPointer { |
| (uChars: UnsafeBufferPointer<UInt16>) -> Array<UInt16> in |
| var result = Array<UInt16>(repeating: 0, count: uChars.count) |
| let len = result.withUnsafeMutableBufferPointer { |
| (output) -> Int in |
| var err = __swift_stdlib_U_ZERO_ERROR |
| return Int(truncatingIfNeeded: |
| __swift_stdlib_u_strToUpper( |
| output.baseAddress._unsafelyUnwrappedUnchecked, |
| Int32(output.count), |
| uChars.baseAddress._unsafelyUnwrappedUnchecked, |
| Int32(uChars.count), |
| "", |
| &err)) |
| } |
| if len > uChars.count { |
| var err = __swift_stdlib_U_ZERO_ERROR |
| result = Array<UInt16>(repeating: 0, count: len) |
| result.withUnsafeMutableBufferPointer { |
| output -> Void in |
| __swift_stdlib_u_strToUpper( |
| output.baseAddress._unsafelyUnwrappedUnchecked, |
| Int32(output.count), |
| uChars.baseAddress._unsafelyUnwrappedUnchecked, |
| Int32(uChars.count), |
| "", |
| &err) |
| } |
| } |
| return result |
| } |
| return codeUnits.withUnsafeBufferPointer { String._uncheckedFromUTF16($0) } |
| } |
| |
| /// Creates an instance from the description of a given |
| /// `LosslessStringConvertible` instance. |
| @inlinable @inline(__always) |
| public init<T : LosslessStringConvertible>(_ value: T) { |
| self = value.description |
| } |
| } |
| |
| extension String: CustomStringConvertible { |
| /// The value of this string. |
| /// |
| /// Using this property directly is discouraged. Instead, use simple |
| /// assignment to create a new constant or variable equal to this string. |
| @inlinable |
| public var description: String { return self } |
| } |
| |
| extension String { |
| public // @testable |
| var _nfcCodeUnits: [UInt8] { |
| var codeUnits = [UInt8]() |
| _withNFCCodeUnits { |
| codeUnits.append($0) |
| } |
| return codeUnits |
| } |
| |
| public // @testable |
| func _withNFCCodeUnits(_ f: (UInt8) throws -> Void) rethrows { |
| try _gutsSlice._withNFCCodeUnits(f) |
| } |
| } |
| |
| extension _StringGutsSlice { |
| internal func _withNFCCodeUnits(_ f: (UInt8) throws -> Void) rethrows { |
| var output = _FixedArray16<UInt8>(allZeros: ()) |
| var icuInput = _FixedArray16<UInt16>(allZeros: ()) |
| var icuOutput = _FixedArray16<UInt16>(allZeros: ()) |
| if _fastPath(isFastUTF8) { |
| try withFastUTF8 { |
| return try _fastWithNormalizedCodeUnitsImpl( |
| sourceBuffer: $0, |
| outputBuffer: _castOutputBuffer(&output), |
| icuInputBuffer: _castOutputBuffer(&icuInput), |
| icuOutputBuffer: _castOutputBuffer(&icuOutput), |
| f |
| ) |
| } |
| } else { |
| return try _foreignWithNormalizedCodeUnitsImpl( |
| outputBuffer: _castOutputBuffer(&output), |
| icuInputBuffer: _castOutputBuffer(&icuInput), |
| icuOutputBuffer: _castOutputBuffer(&icuOutput), |
| f |
| ) |
| } |
| } |
| |
| internal func _foreignWithNormalizedCodeUnitsImpl( |
| outputBuffer: UnsafeMutableBufferPointer<UInt8>, |
| icuInputBuffer: UnsafeMutableBufferPointer<UInt16>, |
| icuOutputBuffer: UnsafeMutableBufferPointer<UInt16>, |
| _ f: (UInt8) throws -> Void |
| ) rethrows { |
| var outputBuffer = outputBuffer |
| var icuInputBuffer = icuInputBuffer |
| var icuOutputBuffer = icuOutputBuffer |
| |
| var index = range.lowerBound |
| let cachedEndIndex = range.upperBound |
| |
| var hasBufferOwnership = false |
| |
| defer { |
| if hasBufferOwnership { |
| outputBuffer.deallocate() |
| icuInputBuffer.deallocate() |
| icuOutputBuffer.deallocate() |
| } |
| } |
| |
| while index < cachedEndIndex { |
| let result = _foreignNormalize( |
| readIndex: index, |
| endIndex: cachedEndIndex, |
| guts: _guts, |
| outputBuffer: &outputBuffer, |
| icuInputBuffer: &icuInputBuffer, |
| icuOutputBuffer: &icuOutputBuffer |
| ) |
| for i in 0..<result.amountFilled { |
| try f(outputBuffer[i]) |
| } |
| _internalInvariant(result.nextReadPosition != index) |
| index = result.nextReadPosition |
| if result.allocatedBuffers { |
| _internalInvariant(!hasBufferOwnership) |
| hasBufferOwnership = true |
| } |
| } |
| } |
| } |
| |
| internal func _fastWithNormalizedCodeUnitsImpl( |
| sourceBuffer: UnsafeBufferPointer<UInt8>, |
| outputBuffer: UnsafeMutableBufferPointer<UInt8>, |
| icuInputBuffer: UnsafeMutableBufferPointer<UInt16>, |
| icuOutputBuffer: UnsafeMutableBufferPointer<UInt16>, |
| _ f: (UInt8) throws -> Void |
| ) rethrows { |
| var outputBuffer = outputBuffer |
| var icuInputBuffer = icuInputBuffer |
| var icuOutputBuffer = icuOutputBuffer |
| |
| var index = String.Index(encodedOffset: 0) |
| let cachedEndIndex = String.Index(encodedOffset: sourceBuffer.count) |
| |
| var hasBufferOwnership = false |
| |
| defer { |
| if hasBufferOwnership { |
| outputBuffer.deallocate() |
| icuInputBuffer.deallocate() |
| icuOutputBuffer.deallocate() |
| } |
| } |
| |
| while index < cachedEndIndex { |
| let result = _fastNormalize( |
| readIndex: index, |
| sourceBuffer: sourceBuffer, |
| outputBuffer: &outputBuffer, |
| icuInputBuffer: &icuInputBuffer, |
| icuOutputBuffer: &icuOutputBuffer |
| ) |
| for i in 0..<result.amountFilled { |
| try f(outputBuffer[i]) |
| } |
| _internalInvariant(result.nextReadPosition != index) |
| index = result.nextReadPosition |
| if result.allocatedBuffers { |
| _internalInvariant(!hasBufferOwnership) |
| hasBufferOwnership = true |
| } |
| } |
| } |