| //===----------------------------------------------------------------------===// |
| // |
| // This source file is part of the Swift.org open source project |
| // |
| // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors |
| // Licensed under Apache License v2.0 with Runtime Library Exception |
| // |
| // See https://swift.org/LICENSE.txt for license information |
| // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| // |
| //===----------------------------------------------------------------------===// |
| |
| import SwiftShims |
| |
| @inlinable // FIXME(sil-serialize-all) |
| @_semantics("optimize.sil.specialize.generic.partial.never") |
| internal func _withCStringAndLength< |
| Source : Collection, |
| SourceEncoding : Unicode.Encoding, |
| TargetEncoding : Unicode.Encoding, |
| Result |
| >( |
| encodedAs targetEncoding: TargetEncoding.Type, |
| from source: Source, |
| encodedAs sourceEncoding: SourceEncoding.Type, |
| execute body : (UnsafePointer<TargetEncoding.CodeUnit>, Int) throws -> Result |
| ) rethrows -> Result |
| where Source.Iterator.Element == SourceEncoding.CodeUnit { |
| var targetLength = 0 // nul terminator |
| var i = source.makeIterator() |
| SourceEncoding.ForwardParser._parse(&i) { |
| targetLength += numericCast( |
| targetEncoding._transcode($0, from: SourceEncoding.self).count) |
| } |
| var a: [TargetEncoding.CodeUnit] = [] |
| a.reserveCapacity(targetLength + 1) |
| i = source.makeIterator() |
| SourceEncoding.ForwardParser._parse(&i) { |
| a.append( |
| contentsOf: targetEncoding._transcode($0, from: SourceEncoding.self)) |
| } |
| a.append(0) |
| return try body(a, targetLength) |
| } |
| |
| extension _StringGuts { |
| // |
| // TODO:(TODO: JIRA) This is all very bloated code; needs a rewrite given |
| // StringGuts' new design and the potential to run directly on internal |
| // storage. For now, follow a hand-coded opaque pattern. |
| // |
| |
| /// Invokes `body` on a null-terminated sequence of code units in the given |
| /// encoding corresponding to the substring in `bounds`. |
| @inlinable // FIXME(sil-serialize-all) |
| internal func _withCSubstring<Result, TargetEncoding: Unicode.Encoding>( |
| in bounds: Range<Int>, |
| encoding targetEncoding: TargetEncoding.Type, |
| _ body: (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result |
| ) rethrows -> Result { |
| return try _withCSubstringAndLength(in: bounds, encoding: targetEncoding) { |
| p,_ in try body(p) |
| } |
| } |
| |
| @inlinable // FIXME(sil-serialize-all) |
| @_semantics("optimize.sil.specialize.generic.partial.never") |
| internal func _withCSubstringAndLength< |
| Result, TargetEncoding: Unicode.Encoding |
| >( |
| in bounds: Range<Int>, |
| encoding targetEncoding: TargetEncoding.Type, |
| _ body: (UnsafePointer<TargetEncoding.CodeUnit>, Int) throws -> Result |
| ) rethrows -> Result { |
| if _slowPath(_isOpaque) { |
| return try _opaqueWithCStringAndLength( |
| in: bounds, encoding: targetEncoding, body) |
| } |
| |
| defer { _fixLifetime(self) } |
| if isASCII { |
| let ascii = _unmanagedASCIIView[bounds] |
| return try Swift._withCStringAndLength( |
| encodedAs: targetEncoding, |
| from: ascii.buffer, |
| encodedAs: Unicode.ASCII.self, |
| execute: body) |
| } |
| let utf16 = _unmanagedUTF16View[bounds] |
| return try Swift._withCStringAndLength( |
| encodedAs: targetEncoding, |
| from: utf16.buffer, |
| encodedAs: Unicode.UTF16.self, |
| execute: body) |
| } |
| |
| @usableFromInline // @opaque |
| func _opaqueWithCStringAndLength< |
| Result, TargetEncoding: Unicode.Encoding |
| >( |
| in bounds: Range<Int>, |
| encoding targetEncoding: TargetEncoding.Type, |
| _ body: (UnsafePointer<TargetEncoding.CodeUnit>, Int) throws -> Result |
| ) rethrows -> Result { |
| _sanityCheck(_isOpaque) |
| |
| if self._isSmall { |
| let small = self._smallUTF8String[bounds] |
| if small.isASCII { |
| return try small.withUnmanagedASCII { |
| (ascii: _UnmanagedString<Unicode.UTF8.CodeUnit>) throws -> Result in |
| return try Swift._withCStringAndLength( |
| encodedAs: targetEncoding, |
| from: ascii.buffer, |
| encodedAs: Unicode.UTF8.self, |
| execute: body) |
| } |
| } else { |
| fatalError("TODO: UTF-8 support in small strings") |
| } |
| } |
| |
| defer { _fixLifetime(self) } |
| let opaque = _asOpaque()[bounds] |
| return try Swift._withCStringAndLength( |
| encodedAs: targetEncoding, |
| from: opaque, |
| encodedAs: Unicode.UTF16.self, |
| execute: body) |
| } |
| } |
| |
| extension String { |
| /// Creates a string from the given Unicode code units in the specified |
| /// encoding. |
| /// |
| /// - Parameters: |
| /// - codeUnits: A collection of code units encoded in the encoding |
| /// specified in `sourceEncoding`. |
| /// - sourceEncoding: The encoding in which `codeUnits` should be |
| /// interpreted. |
| @inlinable // FIXME(sil-serialize-all) |
| @inline(__always) // Eliminate dynamic type check when possible |
| public init<C: Collection, Encoding: Unicode.Encoding>( |
| decoding codeUnits: C, as sourceEncoding: Encoding.Type |
| ) where C.Iterator.Element == Encoding.CodeUnit { |
| if let contigBytes = codeUnits as? _HasContiguousBytes, |
| sourceEncoding == UTF8.self |
| { |
| self = contigBytes.withUnsafeBytes { rawBufPtr in |
| let ptr = rawBufPtr.baseAddress._unsafelyUnwrappedUnchecked |
| return String._fromUTF8( |
| UnsafeBufferPointer( |
| start: ptr.assumingMemoryBound(to: UInt8.self), |
| count: rawBufPtr.count), |
| repair: true).unsafelyUnwrapped |
| } |
| return |
| } |
| |
| self = String._fromCodeUnits( |
| codeUnits, encoding: sourceEncoding, repairIllFormedSequences: true)! |
| } |
| |
| /// Creates a string from the null-terminated sequence of bytes at the given |
| /// pointer. |
| /// |
| /// - Parameters: |
| /// - nullTerminatedCodeUnits: A pointer to a sequence of contiguous code |
| /// units in the encoding specified in `sourceEncoding`, ending just |
| /// before the first zero code unit. |
| /// - sourceEncoding: The encoding in which the code units should be |
| /// interpreted. |
| @inlinable // FIXME(sil-serialize-all) |
| public init<Encoding: Unicode.Encoding>( |
| decodingCString nullTerminatedCodeUnits: UnsafePointer<Encoding.CodeUnit>, |
| as sourceEncoding: Encoding.Type) { |
| |
| self = String.decodeCString( |
| nullTerminatedCodeUnits, as: sourceEncoding)!.result |
| } |
| |
| /// Calls the given closure with a pointer to the contents of the string, |
| /// represented as a null-terminated sequence of code units. |
| /// |
| /// The pointer passed as an argument to `body` is valid only during the |
| /// execution of `withCString(encodedAs:_:)`. Do not store or return the |
| /// pointer for later use. |
| /// |
| /// - Parameters: |
| /// - body: A closure with a pointer parameter that points to a |
| /// null-terminated sequence of code units. If `body` has a return |
| /// value, that value is also used as the return value for the |
| /// `withCString(encodedAs:_:)` method. The pointer argument is valid |
| /// only for the duration of the method's execution. |
| /// - targetEncoding: The encoding in which the code units should be |
| /// interpreted. |
| /// - Returns: The return value, if any, of the `body` closure parameter. |
| @inlinable // FIXME(sil-serialize-all) |
| public func withCString<Result, TargetEncoding: Unicode.Encoding>( |
| encodedAs targetEncoding: TargetEncoding.Type, |
| _ body: (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result |
| ) rethrows -> Result { |
| return try _guts._withCSubstring( |
| in: 0..<_guts.count, |
| encoding: TargetEncoding.self, |
| body) |
| } |
| } |
| // FIXME: complexity documentation for most of methods on String ought to be |
| // qualified with "amortized" at least, as Characters are variable-length. |
| |
| /// A Unicode string value that is a collection of characters. |
| /// |
| /// A string is a series of characters, such as `"Swift"`, that forms a |
| /// collection. Strings in Swift are Unicode correct and locale insensitive, |
| /// and are designed to be efficient. The `String` type bridges with the |
| /// Objective-C class `NSString` and offers interoperability with C functions |
| /// that works with strings. |
| /// |
| /// You can create new strings using string literals or string interpolations. |
| /// A *string literal* is a series of characters enclosed in quotes. |
| /// |
| /// let greeting = "Welcome!" |
| /// |
| /// *String interpolations* are string literals that evaluate any included |
| /// expressions and convert the results to string form. String interpolations |
| /// give you an easy way to build a string from multiple pieces. Wrap each |
| /// expression in a string interpolation in parentheses, prefixed by a |
| /// backslash. |
| /// |
| /// let name = "Rosa" |
| /// let personalizedGreeting = "Welcome, \(name)!" |
| /// // personalizedGreeting == "Welcome, Rosa!" |
| /// |
| /// let price = 2 |
| /// let number = 3 |
| /// let cookiePrice = "\(number) cookies: $\(price * number)." |
| /// // cookiePrice == "3 cookies: $6." |
| /// |
| /// Combine strings using the concatenation operator (`+`). |
| /// |
| /// let longerGreeting = greeting + " We're glad you're here!" |
| /// // longerGreeting == "Welcome! We're glad you're here!" |
| /// |
| /// Multiline string literals are enclosed in three double quotation marks |
| /// (`"""`), with each delimiter on its own line. Indentation is stripped from |
| /// each line of a multiline string literal to match the indentation of the |
| /// closing delimiter. |
| /// |
| /// let banner = """ |
| /// __, |
| /// ( o /) _/_ |
| /// `. , , , , // / |
| /// (___)(_(_/_(_ //_ (__ |
| /// /) |
| /// (/ |
| /// """ |
| /// |
| /// Modifying and Comparing Strings |
| /// =============================== |
| /// |
| /// Strings always have value semantics. Modifying a copy of a string leaves |
| /// the original unaffected. |
| /// |
| /// var otherGreeting = greeting |
| /// otherGreeting += " Have a nice time!" |
| /// // otherGreeting == "Welcome! Have a nice time!" |
| /// |
| /// print(greeting) |
| /// // Prints "Welcome!" |
| /// |
| /// Comparing strings for equality using the equal-to operator (`==`) or a |
| /// relational operator (like `<` or `>=`) is always performed using Unicode |
| /// canonical representation. As a result, different representations of a |
| /// string compare as being equal. |
| /// |
| /// let cafe1 = "Cafe\u{301}" |
| /// let cafe2 = "Café" |
| /// print(cafe1 == cafe2) |
| /// // Prints "true" |
| /// |
| /// The Unicode scalar value `"\u{301}"` modifies the preceding character to |
| /// include an accent, so `"e\u{301}"` has the same canonical representation |
| /// as the single Unicode scalar value `"é"`. |
| /// |
| /// Basic string operations are not sensitive to locale settings, ensuring that |
| /// string comparisons and other operations always have a single, stable |
| /// result, allowing strings to be used as keys in `Dictionary` instances and |
| /// for other purposes. |
| /// |
| /// Accessing String Elements |
| /// ========================= |
| /// |
| /// A string is a collection of *extended grapheme clusters*, which approximate |
| /// human-readable characters. Many individual characters, such as "é", "김", |
| /// and "🇮🇳", can be made up of multiple Unicode scalar values. These scalar |
| /// values are combined by Unicode's boundary algorithms into extended |
| /// grapheme clusters, represented by the Swift `Character` type. Each element |
| /// of a string is represented by a `Character` instance. |
| /// |
| /// For example, to retrieve the first word of a longer string, you can search |
| /// for a space and then create a substring from a prefix of the string up to |
| /// that point: |
| /// |
| /// let name = "Marie Curie" |
| /// let firstSpace = name.firstIndex(of: " ") ?? name.endIndex |
| /// let firstName = name[..<firstSpace] |
| /// // firstName == "Marie" |
| /// |
| /// The `firstName` constant is an instance of the `Substring` type---a type |
| /// that represents substrings of a string while sharing the original string's |
| /// storage. Substrings present the same interface as strings. |
| /// |
| /// print("\(name)'s first name has \(firstName.count) letters.") |
| /// // Prints "Marie Curie's first name has 5 letters." |
| /// |
| /// Accessing a String's Unicode Representation |
| /// =========================================== |
| /// |
| /// If you need to access the contents of a string as encoded in different |
| /// Unicode encodings, use one of the string's `unicodeScalars`, `utf16`, or |
| /// `utf8` properties. Each property provides access to a view of the string |
| /// as a series of code units, each encoded in a different Unicode encoding. |
| /// |
| /// To demonstrate the different views available for every string, the |
| /// following examples use this `String` instance: |
| /// |
| /// let cafe = "Cafe\u{301} du 🌍" |
| /// print(cafe) |
| /// // Prints "Café du 🌍" |
| /// |
| /// The `cafe` string is a collection of the nine characters that are visible |
| /// when the string is displayed. |
| /// |
| /// print(cafe.count) |
| /// // Prints "9" |
| /// print(Array(cafe)) |
| /// // Prints "["C", "a", "f", "é", " ", "d", "u", " ", "🌍"]" |
| /// |
| /// Unicode Scalar View |
| /// ------------------- |
| /// |
| /// A string's `unicodeScalars` property is a collection of Unicode scalar |
| /// values, the 21-bit codes that are the basic unit of Unicode. Each scalar |
| /// value is represented by a `Unicode.Scalar` instance and is equivalent to a |
| /// UTF-32 code unit. |
| /// |
| /// print(cafe.unicodeScalars.count) |
| /// // Prints "10" |
| /// print(Array(cafe.unicodeScalars)) |
| /// // Prints "["C", "a", "f", "e", "\u{0301}", " ", "d", "u", " ", "\u{0001F30D}"]" |
| /// print(cafe.unicodeScalars.map { $0.value }) |
| /// // Prints "[67, 97, 102, 101, 769, 32, 100, 117, 32, 127757]" |
| /// |
| /// The `unicodeScalars` view's elements comprise each Unicode scalar value in |
| /// the `cafe` string. In particular, because `cafe` was declared using the |
| /// decomposed form of the `"é"` character, `unicodeScalars` contains the |
| /// scalar values for both the letter `"e"` (101) and the accent character |
| /// `"´"` (769). |
| /// |
| /// UTF-16 View |
| /// ----------- |
| /// |
| /// A string's `utf16` property is a collection of UTF-16 code units, the |
| /// 16-bit encoding form of the string's Unicode scalar values. Each code unit |
| /// is stored as a `UInt16` instance. |
| /// |
| /// print(cafe.utf16.count) |
| /// // Prints "11" |
| /// print(Array(cafe.utf16)) |
| /// // Prints "[67, 97, 102, 101, 769, 32, 100, 117, 32, 55356, 57101]" |
| /// |
| /// The elements of the `utf16` view are the code units for the string when |
| /// encoded in UTF-16. These elements match those accessed through indexed |
| /// `NSString` APIs. |
| /// |
| /// let nscafe = cafe as NSString |
| /// print(nscafe.length) |
| /// // Prints "11" |
| /// print(nscafe.character(at: 3)) |
| /// // Prints "101" |
| /// |
| /// UTF-8 View |
| /// ---------- |
| /// |
| /// A string's `utf8` property is a collection of UTF-8 code units, the 8-bit |
| /// encoding form of the string's Unicode scalar values. Each code unit is |
| /// stored as a `UInt8` instance. |
| /// |
| /// print(cafe.utf8.count) |
| /// // Prints "14" |
| /// print(Array(cafe.utf8)) |
| /// // Prints "[67, 97, 102, 101, 204, 129, 32, 100, 117, 32, 240, 159, 140, 141]" |
| /// |
| /// The elements of the `utf8` view are the code units for the string when |
| /// encoded in UTF-8. This representation matches the one used when `String` |
| /// instances are passed to C APIs. |
| /// |
| /// let cLength = strlen(cafe) |
| /// print(cLength) |
| /// // Prints "14" |
| /// |
| /// Measuring the Length of a String |
| /// ================================ |
| /// |
| /// When you need to know the length of a string, you must first consider what |
| /// you'll use the length for. Are you measuring the number of characters that |
| /// will be displayed on the screen, or are you measuring the amount of |
| /// storage needed for the string in a particular encoding? A single string |
| /// can have greatly differing lengths when measured by its different views. |
| /// |
| /// For example, an ASCII character like the capital letter *A* is represented |
| /// by a single element in each of its four views. The Unicode scalar value of |
| /// *A* is `65`, which is small enough to fit in a single code unit in both |
| /// UTF-16 and UTF-8. |
| /// |
| /// let capitalA = "A" |
| /// print(capitalA.count) |
| /// // Prints "1" |
| /// print(capitalA.unicodeScalars.count) |
| /// // Prints "1" |
| /// print(capitalA.utf16.count) |
| /// // Prints "1" |
| /// print(capitalA.utf8.count) |
| /// // Prints "1" |
| /// |
| /// On the other hand, an emoji flag character is constructed from a pair of |
| /// Unicode scalar values, like `"\u{1F1F5}"` and `"\u{1F1F7}"`. Each of these |
| /// scalar values, in turn, is too large to fit into a single UTF-16 or UTF-8 |
| /// code unit. As a result, each view of the string `"🇵🇷"` reports a different |
| /// length. |
| /// |
| /// let flag = "🇵🇷" |
| /// print(flag.count) |
| /// // Prints "1" |
| /// print(flag.unicodeScalars.count) |
| /// // Prints "2" |
| /// print(flag.utf16.count) |
| /// // Prints "4" |
| /// print(flag.utf8.count) |
| /// // Prints "8" |
| /// |
| /// To check whether a string is empty, use its `isEmpty` property instead of |
| /// comparing the length of one of the views to `0`. Unlike with `isEmpty`, |
| /// calculating a view's `count` property requires iterating through the |
| /// elements of the string. |
| /// |
| /// Accessing String View Elements |
| /// ============================== |
| /// |
| /// To find individual elements of a string, use the appropriate view for your |
| /// task. For example, to retrieve the first word of a longer string, you can |
| /// search the string for a space and then create a new string from a prefix |
| /// of the string up to that point. |
| /// |
| /// let name = "Marie Curie" |
| /// let firstSpace = name.firstIndex(of: " ") ?? name.endIndex |
| /// let firstName = name[..<firstSpace] |
| /// print(firstName) |
| /// // Prints "Marie" |
| /// |
| /// Strings and their views share indices, so you can access the UTF-8 view of |
| /// the `name` string using the same `firstSpace` index. |
| /// |
| /// print(Array(name.utf8[..<firstSpace])) |
| /// // Prints "[77, 97, 114, 105, 101]" |
| /// |
| /// Note that an index into one view may not have an exact corresponding |
| /// position in another view. For example, the `flag` string declared above |
| /// comprises a single character, but is composed of eight code units when |
| /// encoded as UTF-8. The following code creates constants for the first and |
| /// second positions in the `flag.utf8` view. Accessing the `utf8` view with |
| /// these indices yields the first and second code UTF-8 units. |
| /// |
| /// let firstCodeUnit = flag.startIndex |
| /// let secondCodeUnit = flag.utf8.index(after: firstCodeUnit) |
| /// // flag.utf8[firstCodeUnit] == 240 |
| /// // flag.utf8[secondCodeUnit] == 159 |
| /// |
| /// When used to access the elements of the `flag` string itself, however, the |
| /// `secondCodeUnit` index does not correspond to the position of a specific |
| /// character. Instead of only accessing the specific UTF-8 code unit, that |
| /// index is treated as the position of the character at the index's encoded |
| /// offset. In the case of `secondCodeUnit`, that character is still the flag |
| /// itself. |
| /// |
| /// // flag[firstCodeUnit] == "🇵🇷" |
| /// // flag[secondCodeUnit] == "🇵🇷" |
| /// |
| /// If you need to validate that an index from one string's view corresponds |
| /// with an exact position in another view, use the index's |
| /// `samePosition(in:)` method or the `init(_:within:)` initializer. |
| /// |
| /// if let exactIndex = secondCodeUnit.samePosition(in: flag) { |
| /// print(flag[exactIndex]) |
| /// } else { |
| /// print("No exact match for this position.") |
| /// } |
| /// // Prints "No exact match for this position." |
| /// |
| /// Performance Optimizations |
| /// ========================= |
| /// |
| /// Although strings in Swift have value semantics, strings use a copy-on-write |
| /// strategy to store their data in a buffer. This buffer can then be shared |
| /// by different copies of a string. A string's data is only copied lazily, |
| /// upon mutation, when more than one string instance is using the same |
| /// buffer. Therefore, the first in any sequence of mutating operations may |
| /// cost O(*n*) time and space. |
| /// |
| /// When a string's contiguous storage fills up, a new buffer must be allocated |
| /// and data must be moved to the new storage. String buffers use an |
| /// exponential growth strategy that makes appending to a string a constant |
| /// time operation when averaged over many append operations. |
| /// |
| /// Bridging Between String and NSString |
| /// ==================================== |
| /// |
| /// Any `String` instance can be bridged to `NSString` using the type-cast |
| /// operator (`as`), and any `String` instance that originates in Objective-C |
| /// may use an `NSString` instance as its storage. Because any arbitrary |
| /// subclass of `NSString` can become a `String` instance, there are no |
| /// guarantees about representation or efficiency when a `String` instance is |
| /// backed by `NSString` storage. Because `NSString` is immutable, it is just |
| /// as though the storage was shared by a copy. The first in any sequence of |
| /// mutating operations causes elements to be copied into unique, contiguous |
| /// storage which may cost O(*n*) time and space, where *n* is the length of |
| /// the string's encoded representation (or more, if the underlying `NSString` |
| /// has unusual performance characteristics). |
| /// |
| /// For more information about the Unicode terms used in this discussion, see |
| /// the [Unicode.org glossary][glossary]. In particular, this discussion |
| /// mentions [extended grapheme clusters][clusters], [Unicode scalar |
| /// values][scalars], and [canonical equivalence][equivalence]. |
| /// |
| /// [glossary]: http://www.unicode.org/glossary/ |
| /// [clusters]: http://www.unicode.org/glossary/#extended_grapheme_cluster |
| /// [scalars]: http://www.unicode.org/glossary/#unicode_scalar_value |
| /// [equivalence]: http://www.unicode.org/glossary/#canonical_equivalent |
| @_fixed_layout |
| public struct String { |
| public // SPI(Foundation) |
| var _guts: _StringGuts |
| |
| /// Creates an empty string. |
| /// |
| /// Using this initializer is equivalent to initializing a string with an |
| /// empty string literal. |
| /// |
| /// let empty = "" |
| /// let alsoEmpty = String() |
| @inlinable // FIXME(sil-serialize-all) |
| public init() { |
| self._guts = _StringGuts() |
| } |
| |
| @inlinable // FIXME(sil-serialize-all) |
| internal init(_ _guts: _StringGuts) { |
| self._guts = _guts |
| } |
| } |
| |
| extension String { |
| public func _dump() { // FIXME: remove |
| self._guts._dump() |
| } |
| } |
| |
| internal func _isAllASCII(_ input: UnsafeBufferPointer<UInt8>) -> Bool { |
| for byte in input { |
| guard byte <= 0x7F else { return false } |
| } |
| return true |
| } |
| |
| // TODO: re-organize a bit before merging... |
| |
| @usableFromInline |
| internal protocol _HasContiguousBytes { |
| func withUnsafeBytes<R>( |
| _ body: (UnsafeRawBufferPointer) throws -> R |
| ) rethrows -> R |
| } |
| extension Array: _HasContiguousBytes {} |
| extension UnsafeBufferPointer: _HasContiguousBytes { |
| @inlinable |
| @inline(__always) |
| func withUnsafeBytes<R>( |
| _ body: (UnsafeRawBufferPointer) throws -> R |
| ) rethrows -> R { |
| let ptr = UnsafeRawPointer(self.baseAddress._unsafelyUnwrappedUnchecked) |
| let len = self.count &* MemoryLayout<Element>.stride |
| return try body(UnsafeRawBufferPointer(start: ptr, count: len)) |
| } |
| } |
| extension UnsafeMutableBufferPointer: _HasContiguousBytes { |
| @inlinable |
| @inline(__always) |
| func withUnsafeBytes<R>( |
| _ body: (UnsafeRawBufferPointer) throws -> R |
| ) rethrows -> R { |
| let ptr = UnsafeRawPointer(self.baseAddress._unsafelyUnwrappedUnchecked) |
| let len = self.count &* MemoryLayout<Element>.stride |
| return try body(UnsafeRawBufferPointer(start: ptr, count: len)) |
| } |
| } |
| |
| extension String : _ExpressibleByBuiltinUnicodeScalarLiteral { |
| @inlinable // FIXME(sil-serialize-all) |
| @_effects(readonly) |
| public // @testable |
| init(_builtinUnicodeScalarLiteral value: Builtin.Int32) { |
| self.init(Unicode.Scalar(_value: UInt32(value))) |
| } |
| @inlinable // FIXME(sil-serialize-all) |
| public init(_ scalar: Unicode.Scalar) { |
| // Until we have UTF-8 support in small string, need to be large |
| // |
| // TODO: All scalars are small |
| if scalar.value <= 0x7f { |
| if let small = _SmallUTF8String(scalar) { |
| self = String(_StringGuts(small)) |
| return |
| } else { |
| #if arch(i386) || arch(arm) |
| #else |
| _sanityCheckFailure("Couldn't fit ASCII scalar into small string?") |
| #endif |
| } |
| } |
| self = String._fromCodeUnits( |
| CollectionOfOne(scalar.value), |
| encoding: UTF32.self, |
| repairIllFormedSequences: false |
| )._unsafelyUnwrappedUnchecked |
| } |
| } |
| |
| extension String : _ExpressibleByBuiltinExtendedGraphemeClusterLiteral { |
| @inlinable |
| @_effects(readonly) |
| @_semantics("string.makeUTF8") |
| public init( |
| _builtinExtendedGraphemeClusterLiteral start: Builtin.RawPointer, |
| utf8CodeUnitCount: Builtin.Word, |
| isASCII: Builtin.Int1 |
| ) { |
| self.init( |
| _builtinStringLiteral: start, |
| utf8CodeUnitCount: utf8CodeUnitCount, |
| isASCII: isASCII) |
| } |
| } |
| |
| extension String : _ExpressibleByBuiltinUTF16StringLiteral { |
| @inlinable |
| @_effects(readonly) |
| @_semantics("string.makeUTF16") |
| public init( |
| _builtinUTF16StringLiteral start: Builtin.RawPointer, |
| utf16CodeUnitCount: Builtin.Word |
| ) { |
| let bufPtr = UnsafeBufferPointer( |
| start: UnsafeRawPointer(start).assumingMemoryBound(to: UInt16.self), |
| count: Int(utf16CodeUnitCount)) |
| if let small = _SmallUTF8String(bufPtr) { |
| self = String(_StringGuts(small)) |
| return |
| } |
| |
| self = String(_StringGuts(_large: _UnmanagedString(bufPtr))) |
| } |
| } |
| |
| extension String : _ExpressibleByBuiltinStringLiteral { |
| @inline(__always) |
| @inlinable |
| @_effects(readonly) |
| @_semantics("string.makeUTF8") |
| public init( |
| _builtinStringLiteral start: Builtin.RawPointer, |
| utf8CodeUnitCount: Builtin.Word, |
| isASCII: Builtin.Int1 |
| ) { |
| let bufPtr = UnsafeBufferPointer( |
| start: UnsafeRawPointer(start).assumingMemoryBound(to: UInt8.self), |
| count: Int(utf8CodeUnitCount)) |
| if bufPtr.isEmpty { |
| self.init() |
| return |
| } |
| |
| if let small = _SmallUTF8String(bufPtr) { |
| self = String(_StringGuts(small)) |
| return |
| } |
| if _fastPath(Bool(isASCII)) { |
| self = String(_StringGuts(_large: _UnmanagedString(bufPtr))) |
| return |
| } |
| self = String._fromWellFormedUTF8(bufPtr) |
| } |
| } |
| |
| extension String : ExpressibleByStringLiteral { |
| /// Creates an instance initialized to the given string value. |
| /// |
| /// Do not call this initializer directly. It is used by the compiler when you |
| /// initialize a string using a string literal. For example: |
| /// |
| /// let nextStop = "Clark & Lake" |
| /// |
| /// This assignment to the `nextStop` constant calls this string literal |
| /// initializer behind the scenes. |
| @inlinable // FIXME(sil-serialize-all) |
| public init(stringLiteral value: String) { |
| self = value |
| } |
| } |
| |
| extension String : CustomDebugStringConvertible { |
| /// A representation of the string that is suitable for debugging. |
| public var debugDescription: String { |
| var result = "\"" |
| for us in self.unicodeScalars { |
| result += us.escaped(asASCII: false) |
| } |
| result += "\"" |
| return result |
| } |
| } |
| |
| extension String { |
| /// Returns the number of code units occupied by this string |
| /// in the given encoding. |
| @inlinable // FIXME(sil-serialize-all) |
| internal func _encodedLength< |
| Encoding: Unicode.Encoding |
| >(_ encoding: Encoding.Type) -> Int { |
| var codeUnitCount = 0 |
| self._encode(encoding, into: { _ in codeUnitCount += 1 }) |
| return codeUnitCount |
| } |
| |
| // |
| // TODO (TODO: JIRA): This needs to be completely rewritten. It's about 12KB |
| // of code, most of which are MOV instructions. Keeping the by-hand opaque |
| // visitation pattern for now. |
| // |
| |
| // FIXME: this function may not handle the case when a wrapped NSString |
| // contains unpaired surrogates. Fix this before exposing this function as a |
| // public API. But it is unclear if it is valid to have such an NSString in |
| // the first place. If it is not, we should not be crashing in an obscure |
| // way -- add a test for that. |
| // Related: <rdar://problem/17340917> Please document how NSString interacts |
| // with unpaired surrogates |
| @inlinable // FIXME(sil-serialize-all) |
| internal func _encode<Encoding: Unicode.Encoding>( |
| _ encoding: Encoding.Type, |
| into processCodeUnit: (Encoding.CodeUnit) -> Void |
| ) { |
| if _slowPath(_guts._isOpaque) { |
| _opaqueEncode(encoding, into: processCodeUnit) |
| return |
| } |
| |
| defer { _fixLifetime(self) } |
| if _guts.isASCII { |
| let ascii = _guts._unmanagedASCIIView |
| if encoding == Unicode.ASCII.self |
| || encoding == Unicode.UTF8.self |
| || encoding == Unicode.UTF16.self |
| || encoding == Unicode.UTF32.self { |
| ascii.forEach { |
| processCodeUnit(Encoding.CodeUnit(truncatingIfNeeded: $0)) |
| } |
| } else { |
| // TODO: be sure tests exercise this code path. |
| for b in ascii { |
| Encoding._encode( |
| Unicode.Scalar(_unchecked: UInt32(b))).forEach(processCodeUnit) |
| } |
| } |
| return |
| } |
| let utf16 = _guts._unmanagedUTF16View |
| var i = utf16.makeIterator() |
| Unicode.UTF16.ForwardParser._parse(&i) { |
| Encoding._transcode($0, from: UTF16.self).forEach(processCodeUnit) |
| } |
| } |
| |
| @usableFromInline // @opaque |
| internal func _opaqueEncode<Encoding: Unicode.Encoding>( |
| _ encoding: Encoding.Type, |
| into processCodeUnit: (Encoding.CodeUnit) -> Void |
| ) { |
| // TODO: ASCII fast path, and probably adjust this interface too. |
| if _guts._isSmall { |
| _guts._smallUTF8String.withUnmanagedUTF16 { utf16 in |
| var i = utf16.makeIterator() |
| Unicode.UTF16.ForwardParser._parse(&i) { |
| Encoding._transcode($0, from: UTF16.self).forEach(processCodeUnit) |
| } |
| } |
| return |
| } |
| |
| _sanityCheck(_guts._isOpaque) |
| defer { _fixLifetime(self) } |
| let opaque = _guts._asOpaque() |
| var i = opaque.makeIterator() |
| Unicode.UTF16.ForwardParser._parse(&i) { |
| Encoding._transcode($0, from: UTF16.self).forEach(processCodeUnit) |
| } |
| } |
| } |
| |
| // Support for copy-on-write |
| extension String { |
| |
| /// Appends the given string to this string. |
| /// |
| /// The following example builds a customized greeting by using the |
| /// `append(_:)` method: |
| /// |
| /// var greeting = "Hello, " |
| /// if let name = getUserName() { |
| /// greeting.append(name) |
| /// } else { |
| /// greeting.append("friend") |
| /// } |
| /// print(greeting) |
| /// // Prints "Hello, friend" |
| /// |
| /// - Parameter other: Another string. |
| public mutating func append(_ other: String) { |
| self._guts.append(other._guts) |
| } |
| |
| /// Appends the given Unicode scalar to the string. |
| /// |
| /// - Parameter x: A Unicode scalar value. |
| /// |
| /// - Complexity: Appending a Unicode scalar to a string averages to O(1) |
| /// over many additions. |
| @available(*, unavailable, message: "Replaced by append(_: String)") |
| public mutating func append(_ x: Unicode.Scalar) { |
| Builtin.unreachable() |
| } |
| |
| // TODO(SSO): Consider small-checking version |
| @inlinable // FIXME(sil-serialize-all) |
| init<CodeUnit>(_largeStorage storage: _SwiftStringStorage<CodeUnit>) |
| where CodeUnit : FixedWidthInteger & UnsignedInteger { |
| _guts = _StringGuts(_large: storage) |
| } |
| } |
| |
| extension String { |
| @inlinable // FIXME(sil-serialize-all) |
| @_effects(readonly) |
| @_semantics("string.concat") |
| public static func + (lhs: String, rhs: String) -> String { |
| var lhs = lhs |
| lhs.append(rhs) |
| return lhs |
| } |
| |
| // String append |
| @inlinable // FIXME(sil-serialize-all) |
| public static func += (lhs: inout String, rhs: String) { |
| lhs.append(rhs) |
| } |
| } |
| |
| extension Sequence where Element: StringProtocol { |
| |
| /// Returns a new string by concatenating the elements of the sequence, |
| /// adding the given separator between each element. |
| /// |
| /// The following example shows how an array of strings can be joined to a |
| /// single, comma-separated string: |
| /// |
| /// let cast = ["Vivien", "Marlon", "Kim", "Karl"] |
| /// let list = cast.joined(separator: ", ") |
| /// print(list) |
| /// // Prints "Vivien, Marlon, Kim, Karl" |
| /// |
| /// - Parameter separator: A string to insert between each of the elements |
| /// in this sequence. The default separator is an empty string. |
| /// - Returns: A single, concatenated string. |
| @_specialize(where Self == Array<Substring>) |
| @_specialize(where Self == Array<String>) |
| public func joined(separator: String = "") -> String { |
| return _joined(separator: separator) |
| } |
| |
| internal func _joined(separator: String = "") -> String { |
| let separatorSize = separator._guts.count |
| var width = separator._guts.byteWidth |
| |
| let reservation = self._preprocessingPass { |
| () -> Int in |
| var r = 0 |
| for chunk in self { |
| r += separatorSize + chunk._encodedOffsetRange.count |
| width = Swift.max(width, chunk._wholeString._guts.byteWidth) |
| } |
| return r > 0 ? r - separatorSize : 0 |
| } |
| |
| let capacity = reservation ?? separatorSize |
| var result = "" |
| result.reserveCapacity(capacity) |
| if separator.isEmpty { |
| for x in self { |
| result._guts.append(x) |
| } |
| return result |
| } |
| |
| var iter = makeIterator() |
| if let first = iter.next() { |
| result._guts.append(first) |
| while let next = iter.next() { |
| result.append(separator) |
| result._guts.append(next) |
| } |
| } |
| return result |
| } |
| } |
| |
| |
| // This overload is necessary because String now conforms to |
| // BidirectionalCollection, and there are other `joined` overloads that are |
| // considered more specific. See Flatten.swift.gyb. |
| extension BidirectionalCollection where Element == String { |
| /// Returns a new string by concatenating the elements of the sequence, |
| /// adding the given separator between each element. |
| /// |
| /// The following example shows how an array of strings can be joined to a |
| /// single, comma-separated string: |
| /// |
| /// let cast = ["Vivien", "Marlon", "Kim", "Karl"] |
| /// let list = cast.joined(separator: ", ") |
| /// print(list) |
| /// // Prints "Vivien, Marlon, Kim, Karl" |
| /// |
| /// - Parameter separator: A string to insert between each of the elements |
| /// in this sequence. The default separator is an empty string. |
| /// - Returns: A single, concatenated string. |
| @_specialize(where Self == Array<String>) |
| public func joined(separator: String = "") -> String { |
| return _joined(separator: separator) |
| } |
| } |
| |
| #if _runtime(_ObjC) |
| @usableFromInline // FIXME(sil-serialize-all) |
| @_silgen_name("swift_stdlib_NSStringLowercaseString") |
| internal func _stdlib_NSStringLowercaseString(_ str: AnyObject) -> _CocoaString |
| |
| @usableFromInline // FIXME(sil-serialize-all) |
| @_silgen_name("swift_stdlib_NSStringUppercaseString") |
| internal func _stdlib_NSStringUppercaseString(_ str: AnyObject) -> _CocoaString |
| #else |
| internal func _nativeUnicodeLowercaseString(_ str: String) -> String { |
| |
| // TODO (TODO: JIRA): check for small |
| |
| let guts = str._guts._extractContiguousUTF16() |
| defer { _fixLifetime(guts) } |
| let utf16 = guts._unmanagedUTF16View |
| var storage = _SwiftStringStorage<UTF16.CodeUnit>.create( |
| capacity: utf16.count, |
| count: utf16.count) |
| |
| // Try to write it out to the same length. |
| let z = _swift_stdlib_unicode_strToLower( |
| storage.start, Int32(storage.capacity), // FIXME: handle overflow case |
| utf16.start, Int32(utf16.count)) |
| let correctSize = Int(z) |
| |
| // If more space is needed, do it again with the correct buffer size. |
| if correctSize > storage.capacity { |
| storage = _SwiftStringStorage<UTF16.CodeUnit>.create( |
| capacity: correctSize, |
| count: correctSize) |
| _swift_stdlib_unicode_strToLower( |
| storage.start, Int32(storage.capacity), // FIXME: handle overflow case |
| utf16.start, Int32(utf16.count)) |
| } |
| storage.count = correctSize |
| return String(_largeStorage: storage) |
| } |
| |
| @usableFromInline // FIXME(sil-serialize-all) |
| internal func _nativeUnicodeUppercaseString(_ str: String) -> String { |
| |
| // TODO (TODO: JIRA): check for small |
| |
| let guts = str._guts._extractContiguousUTF16() |
| defer { _fixLifetime(guts) } |
| let utf16 = guts._unmanagedUTF16View |
| var storage = _SwiftStringStorage<UTF16.CodeUnit>.create( |
| capacity: utf16.count, |
| count: utf16.count) |
| |
| // Try to write it out to the same length. |
| let z = _swift_stdlib_unicode_strToUpper( |
| storage.start, Int32(storage.capacity), // FIXME: handle overflow case |
| utf16.start, Int32(utf16.count)) |
| let correctSize = Int(z) |
| |
| // If more space is needed, do it again with the correct buffer size. |
| if correctSize > storage.capacity { |
| storage = _SwiftStringStorage<UTF16.CodeUnit>.create( |
| capacity: correctSize, |
| count: correctSize) |
| _swift_stdlib_unicode_strToUpper( |
| storage.start, Int32(storage.capacity), // FIXME: handle overflow case |
| utf16.start, Int32(utf16.count)) |
| } |
| storage.count = correctSize |
| return String(_largeStorage: storage) |
| } |
| #endif |
| |
| // Unicode algorithms |
| extension String { |
| // FIXME: implement case folding without relying on Foundation. |
| // <rdar://problem/17550602> [unicode] Implement case folding |
| |
| /// A "table" for which ASCII characters need to be upper cased. |
| /// To determine which bit corresponds to which ASCII character, subtract 1 |
| /// from the ASCII value of that character and divide by 2. The bit is set iff |
| /// that character is a lower case character. |
| @inlinable // FIXME(sil-serialize-all) |
| internal var _asciiLowerCaseTable: UInt64 { |
| @inline(__always) |
| get { |
| return 0b0001_1111_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000 |
| } |
| } |
| |
| /// The same table for upper case characters. |
| @inlinable // FIXME(sil-serialize-all) |
| internal var _asciiUpperCaseTable: UInt64 { |
| @inline(__always) |
| get { |
| return 0b0000_0000_0000_0000_0001_1111_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000 |
| } |
| } |
| |
| /// Returns a lowercase version of the string. |
| /// |
| /// Here's an example of transforming a string to all lowercase letters. |
| /// |
| /// let cafe = "BBQ Café 🍵" |
| /// print(cafe.lowercased()) |
| /// // Prints "bbq café 🍵" |
| /// |
| /// - Returns: A lowercase copy of the string. |
| /// |
| /// - Complexity: O(*n*) |
| public func lowercased() -> String { |
| if _guts.isASCII { |
| var guts = _guts |
| guts.withMutableASCIIStorage(unusedCapacity: 0) { storage in |
| for i in 0..<storage._value.count { |
| // For each character in the string, we lookup if it should be shifted |
| // in our ascii table, then we return 0x20 if it should, 0x0 if not. |
| // This code is equivalent to: |
| // switch source[i] { |
| // case let x where (x >= 0x41 && x <= 0x5a): |
| // dest[i] = x &+ 0x20 |
| // case let x: |
| // dest[i] = x |
| // } |
| let value = storage._value.start[i] |
| let isUpper = |
| _asciiUpperCaseTable &>> |
| UInt64(((value &- 1) & 0b0111_1111) &>> 1) |
| let add = (isUpper & 0x1) &<< 5 |
| // Since we are left with either 0x0 or 0x20, we can safely truncate |
| // to a UInt8 and add to our ASCII value (this will not overflow |
| // numbers in the ASCII range). |
| storage._value.start[i] = value &+ UInt8(truncatingIfNeeded: add) |
| } |
| } |
| return String(guts) |
| } |
| |
| #if _runtime(_ObjC) |
| return String(_cocoaString: |
| _stdlib_NSStringLowercaseString(self._bridgeToObjectiveCImpl())) |
| #else |
| return _nativeUnicodeLowercaseString(self) |
| #endif |
| } |
| |
| /// Returns an uppercase version of the string. |
| /// |
| /// The following example transforms a string to uppercase letters: |
| /// |
| /// let cafe = "Café 🍵" |
| /// print(cafe.uppercased()) |
| /// // Prints "CAFÉ 🍵" |
| /// |
| /// - Returns: An uppercase copy of the string. |
| /// |
| /// - Complexity: O(*n*) |
| public func uppercased() -> String { |
| if _guts.isASCII { |
| var guts = _guts |
| guts.withMutableASCIIStorage(unusedCapacity: 0) { storage in |
| for i in 0..<storage._value.count { |
| // See the comment above in lowercased. |
| let value = storage._value.start[i] |
| let isLower = |
| _asciiLowerCaseTable &>> |
| UInt64(((value &- 1) & 0b0111_1111) &>> 1) |
| let add = (isLower & 0x1) &<< 5 |
| storage._value.start[i] = value &- UInt8(truncatingIfNeeded: add) |
| } |
| } |
| return String(guts) |
| } |
| |
| #if _runtime(_ObjC) |
| return String(_cocoaString: |
| _stdlib_NSStringUppercaseString(self._bridgeToObjectiveCImpl())) |
| #else |
| return _nativeUnicodeUppercaseString(self) |
| #endif |
| } |
| |
| /// Creates an instance from the description of a given |
| /// `LosslessStringConvertible` instance. |
| @inlinable // FIXME(sil-serialize-all) |
| public init<T : LosslessStringConvertible>(_ value: T) { |
| self = value.description |
| } |
| } |
| |
| extension String : CustomStringConvertible { |
| /// The value of this string. |
| /// |
| /// Using this property directly is discouraged. Instead, use simple |
| /// assignment to create a new constant or variable equal to this string. |
| @inlinable // FIXME(sil-serialize-all) |
| public var description: String { |
| return self |
| } |
| } |