Revert "[stdlib] String index interchange, etc." (#10812)
rdar://33186295
diff --git a/stdlib/public/SDK/Foundation/ExtraStringAPIs.swift b/stdlib/public/SDK/Foundation/ExtraStringAPIs.swift
index ed9629b..17c4d63 100644
--- a/stdlib/public/SDK/Foundation/ExtraStringAPIs.swift
+++ b/stdlib/public/SDK/Foundation/ExtraStringAPIs.swift
@@ -10,24 +10,25 @@
//
//===----------------------------------------------------------------------===//
-extension String.UTF16View.Index {
+// Random access for String.UTF16View, only when Foundation is
+// imported. Making this API dependent on Foundation decouples the
+// Swift core from a UTF16 representation.
+extension String.UTF16View.Index : Strideable {
/// Construct from an integer offset.
- @available(swift, deprecated: 3.2)
- @available(swift, obsoleted: 4.0)
public init(_ offset: Int) {
_precondition(offset >= 0, "Negative UTF16 index offset not allowed")
self.init(_offset: offset)
}
- @available(swift, deprecated: 3.2)
- @available(swift, obsoleted: 4.0)
public func distance(to other: String.UTF16View.Index) -> Int {
return _offset.distance(to: other._offset)
}
- @available(swift, deprecated: 3.2)
- @available(swift, obsoleted: 4.0)
public func advanced(by n: Int) -> String.UTF16View.Index {
return String.UTF16View.Index(_offset.advanced(by: n))
}
}
+
+extension String.UTF16View : RandomAccessCollection {}
+extension String.UTF16View.Indices : RandomAccessCollection {}
+
diff --git a/stdlib/public/SDK/Foundation/NSRange.swift b/stdlib/public/SDK/Foundation/NSRange.swift
index 37cf760..1342d5c 100644
--- a/stdlib/public/SDK/Foundation/NSRange.swift
+++ b/stdlib/public/SDK/Foundation/NSRange.swift
@@ -151,8 +151,8 @@
where R.Bound == S.Index, S.Index == String.Index {
let r = region.relative(to: target)
self = NSRange(
- location: r.lowerBound.encodedOffset - target.startIndex.encodedOffset,
- length: r.upperBound.encodedOffset - r.lowerBound.encodedOffset
+ location: r.lowerBound._utf16Index - target.startIndex._utf16Index,
+ length: r.upperBound._utf16Index - r.lowerBound._utf16Index
)
}
diff --git a/stdlib/public/SDK/Foundation/NSStringAPI.swift b/stdlib/public/SDK/Foundation/NSStringAPI.swift
index 2ce6624..585b6e2 100644
--- a/stdlib/public/SDK/Foundation/NSStringAPI.swift
+++ b/stdlib/public/SDK/Foundation/NSStringAPI.swift
@@ -32,8 +32,8 @@
func _toNSRange(_ r: Range<String.Index>) -> NSRange {
return NSRange(
- location: r.lowerBound.encodedOffset,
- length: r.upperBound.encodedOffset - r.lowerBound.encodedOffset)
+ location: r.lowerBound._utf16Index,
+ length: r.upperBound._utf16Index - r.lowerBound._utf16Index)
}
// We only need this for UnsafeMutablePointer, but there's not currently a way
@@ -72,7 +72,10 @@
/// Return an `Index` corresponding to the given offset in our UTF-16
/// representation.
func _index(_ utf16Index: Int) -> Index {
- return Index(encodedOffset: utf16Index)
+ return Index(
+ _base: String.UnicodeScalarView.Index(_position: utf16Index),
+ in: characters
+ )
}
/// Return a `Range<Index>` corresponding to the given `NSRange` of
@@ -1257,7 +1260,7 @@
public
func rangeOfComposedCharacterSequence(at anIndex: Index) -> Range<Index> {
return _range(
- _ns.rangeOfComposedCharacterSequence(at: anIndex.encodedOffset))
+ _ns.rangeOfComposedCharacterSequence(at: anIndex._utf16Index))
}
// - (NSRange)rangeOfComposedCharacterSequencesForRange:(NSRange)range
@@ -1607,7 +1610,7 @@
/// Returns a new string containing the characters of the
/// `String` from the one at a given index to the end.
public func substring(from index: Index) -> String {
- return _ns.substring(from: index.encodedOffset)
+ return _ns.substring(from: index._utf16Index)
}
// - (NSString *)substringToIndex:(NSUInteger)anIndex
@@ -1615,7 +1618,7 @@
/// Returns a new string containing the characters of the
/// `String` up to, but not including, the one at a given index.
public func substring(to index: Index) -> String {
- return _ns.substring(to: index.encodedOffset)
+ return _ns.substring(to: index._utf16Index)
}
// - (NSString *)substringWithRange:(NSRange)aRange
diff --git a/stdlib/public/SDK/Foundation/URLComponents.swift b/stdlib/public/SDK/Foundation/URLComponents.swift
index f4feba0..652c63a 100644
--- a/stdlib/public/SDK/Foundation/URLComponents.swift
+++ b/stdlib/public/SDK/Foundation/URLComponents.swift
@@ -194,8 +194,8 @@
private func _toStringRange(_ r : NSRange) -> Range<String.Index>? {
guard r.location != NSNotFound else { return nil }
- let utf16Start = String.UTF16View.Index(encodedOffset: r.location)
- let utf16End = String.UTF16View.Index(encodedOffset: r.location + r.length)
+ let utf16Start = String.UTF16View.Index(_offset: r.location)
+ let utf16End = String.UTF16View.Index(_offset: r.location + r.length)
guard let s = self.string else { return nil }
guard let start = String.Index(utf16Start, within: s) else { return nil }
diff --git a/stdlib/public/core/CMakeLists.txt b/stdlib/public/core/CMakeLists.txt
index 07c5e40..005e0bf 100644
--- a/stdlib/public/core/CMakeLists.txt
+++ b/stdlib/public/core/CMakeLists.txt
@@ -126,7 +126,6 @@
StringBuffer.swift
StringComparable.swift
StringCore.swift
- StringIndex.swift
StringInterpolation.swift
StringLegacy.swift
StringRangeReplaceableCollection.swift.gyb
diff --git a/stdlib/public/core/GroupInfo.json b/stdlib/public/core/GroupInfo.json
index 117b000..f816522 100644
--- a/stdlib/public/core/GroupInfo.json
+++ b/stdlib/public/core/GroupInfo.json
@@ -17,7 +17,6 @@
"StringComparable.swift",
"StringCore.swift",
"StringHashable.swift",
- "StringIndex.swift",
"StringIndexConversions.swift",
"StringInterpolation.swift",
"StringLegacy.swift",
diff --git a/stdlib/public/core/StringCharacterView.swift b/stdlib/public/core/StringCharacterView.swift
index d4185a4..13169af 100644
--- a/stdlib/public/core/StringCharacterView.swift
+++ b/stdlib/public/core/StringCharacterView.swift
@@ -63,7 +63,7 @@
/// The offset of this view's `_core` from an original core. This works
/// around the fact that `_StringCore` is always zero-indexed.
- /// `_coreOffset` should be subtracted from `UnicodeScalarIndex.encodedOffset`
+ /// `_coreOffset` should be subtracted from `UnicodeScalarIndex._position`
/// before that value is used as a `_core` index.
@_versioned
internal var _coreOffset: Int
@@ -178,14 +178,61 @@
return UnicodeScalarView(_core, coreOffset: _coreOffset)
}
- public typealias Index = String.Index
+ /// A position in a string's `CharacterView` instance.
+ ///
+ /// You can convert between indices of the different string views by using
+ /// conversion initializers and the `samePosition(in:)` method overloads.
+ /// The following example finds the index of the first space in the string's
+ /// character view and then converts that to the same position in the UTF-8
+ /// view:
+ ///
+ /// let hearts = "Hearts <3 ♥︎ 💘"
+ /// if let i = hearts.characters.index(of: " ") {
+ /// let j = i.samePosition(in: hearts.utf8)
+ /// print(Array(hearts.utf8[..<j]))
+ /// }
+ /// // Prints "[72, 101, 97, 114, 116, 115]"
+ public struct Index : Comparable, CustomPlaygroundQuickLookable {
+ public // SPI(Foundation)
+ init(_base: String.UnicodeScalarView.Index, in c: String.CharacterView) {
+ self._base = _base
+ self._countUTF16 = c._measureExtendedGraphemeClusterForward(from: _base)
+ }
+
+ internal init(_base: UnicodeScalarView.Index, _countUTF16: Int) {
+ self._base = _base
+ self._countUTF16 = _countUTF16
+ }
+
+ internal let _base: UnicodeScalarView.Index
+
+ /// The count of this extended grapheme cluster in UTF-16 code units.
+ internal let _countUTF16: Int
+
+ /// The integer offset of this index in UTF-16 code units.
+ public // SPI(Foundation)
+ var _utf16Index: Int {
+ return _base._position
+ }
+
+ /// The one past end index for this extended grapheme cluster in Unicode
+ /// scalars.
+ internal var _endBase: UnicodeScalarView.Index {
+ return UnicodeScalarView.Index(_position: _utf16Index + _countUTF16)
+ }
+
+ public var customPlaygroundQuickLook: PlaygroundQuickLook {
+ return .int(Int64(_utf16Index))
+ }
+ }
+
public typealias IndexDistance = Int
/// The position of the first character in a nonempty character view.
///
/// In an empty character view, `startIndex` is equal to `endIndex`.
public var startIndex: Index {
- return unicodeScalars.startIndex
+ return Index(_base: unicodeScalars.startIndex, in: self)
}
/// A character view's "past the end" position---that is, the position one
@@ -193,51 +240,35 @@
///
/// In an empty character view, `endIndex` is equal to `startIndex`.
public var endIndex: Index {
- return unicodeScalars.endIndex
+ return Index(_base: unicodeScalars.endIndex, in: self)
}
- internal func _index(atEncodedOffset n: Int) -> Index {
- let stride = _measureExtendedGraphemeClusterForward(
- from: Index(encodedOffset: n))
- return Index(encodedOffset: n, .character(stride: UInt16(stride)))
- }
-
/// Returns the next consecutive position after `i`.
///
/// - Precondition: The next position is valid.
public func index(after i: Index) -> Index {
- _precondition(
- i < unicodeScalars.endIndex,
+ _precondition(i._base < unicodeScalars.endIndex,
"cannot increment beyond endIndex")
-
- _precondition(
- i >= unicodeScalars.startIndex,
+ _precondition(i._base >= unicodeScalars.startIndex,
"cannot increment invalid index")
-
- var j = i
- while true {
- if case .character(let oldStride) = j._cache {
- return _index(atEncodedOffset: j.encodedOffset + Int(oldStride))
- }
- j = _index(atEncodedOffset: j.encodedOffset)
- }
+ return Index(_base: i._endBase, in: self)
}
/// Returns the previous consecutive position before `i`.
///
/// - Precondition: The previous position is valid.
public func index(before i: Index) -> Index {
- _precondition(i > unicodeScalars.startIndex,
+ _precondition(i._base > unicodeScalars.startIndex,
"cannot decrement before startIndex")
- _precondition(i <= unicodeScalars.endIndex,
+ _precondition(i._base <= unicodeScalars.endIndex,
"cannot decrement invalid index")
-
- let stride = _measureExtendedGraphemeClusterBackward(
- from: Index(encodedOffset: i.encodedOffset))
-
+ let predecessorLengthUTF16 =
+ _measureExtendedGraphemeClusterBackward(from: i._base)
return Index(
- encodedOffset: i.encodedOffset &- stride,
- .character(stride: numericCast(stride))
+ _base: UnicodeScalarView.Index(
+ _position: i._utf16Index - predecessorLengthUTF16
+ ),
+ in: self
)
}
@@ -334,8 +365,8 @@
internal func _measureExtendedGraphemeClusterForward(
from start: UnicodeScalarView.Index
) -> Int {
- let startPosition = start.encodedOffset
- let endPosition = unicodeScalars.endIndex.encodedOffset
+ let startPosition = start._position
+ let endPosition = unicodeScalars.endIndex._position
// No more graphemes
if startPosition == endPosition {
@@ -348,7 +379,7 @@
}
// Our relative offset from the _StringCore's baseAddress pointer. If our
- // _core is not a substring, this is the same as start.encodedOffset. Otherwise,
+ // _core is not a substring, this is the same as start._position. Otherwise,
// it is the code unit relative offset into the substring and not the
// absolute offset into the outer string.
let startOffset = startPosition - _coreOffset
@@ -388,7 +419,7 @@
func _measureExtendedGraphemeClusterForwardSlow(
startOffset: Int
) -> Int {
- let endOffset = unicodeScalars.endIndex.encodedOffset - _coreOffset
+ let endOffset = unicodeScalars.endIndex._position - _coreOffset
let numCodeUnits = endOffset - startOffset
_sanityCheck(numCodeUnits >= 2, "should have at least two code units")
@@ -470,8 +501,8 @@
internal func _measureExtendedGraphemeClusterBackward(
from end: UnicodeScalarView.Index
) -> Int {
- let startPosition = unicodeScalars.startIndex.encodedOffset
- let endPosition = end.encodedOffset
+ let startPosition = unicodeScalars.startIndex._position
+ let endPosition = end._position
// No more graphemes
if startPosition == endPosition {
@@ -528,7 +559,7 @@
) -> Int {
let startOffset = 0
let numCodeUnits = endOffset - startOffset
- _sanityCheck(unicodeScalars.startIndex.encodedOffset - _coreOffset == 0,
+ _sanityCheck(unicodeScalars.startIndex._position - _coreOffset == 0,
"position/offset mismatch in _StringCore as a substring")
_sanityCheck(numCodeUnits >= 2,
"should have at least two code units")
@@ -612,38 +643,31 @@
///
/// - Parameter position: A valid index of the character view. `position`
/// must be less than the view's end index.
- public subscript(i_: Index) -> Character {
- var i = i_
- while true {
- if case .character(let stride) = i._cache {
- if _fastPath(stride == 1) {
- // For single-code-unit graphemes, we can construct a Character directly
- // from a single unicode scalar (if sub-surrogate).
- let relativeOffset = i.encodedOffset - _coreOffset
- if _core.isASCII {
- let asciiBuffer = _core.asciiBuffer._unsafelyUnwrappedUnchecked
- // Bounds checks in an UnsafeBufferPointer (asciiBuffer) are only
- // performed in Debug mode, so they need to be duplicated here.
- // Falling back to the non-optimal behavior in the case they don't
- // pass.
- if relativeOffset >= asciiBuffer.startIndex &&
- relativeOffset < asciiBuffer.endIndex {
- return Character(Unicode.Scalar(asciiBuffer[relativeOffset]))
- }
- } else if _core._baseAddress != nil {
- let cu = _core._nthContiguous(relativeOffset)
- // Only constructible if sub-surrogate
- if (cu < 0xd800) {
- return Character(Unicode.Scalar(cu)._unsafelyUnwrappedUnchecked)
- }
- }
+ public subscript(i: Index) -> Character {
+ if i._countUTF16 == 1 {
+ // For single-code-unit graphemes, we can construct a Character directly
+ // from a single unicode scalar (if sub-surrogate).
+ let relativeOffset = i._base._position - _coreOffset
+ if _core.isASCII {
+ let asciiBuffer = _core.asciiBuffer._unsafelyUnwrappedUnchecked
+ // Bounds checks in an UnsafeBufferPointer (asciiBuffer) are only
+ // performed in Debug mode, so they need to be duplicated here.
+ // Falling back to the non-optimal behavior in the case they don't
+ // pass.
+ if relativeOffset >= asciiBuffer.startIndex &&
+ relativeOffset < asciiBuffer.endIndex {
+ return Character(Unicode.Scalar(asciiBuffer[relativeOffset]))
}
-
- let s = self[i..<Index(encodedOffset: i.encodedOffset + Int(stride))]
- return Character(s._ephemeralContent)
+ } else if _core._baseAddress != nil {
+ let cu = _core._nthContiguous(relativeOffset)
+ // Only constructible if sub-surrogate
+ if (cu < 0xd800) {
+ return Character(Unicode.Scalar(cu)._unsafelyUnwrappedUnchecked)
+ }
}
- i = _index(atEncodedOffset: i.encodedOffset)
}
+
+ return Character(String(unicodeScalars[i._base..<i._endBase]))
}
}
@@ -672,8 +696,8 @@
with newElements: C
) where C : Collection, C.Element == Character {
let rawSubRange: Range<Int> =
- bounds.lowerBound.encodedOffset - _coreOffset
- ..< bounds.upperBound.encodedOffset - _coreOffset
+ bounds.lowerBound._base._position - _coreOffset
+ ..< bounds.upperBound._base._position - _coreOffset
let lazyUTF16 = newElements.lazy.flatMap { $0.utf16 }
_core.replaceSubrange(rawSubRange, with: lazyUTF16)
}
@@ -740,9 +764,9 @@
/// - Complexity: O(*n*) if the underlying string is bridged from
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
public subscript(bounds: Range<Index>) -> String.CharacterView {
- return String.CharacterView(
- unicodeScalars[bounds]._core,
- coreOffset: bounds.lowerBound.encodedOffset)
+ let unicodeScalarRange = bounds.lowerBound._base..<bounds.upperBound._base
+ return String.CharacterView(unicodeScalars[unicodeScalarRange]._core,
+ coreOffset: unicodeScalarRange.lowerBound._position)
}
}
diff --git a/stdlib/public/core/StringCore.swift b/stdlib/public/core/StringCore.swift
index d2cd6b4..2ad24dc 100644
--- a/stdlib/public/core/StringCore.swift
+++ b/stdlib/public/core/StringCore.swift
@@ -428,6 +428,7 @@
// In order to grow the substring in place, this _StringCore should point
// at the substring at the end of a _StringBuffer. Otherwise, some other
// String is using parts of the buffer beyond our last byte.
+ let usedStart = _pointer(toElementAt:0)
let usedEnd = _pointer(toElementAt:count)
// Attempt to claim unused capacity in the buffer
diff --git a/stdlib/public/core/StringIndex.swift b/stdlib/public/core/StringIndex.swift
deleted file mode 100644
index be5ac07..0000000
--- a/stdlib/public/core/StringIndex.swift
+++ /dev/null
@@ -1,162 +0,0 @@
-//===--- StringIndex.swift ------------------------------------------------===//
-//
-// This source file is part of the Swift.org open source project
-//
-// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
-// Licensed under Apache License v2.0 with Runtime Library Exception
-//
-// See https://swift.org/LICENSE.txt for license information
-// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
-//
-//===----------------------------------------------------------------------===//
-extension String {
- /// A position of a character or code unit in a string.
- public struct Index {
- internal var _compoundOffset : UInt64
- @_versioned
- internal var _cache: _Cache
-
- internal typealias _UTF8Buffer = _ValidUTF8Buffer<UInt64>
- @_versioned
- internal enum _Cache {
- case utf16
- case utf8(buffer: _UTF8Buffer)
- case character(stride: UInt16)
- case unicodeScalar(value: Unicode.Scalar)
- }
- }
-}
-
-/// Convenience accessors
-extension String.Index._Cache {
- var utf16: Void? {
- if case .utf16 = self { return () } else { return nil }
- }
- var utf8: String.Index._UTF8Buffer? {
- if case .utf8(let r) = self { return r } else { return nil }
- }
- var character: UInt16? {
- if case .character(let r) = self { return r } else { return nil }
- }
- var unicodeScalar: UnicodeScalar? {
- if case .unicodeScalar(let r) = self { return r } else { return nil }
- }
-}
-
-extension String.Index : Equatable {
- public static func == (lhs: String.Index, rhs: String.Index) -> Bool {
- return lhs._compoundOffset == rhs._compoundOffset
- }
-}
-
-extension String.Index : Comparable {
- public static func < (lhs: String.Index, rhs: String.Index) -> Bool {
- return lhs._compoundOffset < rhs._compoundOffset
- }
-}
-
-extension String.Index {
- internal typealias _Self = String.Index
-
- /// Creates a new index at the specified UTF-16 offset.
- ///
- /// - Parameter offset: An offset in UTF-16 code units.
- public init(encodedOffset offset: Int) {
- _compoundOffset = UInt64(offset << _Self._strideBits)
- _cache = .utf16
- }
-
- @_versioned
- internal init(encodedOffset o: Int, transcodedOffset: Int = 0, _ c: _Cache) {
- _compoundOffset = UInt64(o << _Self._strideBits | transcodedOffset)
- _cache = c
- }
-
- internal static var _strideBits : Int { return 16 }
- internal static var _mask : UInt64 { return (1 &<< _Self._strideBits) &- 1 }
-
- internal mutating func _setEncodedOffset(_ x: Int) {
- _compoundOffset = UInt64(x << _Self._strideBits)
- }
-
- /// The offset into a string's UTF-16 encoding for this index.
- public var encodedOffset : Int {
- return Int(_compoundOffset >> numericCast(_Self._strideBits))
- }
-
- /// The offset of this index within whatever encoding this is being viewed as
- @_versioned
- internal var _transcodedOffset : Int {
- get {
- return Int(_compoundOffset & _Self._mask)
- }
- set {
- let extended = UInt64(newValue)
- _sanityCheck(extended <= _Self._mask)
- _compoundOffset &= ~_Self._mask
- _compoundOffset |= extended
- }
- }
-}
-
-// SPI for Foundation
-extension String.Index {
- @available(swift, deprecated: 3.2)
- @available(swift, obsoleted: 4.0)
- public // SPI(Foundation)
- init(_position: Int) {
- self.init(encodedOffset: _position)
- }
-
- @available(swift, deprecated: 3.2)
- @available(swift, obsoleted: 4.0)
- public // SPI(Foundation)
- init(_offset: Int) {
- self.init(encodedOffset: _offset)
- }
-
- @available(swift, deprecated: 3.2)
- @available(swift, obsoleted: 4.0)
- public // SPI(Foundation)
- init(_base: String.Index, in c: String.CharacterView) {
- self = _base
- }
-
- /// The integer offset of this index in UTF-16 code units.
- @available(swift, deprecated: 3.2)
- @available(swift, obsoleted: 4.0)
- public // SPI(Foundation)
- var _utf16Index: Int {
- return self.encodedOffset
- }
-
- /// The integer offset of this index in UTF-16 code units.
- @available(swift, deprecated: 3.2)
- @available(swift, obsoleted: 4.0)
- public // SPI(Foundation)
- var _offset: Int {
- return self.encodedOffset
- }
-}
-
-
-// backward compatibility for index interchange.
-extension Optional where Wrapped == String.Index {
- @available(
- swift, obsoleted: 4.0,
- message: "Any String view index conversion can fail in Swift 4; please unwrap the optional indices")
- public static func ..<(
- lhs: String.Index?, rhs: String.Index?
- ) -> Range<String.Index> {
- return lhs! ..< rhs!
- }
-
- @available(
- swift, obsoleted: 4.0,
- message: "Any String view index conversion can fail in Swift 4; please unwrap the optional indices")
- public static func ...(
- lhs: String.Index?, rhs: String.Index?
- ) -> ClosedRange<String.Index> {
- return lhs! ... rhs!
- }
-}
diff --git a/stdlib/public/core/StringIndexConversions.swift b/stdlib/public/core/StringIndexConversions.swift
index f000f8f..f9365ef 100644
--- a/stdlib/public/core/StringIndexConversions.swift
+++ b/stdlib/public/core/StringIndexConversions.swift
@@ -40,18 +40,95 @@
/// // Prints "nil"
///
/// - Parameters:
- /// - sourcePosition: A position in (a view of) the `other` parameter.
- /// - target: The string referenced by both `unicodeScalarIndex` and the
+ /// - unicodeScalarIndex: A position in the `unicodeScalars` view of the
+ /// `other` parameter.
+ /// - other: The string referenced by both `unicodeScalarIndex` and the
/// resulting index.
public init?(
- _ sourcePosition: String.Index,
- within target: String
+ _ unicodeScalarIndex: String.UnicodeScalarIndex,
+ within other: String
) {
- guard target.unicodeScalars._isOnGraphemeClusterBoundary(sourcePosition)
- else { return nil }
+ if !other.unicodeScalars._isOnGraphemeClusterBoundary(unicodeScalarIndex) {
+ return nil
+ }
+ self.init(_base: unicodeScalarIndex, in: other.characters)
+ }
- self = target.characters._index(
- atEncodedOffset: sourcePosition.encodedOffset)
+ /// Creates an index in the given string that corresponds exactly to the
+ /// specified `UTF16View` position.
+ ///
+ /// The following example finds the position of a space in a string's `utf16`
+ /// view and then converts that position to an index in the string. The
+ /// value `32` is the UTF-16 encoded value of a space character.
+ ///
+ /// let cafe = "Café 🍵"
+ ///
+ /// let utf16Index = cafe.utf16.index(of: 32)!
+ /// let stringIndex = String.Index(utf16Index, within: cafe)!
+ ///
+ /// print(cafe[..<stringIndex])
+ /// // Prints "Café"
+ ///
+ /// If the position passed in `utf16Index` doesn't have an exact
+ /// corresponding position in `other`, the result of the initializer is
+ /// `nil`. For example, an attempt to convert the position of the trailing
+ /// surrogate of a UTF-16 surrogate pair fails.
+ ///
+ /// The next example attempts to convert the indices of the two UTF-16 code
+ /// points that represent the teacup emoji (`"🍵"`). The index of the lead
+ /// surrogate is successfully converted to a position in `other`, but the
+ /// index of the trailing surrogate is not.
+ ///
+ /// let emojiHigh = cafe.utf16.index(after: utf16Index)
+ /// print(String.Index(emojiHigh, within: cafe))
+ /// // Prints "Optional(String.Index(...))"
+ ///
+ /// let emojiLow = cafe.utf16.index(after: emojiHigh)
+ /// print(String.Index(emojiLow, within: cafe))
+ /// // Prints "nil"
+ ///
+ /// - Parameters:
+ /// - utf16Index: A position in the `utf16` view of the `other` parameter.
+ /// - other: The string referenced by both `utf16Index` and the resulting
+ /// index.
+ public init?(
+ _ utf16Index: String.UTF16Index,
+ within other: String
+ ) {
+ if let me = utf16Index.samePosition(
+ in: other.unicodeScalars
+ )?.samePosition(in: other) {
+ self = me
+ }
+ else {
+ return nil
+ }
+ }
+
+ /// Creates an index in the given string that corresponds exactly to the
+ /// specified `UTF8View` position.
+ ///
+ /// If the position passed in `utf8Index` doesn't have an exact corresponding
+ /// position in `other`, the result of the initializer is `nil`. For
+ /// example, an attempt to convert the position of a UTF-8 continuation byte
+ /// returns `nil`.
+ ///
+ /// - Parameters:
+ /// - utf8Index: A position in the `utf8` view of the `other` parameter.
+ /// - other: The string referenced by both `utf8Index` and the resulting
+ /// index.
+ public init?(
+ _ utf8Index: String.UTF8Index,
+ within other: String
+ ) {
+ if let me = utf8Index.samePosition(
+ in: other.unicodeScalars
+ )?.samePosition(in: other) {
+ self = me
+ }
+ else {
+ return nil
+ }
}
/// Returns the position in the given UTF-8 view that corresponds exactly to
@@ -73,7 +150,7 @@
/// - Returns: The position in `utf8` that corresponds exactly to this index.
public func samePosition(
in utf8: String.UTF8View
- ) -> String.UTF8View.Index? {
+ ) -> String.UTF8View.Index {
return String.UTF8View.Index(self, within: utf8)
}
@@ -96,8 +173,33 @@
/// - Returns: The position in `utf16` that corresponds exactly to this index.
public func samePosition(
in utf16: String.UTF16View
- ) -> String.UTF16View.Index? {
+ ) -> String.UTF16View.Index {
return String.UTF16View.Index(self, within: utf16)
}
+
+ /// Returns the position in the given view of Unicode scalars that
+ /// corresponds exactly to this index.
+ ///
+ /// The index must be a valid index of `String(unicodeScalars)`.
+ ///
+ /// This example first finds the position of the character `"é"` and then uses
+ /// this method find the same position in the string's `unicodeScalars`
+ /// view.
+ ///
+ /// let cafe = "Café"
+ /// if let i = cafe.index(of: "é") {
+ /// let j = i.samePosition(in: cafe.unicodeScalars)
+ /// print(cafe.unicodeScalars[j])
+ /// }
+ /// // Prints "é"
+ ///
+ /// - Parameter unicodeScalars: The view to use for the index conversion.
+ /// - Returns: The position in `unicodeScalars` that corresponds exactly to
+ /// this index.
+ public func samePosition(
+ in unicodeScalars: String.UnicodeScalarView
+ ) -> String.UnicodeScalarView.Index {
+ return String.UnicodeScalarView.Index(self, within: unicodeScalars)
+ }
}
diff --git a/stdlib/public/core/StringRangeReplaceableCollection.swift.gyb b/stdlib/public/core/StringRangeReplaceableCollection.swift.gyb
index 128cb59..2e02475 100644
--- a/stdlib/public/core/StringRangeReplaceableCollection.swift.gyb
+++ b/stdlib/public/core/StringRangeReplaceableCollection.swift.gyb
@@ -10,7 +10,10 @@
//
//===----------------------------------------------------------------------===//
-extension String : StringProtocol, RangeReplaceableCollection {
+extension String : StringProtocol, RangeReplaceableCollection {
+ /// The index type for subscripting a string.
+ public typealias Index = CharacterView.Index
+
/// A type that represents the number of steps between two `String.Index`
/// values, where one value is reachable from the other.
///
@@ -171,6 +174,16 @@
public subscript(i: Index) -> Character { return characters[i] }
}
+extension String.Index {
+ public static func == (lhs: String.Index, rhs: String.Index) -> Bool {
+ return lhs._base == rhs._base
+ }
+
+ public static func < (lhs: String.Index, rhs: String.Index) -> Bool {
+ return lhs._base < rhs._base
+ }
+}
+
extension String {
/// Creates a new string containing the characters in the given sequence.
///
@@ -187,7 +200,7 @@
///
/// - Parameter characters: A sequence of characters.
public init<S : Sequence>(_ characters: S)
- where S.Iterator.Element == Character {
+ where S.Element == Character {
self._core = CharacterView(characters)._core
}
@@ -229,7 +242,7 @@
///
/// - Parameter newElements: A sequence of characters.
public mutating func append<S : Sequence>(contentsOf newElements: S)
- where S.Iterator.Element == Character {
+ where S.Element == Character {
withMutableCharacters {
(v: inout CharacterView) in v.append(contentsOf: newElements)
}
@@ -252,7 +265,7 @@
public mutating func replaceSubrange<C>(
_ bounds: Range<Index>,
with newElements: C
- ) where C : Collection, C.Iterator.Element == Character {
+ ) where C : Collection, C.Element == Character {
withMutableCharacters {
(v: inout CharacterView)
in v.replaceSubrange(bounds, with: newElements)
@@ -292,7 +305,7 @@
/// `newElements`.
public mutating func insert<S : Collection>(
contentsOf newElements: S, at i: Index
- ) where S.Iterator.Element == Character {
+ ) where S.Element == Character {
withMutableCharacters {
(v: inout CharacterView) in v.insert(contentsOf: newElements, at: i)
}
diff --git a/stdlib/public/core/StringUTF16.swift b/stdlib/public/core/StringUTF16.swift
index 19cf4d3..17673f5 100644
--- a/stdlib/public/core/StringUTF16.swift
+++ b/stdlib/public/core/StringUTF16.swift
@@ -115,13 +115,37 @@
CustomStringConvertible,
CustomDebugStringConvertible {
- public typealias Index = String.Index
+ /// A position in a string's collection of UTF-16 code units.
+ ///
+ /// You can convert between indices of the different string views by using
+ /// conversion initializers and the `samePosition(in:)` method overloads.
+ /// For example, the following code sample finds the index of the first
+ /// space in a string and then converts that to the same
+ /// position in the UTF-16 view.
+ ///
+ /// let hearts = "Hearts <3 ♥︎ 💘"
+ /// if let i = hearts.index(of: " ") {
+ /// let j = i.samePosition(in: hearts.utf16)
+ /// print(Array(hearts.utf16[j...]))
+ /// print(hearts.utf16[j...])
+ /// }
+ /// // Prints "[32, 60, 51, 32, 9829, 65038, 32, 55357, 56472]"
+ /// // Prints " <3 ♥︎ 💘"
+ public struct Index {
+ // Foundation needs access to these fields so it can expose
+ // random access
+ public // SPI(Foundation)
+ init(_offset: Int) { self._offset = _offset }
+
+ public let _offset: Int
+ }
+
public typealias IndexDistance = Int
/// The position of the first code unit if the `String` is
/// nonempty; identical to `endIndex` otherwise.
public var startIndex: Index {
- return Index(encodedOffset: _offset)
+ return Index(_offset: _offset)
}
/// The "past the end" position---that is, the position one greater than
@@ -129,7 +153,7 @@
///
/// In an empty UTF-16 view, `endIndex` is equal to `startIndex`.
public var endIndex: Index {
- return Index(encodedOffset: _offset + _length)
+ return Index(_offset: _offset + _length)
}
public struct Indices {
@@ -146,19 +170,19 @@
// TODO: swift-3-indexing-model - add docs
public func index(after i: Index) -> Index {
// FIXME: swift-3-indexing-model: range check i?
- return Index(encodedOffset: _unsafePlus(i.encodedOffset, 1))
+ return Index(_offset: _unsafePlus(i._offset, 1))
}
// TODO: swift-3-indexing-model - add docs
public func index(before i: Index) -> Index {
// FIXME: swift-3-indexing-model: range check i?
- return Index(encodedOffset: _unsafeMinus(i.encodedOffset, 1))
+ return Index(_offset: _unsafeMinus(i._offset, 1))
}
// TODO: swift-3-indexing-model - add docs
public func index(_ i: Index, offsetBy n: IndexDistance) -> Index {
// FIXME: swift-3-indexing-model: range check i?
- return Index(encodedOffset: i.encodedOffset.advanced(by: n))
+ return Index(_offset: i._offset.advanced(by: n))
}
// TODO: swift-3-indexing-model - add docs
@@ -166,17 +190,17 @@
_ i: Index, offsetBy n: IndexDistance, limitedBy limit: Index
) -> Index? {
// FIXME: swift-3-indexing-model: range check i?
- let d = i.encodedOffset.distance(to: limit.encodedOffset)
+ let d = i._offset.distance(to: limit._offset)
if (d > 0) ? (d < n) : (d > n) {
return nil
}
- return Index(encodedOffset: i.encodedOffset.advanced(by: n))
+ return Index(_offset: i._offset.advanced(by: n))
}
// TODO: swift-3-indexing-model - add docs
public func distance(from start: Index, to end: Index) -> IndexDistance {
// FIXME: swift-3-indexing-model: range check start and end?
- return start.encodedOffset.distance(to: end.encodedOffset)
+ return start._offset.distance(to: end._offset)
}
func _internalIndex(at i: Int) -> Int {
@@ -199,7 +223,7 @@
_precondition(i >= startIndex && i < endIndex,
"out-of-range access on a UTF16View")
- let index = _internalIndex(at: i.encodedOffset)
+ let index = _internalIndex(at: i._offset)
let u = _core[index]
if _fastPath((u &>> 11) != 0b1101_1) {
// Neither high-surrogate, nor low-surrogate -- well-formed sequence
@@ -252,8 +276,8 @@
public subscript(bounds: Range<Index>) -> UTF16View {
return UTF16View(
_core,
- offset: _internalIndex(at: bounds.lowerBound.encodedOffset),
- length: bounds.upperBound.encodedOffset - bounds.lowerBound.encodedOffset)
+ offset: _internalIndex(at: bounds.lowerBound._offset),
+ length: bounds.upperBound._offset - bounds.lowerBound._offset)
}
internal init(_ _core: _StringCore) {
@@ -313,9 +337,9 @@
let wholeString = String(utf16._core)
guard
- let start = UTF16Index(encodedOffset: utf16._offset)
+ let start = UTF16Index(_offset: utf16._offset)
.samePosition(in: wholeString),
- let end = UTF16Index(encodedOffset: utf16._offset + utf16._length)
+ let end = UTF16Index(_offset: utf16._offset + utf16._length)
.samePosition(in: wholeString)
else
{
@@ -333,9 +357,94 @@
var _persistentContent : String { return String(self._core) }
}
+extension String.UTF16View.Index : Comparable {
+ // FIXME: swift-3-indexing-model: add complete set of forwards for Comparable
+ // assuming String.UTF8View.Index continues to exist
+ public static func == (
+ lhs: String.UTF16View.Index,
+ rhs: String.UTF16View.Index
+ ) -> Bool {
+ return lhs._offset == rhs._offset
+ }
+
+ public static func < (
+ lhs: String.UTF16View.Index,
+ rhs: String.UTF16View.Index
+ ) -> Bool {
+ return lhs._offset < rhs._offset
+ }
+}
+
// Index conversions
extension String.UTF16View.Index {
/// Creates an index in the given UTF-16 view that corresponds exactly to the
+ /// specified `UTF8View` position.
+ ///
+ /// The following example finds the position of a space in a string's `utf8`
+ /// view and then converts that position to an index in the string's
+ /// `utf16` view.
+ ///
+ /// let cafe = "Café 🍵"
+ ///
+ /// let utf8Index = cafe.utf8.index(of: 32)!
+ /// let utf16Index = String.UTF16View.Index(utf8Index, within: cafe.utf16)!
+ ///
+ /// print(cafe.utf16[..<utf16Index])
+ /// // Prints "Café"
+ ///
+ /// If the position passed as `utf8Index` doesn't have an exact corresponding
+ /// position in `utf16`, the result of the initializer is `nil`. For
+ /// example, because UTF-8 and UTF-16 represent high Unicode code points
+ /// differently, an attempt to convert the position of a UTF-8 continuation
+ /// byte fails.
+ ///
+ /// - Parameters:
+ /// - utf8Index: A position in a `UTF8View` instance. `utf8Index` must be
+ /// an element in `String(utf16).utf8.indices`.
+ /// - utf16: The `UTF16View` in which to find the new position.
+ public init?(
+ _ utf8Index: String.UTF8Index, within utf16: String.UTF16View
+ ) {
+ let core = utf16._core
+
+ _precondition(
+ utf8Index._coreIndex >= 0 && utf8Index._coreIndex <= core.endIndex,
+ "Invalid String.UTF8Index for this UTF-16 view")
+
+ // Detect positions that have no corresponding index.
+ if !utf8Index._isOnUnicodeScalarBoundary(in: core) {
+ return nil
+ }
+ _offset = utf8Index._coreIndex
+ }
+
+ /// Creates an index in the given UTF-16 view that corresponds exactly to the
+ /// specified `UnicodeScalarView` position.
+ ///
+ /// The following example finds the position of a space in a string's `utf8`
+ /// view and then converts that position to an index in the string's
+ /// `utf16` view.
+ ///
+ /// let cafe = "Café 🍵"
+ ///
+ /// let scalarIndex = cafe.unicodeScalars.index(of: "é")!
+ /// let utf16Index = String.UTF16View.Index(scalarIndex, within: cafe.utf16)
+ ///
+ /// print(cafe.utf16[...utf16Index])
+ /// // Prints "Café"
+ ///
+ /// - Parameters:
+ /// - unicodeScalarIndex: A position in a `UnicodeScalarView` instance.
+ /// `unicodeScalarIndex` must be an element in
+ /// `String(utf16).unicodeScalarIndex.indices`.
+ /// - utf16: The `UTF16View` in which to find the new position.
+ public init(
+ _ unicodeScalarIndex: String.UnicodeScalarIndex,
+ within utf16: String.UTF16View) {
+ _offset = unicodeScalarIndex._position
+ }
+
+ /// Creates an index in the given UTF-16 view that corresponds exactly to the
/// specified string position.
///
/// The following example finds the position of a space in a string and then
@@ -350,13 +459,37 @@
/// // Prints "Café"
///
/// - Parameters:
- /// - sourcePosition: A position in a string or one of its views
- /// - target: The `UTF16View` in which to find the new position.
- public init?(
- _ sourcePosition: String.Index, within target: String.UTF16View
- ) {
- guard sourcePosition._transcodedOffset == 0 else { return nil }
- self.init(encodedOffset: sourcePosition.encodedOffset)
+ /// - index: A position in a string. `index` must be an element in
+ /// `String(utf16).indices`.
+ /// - utf16: The `UTF16View` in which to find the new position.
+ public init(_ index: String.Index, within utf16: String.UTF16View) {
+ _offset = index._utf16Index
+ }
+
+ /// Returns the position in the given UTF-8 view that corresponds exactly to
+ /// this index.
+ ///
+ /// The index must be a valid index of `String(utf8).utf16`.
+ ///
+ /// This example first finds the position of a space (UTF-16 code point `32`)
+ /// in a string's `utf16` view and then uses this method to find the same
+ /// position in the string's `utf8` view.
+ ///
+ /// let cafe = "Café 🍵"
+ /// let i = cafe.utf16.index(of: 32)!
+ /// let j = i.samePosition(in: cafe.utf8)!
+ /// print(Array(cafe.utf8[..<j]))
+ /// // Prints "[67, 97, 102, 195, 169]"
+ ///
+ /// - Parameter utf8: The view to use for the index conversion.
+ /// - Returns: The position in `utf8` that corresponds exactly to this index.
+ /// If this index does not have an exact corresponding position in `utf8`,
+ /// this method returns `nil`. For example, an attempt to convert the
+ /// position of a UTF-16 trailing surrogate returns `nil`.
+ public func samePosition(
+ in utf8: String.UTF8View
+ ) -> String.UTF8View.Index? {
+ return String.UTF8View.Index(self, within: utf8)
}
/// Returns the position in the given view of Unicode scalars that
@@ -385,6 +518,32 @@
) -> String.UnicodeScalarIndex? {
return String.UnicodeScalarIndex(self, within: unicodeScalars)
}
+
+ /// Returns the position in the given string that corresponds exactly to this
+ /// index.
+ ///
+ /// This index must be a valid index of `characters.utf16`.
+ ///
+ /// This example first finds the position of a space (UTF-16 code point `32`)
+ /// in a string's `utf16` view and then uses this method find the same position
+ /// in the string.
+ ///
+ /// let cafe = "Café 🍵"
+ /// let i = cafe.utf16.index(of: 32)!
+ /// let j = i.samePosition(in: cafe)!
+ /// print(cafe[..<j])
+ /// // Prints "Café"
+ ///
+ /// - Parameter characters: The string to use for the index conversion.
+ /// - Returns: The position in `characters` that corresponds exactly to this
+ /// index. If this index does not have an exact corresponding position in
+ /// `characters`, this method returns `nil`. For example, an attempt to
+ /// convert the position of a UTF-16 trailing surrogate returns `nil`.
+ public func samePosition(
+ in characters: String
+ ) -> String.Index? {
+ return String.Index(self, within: characters)
+ }
}
// Reflection
@@ -481,31 +640,3 @@
}
}
-// backward compatibility for index interchange.
-extension String.UTF16View {
- @available(
- swift, obsoleted: 4.0,
- message: "Any String view index conversion can fail in Swift 4; please unwrap the optional index")
- public func index(after i: Index?) -> Index {
- return index(after: i)
- }
- @available(
- swift, obsoleted: 4.0,
- message: "Any String view index conversion can fail in Swift 4; please unwrap the optional index")
- public func index(
- _ i: Index?, offsetBy n: IndexDistance) -> Index {
- return index(i!, offsetBy: n)
- }
- @available(
- swift, obsoleted: 4.0,
- message: "Any String view index conversion can fail in Swift 4; please unwrap the optional indices")
- public func distance(from i: Index?, to j: Index?) -> IndexDistance {
- return distance(from: i!, to: j!)
- }
- @available(
- swift, obsoleted: 4.0,
- message: "Any String view index conversion can fail in Swift 4; please unwrap the optional index")
- public subscript(i: Index?) -> Unicode.UTF16.CodeUnit {
- return self[i!]
- }
-}
diff --git a/stdlib/public/core/StringUTF8.swift b/stdlib/public/core/StringUTF8.swift
index 8c5ae5f..4d41412 100644
--- a/stdlib/public/core/StringUTF8.swift
+++ b/stdlib/public/core/StringUTF8.swift
@@ -22,6 +22,75 @@
// FIXME(ABI)#73 : The UTF-8 string view should have a custom iterator type to
// allow performance optimizations of linear traversals.
+extension _StringCore {
+ /// An integral type that holds a sequence of UTF-8 code units, starting in
+ /// its low byte.
+ internal typealias _UTF8Chunk = UInt64
+
+ /// Encode text starting at `i` as UTF-8. Returns a pair whose first
+ /// element is the index of the text following whatever got encoded,
+ /// and the second element contains the encoded UTF-8 starting in its
+ /// low byte. Any unused high bytes in the result will be set to
+ /// 0xFF.
+ @inline(__always)
+ func _encodeSomeUTF8(from i: Int) -> (Int, _UTF8Chunk) {
+ _sanityCheck(i <= count)
+
+ if let asciiBuffer = self.asciiBuffer {
+ // How many UTF-16 code units might we use before we've filled up
+ // our _UTF8Chunk with UTF-8 code units?
+ let utf16Count =
+ Swift.min(MemoryLayout<_UTF8Chunk>.size, asciiBuffer.count - i)
+
+ var result: _UTF8Chunk = ~0 // Start with all bits set
+
+ _memcpy(
+ dest: UnsafeMutableRawPointer(Builtin.addressof(&result)),
+ src: asciiBuffer.baseAddress! + i,
+ size: numericCast(utf16Count))
+
+ // Convert the _UTF8Chunk into host endianness.
+ return (i + utf16Count, _UTF8Chunk(littleEndian: result))
+ } else if _fastPath(_baseAddress != nil) {
+ // Transcoding should return a _UTF8Chunk in host endianness.
+ return _encodeSomeContiguousUTF16AsUTF8(from: i)
+ } else {
+#if _runtime(_ObjC)
+ return _encodeSomeNonContiguousUTF16AsUTF8(from: i)
+#else
+ _sanityCheckFailure("_encodeSomeUTF8: Unexpected cocoa string")
+#endif
+ }
+ }
+
+ /// Helper for `_encodeSomeUTF8`, above. Handles the case where the
+ /// storage is contiguous UTF-16.
+ func _encodeSomeContiguousUTF16AsUTF8(from i: Int) -> (Int, _UTF8Chunk) {
+ _sanityCheck(elementWidth == 2)
+ _sanityCheck(_baseAddress != nil)
+
+ let storage = UnsafeBufferPointer(start: startUTF16, count: self.count)
+ return _transcodeSomeUTF16AsUTF8(storage, i)
+ }
+
+#if _runtime(_ObjC)
+ /// Helper for `_encodeSomeUTF8`, above. Handles the case where the
+ /// storage is non-contiguous UTF-16.
+ func _encodeSomeNonContiguousUTF16AsUTF8(from i: Int) -> (Int, _UTF8Chunk) {
+ _sanityCheck(elementWidth == 2)
+ _sanityCheck(_baseAddress == nil)
+
+ let storage = _CollectionOf<Int, UInt16>(
+ _startIndex: 0, endIndex: self.count
+ ) {
+ (i: Int) -> UInt16 in
+ return _cocoaStringSubscript(self, i)
+ }
+ return _transcodeSomeUTF16AsUTF8(storage, i)
+ }
+#endif
+}
+
extension String {
/// A view of a string's contents as a collection of UTF-8 code units.
///
@@ -101,14 +170,109 @@
: Collection,
CustomStringConvertible,
CustomDebugStringConvertible {
- @_versioned
internal let _core: _StringCore
+ internal let _startIndex: Index
+ internal let _endIndex: Index
init(_ _core: _StringCore) {
self._core = _core
+ self._endIndex = Index(_coreIndex: _core.endIndex, Index._emptyBuffer)
+ if _fastPath(_core.count != 0) {
+ let (_, buffer) = _core._encodeSomeUTF8(from: 0)
+ self._startIndex = Index(_coreIndex: 0, buffer)
+ } else {
+ self._startIndex = self._endIndex
+ }
}
- public typealias Index = String.Index
+ init(_ _core: _StringCore, _ s: Index, _ e: Index) {
+ self._core = _core
+ self._startIndex = s
+ self._endIndex = e
+ }
+
+ /// A position in a string's `UTF8View` instance.
+ ///
+ /// You can convert between indices of the different string views by using
+ /// conversion initializers and the `samePosition(in:)` method overloads.
+ /// For example, the following code sample finds the index of the first
+ /// space in the string's character view and then converts that to the same
+ /// position in the UTF-8 view.
+ ///
+ /// let hearts = "Hearts <3 ♥︎ 💘"
+ /// if let i = hearts.index(of: " ") {
+ /// let j = i.samePosition(in: hearts.utf8)
+ /// print(Array(hearts.utf8[..<j]))
+ /// print(hearts.utf8[..<j])
+ /// }
+ /// // Prints "[72, 101, 97, 114, 116, 115]"
+ /// // Prints "Hearts"
+ public struct Index {
+ internal typealias Buffer = _StringCore._UTF8Chunk
+
+ init(_coreIndex: Int, _ _buffer: Buffer) {
+ self._coreIndex = _coreIndex
+ self._buffer = _buffer
+ }
+
+ /// True iff the index is at the end of its view or if the next
+ /// byte begins a new Unicode.Scalar.
+ internal func _isOnUnicodeScalarBoundary(in core: _StringCore) -> Bool {
+ let buffer = UInt32(extendingOrTruncating: _buffer)
+ let (codePoint, _) = UTF8._decodeOne(buffer)
+ return codePoint != nil || _isEndIndex(of: core)
+ }
+
+ /// True iff the index is at the end of its view
+ internal func _isEndIndex(of core: _StringCore) -> Bool {
+ return _buffer == Index._emptyBuffer
+ && _coreIndex == core.endIndex
+ }
+
+ /// The number of UTF-8 code units remaining in the buffer before the
+ /// next unicode scalar value is reached. This simulates calling
+ /// `index(after: i)` until `i._coreIndex` is incremented, but doesn't
+ /// need a `_core` reference.
+ internal var _utf8ContinuationBytesUntilNextUnicodeScalar: Int {
+ var buffer = _buffer
+ var count = 0
+
+ while true {
+ let currentUnit = UTF8.CodeUnit(extendingOrTruncating: buffer)
+ if currentUnit & 0b1100_0000 != 0b1000_0000 {
+ break
+ }
+ count += 1
+ buffer = Index._nextBuffer(after: buffer)
+ }
+ return count
+ }
+
+ /// The value of the buffer when it is empty
+ internal static var _emptyBuffer: Buffer {
+ return ~0
+ }
+
+ /// A Buffer value with the high byte set
+ internal static var _bufferHiByte: Buffer {
+ return 0xFF &<< ((MemoryLayout<Buffer>.size &- 1) &* 8)
+ }
+
+ /// Consume a byte of the given buffer: shift out the low byte
+ /// and put FF in the high byte
+ internal static func _nextBuffer(after thisBuffer: Buffer) -> Buffer {
+ return (thisBuffer &>> (8 as Buffer)) | _bufferHiByte
+ }
+
+ /// The position of `self`, rounded up to the nearest unicode
+ /// scalar boundary, in the underlying UTF-16.
+ internal let _coreIndex: Int
+ /// If `self` is at the end of its `_core`, has the value `_emptyBuffer`.
+ /// Otherwise, the low byte contains the value of the UTF-8 code unit
+ /// at this position.
+ internal let _buffer: Buffer
+ }
+
public typealias IndexDistance = Int
/// The position of the first code unit if the UTF-8 view is
@@ -116,7 +280,7 @@
///
/// If the UTF-8 view is empty, `startIndex` is equal to `endIndex`.
public var startIndex: Index {
- return _index(atEncodedOffset: _core.startIndex)
+ return self._startIndex
}
/// The "past the end" position---that is, the position one
@@ -124,100 +288,54 @@
///
/// In an empty UTF-8 view, `endIndex` is equal to `startIndex`.
public var endIndex: Index {
- return Index(encodedOffset: _core.endIndex)
+ return self._endIndex
}
- @_versioned
- internal func _index(atEncodedOffset n: Int) -> Index {
- if _fastPath(_core.isASCII) { return Index(encodedOffset: n) }
- if n == _core.endIndex { return endIndex }
-
- var p = UTF16.ForwardParser()
- var i = _core[n...].makeIterator()
- var buffer = Index._UTF8Buffer()
- Loop:
- while true {
- switch p.parseScalar(from: &i) {
- case .valid(let u16):
- let u8 = Unicode.UTF8.transcode(u16, from: Unicode.UTF16.self)
- ._unsafelyUnwrappedUnchecked
- if buffer.count + u8.count > buffer.capacity { break Loop }
- buffer.append(contentsOf: u8)
- case .error:
- let u8 = Unicode.UTF8.encodedReplacementCharacter
- if buffer.count + u8.count > buffer.capacity { break Loop }
- buffer.append(contentsOf: u8)
- case .emptyInput:
- break Loop
- }
- }
- return Index(encodedOffset: n, .utf8(buffer: buffer))
- }
-
/// Returns the next consecutive position after `i`.
///
/// - Precondition: The next position is representable.
- @inline(__always)
public func index(after i: Index) -> Index {
- if _fastPath(_core.isASCII) {
- precondition(i.encodedOffset < _core.count)
- return Index(encodedOffset: i.encodedOffset + 1)
- }
+ // FIXME: swift-3-indexing-model: range check i?
+ let currentUnit = UTF8.CodeUnit(extendingOrTruncating: i._buffer)
+ let hiNibble = currentUnit &>> (4 as UTF8.CodeUnit)
+
+ // Amounts to increment the UTF-16 index based on the high nibble of a
+ // UTF-8 code unit. If the high nibble is:
+ //
+ // - 0b0000-0b0111: U+0000...U+007F: increment the UTF-16 pointer by 1
+ // - 0b1000-0b1011: UTF-8 continuation byte, do not increment
+ // the UTF-16 pointer
+ // - 0b1100-0b1110: U+0080...U+FFFF: increment the UTF-16 pointer by 1
+ // - 0b1111: U+10000...U+1FFFFF: increment the UTF-16 pointer by 2
+ let u16Increments = Int(bitPattern:
+ // 1111 1110 1101 1100 1011 1010 1001 1000 0111 0110 0101 0100 0011 0010 0001 0000
+ 0b10___01___01___01___00___00___00___00___01___01___01___01___01___01___01___01)
- var j = i
- while true {
- if case .utf8(let buffer) = j._cache {
- _onFastPath()
- var scalarLength16 = 1
- let b0 = buffer.first._unsafelyUnwrappedUnchecked
- var nextBuffer = buffer
-
- let leading1s = (~b0).leadingZeroBitCount
- if leading1s == 0 {
- nextBuffer.removeFirst()
- }
- else {
- let n8 = j._transcodedOffset + 1
- // If we haven't reached a scalar boundary...
- if _fastPath(n8 < leading1s) {
- return Index(
- encodedOffset: j.encodedOffset,
- transcodedOffset: n8, .utf8(buffer: nextBuffer))
- }
- scalarLength16 = n8 >> 2 + 1
- nextBuffer.removeFirst(n8)
- }
- if _fastPath(!nextBuffer.isEmpty) {
- return Index(
- encodedOffset: j.encodedOffset + scalarLength16,
- .utf8(buffer: nextBuffer))
- }
- return _index(atEncodedOffset: j.encodedOffset + scalarLength16)
- }
- j = _index(atEncodedOffset: j.encodedOffset)
- precondition(j != endIndex, "index out of bounds")
+ // Map the high nibble of the current code unit into the
+ // amount by which to increment the UTF-16 index.
+ let increment = (u16Increments &>>
+ Int(extendingOrTruncating: hiNibble &<< (1 as UTF8.CodeUnit))) & 0x3
+ let nextCoreIndex = i._coreIndex &+ increment
+ let nextBuffer = Index._nextBuffer(after: i._buffer)
+
+ // If the nextBuffer is nonempty, we have all we need
+ if _fastPath(nextBuffer != Index._emptyBuffer) {
+ return Index(_coreIndex: nextCoreIndex, nextBuffer)
+ }
+ // If the underlying UTF16 isn't exhausted, fill a new buffer
+ else if _fastPath(nextCoreIndex < _core.endIndex) {
+ let (_, freshBuffer) = _core._encodeSomeUTF8(from: nextCoreIndex)
+ return Index(_coreIndex: nextCoreIndex, freshBuffer)
+ }
+ else {
+ // Produce the endIndex
+ _precondition(
+ nextCoreIndex == _core.endIndex,
+ "Can't increment past endIndex of String.UTF8View")
+ return Index(_coreIndex: nextCoreIndex, nextBuffer)
}
}
- public func distance(from i: Index, to j: Index) -> IndexDistance {
- if _fastPath(_core.isASCII) {
- return j.encodedOffset - i.encodedOffset
- }
- return j >= i
- ? _forwardDistance(from: i, to: j) : -_forwardDistance(from: j, to: i)
- }
-
- @_versioned
- @inline(__always)
- internal func _forwardDistance(from i: Index, to j: Index) -> IndexDistance {
- var r: IndexDistance = j._transcodedOffset - i._transcodedOffset
- UTF8._transcode(
- _core[i.encodedOffset..<j.encodedOffset], from: UTF16.self) {
- r += $0.count
- }
- return r
- }
-
/// Accesses the code unit at the given position.
///
/// The following example uses the subscript to print the value of a
@@ -231,27 +349,22 @@
/// - Parameter position: A valid index of the view. `position`
/// must be less than the view's end index.
public subscript(position: Index) -> UTF8.CodeUnit {
- @inline(__always)
- get {
- if _fastPath(_core.asciiBuffer != nil), let ascii = _core.asciiBuffer {
- _precondition(position < endIndex, "index out of bounds")
- return ascii[position.encodedOffset]
- }
- var j = position
- while true {
- if case .utf8(let buffer) = j._cache {
- _onFastPath()
- return buffer[
- buffer.index(buffer.startIndex, offsetBy: j._transcodedOffset)]
- }
- j = _index(atEncodedOffset: j.encodedOffset)
- precondition(j < endIndex, "index out of bounds")
- }
- }
+ let result = UTF8.CodeUnit(extendingOrTruncating: position._buffer & 0xFF)
+ _precondition(result != 0xFF, "cannot subscript using endIndex")
+ return result
+ }
+
+ /// Accesses the contiguous subrange of elements enclosed by the specified
+ /// range.
+ ///
+ /// - Complexity: O(*n*) if the underlying string is bridged from
+ /// Objective-C, where *n* is the length of the string; otherwise, O(1).
+ public subscript(bounds: Range<Index>) -> UTF8View {
+ return UTF8View(_core, bounds.lowerBound, bounds.upperBound)
}
public var description: String {
- return String(_core)
+ return String._fromCodeUnitSequenceWithRepair(UTF8.self, input: self).0
}
public var debugDescription: String {
@@ -312,28 +425,6 @@
///
/// If `utf8` is an ill-formed UTF-8 code sequence, the result is `nil`.
///
- /// You can use this initializer to create a new string from
- /// another string's `utf8` view.
- ///
- /// let picnicGuest = "Deserving porcupine"
- /// if let i = picnicGuest.utf8.index(of: 32) {
- /// let adjective = String(picnicGuest.utf8[..<i])
- /// print(adjective)
- /// }
- /// // Prints "Optional(Deserving)"
- ///
- /// The `adjective` constant is created by calling this initializer with a
- /// slice of the `picnicGuest.utf8` view.
- ///
- /// - Parameter utf8: A UTF-8 code sequence.
- public init(_ utf8: UTF8View) {
- self = String(utf8._core)
- }
-
- /// Creates a string corresponding to the given sequence of UTF-8 code units.
- ///
- /// If `utf8` is an ill-formed UTF-8 code sequence, the result is `nil`.
- ///
/// You can use this initializer to create a new string from a slice of
/// another string's `utf8` view.
///
@@ -348,8 +439,9 @@
/// slice of the `picnicGuest.utf8` view.
///
/// - Parameter utf8: A UTF-8 code sequence.
- public init?(_ utf8: UTF8View.SubSequence) {
- let wholeString = String(utf8.base._core)
+ public init?(_ utf8: UTF8View) {
+ let wholeString = String(utf8._core)
+
if let start = utf8.startIndex.samePosition(in: wholeString),
let end = utf8.endIndex.samePosition(in: wholeString) {
self = wholeString[start..<end]
@@ -366,133 +458,65 @@
var _persistentContent : String { return String(self._core) }
}
-extension String.UTF8View {
- public struct Iterator {
- typealias _OutputBuffer = UInt64
- internal let _source: _StringCore
- internal var _sourceIndex: Int
- internal var _buffer: _OutputBuffer
- }
- public func makeIterator() -> Iterator {
- return Iterator(_core)
- }
-}
-extension String.UTF8View.Iterator : IteratorProtocol {
- internal init(_ source: _StringCore) {
- _source = source
- _sourceIndex = 0
- _buffer = 0
- }
-
- public mutating func next() -> Unicode.UTF8.CodeUnit? {
- if _fastPath(_buffer != 0) {
- let r = UInt8(extendingOrTruncating: _buffer) &- 1
- _buffer >>= 8
- return r
+extension String.UTF8View.Index : Comparable {
+ // FIXME: swift-3-indexing-model: add complete set of forwards for Comparable
+ // assuming String.UTF8View.Index continues to exist
+ public static func == (
+ lhs: String.UTF8View.Index,
+ rhs: String.UTF8View.Index
+ ) -> Bool {
+ // If the underlying UTF16 index differs, they're unequal
+ if lhs._coreIndex != rhs._coreIndex {
+ return false
}
- if _slowPath(_sourceIndex == _source.count) { return nil }
- defer { _fixLifetime(_source) }
-
- if _fastPath(_source._unmanagedASCII != nil),
- let ascii = _source._unmanagedASCII {
- let result = ascii[_sourceIndex]
- _sourceIndex += 1
- for i in 0 ..< _OutputBuffer.bitWidth>>3 {
- if _sourceIndex == _source.count { break }
- _buffer |= _OutputBuffer(ascii[_sourceIndex] &+ 1) &<< (i << 3)
- _sourceIndex += 1
- }
- return result
- }
-
- if _fastPath(_source._unmanagedUTF16 != nil),
- let utf16 = _source._unmanagedUTF16 {
- return _next(refillingFrom: utf16)
- }
- return _next(refillingFrom: _source)
- }
-
- internal mutating func _next<Source: Collection>(
- refillingFrom source: Source
- ) -> Unicode.UTF8.CodeUnit?
- where Source.Element == Unicode.UTF16.CodeUnit,
- Source.Index == Int
- {
- _sanityCheck(_buffer == 0)
- var shift = 0
-
- // ASCII fastpath
- while _sourceIndex != _source.endIndex && shift < _OutputBuffer.bitWidth {
- let u = _source[_sourceIndex]
- if u >= 0x80 { break }
- _buffer |= _OutputBuffer(UInt8(extendingOrTruncating: u &+ 1)) &<< shift
- _sourceIndex += 1
- shift = shift &+ 8
- }
-
- var i = IndexingIterator(_elements: source, _position: _sourceIndex)
- var parser = Unicode.UTF16.ForwardParser()
- Loop:
+ // Match up bytes in the buffer
+ var buffer = (lhs._buffer, rhs._buffer)
+ var isContinuation: Bool
while true {
- let u8: UTF8.EncodedScalar
- switch parser.parseScalar(from: &i) {
- case .valid(let s):
- u8 = UTF8.transcode(s, from: UTF16.self)._unsafelyUnwrappedUnchecked
- case .error(_):
- u8 = UTF8.encodedReplacementCharacter
- case .emptyInput:
- break Loop
- }
- var newBuffer = _buffer
- for x in u8 {
- newBuffer |= _OutputBuffer(x &+ 1) &<< shift
- shift = shift &+ 8
- }
- guard _fastPath(shift <= _OutputBuffer.bitWidth) else { break Loop }
- _buffer = newBuffer
- _sourceIndex = i._position &- parser._buffer.count
- }
- guard _fastPath(_buffer != 0) else { return nil }
- let result = UInt8(extendingOrTruncating: _buffer) &- 1
- _buffer >>= 8
- return result
- }
-}
+ let unit = (
+ UTF8.CodeUnit(extendingOrTruncating: buffer.0),
+ UTF8.CodeUnit(extendingOrTruncating: buffer.1))
-extension String.UTF8View {
- public var count: Int {
- if _fastPath(_core.isASCII) { return _core.count }
- let b = _core._unmanagedUTF16
- if _fastPath(b != nil) {
- defer { _fixLifetime(_core) }
- return _count(fromUTF16: b!)
+ isContinuation = UTF8.isContinuation(unit.0)
+ if !isContinuation {
+ // We don't check for unit equality in this case because one of
+ // the units might be an 0xFF read from the end of the buffer.
+ return !UTF8.isContinuation(unit.1)
+ }
+ // Continuation bytes must match exactly
+ else if unit.0 != unit.1 {
+ return false
+ }
+
+ // Move the buffers along.
+ buffer = (
+ String.UTF8Index._nextBuffer(after: buffer.0),
+ String.UTF8Index._nextBuffer(after: buffer.1))
}
- return _count(fromUTF16: self._core)
}
- internal func _count<Source: Sequence>(fromUTF16 source: Source) -> Int
- where Source.Element == Unicode.UTF16.CodeUnit
- {
- var result = 0
- var prev: Unicode.UTF16.CodeUnit = 0
- for u in source {
- switch u {
- case 0..<0x80: result += 1
- case 0x80..<0x800: result += 2
- case 0x800..<0xDC00: result += 3
- case 0xDC00..<0xE000: result += UTF16.isLeadSurrogate(prev) ? 1 : 3
- default: result += 3
- }
- prev = u
+ public static func < (
+ lhs: String.UTF8View.Index,
+ rhs: String.UTF8View.Index
+ ) -> Bool {
+ if lhs._coreIndex == rhs._coreIndex && lhs._buffer != rhs._buffer {
+ // The index with more continuation bytes remaining before the next
+ return lhs._utf8ContinuationBytesUntilNextUnicodeScalar >
+ rhs._utf8ContinuationBytesUntilNextUnicodeScalar
}
- return result
+ return lhs._coreIndex < rhs._coreIndex
}
}
// Index conversions
extension String.UTF8View.Index {
+ internal init(_ core: _StringCore, _utf16Offset: Int) {
+ let (_, buffer) = core._encodeSomeUTF8(from: _utf16Offset)
+ self.init(_coreIndex: _utf16Offset, buffer)
+ }
+
/// Creates an index in the given UTF-8 view that corresponds exactly to the
/// specified `UTF16View` position.
///
@@ -528,12 +552,156 @@
/// // Prints "nil"
///
/// - Parameters:
- /// - sourcePosition: A position in a `String` or one of its views.
- /// - target: The `UTF8View` in which to find the new position.
- public init?(_ sourcePosition: String.Index, within target: String.UTF8View) {
- guard String.UnicodeScalarView(target._core)._isOnUnicodeScalarBoundary(
- sourcePosition) else { return nil }
- self.init(encodedOffset: sourcePosition.encodedOffset)
+ /// - utf16Index: A position in a `UTF16View` instance. `utf16Index` must
+ /// be an element in `String(utf8).utf16.indices`.
+ /// - utf8: The `UTF8View` in which to find the new position.
+ public init?(_ utf16Index: String.UTF16Index, within utf8: String.UTF8View) {
+ let utf16 = String.UTF16View(utf8._core)
+
+ if utf16Index != utf16.startIndex
+ && utf16Index != utf16.endIndex {
+ _precondition(
+ utf16Index >= utf16.startIndex
+ && utf16Index <= utf16.endIndex,
+ "Invalid String.UTF16Index for this UTF-8 view")
+
+ // Detect positions that have no corresponding index. Note that
+ // we have to check before and after, because an unpaired
+ // surrogate will be decoded as a single replacement character,
+ // thus making the corresponding position valid in UTF8.
+ if UTF16.isTrailSurrogate(utf16[utf16Index])
+ && UTF16.isLeadSurrogate(utf16[utf16.index(before: utf16Index)]) {
+ return nil
+ }
+ }
+ self.init(utf8._core, _utf16Offset: utf16Index._offset)
+ }
+
+ /// Creates an index in the given UTF-8 view that corresponds exactly to the
+ /// specified `UnicodeScalarView` position.
+ ///
+ /// The following example converts the position of the Unicode scalar `"e"`
+ /// into its corresponding position in the string's `utf8` view.
+ ///
+ /// let cafe = "Cafe\u{0301}"
+ /// let scalarsIndex = cafe.unicodeScalars.index(of: "e")!
+ /// let utf8Index = String.UTF8View.Index(scalarsIndex, within: cafe.utf8)
+ ///
+ /// print(Array(cafe.utf8[...utf8Index]))
+ /// // Prints "[67, 97, 102, 101]"
+ ///
+ /// - Parameters:
+ /// - unicodeScalarIndex: A position in a `UnicodeScalarView` instance.
+ /// `unicodeScalarIndex` must be an element of
+ /// `String(utf8).unicodeScalars.indices`.
+ /// - utf8: The `UTF8View` in which to find the new position.
+ public init(
+ _ unicodeScalarIndex: String.UnicodeScalarIndex,
+ within utf8: String.UTF8View
+ ) {
+ self.init(utf8._core, _utf16Offset: unicodeScalarIndex._position)
+ }
+
+ /// Creates an index in the given UTF-8 view that corresponds exactly to the
+ /// specified string position.
+ ///
+ /// The following example converts the position of the teacup emoji (`"🍵"`)
+ /// into its corresponding position in the string's `utf8` view.
+ ///
+ /// let cafe = "Café 🍵"
+ /// let stringIndex = cafe.index(of: "🍵")!
+ /// let utf8Index = String.UTF8View.Index(stringIndex, within: cafe.utf8)
+ ///
+ /// print(Array(cafe.utf8[utf8Index...]))
+ /// // Prints "[240, 159, 141, 181]"
+ ///
+ /// - Parameters:
+ /// - index: A position in a string instance.
+ /// `index` must be an element of
+ /// `String(utf8).indices`.
+ /// - utf8: The `UTF8View` in which to find the new position.
+ public init(_ index: String.Index, within utf8: String.UTF8View) {
+ self.init(utf8._core, _utf16Offset: index._base._position)
+ }
+
+ /// Returns the position in the given UTF-16 view that corresponds exactly to
+ /// this index.
+ ///
+ /// The index must be a valid index of `String(utf16).utf8`.
+ ///
+ /// This example first finds the position of a space (UTF-8 code point `32`)
+ /// in a string's `utf8` view and then uses this method to find the same
+ /// position in the string's `utf16` view.
+ ///
+ /// let cafe = "Café 🍵"
+ /// let i = cafe.utf8.index(of: 32)!
+ /// let j = i.samePosition(in: cafe.utf16)!
+ /// print(cafe.utf16[..<j])
+ /// // Prints "Café"
+ ///
+ /// - Parameter utf16: The view to use for the index conversion.
+ /// - Returns: The position in `utf16` that corresponds exactly to this
+ /// index. If this index does not have an exact corresponding position in
+ /// `utf16`, this method returns `nil`. For example, an attempt to convert
+ /// the position of a UTF-8 continuation byte returns `nil`.
+ public func samePosition(
+ in utf16: String.UTF16View
+ ) -> String.UTF16View.Index? {
+ return String.UTF16View.Index(self, within: utf16)
+ }
+
+ /// Returns the position in the given view of Unicode scalars that
+ /// corresponds exactly to this index.
+ ///
+ /// This index must be a valid index of `String(unicodeScalars).utf8`.
+ ///
+ /// This example first finds the position of a space (UTF-8 code point `32`)
+ /// in a string's `utf8` view and then uses this method to find the same position
+ /// in the string's `unicodeScalars` view.
+ ///
+ /// let cafe = "Café 🍵"
+ /// let i = cafe.utf8.index(of: 32)!
+ /// let j = i.samePosition(in: cafe.unicodeScalars)!
+ /// print(cafe.unicodeScalars[..<j])
+ /// // Prints "Café"
+ ///
+ /// - Parameter unicodeScalars: The view to use for the index conversion.
+ /// - Returns: The position in `unicodeScalars` that corresponds exactly to
+ /// this index. If this index does not have an exact corresponding
+ /// position in `unicodeScalars`, this method returns `nil`. For example,
+ /// an attempt to convert the position of a UTF-8 continuation byte
+ /// returns `nil`.
+ public func samePosition(
+ in unicodeScalars: String.UnicodeScalarView
+ ) -> String.UnicodeScalarIndex? {
+ return String.UnicodeScalarIndex(self, within: unicodeScalars)
+ }
+
+ /// Returns the position in the given string that corresponds exactly to this
+ /// index.
+ ///
+ /// This index must be a valid index of `utf8`.
+ ///
+ /// This example first finds the position of a space (UTF-8 code point `32`)
+ /// in a string's `utf8` view and then uses this method find the same position
+ /// in the string.
+ ///
+ /// let cafe = "Café 🍵"
+ /// let i = cafe.utf8.index(of: 32)!
+ /// let j = i.samePosition(in: cafe)!
+ /// print(cafe[..<j])
+ /// // Prints "Café"
+ ///
+ /// - Parameter characters: The string to use for the index conversion.
+ /// - Returns: The position in `characters` that corresponds exactly to
+ /// this index. If this index does not have an exact corresponding
+ /// position in `characters`, this method returns `nil`. For example,
+ /// an attempt to convert the position of a UTF-8 continuation byte
+ /// returns `nil`.
+ public func samePosition(
+ in characters: String
+ ) -> String.Index? {
+ return String.Index(self, within: characters)
}
}
@@ -557,32 +725,3 @@
Builtin.unreachable()
}
}
-
-// backward compatibility for index interchange.
-extension String.UTF8View {
- @available(
- swift, obsoleted: 4.0,
- message: "Any String view index conversion can fail in Swift 4; please unwrap the optional index")
- public func index(after i: Index?) -> Index {
- return index(after: i!)
- }
- @available(
- swift, obsoleted: 4.0,
- message: "Any String view index conversion can fail in Swift 4; please unwrap the optional index")
- public func index(_ i: Index?, offsetBy n: IndexDistance) -> Index {
- return index(i!, offsetBy: n)
- }
- @available(
- swift, obsoleted: 4.0,
- message: "Any String view index conversion can fail in Swift 4; please unwrap the optional indices")
- public func distance(
- from i: Index?, to j: Index?) -> IndexDistance {
- return distance(from: i!, to: j!)
- }
- @available(
- swift, obsoleted: 4.0,
- message: "Any String view index conversion can fail in Swift 4; please unwrap the optional index")
- public subscript(i: Index?) -> Unicode.UTF8.CodeUnit {
- return self[i!]
- }
-}
diff --git a/stdlib/public/core/StringUnicodeScalarView.swift b/stdlib/public/core/StringUnicodeScalarView.swift
index e23556b..09c5633 100644
--- a/stdlib/public/core/StringUnicodeScalarView.swift
+++ b/stdlib/public/core/StringUnicodeScalarView.swift
@@ -85,19 +85,41 @@
}
}
- public typealias Index = String.Index
- public typealias IndexDistance = Int
-
+ /// A position in a string's `UnicodeScalars` view.
+ ///
+ /// You can convert between indices of the different string views by using
+ /// conversion initializers and the `samePosition(in:)` method overloads.
+ /// The following example finds the index of the solid heart pictograph in
+ /// the string's character view and then converts that to the same
+ /// position in the Unicode scalars view:
+ ///
+ /// let hearts = "Hearts <3 ♥︎ 💘"
+ /// let i = hearts.index(of: "♥︎")!
+ ///
+ /// let j = i.samePosition(in: hearts.unicodeScalars)
+ /// print(hearts.unicodeScalars[j...])
+ /// // Prints "♥︎ 💘"
+ /// print(hearts.unicodeScalars[j].value)
+ /// // Prints "9829"
+ public struct Index {
+ public // SPI(Foundation)
+ init(_position: Int) {
+ self._position = _position
+ }
+
+ @_versioned internal var _position: Int
+ }
+
/// Translates a `_core` index into a `UnicodeScalarIndex` using this view's
/// `_coreOffset`.
internal func _fromCoreIndex(_ i: Int) -> Index {
- return Index(encodedOffset: i + _coreOffset)
+ return Index(_position: i + _coreOffset)
}
/// Translates a `UnicodeScalarIndex` into a `_core` index using this view's
/// `_coreOffset`.
internal func _toCoreIndex(_ i: Index) -> Int {
- return i.encodedOffset - _coreOffset
+ return i._position - _coreOffset
}
/// The position of the first Unicode scalar value if the string is
@@ -186,7 +208,7 @@
public subscript(r: Range<Index>) -> UnicodeScalarView {
let rawSubRange = _toCoreIndex(r.lowerBound)..<_toCoreIndex(r.upperBound)
return UnicodeScalarView(_core[rawSubRange],
- coreOffset: r.lowerBound.encodedOffset)
+ coreOffset: r.lowerBound._position)
}
/// An iterator over the Unicode scalars that make up a `UnicodeScalarView`
@@ -274,7 +296,7 @@
/// The offset of this view's `_core` from an original core. This works
/// around the fact that `_StringCore` is always zero-indexed.
- /// `_coreOffset` should be subtracted from `UnicodeScalarIndex.encodedOffset`
+ /// `_coreOffset` should be subtracted from `UnicodeScalarIndex._position`
/// before that value is used as a `_core` index.
internal var _coreOffset: Int
}
@@ -320,6 +342,22 @@
}
}
+extension String.UnicodeScalarView.Index : Comparable {
+ public static func == (
+ lhs: String.UnicodeScalarView.Index,
+ rhs: String.UnicodeScalarView.Index
+ ) -> Bool {
+ return lhs._position == rhs._position
+ }
+
+ public static func < (
+ lhs: String.UnicodeScalarView.Index,
+ rhs: String.UnicodeScalarView.Index
+ ) -> Bool {
+ return lhs._position < rhs._position
+ }
+}
+
extension String.UnicodeScalarView : RangeReplaceableCollection {
/// Creates an empty view instance.
public init() {
@@ -416,8 +454,124 @@
_ utf16Index: String.UTF16Index,
within unicodeScalars: String.UnicodeScalarView
) {
- if !unicodeScalars._isOnUnicodeScalarBoundary(utf16Index) { return nil }
- self = utf16Index
+ let utf16 = String.UTF16View(unicodeScalars._core)
+
+ if utf16Index != utf16.startIndex
+ && utf16Index != utf16.endIndex {
+ _precondition(
+ utf16Index >= utf16.startIndex
+ && utf16Index <= utf16.endIndex,
+ "Invalid String.UTF16Index for this Unicode.Scalar view")
+
+ // Detect positions that have no corresponding index. Note that
+ // we have to check before and after, because an unpaired
+ // surrogate will be decoded as a single replacement character,
+ // thus making the corresponding position valid.
+ if UTF16.isTrailSurrogate(utf16[utf16Index])
+ && UTF16.isLeadSurrogate(utf16[utf16.index(before: utf16Index)]) {
+ return nil
+ }
+ }
+ self.init(_position: utf16Index._offset)
+ }
+
+ /// Creates an index in the given Unicode scalars view that corresponds
+ /// exactly to the specified `UTF8View` position.
+ ///
+ /// If the position passed as `utf8Index` doesn't have an exact corresponding
+ /// position in `unicodeScalars`, the result of the initializer is `nil`.
+ /// For example, an attempt to convert the position of a UTF-8 continuation
+ /// byte returns `nil`.
+ ///
+ /// - Parameters:
+ /// - utf8Index: A position in the `utf8` view of a string. `utf8Index`
+ /// must be an element of `String(unicodeScalars).utf8.indices`.
+ /// - unicodeScalars: The `UnicodeScalarView` in which to find the new
+ /// position.
+ public init?(
+ _ utf8Index: String.UTF8Index,
+ within unicodeScalars: String.UnicodeScalarView
+ ) {
+ let core = unicodeScalars._core
+
+ _precondition(
+ utf8Index._coreIndex >= 0 && utf8Index._coreIndex <= core.endIndex,
+ "Invalid String.UTF8Index for this Unicode.Scalar view")
+
+ // Detect positions that have no corresponding index.
+ if !utf8Index._isOnUnicodeScalarBoundary(in: core) {
+ return nil
+ }
+ self.init(_position: utf8Index._coreIndex)
+ }
+
+ /// Creates an index in the given Unicode scalars view that corresponds
+ /// exactly to the specified string position.
+ ///
+ /// The following example converts the position of the teacup emoji (`"🍵"`)
+ /// into its corresponding position in the string's `unicodeScalars` view.
+ ///
+ /// let cafe = "Café 🍵"
+ /// let stringIndex = cafe.index(of: "🍵")!
+ /// let scalarIndex = String.UnicodeScalarView.Index(stringIndex, within: cafe.unicodeScalars)
+ ///
+ /// print(cafe.unicodeScalars[scalarIndex...])
+ /// // Prints "🍵"
+ ///
+ /// - Parameters:
+ /// - index: A position in a string. `index` must be an element of
+ /// `String(unicodeScalars).indices`.
+ /// - unicodeScalars: The `UnicodeScalarView` in which to find the new
+ /// position.
+ public init(
+ _ index: String.Index,
+ within unicodeScalars: String.UnicodeScalarView
+ ) {
+ self.init(_position: index._base._position)
+ }
+
+ /// Returns the position in the given UTF-8 view that corresponds exactly to
+ /// this index.
+ ///
+ /// The index must be a valid index of `String(utf8).unicodeScalars`.
+ ///
+ /// This example first finds the position of the character `"é"` and then uses
+ /// this method find the same position in the string's `utf8` view.
+ ///
+ /// let cafe = "Café"
+ /// if let i = cafe.unicodeScalars.index(of: "é") {
+ /// let j = i.samePosition(in: cafe.utf8)
+ /// print(Array(cafe.utf8[j...]))
+ /// }
+ /// // Prints "[195, 169]"
+ ///
+ /// - Parameter utf8: The view to use for the index conversion.
+ /// - Returns: The position in `utf8` that corresponds exactly to this index.
+ public func samePosition(in utf8: String.UTF8View) -> String.UTF8View.Index {
+ return String.UTF8View.Index(self, within: utf8)
+ }
+
+ /// Returns the position in the given UTF-16 view that corresponds exactly to
+ /// this index.
+ ///
+ /// The index must be a valid index of `String(utf16).unicodeScalars`.
+ ///
+ /// This example first finds the position of the character `"é"` and then uses
+ /// this method find the same position in the string's `utf16` view.
+ ///
+ /// let cafe = "Café"
+ /// if let i = cafe.unicodeScalars.index(of: "é") {
+ /// let j = i.samePosition(in: cafe.utf16)
+ /// print(cafe.utf16[j])
+ /// }
+ /// // Prints "233"
+ ///
+ /// - Parameter utf16: The view to use for the index conversion.
+ /// - Returns: The position in `utf16` that corresponds exactly to this index.
+ public func samePosition(
+ in utf16: String.UTF16View
+ ) -> String.UTF16View.Index {
+ return String.UTF16View.Index(self, within: utf16)
}
/// Returns the position in the given string that corresponds exactly to this
@@ -447,21 +601,9 @@
}
extension String.UnicodeScalarView {
- internal func _isOnUnicodeScalarBoundary(_ i: Index) -> Bool {
- if _fastPath(_core.isASCII) { return true }
- if i == startIndex || i == endIndex {
- return true
- }
- if i._transcodedOffset != 0 { return false }
- let i2 = _toCoreIndex(i)
- if _fastPath(_core[i2] & 0xFC00 != 0xDC00) { return true }
- return _core[i2 &- 1] & 0xFC00 != 0xD800
- }
-
// NOTE: Don't make this function inlineable. Grapheme cluster
// segmentation uses a completely different algorithm in Unicode 9.0.
internal func _isOnGraphemeClusterBoundary(_ i: Index) -> Bool {
- if !_isOnUnicodeScalarBoundary(i) { return false }
if i == startIndex || i == endIndex {
return true
}
@@ -497,31 +639,3 @@
return .text(description)
}
}
-
-// backward compatibility for index interchange.
-extension String.UnicodeScalarView {
- @available(
- swift, obsoleted: 4.0,
- message: "Any String view index conversion can fail in Swift 4; please unwrap the optional index")
- public func index(after i: Index?) -> Index {
- return index(after: i)
- }
- @available(
- swift, obsoleted: 4.0,
- message: "Any String view index conversion can fail in Swift 4; please unwrap the optional index")
- public func index(_ i: Index?, offsetBy n: IndexDistance) -> Index {
- return index(i!, offsetBy: n)
- }
- @available(
- swift, obsoleted: 4.0,
- message: "Any String view index conversion can fail in Swift 4; please unwrap the optional indices")
- public func distance(from i: Index?, to j: Index?) -> IndexDistance {
- return distance(from: i!, to: j!)
- }
- @available(
- swift, obsoleted: 4.0,
- message: "Any String view index conversion can fail in Swift 4; please unwrap the optional index")
- public subscript(i: Index?) -> Unicode.Scalar {
- return self[i!]
- }
-}
diff --git a/stdlib/public/core/Substring.swift.gyb b/stdlib/public/core/Substring.swift.gyb
index cbb232c..8bb704f 100644
--- a/stdlib/public/core/Substring.swift.gyb
+++ b/stdlib/public/core/Substring.swift.gyb
@@ -228,7 +228,7 @@
public func withCString<Result>(
_ body: (UnsafePointer<CChar>) throws -> Result) rethrows -> Result {
return try _slice._base._core._withCSubstringAndLength(
- in: startIndex.encodedOffset..<endIndex.encodedOffset,
+ in: startIndex._base._position..<endIndex._base._position,
encoding: UTF8.self) {
p, length in try p.withMemoryRebound(to: CChar.self, capacity: length) {
try body($0)
@@ -257,7 +257,7 @@
_ body: (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result
) rethrows -> Result {
return try _slice._base._core._withCSubstring(
- in: startIndex.encodedOffset..<endIndex.encodedOffset,
+ in: startIndex._base._position..<endIndex._base._position,
encoding: targetEncoding, body)
}
}
@@ -282,7 +282,7 @@
var _ephemeralContent : String {
let wholeCore = _slice._base._core
let subCore : _StringCore = wholeCore[
- startIndex.encodedOffset..<endIndex.encodedOffset]
+ startIndex._base._position..<endIndex._base._position]
// Check that we haven't allocated a new buffer for the result, if we have
// contiguous storage.
_sanityCheck(
diff --git a/stdlib/public/core/UIntBuffer.swift b/stdlib/public/core/UIntBuffer.swift
index f04677d..2e79821 100644
--- a/stdlib/public/core/UIntBuffer.swift
+++ b/stdlib/public/core/UIntBuffer.swift
@@ -172,13 +172,6 @@
_storage |= Storage(newElement) &<< _bitCount
_bitCount = _bitCount &+ _elementWidth
}
-
- @inline(__always)
- public mutating func removeFirst() {
- _debugPrecondition(!isEmpty)
- _bitCount = _bitCount &- _elementWidth
- _storage = _storage._fullShiftRight(_elementWidth)
- }
@inline(__always)
public mutating func replaceSubrange<C: Collection>(
diff --git a/stdlib/public/core/UnavailableStringAPIs.swift.gyb b/stdlib/public/core/UnavailableStringAPIs.swift.gyb
index 7f04c97..efd9bf4 100644
--- a/stdlib/public/core/UnavailableStringAPIs.swift.gyb
+++ b/stdlib/public/core/UnavailableStringAPIs.swift.gyb
@@ -71,3 +71,42 @@
% end
}
+% for View in ['UTF8View', 'UTF16View', 'UnicodeScalarView', 'CharacterView']:
+% Index = 'String.%s.Index' % View
+% Distance = 'String.%s.IndexDistance' % View
+extension ${Index} {
+ @available(
+ *, unavailable,
+ message: "To get the next index call 'index(after:)' on the ${View} instance that produced the index.")
+ public func successor() -> ${Index} {
+ Builtin.unreachable()
+ }
+% if View != 'UTF8View':
+ @available(
+ *, unavailable,
+ message: "To get the previous index call 'index(before:)' on the ${View} instance that produced the index.")
+ public func predecessor() -> ${Index} {
+ Builtin.unreachable()
+ }
+% end
+ @available(
+ *, unavailable,
+ message: "To advance an index by n steps call 'index(_:offsetBy:)' on the ${View} instance that produced the index.")
+ public func advancedBy(_ n: ${Distance}) -> ${Index} {
+ Builtin.unreachable()
+ }
+ @available(
+ *, unavailable,
+ message: "To advance an index by n steps stopping at a given limit call 'index(_:offsetBy:limitedBy:)' on ${View} instance that produced the index. Note that the Swift 3 API returns 'nil' when trying to advance past the limit; the Swift 2 API returned the limit.")
+ public func advancedBy(_ n: ${Distance}, limit: ${Index}) -> ${Index} {
+ Builtin.unreachable()
+ }
+ @available(
+ *, unavailable,
+ message: "To find the distance between two indices call 'distance(from:to:)' on the ${View} instance that produced the index.")
+ public func distanceTo(_ end: ${Index}) -> ${Distance} {
+ Builtin.unreachable()
+ }
+}
+% end
+
diff --git a/stdlib/public/core/Unicode.swift b/stdlib/public/core/Unicode.swift
index aad15ae..97b8ee3 100644
--- a/stdlib/public/core/Unicode.swift
+++ b/stdlib/public/core/Unicode.swift
@@ -585,6 +585,99 @@
}
}
+/// Transcode UTF-16 to UTF-8, replacing ill-formed sequences with U+FFFD.
+///
+/// Returns the index of the first unhandled code unit and the UTF-8 data
+/// that was encoded.
+internal func _transcodeSomeUTF16AsUTF8<Input : Collection>(
+ _ input: Input, _ startIndex: Input.Index
+) -> (Input.Index, _StringCore._UTF8Chunk)
+ where Input.Element == UInt16 {
+
+ typealias _UTF8Chunk = _StringCore._UTF8Chunk
+
+ let endIndex = input.endIndex
+ let utf8Max = MemoryLayout<_UTF8Chunk>.size
+ var result: _UTF8Chunk = 0
+ var utf8Count = 0
+ var nextIndex = startIndex
+ while nextIndex != input.endIndex && utf8Count != utf8Max {
+ let u = UInt(input[nextIndex])
+ let shift = _UTF8Chunk(utf8Count * 8)
+ var utf16Length: Input.IndexDistance = 1
+
+ if _fastPath(u <= 0x7f) {
+ result |= _UTF8Chunk(u) &<< shift
+ utf8Count += 1
+ } else {
+ var scalarUtf8Length: Int
+ var r: UInt
+ if _fastPath((u &>> 11) != 0b1101_1) {
+ // Neither high-surrogate, nor low-surrogate -- well-formed sequence
+ // of 1 code unit, decoding is trivial.
+ if u < 0x800 {
+ r = 0b10__00_0000__110__0_0000
+ r |= u &>> 6
+ r |= (u & 0b11_1111) &<< 8
+ scalarUtf8Length = 2
+ }
+ else {
+ r = 0b10__00_0000__10__00_0000__1110__0000
+ r |= u &>> 12
+ r |= ((u &>> 6) & 0b11_1111) &<< 8
+ r |= (u & 0b11_1111) &<< 16
+ scalarUtf8Length = 3
+ }
+ } else {
+ let unit0 = u
+ if _slowPath((unit0 &>> 10) == 0b1101_11) {
+ // `unit0` is a low-surrogate. We have an ill-formed sequence.
+ // Replace it with U+FFFD.
+ r = 0xbdbfef
+ scalarUtf8Length = 3
+ } else if _slowPath(input.index(nextIndex, offsetBy: 1) == endIndex) {
+ // We have seen a high-surrogate and EOF, so we have an ill-formed
+ // sequence. Replace it with U+FFFD.
+ r = 0xbdbfef
+ scalarUtf8Length = 3
+ } else {
+ let unit1 = UInt(input[input.index(nextIndex, offsetBy: 1)])
+ if _fastPath((unit1 &>> 10) == 0b1101_11) {
+ // `unit1` is a low-surrogate. We have a well-formed surrogate
+ // pair.
+ let v = 0x10000 + (((unit0 & 0x03ff) &<< 10) | (unit1 & 0x03ff))
+
+ r = 0b10__00_0000__10__00_0000__10__00_0000__1111_0__000
+ r |= v &>> 18
+ r |= ((v &>> 12) & 0b11_1111) &<< 8
+ r |= ((v &>> 6) & 0b11_1111) &<< 16
+ r |= (v & 0b11_1111) &<< 24
+ scalarUtf8Length = 4
+ utf16Length = 2
+ } else {
+ // Otherwise, we have an ill-formed sequence. Replace it with
+ // U+FFFD.
+ r = 0xbdbfef
+ scalarUtf8Length = 3
+ }
+ }
+ }
+ // Don't overrun the buffer
+ if utf8Count + scalarUtf8Length > utf8Max {
+ break
+ }
+ result |= numericCast(r) &<< shift
+ utf8Count += scalarUtf8Length
+ }
+ nextIndex = input.index(nextIndex, offsetBy: utf16Length)
+ }
+ // FIXME: Annoying check, courtesy of <rdar://problem/16740169>
+ if utf8Count < MemoryLayout.size(ofValue: result) {
+ result |= ~0 &<< numericCast(utf8Count * 8)
+ }
+ return (nextIndex, result)
+}
+
/// Instances of conforming types are used in internal `String`
/// representation.
public // @testable
diff --git a/stdlib/public/core/UnicodeEncoding.swift b/stdlib/public/core/UnicodeEncoding.swift
index 2bde99e..c2555f3 100644
--- a/stdlib/public/core/UnicodeEncoding.swift
+++ b/stdlib/public/core/UnicodeEncoding.swift
@@ -78,7 +78,6 @@
/// Converts from encoding-independent to encoded representation, returning
/// `encodedReplacementCharacter` if the scalar can't be represented in this
/// encoding.
- @_versioned
internal static func _encode(_ content: Unicode.Scalar) -> EncodedScalar {
return encode(content) ?? encodedReplacementCharacter
}
@@ -86,31 +85,12 @@
/// Converts a scalar from another encoding's representation, returning
/// `encodedReplacementCharacter` if the scalar can't be represented in this
/// encoding.
- @_versioned
internal static func _transcode<FromEncoding : Unicode.Encoding>(
_ content: FromEncoding.EncodedScalar, from _: FromEncoding.Type
) -> EncodedScalar {
return transcode(content, from: FromEncoding.self)
?? encodedReplacementCharacter
}
-
- @_versioned
- internal static func _transcode<
- Source: Sequence, SourceEncoding: Unicode.Encoding>(
- _ source: Source,
- from sourceEncoding: SourceEncoding.Type,
- into processScalar: (EncodedScalar)->Void)
- where Source.Element == SourceEncoding.CodeUnit {
- var p = SourceEncoding.ForwardParser()
- var i = source.makeIterator()
- while true {
- switch p.parseScalar(from: &i) {
- case .valid(let e): processScalar(_transcode(e, from: sourceEncoding))
- case .error(_): processScalar(encodedReplacementCharacter)
- case .emptyInput: return
- }
- }
- }
}
extension Unicode {
diff --git a/stdlib/public/core/ValidUTF8Buffer.swift b/stdlib/public/core/ValidUTF8Buffer.swift
index 0690e2f..b105376 100644
--- a/stdlib/public/core/ValidUTF8Buffer.swift
+++ b/stdlib/public/core/ValidUTF8Buffer.swift
@@ -21,8 +21,6 @@
Storage: UnsignedInteger & FixedWidthInteger
> {
public typealias Element = Unicode.UTF8.CodeUnit
- internal typealias _Storage = Storage
-
@_versioned
internal var _biasedBits: Storage
@@ -62,10 +60,8 @@
public typealias IndexDistance = Int
public struct Index : Comparable {
- @_versioned
internal var _biasedBits: Storage
- @_versioned
internal init(_biasedBits: Storage) { self._biasedBits = _biasedBits }
public static func == (lhs: Index, rhs: Index) -> Bool {
@@ -108,17 +104,15 @@
extension _ValidUTF8Buffer : RandomAccessCollection {
public typealias Indices = DefaultRandomAccessIndices<_ValidUTF8Buffer>
-
- @inline(__always)
+
public func distance(from i: Index, to j: Index) -> IndexDistance {
- _debugPrecondition(_isValid(i))
- _debugPrecondition(_isValid(j))
+ _debugPrecondition(indices.contains(i))
+ _debugPrecondition(indices.contains(j))
return (
i._biasedBits.leadingZeroBitCount - j._biasedBits.leadingZeroBitCount
) &>> 3
}
- @inline(__always)
public func index(_ i: Index, offsetBy n: IndexDistance) -> Index {
let startOffset = distance(from: startIndex, to: i)
let newOffset = startOffset + n
@@ -134,38 +128,22 @@
}
public var capacity: IndexDistance {
- return _ValidUTF8Buffer.capacity
- }
-
- public static var capacity: IndexDistance {
return Storage.bitWidth / Element.bitWidth
}
- @inline(__always)
public mutating func append(_ e: Element) {
_debugPrecondition(count + 1 <= capacity)
_sanityCheck(
e != 192 && e != 193 && !(245...255).contains(e), "invalid UTF8 byte")
_biasedBits |= Storage(e &+ 1) &<< (count &<< 3)
}
-
- @inline(__always)
- public mutating func removeFirst() {
- _debugPrecondition(!isEmpty)
- _biasedBits = _biasedBits._fullShiftRight(8)
- }
-
- @_versioned
- internal func _isValid(_ i: Index) -> Bool {
- return i == endIndex || indices.contains(i)
- }
@inline(__always)
public mutating func replaceSubrange<C: Collection>(
_ target: Range<Index>, with replacement: C
) where C.Element == Element {
- _debugPrecondition(_isValid(target.lowerBound))
- _debugPrecondition(_isValid(target.upperBound))
+ _debugPrecondition(indices.contains(target.lowerBound))
+ _debugPrecondition(indices.contains(target.upperBound))
var r = _ValidUTF8Buffer()
for x in self[..<target.lowerBound] { r.append(x) }
for x in replacement { r.append(x) }
@@ -173,11 +151,9 @@
self = r
}
- @inline(__always)
public mutating func append<T>(contentsOf other: _ValidUTF8Buffer<T>) {
_debugPrecondition(count + other.count <= capacity)
- _biasedBits |= Storage(
- extendingOrTruncating: other._biasedBits) &<< (count &<< 3)
+ _biasedBits |= Storage(extendingOrTruncating: other._biasedBits) &<< (count &<< 3)
}
}
diff --git a/test/Constraints/diagnostics.swift b/test/Constraints/diagnostics.swift
index 839d03e..b8a5d9e 100644
--- a/test/Constraints/diagnostics.swift
+++ b/test/Constraints/diagnostics.swift
@@ -635,7 +635,7 @@
// <rdar://problem/23641896> QoI: Strings in Swift cannot be indexed directly with integer offsets
func r23641896() {
var g = "Hello World"
- g.replaceSubrange(0...2, with: "ce") // expected-error {{cannot convert value of type 'CountableClosedRange<Int>' to expected argument type 'Range<String.Index>'}}
+ g.replaceSubrange(0...2, with: "ce") // expected-error {{cannot convert value of type 'CountableClosedRange<Int>' to expected argument type 'Range<String.Index>' (aka 'Range<String.CharacterView.Index>')}}
_ = g[12] // expected-error {{'subscript' is unavailable: cannot subscript String with an Int, see the documentation comment for discussion}}
diff --git a/test/SourceKit/InterfaceGen/gen_stdlib.swift b/test/SourceKit/InterfaceGen/gen_stdlib.swift
index a085dbb..3204099 100644
--- a/test/SourceKit/InterfaceGen/gen_stdlib.swift
+++ b/test/SourceKit/InterfaceGen/gen_stdlib.swift
@@ -14,10 +14,10 @@
// CHECK-STDLIB: }
// Check that extensions of nested decls are showing up.
-// CHECK-STDLIB-LABEL: extension String.Index {
+// CHECK-STDLIB-LABEL: extension String.UTF16View.Index {
// CHECK-STDLIB: func samePosition(in utf8: String.UTF8View) -> String.UTF8View.Index?
-// CHECK-STDLIB: func samePosition(in characters: String) -> String.Index?
// CHECK-STDLIB: func samePosition(in unicodeScalars: String.UnicodeScalarView) -> String.UnicodeScalarIndex?
+// CHECK-STDLIB: func samePosition(in characters: String) -> String.Index?
// CHECK-STDLIB-NEXT: }
// CHECK-MUTATING-ATTR: mutating func
diff --git a/test/stdlib/StringDiagnostics.swift b/test/stdlib/StringDiagnostics.swift
index 49abc1d..dd327ed 100644
--- a/test/stdlib/StringDiagnostics.swift
+++ b/test/stdlib/StringDiagnostics.swift
@@ -74,9 +74,10 @@
acceptsBidirectionalCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'BidirectionalCollection'}}
acceptsRandomAccessCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'RandomAccessCollection'}}
- acceptsCollection(s.utf16)
+ // UTF16View.Collection is random-access with Foundation, bidirectional without
+ acceptsCollection(s.utf16)
acceptsBidirectionalCollection(s.utf16)
- acceptsRandomAccessCollection(s.utf16) // expected-error{{argument type 'String.UTF16View' does not conform to expected type 'RandomAccessCollection'}}
+ acceptsRandomAccessCollection(s.utf16)
acceptsCollection(s.unicodeScalars)
acceptsBidirectionalCollection(s.unicodeScalars)
diff --git a/test/stdlib/StringTraps.swift b/test/stdlib/StringTraps.swift
index 92be21b..8eaa9bb 100644
--- a/test/stdlib/StringTraps.swift
+++ b/test/stdlib/StringTraps.swift
@@ -88,8 +88,8 @@
.code {
var s = "abc"
var i = s.utf16.startIndex
- expectCrashLater()
i = s.utf16.index(before: i)
+ expectCrashLater()
s.utf16[i]
}
diff --git a/test/stdlib/UnavailableStringAPIs.swift.gyb b/test/stdlib/UnavailableStringAPIs.swift.gyb
index 11a0595..eefe1c0 100644
--- a/test/stdlib/UnavailableStringAPIs.swift.gyb
+++ b/test/stdlib/UnavailableStringAPIs.swift.gyb
@@ -25,14 +25,26 @@
% end
func test_UTF8View(s: String.UTF8View, i: String.UTF8View.Index, d: Int) {
+ _ = i.successor() // expected-error {{'successor()' is unavailable: To get the next index call 'index(after:)' on the UTF8View instance that produced the index.}} {{none}}
+ _ = i.predecessor() // expected-error {{value of type 'String.UTF8View.Index' has no member 'predecessor'}} {{none}}
+ _ = i.advancedBy(d) // expected-error {{'advancedBy' is unavailable: To advance an index by n steps call 'index(_:offsetBy:)' on the UTF8View instance that produced the index.}} {{none}}
+ _ = i.advancedBy(d, limit: i) // expected-error {{'advancedBy(_:limit:)' is unavailable: To advance an index by n steps stopping at a given limit call 'index(_:offsetBy:limitedBy:)' on UTF8View instance that produced the index. Note that the Swift 3 API returns 'nil' when trying to advance past the limit; the Swift 2 API returned the limit.}} {{none}}
+ _ = i.distanceTo(i) // expected-error {{'distanceTo' is unavailable: To find the distance between two indices call 'distance(from:to:)' on the UTF8View instance that produced the index.}} {{none}}
+
_ = s.index(after: i) // OK
- _ = s.index(before: i) // expected-error {{before:}} expected-note {{overloads}}
+ _ = s.index(before: i) // expected-error {{before:}}
_ = s.index(i, offsetBy: d) // OK
_ = s.index(i, offsetBy: d, limitedBy: i) // OK
_ = s.distance(from: i, to: i) // OK
}
func test_UTF16View(s: String.UTF16View, i: String.UTF16View.Index, d: Int) {
+ _ = i.successor() // expected-error {{'successor()' is unavailable: To get the next index call 'index(after:)' on the UTF16View instance that produced the index.}} {{none}}
+ _ = i.predecessor() // expected-error {{'predecessor()' is unavailable: To get the previous index call 'index(before:)' on the UTF16View instance that produced the index.}} {{none}}
+ _ = i.advancedBy(d) // expected-error {{'advancedBy' is unavailable: To advance an index by n steps call 'index(_:offsetBy:)' on the UTF16View instance that produced the index.}} {{none}}
+ _ = i.advancedBy(d, limit: i) // expected-error {{'advancedBy(_:limit:)' is unavailable: To advance an index by n steps stopping at a given limit call 'index(_:offsetBy:limitedBy:)' on UTF16View instance that produced the index. Note that the Swift 3 API returns 'nil' when trying to advance past the limit; the Swift 2 API returned the limit.}} {{none}}
+ _ = i.distanceTo(i) // expected-error {{'distanceTo' is unavailable: To find the distance between two indices call 'distance(from:to:)' on the UTF16View instance that produced the index.}} {{none}}
+
_ = s.index(after: i) // OK
_ = s.index(before: i) // OK
_ = s.index(i, offsetBy: d) // OK
@@ -41,6 +53,12 @@
}
func test_UnicodeScalarView(s: String.UnicodeScalarView, i: String.UnicodeScalarView.Index, d: Int) {
+ _ = i.successor() // expected-error {{'successor()' is unavailable: To get the next index call 'index(after:)' on the UnicodeScalarView instance that produced the index.}} {{none}}
+ _ = i.predecessor() // expected-error {{'predecessor()' is unavailable: To get the previous index call 'index(before:)' on the UnicodeScalarView instance that produced the index.}} {{none}}
+ _ = i.advancedBy(d) // expected-error {{'advancedBy' is unavailable: To advance an index by n steps call 'index(_:offsetBy:)' on the UnicodeScalarView instance that produced the index.}} {{none}}
+ _ = i.advancedBy(d, limit: i) // expected-error {{'advancedBy(_:limit:)' is unavailable: To advance an index by n steps stopping at a given limit call 'index(_:offsetBy:limitedBy:)' on UnicodeScalarView instance that produced the index. Note that the Swift 3 API returns 'nil' when trying to advance past the limit; the Swift 2 API returned the limit.}} {{none}}
+ _ = i.distanceTo(i) // expected-error {{'distanceTo' is unavailable: To find the distance between two indices call 'distance(from:to:)' on the UnicodeScalarView instance that produced the index.}} {{none}}
+
_ = s.index(after: i) // OK
_ = s.index(before: i) // OK
_ = s.index(i, offsetBy: d) // OK
@@ -49,6 +67,12 @@
}
func test_CharacterView(s: String.CharacterView, i: String.CharacterView.Index, d: Int) {
+ _ = i.successor() // expected-error {{'successor()' is unavailable: To get the next index call 'index(after:)' on the CharacterView instance that produced the index.}} {{none}}
+ _ = i.predecessor() // expected-error {{'predecessor()' is unavailable: To get the previous index call 'index(before:)' on the CharacterView instance that produced the index.}} {{none}}
+ _ = i.advancedBy(d) // expected-error {{'advancedBy' is unavailable: To advance an index by n steps call 'index(_:offsetBy:)' on the CharacterView instance that produced the index.}} {{none}}
+ _ = i.advancedBy(d, limit: i) // expected-error {{'advancedBy(_:limit:)' is unavailable: To advance an index by n steps stopping at a given limit call 'index(_:offsetBy:limitedBy:)' on CharacterView instance that produced the index. Note that the Swift 3 API returns 'nil' when trying to advance past the limit; the Swift 2 API returned the limit.}} {{none}}
+ _ = i.distanceTo(i) // expected-error {{'distanceTo' is unavailable: To find the distance between two indices call 'distance(from:to:)' on the CharacterView instance that produced the index.}} {{none}}
+
_ = s.index(after: i) // OK
_ = s.index(before: i) // OK
_ = s.index(i, offsetBy: d) // OK
diff --git a/validation-test/stdlib/String.swift b/validation-test/stdlib/String.swift
index 2b7f2b5..8ae6336 100644
--- a/validation-test/stdlib/String.swift
+++ b/validation-test/stdlib/String.swift
@@ -58,8 +58,8 @@
typealias View = String.UTF8View
expectCollectionAssociatedTypes(
collectionType: View.self,
- iteratorType: View.Iterator.self,
- subSequenceType: Slice<View>.self,
+ iteratorType: IndexingIterator<View>.self,
+ subSequenceType: View.self,
indexType: View.Index.self,
indexDistanceType: Int.self,
indicesType: DefaultIndices<View>.self)
@@ -177,7 +177,7 @@
do {
let donor = "abcdef"
let acceptor = "\u{1f601}\u{1f602}\u{1f603}"
- expectEqual("\u{1f601}", acceptor[donor.startIndex])
+ expectEqual("\u{fffd}", acceptor[donor.startIndex])
expectEqual("\u{fffd}", acceptor[donor.index(after: donor.startIndex)])
expectEqualUnicodeScalars([ 0xfffd, 0x1f602, 0xfffd ],
acceptor[donor.index(_nth: 1)..<donor.index(_nth: 5)])
@@ -194,12 +194,8 @@
let donor = "\u{1f601}\u{1f602}\u{1f603}"
let acceptor = "abcdef"
-
- // Adjust donor.startIndex to ensure it caches a stride
- let start = donor.index(before: donor.index(after: donor.startIndex))
-
// FIXME: this traps right now when trying to construct Character("ab").
- expectEqual("a", acceptor[start])
+ expectEqual("a", acceptor[donor.startIndex])
}
StringTests.test("ForeignIndexes/subscript(Index)/OutOfBoundsTrap") {
@@ -1110,25 +1106,14 @@
let winter = "\u{1F3C2}\u{2603}"
// slices
- // First scalar is 4 bytes long, so this should be invalid
- expectNil(
- String(winter.utf8[
- winter.utf8.startIndex
- ..<
- winter.utf8.index(after: winter.utf8.index(after: winter.utf8.startIndex))
- ]))
-
- /*
- // FIXME: note changed String(describing:) results
+ // It is 4 bytes long, so it should return a replacement character.
expectEqual(
- "\u{FFFD}",
- String(describing:
+ "\u{FFFD}", String(describing:
winter.utf8[
winter.utf8.startIndex
..<
winter.utf8.index(after: winter.utf8.index(after: winter.utf8.startIndex))
]))
- */
expectEqual(
"\u{1F3C2}", String(
@@ -1185,8 +1170,8 @@
result, flags, stop
in
let r = result!.rangeAt(1)
- let start = String.UTF16Index(encodedOffset: r.location)
- let end = String.UTF16Index(encodedOffset: r.location + r.length)
+ let start = String.UTF16Index(_offset: r.location)
+ let end = String.UTF16Index(_offset: r.location + r.length)
matches.append(String(s.utf16[start..<end])!)
}
@@ -1282,7 +1267,8 @@
internal func decodeCString<
C : UnicodeCodec
>(_ s: String, as codec: C.Type)
--> (result: String, repairsMade: Bool)? {
+-> (result: String, repairsMade: Bool)?
+where C.CodeUnit : FixedWidthInteger {
let units = s.unicodeScalars.map({ $0.value }) + [0]
return units.map({ C.CodeUnit($0) }).withUnsafeBufferPointer {
String.decodeCString($0.baseAddress, as: C.self)
diff --git a/validation-test/stdlib/StringViews.swift b/validation-test/stdlib/StringViews.swift
index bc9ae68..a91c762 100644
--- a/validation-test/stdlib/StringViews.swift
+++ b/validation-test/stdlib/StringViews.swift
@@ -80,12 +80,7 @@
// [e2 9b 84 | ef b8 8f] [e2 9d 84 | ef b8 8f]
//===--- To UTF8 ----------------------------------------------------------===//
-func checkToUTF8(
- _ id: String,
- mapIndex: @escaping (String.Index, String.UTF8View)->String.Index?
-) {
-
-tests.test("index-mapping/character-to-utf8/\(id)") {
+tests.test("index-mapping/character-to-utf8") {
// the first three utf8 code units at the start of each grapheme
// cluster
expectEqualSequence(
@@ -107,25 +102,21 @@
i in (0..<3).map {
winter.utf8[
winter.utf8.index(
- mapIndex(i, winter.utf8), offsetBy: $0)]
+ i.samePosition(in: winter.utf8), offsetBy: $0)]
}
}, sameValue: ==)
- expectEqual(
- winter.utf8.endIndex, mapIndex(winter.endIndex, winter.utf8))
-
+ expectEqual(winter.utf8.endIndex, winter.endIndex.samePosition(in: winter.utf8))
+
expectEqualSequence(
summerBytes,
- summer.characters.indices.map {
- summer.utf8[mapIndex($0, summer.utf8)]
- }
+ summer.characters.indices.map { summer.utf8[$0.samePosition(in: summer.utf8)] }
)
-
- expectEqual(
- summer.utf8.endIndex, mapIndex(summer.endIndex, summer.utf8))
+
+ expectEqual(summer.utf8.endIndex, summer.endIndex.samePosition(in: summer.utf8))
}
-tests.test("index-mapping/unicode-scalar-to-utf8/\(id)") {
+tests.test("index-mapping/unicode-scalar-to-utf8") {
// the first three utf8 code units at the start of each unicode
// scalar
expectEqualSequence(
@@ -149,34 +140,33 @@
winter.unicodeScalars.indices.map {
i in (0..<3).map {
winter.utf8[
- winter.utf8.index(mapIndex(i, winter.utf8), offsetBy: $0)]
+ winter.utf8.index(i.samePosition(in: winter.utf8), offsetBy: $0)]
}
}, sameValue: ==)
expectEqual(
winter.utf8.endIndex,
- mapIndex(winter.unicodeScalars.endIndex, winter.utf8))
-
+ winter.unicodeScalars.endIndex.samePosition(in: winter.utf8))
+
expectEqualSequence(
summerBytes,
summer.unicodeScalars.indices.map {
- summer.utf8[mapIndex($0, summer.utf8)]
+ summer.utf8[$0.samePosition(in: summer.utf8)]
}
)
expectEqual(
summer.utf8.endIndex,
- mapIndex(summer.unicodeScalars.endIndex, summer.utf8))
+ summer.unicodeScalars.endIndex.samePosition(in: summer.utf8))
}
-tests.test("index-mapping/utf16-to-utf8/\(id)") {
+tests.test("index-mapping/utf16-to-utf8") {
// check the first three utf8 code units at the start of each utf16
// code unit
expectEqualSequence(
[
[0xf0, 0x9f, 0x8f],
- // does not align with any utf8 code unit
- id == "legacy" ? [] : replacementUTF8,
+ [], // does not align with any utf8 code unit
[0xe2, 0x98, 0x83],
[0xe2, 0x9d, 0x85],
[0xe2, 0x9d, 0x86],
@@ -192,46 +182,115 @@
replacementUTF8
] as [[UTF8.CodeUnit]],
winter.utf16.indices.map {
- i16 in mapIndex(i16, winter.utf8).map {
+ i16 in i16.samePosition(in: winter.utf8).map {
i8 in (0..<3).map {
winter.utf8[winter.utf8.index(i8, offsetBy: $0)]
}
} ?? []
}, sameValue: ==)
- expectNotNil(mapIndex(winter.utf16.endIndex, winter.utf8))
+ expectNotNil(winter.utf16.endIndex.samePosition(in: winter.utf8))
expectEqual(
winter.utf8.endIndex,
- mapIndex(winter.utf16.endIndex, winter.utf8)!)
-
+ winter.utf16.endIndex.samePosition(in: winter.utf8)!)
+
expectEqualSequence(
summerBytes,
summer.utf16.indices.map {
- summer.utf8[mapIndex($0, summer.utf8)!]
+ summer.utf8[$0.samePosition(in: summer.utf8)!]
}
)
-
- expectNotNil(mapIndex(summer.utf16.endIndex, summer.utf8))
+
+ expectNotNil(summer.utf16.endIndex.samePosition(in: summer.utf8))
expectEqual(
summer.utf8.endIndex,
- mapIndex(summer.utf16.endIndex, summer.utf8)!)
+ summer.utf16.endIndex.samePosition(in: summer.utf8)!)
}
+
+tests.test("index-mapping/utf8-to-unicode-scalar") {
+ // Define expectation separately to help the type-checker, which
+ // otherwise runs out of time solving.
+ let winterUtf8UnicodeScalars: [UnicodeScalar?] = [
+ UnicodeScalar(0x1f3c2), nil, nil, nil,
+ UnicodeScalar(0x2603), nil, nil,
+ UnicodeScalar(0x2745), nil, nil,
+ UnicodeScalar(0x2746), nil, nil,
+ UnicodeScalar(0x2744), nil, nil, UnicodeScalar(0xfe0e), nil, nil,
+ UnicodeScalar(0x26c4), nil, nil, UnicodeScalar(0xfe0f), nil, nil,
+ UnicodeScalar(0x2744), nil, nil, UnicodeScalar(0xfe0f), nil, nil
+ ]
+
+ expectEqualSequence(
+ winterUtf8UnicodeScalars,
+ winter.utf8.indices.map {
+ i in i.samePosition(in: winter.unicodeScalars).map {
+ winter.unicodeScalars[$0]
+ }
+ }, sameValue: ==
+ )
+
+ expectNotNil(winter.utf8.endIndex.samePosition(in: winter.unicodeScalars))
+ expectEqual(
+ winter.unicodeScalars.endIndex,
+ winter.utf8.endIndex.samePosition(in: winter.unicodeScalars)!)
+
+ expectEqualSequence(
+ summerBytes.map { UnicodeScalar($0) as UnicodeScalar? },
+ summer.utf8.indices.map {
+ i in i.samePosition(in: summer.unicodeScalars).map {
+ summer.unicodeScalars[$0]
+ }
+ }, sameValue: ==
+ )
+
+ expectNotNil(summer.utf8.endIndex.samePosition(in: summer.unicodeScalars))
+ expectEqual(
+ summer.unicodeScalars.endIndex,
+ summer.utf8.endIndex.samePosition(in: summer.unicodeScalars)!)
}
-checkToUTF8("legacy") { $0.samePosition(in: $1) }
-checkToUTF8("interchange") { i, _ in i }
+
+tests.test("index-mapping/utf16-to-unicode-scalar") {
+ let winterUtf16UnicodeScalars: [UnicodeScalar?] = [
+ UnicodeScalar(0x1f3c2), nil,
+ UnicodeScalar(0x2603),
+ UnicodeScalar(0x2745),
+ UnicodeScalar(0x2746),
+ UnicodeScalar(0x2744), UnicodeScalar(0xfe0e),
+ UnicodeScalar(0x26c4), UnicodeScalar(0xfe0f),
+ UnicodeScalar(0x2744), UnicodeScalar(0xfe0f)
+ ]
+
+ expectEqualSequence(
+ winterUtf16UnicodeScalars,
+ winter.utf16.indices.map {
+ i in i.samePosition(in: winter.unicodeScalars).map {
+ winter.unicodeScalars[$0]
+ }
+ }, sameValue: ==
+ )
+
+ expectNotNil(winter.utf16.endIndex.samePosition(in: winter.unicodeScalars))
+ expectEqual(
+ winter.unicodeScalars.endIndex,
+ winter.utf16.endIndex.samePosition(in: winter.unicodeScalars)!)
+
+ expectEqualSequence(
+ summerBytes.map { UnicodeScalar($0) as UnicodeScalar? },
+ summer.utf16.indices.map {
+ i in i.samePosition(in: summer.unicodeScalars).map {
+ summer.unicodeScalars[$0]
+ }
+ }, sameValue: ==
+ )
+
+ expectNotNil(summer.utf16.endIndex.samePosition(in: summer.unicodeScalars))
+ expectEqual(
+ summer.unicodeScalars.endIndex,
+ summer.utf16.endIndex.samePosition(in: summer.unicodeScalars)!)
+}
//===--- To UTF16 ---------------------------------------------------------===//
-func checkToUTF16(
- _ id: String,
- mapIndex: @escaping (String.Index, String.UTF16View)->String.Index?
-) {
-
-
-func err(_ codeUnit: Unicode.UTF16.CodeUnit) -> Unicode.UTF16.CodeUnit? {
- return id == "legacy" ? nil : codeUnit
-}
-
-tests.test("index-mapping/character-to-utf16/\(id)") {
+tests.test("index-mapping/character-to-utf16") {
expectEqualSequence(
[
0xd83c, // 0xdfc2,
@@ -245,21 +304,21 @@
] as [UTF16.CodeUnit],
winter.characters.indices.map {
- winter.utf16[mapIndex($0, winter.utf16)]
+ winter.utf16[$0.samePosition(in: winter.utf16)]
},
sameValue: ==)
- expectEqual(winter.utf16.endIndex, mapIndex(winter.endIndex, winter.utf16))
+ expectEqual(winter.utf16.endIndex, winter.endIndex.samePosition(in: winter.utf16))
expectEqualSequence(
summerBytes.map { UTF16.CodeUnit($0) },
- summer.characters.indices.map { summer.utf16[mapIndex($0, summer.utf16)] }
+ summer.characters.indices.map { summer.utf16[$0.samePosition(in: summer.utf16)] }
)
- expectEqual(summer.utf16.endIndex, mapIndex(summer.endIndex, summer.utf16))
+ expectEqual(summer.utf16.endIndex, summer.endIndex.samePosition(in: summer.utf16))
}
-tests.test("index-mapping/unicode-scalar-to-utf16/\(id)") {
+tests.test("index-mapping/unicode-scalar-to-utf16") {
expectEqualSequence(
[
0xd83c, // 0xdfc2,
@@ -273,80 +332,68 @@
] as [UTF16.CodeUnit],
winter.unicodeScalars.indices.map {
- winter.utf16[mapIndex($0, winter.utf16)]
+ winter.utf16[$0.samePosition(in: winter.utf16)]
})
expectEqual(
winter.utf16.endIndex,
- mapIndex(winter.unicodeScalars.endIndex, winter.utf16))
+ winter.unicodeScalars.endIndex.samePosition(in: winter.utf16))
expectEqualSequence(
summerBytes.map { UTF16.CodeUnit($0) },
summer.unicodeScalars.indices.map {
- summer.utf16[mapIndex($0, summer.utf16)]
+ summer.utf16[$0.samePosition(in: summer.utf16)]
}
)
expectEqual(
summer.utf16.endIndex,
- mapIndex(summer.unicodeScalars.endIndex, summer.utf16))
+ summer.unicodeScalars.endIndex.samePosition(in: summer.utf16))
}
-tests.test("index-mapping/utf8-to-utf16/\(id)") {
+tests.test("index-mapping/utf8-to-utf16") {
expectEqualSequence(
[
- 0xd83c, err(0xd83c), err(0xd83c), err(0xd83c),
- 0x2603, err(0x2603), err(0x2603),
- 0x2745, err(0x2745), err(0x2745),
- 0x2746, err(0x2746), err(0x2746),
- 0x2744, err(0x2744), err(0x2744),
- 0xfe0e, err(0xfe0e), err(0xfe0e),
- 0x26c4, err(0x26c4), err(0x26c4),
- 0xfe0f, err(0xfe0f), err(0xfe0f),
- 0x2744, err(0x2744), err(0x2744),
- 0xfe0f, err(0xfe0f), err(0xfe0f),
- replacementUTF16, err(replacementUTF16), err(replacementUTF16),
+ 0xd83c, nil, nil, nil,
+ 0x2603, nil, nil,
+ 0x2745, nil, nil,
+ 0x2746, nil, nil,
+ 0x2744, nil, nil,
+ 0xfe0e, nil, nil,
+ 0x26c4, nil, nil,
+ 0xfe0f, nil, nil,
+ 0x2744, nil, nil,
+ 0xfe0f, nil, nil,
+ replacementUTF16, nil, nil,
0x20,
- replacementUTF16, err(replacementUTF16), err(replacementUTF16),
- replacementUTF16, err(replacementUTF16), err(replacementUTF16)
+ replacementUTF16, nil, nil,
+ replacementUTF16, nil, nil
] as [UTF16.CodeUnit?],
winter.utf8.indices.map {
- mapIndex($0, winter.utf16).map {
+ $0.samePosition(in: winter.utf16).map {
winter.utf16[$0]
}
}, sameValue: ==)
- expectNotNil(mapIndex(winter.utf8.endIndex, winter.utf16))
+ expectNotNil(winter.utf8.endIndex.samePosition(in: winter.utf16))
expectEqual(
winter.utf16.endIndex,
- mapIndex(winter.utf8.endIndex, winter.utf16)!)
+ winter.utf8.endIndex.samePosition(in: winter.utf16)!)
expectEqualSequence(
summerBytes.map { UTF16.CodeUnit($0) },
- summer.utf8.indices.map { summer.utf16[mapIndex($0, summer.utf16)!] }
+ summer.utf8.indices.map { summer.utf16[$0.samePosition(in: summer.utf16)!] }
)
- expectNotNil(mapIndex(summer.utf8.endIndex, summer.utf16))
+ expectNotNil(summer.utf8.endIndex.samePosition(in: summer.utf16))
expectEqual(
summer.utf16.endIndex,
- mapIndex(summer.utf8.endIndex, summer.utf16)!)
+ summer.utf8.endIndex.samePosition(in: summer.utf16)!)
}
-}
-checkToUTF16("legacy") { $0.samePosition(in: $1) }
-checkToUTF16("interchange") { i, _ in i }
//===--- To UnicodeScalar -------------------------------------------------===//
-func checkToUnicodeScalar(
- _ id: String,
- mapIndex: @escaping (String.Index, String.UnicodeScalarView)->String.Index?
-) {
-
-func err(_ scalarValue: UInt32) -> UnicodeScalar? {
- return id == "legacy" ? nil : UnicodeScalar(scalarValue)
-}
-
-tests.test("index-mapping/character-to-unicode-scalar/\(id)") {
+tests.test("index-mapping/character-to-unicode-scalar") {
let winterCharacterUnicodeScalars: [UnicodeScalar] = [
UnicodeScalar(0x1f3c2)!,
UnicodeScalar(0x2603)!,
@@ -361,72 +408,66 @@
expectEqualSequence(
winterCharacterUnicodeScalars,
winter.characters.indices.map {
- winter.unicodeScalars[mapIndex($0, winter.unicodeScalars)]
+ winter.unicodeScalars[$0.samePosition(in: winter.unicodeScalars)]
})
- expectEqual(winter.unicodeScalars.endIndex, mapIndex(winter.endIndex, winter.unicodeScalars))
+ expectEqual(winter.unicodeScalars.endIndex, winter.endIndex.samePosition(in: winter.unicodeScalars))
expectEqualSequence(
summerBytes.map { UnicodeScalar($0) },
- summer.characters.indices.map { summer.unicodeScalars[mapIndex($0, summer.unicodeScalars)] }
+ summer.characters.indices.map { summer.unicodeScalars[$0.samePosition(in: summer.unicodeScalars)] }
)
- expectEqual(summer.unicodeScalars.endIndex, mapIndex(summer.endIndex, summer.unicodeScalars))
+ expectEqual(summer.unicodeScalars.endIndex, summer.endIndex.samePosition(in: summer.unicodeScalars))
}
-tests.test("index-mapping/utf8-to-unicode-scalar/\(id)") {
+tests.test("index-mapping/utf8-to-unicode-scalar") {
// Define expectation separately to help the type-checker, which
// otherwise runs out of time solving.
let winterUtf8UnicodeScalars: [UnicodeScalar?] = [
- UnicodeScalar(0x1f3c2), err(0x1f3c2), err(0x1f3c2), err(0x1f3c2),
- UnicodeScalar(0x2603), err(0x2603), err(0x2603),
- UnicodeScalar(0x2745), err(0x2745), err(0x2745),
- UnicodeScalar(0x2746), err(0x2746), err(0x2746),
- UnicodeScalar(0x2744), err(0x2744), err(0x2744),
- UnicodeScalar(0xfe0e), err(0xfe0e), err(0xfe0e),
- UnicodeScalar(0x26c4), err(0x26c4), err(0x26c4),
- UnicodeScalar(0xfe0f), err(0xfe0f), err(0xfe0f),
- UnicodeScalar(0x2744), err(0x2744), err(0x2744),
- UnicodeScalar(0xfe0f), err(0xfe0f), err(0xfe0f),
- replacementScalar,
- err(replacementScalar.value), err(replacementScalar.value),
+ UnicodeScalar(0x1f3c2), nil, nil, nil,
+ UnicodeScalar(0x2603), nil, nil,
+ UnicodeScalar(0x2745), nil, nil,
+ UnicodeScalar(0x2746), nil, nil,
+ UnicodeScalar(0x2744), nil, nil, UnicodeScalar(0xfe0e), nil, nil,
+ UnicodeScalar(0x26c4), nil, nil, UnicodeScalar(0xfe0f), nil, nil,
+ UnicodeScalar(0x2744), nil, nil, UnicodeScalar(0xfe0f), nil, nil,
+ replacementScalar, nil, nil,
UnicodeScalar(0x20),
- replacementScalar,
- err(replacementScalar.value), err(replacementScalar.value),
- replacementScalar,
- err(replacementScalar.value), err(replacementScalar.value)
+ replacementScalar, nil, nil,
+ replacementScalar, nil, nil
]
expectEqualSequence(
winterUtf8UnicodeScalars,
winter.utf8.indices.map {
- i in mapIndex(i, winter.unicodeScalars).map {
+ i in i.samePosition(in: winter.unicodeScalars).map {
winter.unicodeScalars[$0]
}
}, sameValue: ==)
- expectNotNil(mapIndex(winter.utf8.endIndex, winter.unicodeScalars))
+ expectNotNil(winter.utf8.endIndex.samePosition(in: winter.unicodeScalars))
expectEqual(
winter.unicodeScalars.endIndex,
- mapIndex(winter.utf8.endIndex, winter.unicodeScalars)!)
+ winter.utf8.endIndex.samePosition(in: winter.unicodeScalars)!)
expectEqualSequence(
summerBytes.map { UnicodeScalar($0) as UnicodeScalar? },
summer.utf8.indices.map {
- i in mapIndex(i, summer.unicodeScalars).map {
+ i in i.samePosition(in: summer.unicodeScalars).map {
summer.unicodeScalars[$0]
}
}, sameValue: ==)
- expectNotNil(mapIndex(summer.utf8.endIndex, summer.unicodeScalars))
+ expectNotNil(summer.utf8.endIndex.samePosition(in: summer.unicodeScalars))
expectEqual(
summer.unicodeScalars.endIndex,
- mapIndex(summer.utf8.endIndex, summer.unicodeScalars)!)
+ summer.utf8.endIndex.samePosition(in: summer.unicodeScalars)!)
}
-tests.test("index-mapping/utf16-to-unicode-scalar/\(id)") {
+tests.test("index-mapping/utf16-to-unicode-scalar") {
let winterUtf16UnicodeScalars: [UnicodeScalar?] = [
- UnicodeScalar(0x1f3c2), err(replacementScalar.value),
+ UnicodeScalar(0x1f3c2), nil,
UnicodeScalar(0x2603),
UnicodeScalar(0x2745),
UnicodeScalar(0x2746),
@@ -439,47 +480,34 @@
expectEqualSequence(
winterUtf16UnicodeScalars,
winter.utf16.indices.map {
- i in mapIndex(i, winter.unicodeScalars).map {
+ i in i.samePosition(in: winter.unicodeScalars).map {
winter.unicodeScalars[$0]
}
}, sameValue: ==)
- expectNotNil(mapIndex(winter.utf16.endIndex, winter.unicodeScalars))
+ expectNotNil(winter.utf16.endIndex.samePosition(in: winter.unicodeScalars))
expectEqual(
winter.unicodeScalars.endIndex,
- mapIndex(winter.utf16.endIndex, winter.unicodeScalars)!)
+ winter.utf16.endIndex.samePosition(in: winter.unicodeScalars)!)
expectEqualSequence(
summerBytes.map { UnicodeScalar($0) as UnicodeScalar? },
summer.utf16.indices.map {
- i in mapIndex(i, summer.unicodeScalars).map {
+ i in i.samePosition(in: summer.unicodeScalars).map {
summer.unicodeScalars[$0]
}
}, sameValue: ==)
- expectNotNil(mapIndex(summer.utf16.endIndex, summer.unicodeScalars))
+ expectNotNil(summer.utf16.endIndex.samePosition(in: summer.unicodeScalars))
expectEqual(
summer.unicodeScalars.endIndex,
- mapIndex(summer.utf16.endIndex, summer.unicodeScalars)!)
+ summer.utf16.endIndex.samePosition(in: summer.unicodeScalars)!)
}
-}
-checkToUnicodeScalar("legacy") { $0.samePosition(in: $1) }
-checkToUnicodeScalar("interchange") { i, _ in i }
//===--- To Character -----------------------------------------------------===//
-func checkToCharacter(
- _ id: String,
- mapIndex: @escaping (String.Index, String)->String.Index?
-) {
-
-func err(_ c: Character) -> Character? {
- return id == "legacy" ? nil : c
-}
-
-tests.test("index-mapping/unicode-scalar-to-character/\(id)") {
+tests.test("index-mapping/unicode-scalar-to-character") {
let winterUnicodeScalarCharacters: [Character?] = [
- "🏂", "☃", "❅", "❆", "❄︎", err("\u{FE0E}"), "⛄️", err("\u{FE0F}"),
- "❄️", err("\u{FE0F}"),
+ "🏂", "☃", "❅", "❆", "❄︎", nil, "⛄️", nil, "❄️", nil,
replacementCharacter, "\u{20}", replacementCharacter, replacementCharacter
]
@@ -487,100 +515,93 @@
winterUnicodeScalarCharacters,
winter.unicodeScalars.indices.map {
- i in mapIndex(i, winter).map {
+ i in i.samePosition(in: winter).map {
winter[$0]
}
}, sameValue: ==)
- expectEqual(winter.endIndex, mapIndex(winter.unicodeScalars.endIndex, winter)!)
+ expectEqual(winter.endIndex, winter.unicodeScalars.endIndex.samePosition(in: winter)!)
expectEqualSequence(
summerBytes.map { Character(UnicodeScalar($0)) },
- summer.unicodeScalars.indices.map { summer[mapIndex($0, summer)!] }
+ summer.unicodeScalars.indices.map { summer[$0.samePosition(in: summer)!] }
)
- expectEqual(summer.endIndex, mapIndex(summer.unicodeScalars.endIndex, summer)!)
+ expectEqual(summer.endIndex, summer.unicodeScalars.endIndex.samePosition(in: summer)!)
}
-tests.test("index-mapping/utf8-to-character/\(id)") {
+tests.test("index-mapping/utf8-to-character") {
// Define expectation separately to help the type-checker, which
// otherwise runs out of time solving.
let winterUtf8Characters: [Character?] = [
- "🏂", err("🏂"), err("🏂"), err("🏂"),
- "☃", err("☃"), err("☃"),
- "❅", err("❅"), err("❅"),
- "❆", err("❆"), err("❆"),
- "❄︎", err("❄︎"), err("❄︎"),
- err("\u{fe0e}"), err("\u{fe0e}"), err("\u{fe0e}"),
- "⛄️", err("⛄️"), err("⛄️"),
- err("\u{fe0f}"), err("\u{fe0f}"), err("\u{fe0f}"),
- "❄️", err("❄️"), err("❄️"),
- err("\u{fe0f}"), err("\u{fe0f}"), err("\u{fe0f}"),
- replacementCharacter, err(replacementCharacter), err(replacementCharacter),
+ "🏂", nil, nil, nil,
+ "☃", nil, nil,
+ "❅", nil, nil,
+ "❆", nil, nil,
+ "❄︎", nil, nil, nil, nil, nil,
+ "⛄️", nil, nil, nil, nil, nil,
+ "❄️", nil, nil, nil, nil, nil,
+ replacementCharacter, nil, nil,
"\u{20}",
- replacementCharacter, err(replacementCharacter), err(replacementCharacter),
- replacementCharacter, err(replacementCharacter), err(replacementCharacter),
+ replacementCharacter, nil, nil,
+ replacementCharacter, nil, nil,
]
expectEqualSequence(
winterUtf8Characters,
winter.utf8.indices.map {
- (i:String.UTF8Index) -> Character? in mapIndex(i, winter).map {
+ (i:String.UTF8Index) -> Character? in i.samePosition(in: winter).map {
winter[$0]
}
}, sameValue: ==)
- expectNotNil(mapIndex(winter.utf8.endIndex, winter))
+ expectNotNil(winter.utf8.endIndex.samePosition(in: winter))
expectEqual(
winter.endIndex,
- mapIndex(winter.utf8.endIndex, winter)!)
+ winter.utf8.endIndex.samePosition(in: winter)!)
expectEqualSequence(
summerBytes.map { Character(UnicodeScalar($0)) },
- summer.utf8.indices.map { summer[mapIndex($0, summer)!] }
+ summer.utf8.indices.map { summer[$0.samePosition(in: summer)!] }
)
- expectNotNil(mapIndex(summer.utf8.endIndex, summer))
+ expectNotNil(summer.utf8.endIndex.samePosition(in: summer))
expectEqual(
summer.endIndex,
- mapIndex(summer.utf8.endIndex, summer)!)
+ summer.utf8.endIndex.samePosition(in: summer)!)
}
-tests.test("index-mapping/utf16-to-character/\(id)") {
+tests.test("index-mapping/utf16-to-character") {
let winterUtf16Characters: [Character?] = [
- "🏂", err(replacementCharacter), "☃", "❅", "❆", "❄︎", err("\u{fe0e}"),
- "⛄️", err("\u{fe0f}"), "❄️", err("\u{fe0f}"),
+ "🏂", nil, "☃", "❅", "❆", "❄︎", nil, "⛄️", nil, "❄️", nil,
replacementCharacter, "\u{20}", replacementCharacter, replacementCharacter
]
expectEqualSequence(
winterUtf16Characters,
winter.utf16.indices.map {
- i in mapIndex(i, winter).map {
+ i in i.samePosition(in: winter).map {
winter[$0]
}
}, sameValue: ==)
- expectNotNil(mapIndex(winter.utf16.endIndex, winter))
+ expectNotNil(winter.utf16.endIndex.samePosition(in: winter))
expectEqual(
winter.endIndex,
- mapIndex(winter.utf16.endIndex, winter)!)
+ winter.utf16.endIndex.samePosition(in: winter)!)
expectEqualSequence(
summerBytes.map { Character(UnicodeScalar($0)) },
summer.utf16.indices.map {
- summer[mapIndex($0, summer)!]
+ summer[$0.samePosition(in: summer)!]
}
)
- expectNotNil(mapIndex(summer.utf16.endIndex, summer))
+ expectNotNil(summer.utf16.endIndex.samePosition(in: summer))
expectEqual(
summer.endIndex,
- mapIndex(summer.utf16.endIndex, summer)!)
+ summer.utf16.endIndex.samePosition(in: summer)!)
}
-}
-checkToCharacter("legacy") { $0.samePosition(in: $1) }
-checkToCharacter("interchange") { i, _ in i }
//===----------------------------------------------------------------------===//
// These are rather complicated due to their internal buffers, so
@@ -618,7 +639,7 @@
//===--- while loop -------------------------------------------------===//
// Advance an index from u8i0 over ds Unicode scalars (thus
// reaching u8i1) by counting leading bytes traversed
- var u8i0a = u8i0! // <========== NOTE SOURCE COMPATIBILITY BREAKAGE
+ var u8i0a = u8i0
var dsa = 0 // number of Unicode scalars it has advanced over
while true {
@@ -721,8 +742,25 @@
}
}
-// Note: Strideable conformance for UTF16View.Index when Foundation is imported
-// has been dropped for Swift 4.
+#if _runtime(_ObjC)
+tests.test("String.UTF16View.Index/Strideable")
+ .forEach(in: utfTests) {
+ test in
+
+ func allIndices<C : Collection>(of c: C) -> [C.Index]
+ where C.Indices.Iterator.Element == C.Index
+ {
+ var result = Array(c.indices)
+ result.append(c.endIndex)
+ return result
+ }
+
+ checkStrideable(
+ instances: allIndices(of: test.string.utf16),
+ distances: Array(0..<test.string.utf16.count),
+ distanceOracle: { $1 - $0 })
+}
+#endif
tests.test("String.UTF8View/Collection")
.forEach(in: utfTests) {
@@ -732,12 +770,21 @@
checkForwardCollection(test.utf8, test.string.utf8) { $0 == $1 }
}
+#if _runtime(_Native)
tests.test("String.UTF16View/BidirectionalCollection")
.forEach(in: utfTests) {
test in
checkBidirectionalCollection(test.utf16, test.string.utf16) { $0 == $1 }
}
+#else
+tests.test("String.UTF16View/RandomAccessCollection")
+ .forEach(in: utfTests) {
+ test in
+
+ checkRandomAccessCollection(test.utf16, test.string.utf16) { $0 == $1 }
+}
+#endif
tests.test("String.UTF32View/BidirectionalCollection")
.forEach(in: utfTests) {