[String] Custom iterator for UTF16View (#20929)
Defining a custom iterator for the UTF16View avoid some redundant
computation over the indexing model. This speeds up iteration by
around 40% on non-ASCII strings.
diff --git a/stdlib/public/core/StringCharacterView.swift b/stdlib/public/core/StringCharacterView.swift
index 09ec819..154dc4f 100644
--- a/stdlib/public/core/StringCharacterView.swift
+++ b/stdlib/public/core/StringCharacterView.swift
@@ -234,8 +234,8 @@
@inlinable
internal init(_ guts: _StringGuts) {
- self._guts = guts
self._end = guts.count
+ self._guts = guts
}
@inlinable
diff --git a/stdlib/public/core/StringUTF16View.swift b/stdlib/public/core/StringUTF16View.swift
index 416fce4..eb00925 100644
--- a/stdlib/public/core/StringUTF16View.swift
+++ b/stdlib/public/core/StringUTF16View.swift
@@ -260,6 +260,59 @@
}
}
}
+
+extension String.UTF16View {
+ @_fixed_layout
+ public struct Iterator: IteratorProtocol {
+ @usableFromInline
+ internal var _guts: _StringGuts
+
+ @usableFromInline
+ internal var _position: Int = 0
+
+ @usableFromInline
+ internal var _end: Int
+
+ // If non-nil, return this value for `next()` (and set it to nil).
+ //
+ // This is set when visiting a non-BMP scalar: the leading surrogate is
+ // returned, this field is set with the value of the trailing surrogate, and
+ // `_position` is advanced to the start of the next scalar.
+ @usableFromInline
+ internal var _nextIsTrailingSurrogate: UInt16? = nil
+
+ @inlinable
+ internal init(_ guts: _StringGuts) {
+ self._end = guts.count
+ self._guts = guts
+ }
+
+ @inlinable
+ public mutating func next() -> UInt16? {
+ if _slowPath(_nextIsTrailingSurrogate != nil) {
+ let trailing = self._nextIsTrailingSurrogate._unsafelyUnwrappedUnchecked
+ self._nextIsTrailingSurrogate = nil
+ return trailing
+ }
+ guard _fastPath(_position < _end) else { return nil }
+
+ let (scalar, len) = _guts.errorCorrectedScalar(startingAt: _position)
+ _position &+= len
+
+ if _slowPath(scalar.value > UInt16.max) {
+ self._nextIsTrailingSurrogate = scalar.utf16[1]
+ return scalar.utf16[0]
+ }
+ return UInt16(truncatingIfNeeded: scalar.value)
+ }
+ }
+ @inlinable
+ public __consuming func makeIterator() -> Iterator {
+ return Iterator(_guts)
+ }
+}
+
+
extension String.UTF16View: CustomStringConvertible {
@inlinable
public var description: String {
@@ -464,7 +517,7 @@
if idx.encodedOffset < _shortHeuristic || !_guts.hasBreadcrumbs {
return _distance(from: startIndex, to: idx)
}
-
+
// Simple and common: endIndex aka `length`.
let breadcrumbsPtr = _guts.getBreadcrumbsPtr()
if idx == endIndex { return breadcrumbsPtr.pointee.utf16Length }
diff --git a/stdlib/public/core/StringUnicodeScalarView.swift b/stdlib/public/core/StringUnicodeScalarView.swift
index df1dd4f..3e8dc69 100644
--- a/stdlib/public/core/StringUnicodeScalarView.swift
+++ b/stdlib/public/core/StringUnicodeScalarView.swift
@@ -180,8 +180,8 @@
@inlinable
internal init(_ guts: _StringGuts) {
- self._guts = guts
self._end = guts.count
+ self._guts = guts
}
@inlinable
diff --git a/test/api-digester/Outputs/stability-stdlib-abi.swift.expected b/test/api-digester/Outputs/stability-stdlib-abi.swift.expected
index 0d928c7..f3913f0 100644
--- a/test/api-digester/Outputs/stability-stdlib-abi.swift.expected
+++ b/test/api-digester/Outputs/stability-stdlib-abi.swift.expected
@@ -499,6 +499,9 @@
Var _StringGutsSlice.range has been removed
Var _StringGutsSlice.start has been removed
+Struct String.UTF16View has type witness type for Collection.Iterator changing from IndexingIterator<String.UTF16View> to String.UTF16View.Iterator
+Struct String.UTF16View has type witness type for Sequence.Iterator changing from IndexingIterator<String.UTF16View> to String.UTF16View.Iterator
+
Func ManagedBufferPointer._sanityCheckValidBufferClass(_:creating:) has been removed
Func _sanityCheck(_:_:file:line:) has been removed
Func _sanityCheckFailure(_:file:line:) has been removed