Merge pull request #10693 from milseman/zalgorithmic_complexity
[stdlib] Update non-contiguous NSStrings to Unicode 9
diff --git a/stdlib/public/core/CMakeLists.txt b/stdlib/public/core/CMakeLists.txt
index 4e8683b..005e0bf 100644
--- a/stdlib/public/core/CMakeLists.txt
+++ b/stdlib/public/core/CMakeLists.txt
@@ -55,6 +55,7 @@
ErrorType.swift
Existential.swift
Filter.swift.gyb
+ FixedArray.swift.gyb
FlatMap.swift
Flatten.swift.gyb
FloatingPoint.swift.gyb
diff --git a/stdlib/public/core/FixedArray.swift.gyb b/stdlib/public/core/FixedArray.swift.gyb
new file mode 100644
index 0000000..4fa02db
--- /dev/null
+++ b/stdlib/public/core/FixedArray.swift.gyb
@@ -0,0 +1,113 @@
+//===--- FixedArray.swift.gyb ---------------------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+//
+// A helper struct to provide fixed-sized array like functionality
+//
+//===----------------------------------------------------------------------===//
+
+%{
+ # The sizes to generate code for.
+ sizes = [16]
+}%
+
+% for N in sizes:
+
+internal struct _FixedArray${N}<T> {
+ // ABI TODO: The has assumptions about tuple layout in the ABI, namely that
+ // they are laid out contiguously and individually addressable (i.e. strided).
+ //
+ internal var storage: (
+ // A ${N}-wide tuple of type T
+% for i in range(0, N-1):
+ T,
+% end
+ T
+ )
+
+ static var _arraySize : Int { return ${N} }
+}
+
+extension _FixedArray${N} : RandomAccessCollection, MutableCollection {
+ internal typealias Index = Int
+ internal typealias IndexDistance = Int
+
+ internal var startIndex : Index {
+ return 0
+ }
+
+ internal var endIndex : Index {
+ return _FixedArray${N}._arraySize
+ }
+
+ internal var count : IndexDistance { return _FixedArray${N}._arraySize }
+
+ internal subscript(i: Index) -> T {
+ @_versioned
+ @inline(__always)
+ get {
+ var copy = storage
+ let res: T = withUnsafeBytes(of: ©) {
+ (rawPtr : UnsafeRawBufferPointer) -> T in
+ let stride = MemoryLayout<T>.stride
+ _sanityCheck(rawPtr.count == ${N}*stride, "layout mismatch?")
+ let bufPtr = UnsafeBufferPointer(
+ start: rawPtr.baseAddress!.assumingMemoryBound(to: T.self),
+ count: count)
+ return bufPtr[i]
+ }
+ return res
+ }
+ @_versioned
+ @inline(__always)
+ set {
+ withUnsafeBytes(of: &storage) {
+ (rawPtr : UnsafeRawBufferPointer) -> () in
+ let rawPtr = UnsafeMutableRawBufferPointer(mutating: rawPtr)
+ let stride = MemoryLayout<T>.stride
+ _sanityCheck(rawPtr.count == ${N}*stride, "layout mismatch?")
+ let bufPtr = UnsafeMutableBufferPointer(
+ start: rawPtr.baseAddress!.assumingMemoryBound(to: T.self),
+ count: count)
+ bufPtr[i] = newValue
+ }
+ }
+ }
+
+ @_versioned
+ @inline(__always)
+ internal func index(after i: Index) -> Index {
+ return i+1
+ }
+
+ @_versioned
+ @inline(__always)
+ internal func index(before i: Index) -> Index {
+ return i-1
+ }
+
+ // TODO: Any customization hooks it's profitable to override, e.g. append?
+
+}
+
+extension _FixedArray${N} where T: IntegerLiteralConvertible {
+ @inline(__always)
+ internal init(allZeros:()) {
+ self.storage = (
+% for i in range(0, N-1):
+ 0,
+% end
+ 0
+ )
+ }
+}
+
+% end
diff --git a/stdlib/public/core/GroupInfo.json b/stdlib/public/core/GroupInfo.json
index c4c41db..f816522 100644
--- a/stdlib/public/core/GroupInfo.json
+++ b/stdlib/public/core/GroupInfo.json
@@ -92,6 +92,7 @@
"Arrays.swift",
"CocoaArray.swift",
"ContiguousArrayBuffer.swift",
+ "FixedArray.swift",
"SliceBuffer.swift",
"SwiftNativeNSArray.swift"],
"HashedCollections": [
diff --git a/stdlib/public/core/StringCharacterView.swift b/stdlib/public/core/StringCharacterView.swift
index debf0ba..13169af 100644
--- a/stdlib/public/core/StringCharacterView.swift
+++ b/stdlib/public/core/StringCharacterView.swift
@@ -15,8 +15,8 @@
//
//===----------------------------------------------------------------------===//
-// FIXME(ABI)#70 : The character string view should have a custom iterator type to
-// allow performance optimizations of linear traversals.
+// FIXME(ABI)#70 : The character string view should have a custom iterator type
+// to allow performance optimizations of linear traversals.
/// CR and LF are common special cases in grapheme breaking logic
@_versioned internal var _CR: UInt8 { return 0x0d }
@@ -102,7 +102,8 @@
/// of the string.
///
/// var str = "All this happened, more or less."
- /// let afterSpace = str.withMutableCharacters { chars -> String.CharacterView in
+ /// let afterSpace = str.withMutableCharacters {
+ /// chars -> String.CharacterView in
/// if let i = chars.index(of: " ") {
/// let result = chars[chars.index(after: i)...]
/// chars.removeSubrange(i...)
@@ -364,128 +365,130 @@
internal func _measureExtendedGraphemeClusterForward(
from start: UnicodeScalarView.Index
) -> Int {
- let end = unicodeScalars.endIndex
- if start == end {
+ let startPosition = start._position
+ let endPosition = unicodeScalars.endIndex._position
+
+ // No more graphemes
+ if startPosition == endPosition {
return 0
}
- // Our relative position (offset). If our _core is not a substring, this is
- // the same as start._position.
- let relativeOffset = start._position - _coreOffset
+ // Last code unit means final grapheme length of 1
+ if startPosition == endPosition - 1 {
+ return 1
+ }
+
+ // Our relative offset from the _StringCore's baseAddress pointer. If our
+ // _core is not a substring, this is the same as start._position. Otherwise,
+ // it is the code unit relative offset into the substring and not the
+ // absolute offset into the outer string.
+ let startOffset = startPosition - _coreOffset
// Grapheme breaking is much simpler if known ASCII
if _core.isASCII {
_onFastPath() // Please aggressively inline
let asciiBuffer = _core.asciiBuffer._unsafelyUnwrappedUnchecked
+ _sanityCheck(startOffset+1 < asciiBuffer.endIndex,
+ "Already checked for last code unit")
// With the exception of CR-LF, ASCII graphemes are single-scalar. Check
// for that one exception.
if _slowPath(
- asciiBuffer[relativeOffset] == _CR &&
- relativeOffset+1 < asciiBuffer.endIndex &&
- asciiBuffer[relativeOffset+1] == _LF
+ asciiBuffer[startOffset] == _CR &&
+ asciiBuffer[startOffset+1] == _LF
) {
return 2
}
return 1
- } else {
- // TODO: Check for (potentially non-contiguous) ASCII NSStrings,
- // especially small tagged pointers.
}
- let startIndexUTF16 = start._position
-
- // Last scalar is its own grapheme
- if (startIndexUTF16+1 == end._position) {
+ // Perform a quick single-code-unit grapheme check.
+ if _fastPath(String.CharacterView._quickCheckGraphemeBreakBetween(
+ _core[startOffset],
+ _core[startOffset+1])
+ ) {
return 1
}
- // Perform a quick single-code-unit grapheme check
- if _core._baseAddress != nil {
- if String.CharacterView._quickCheckGraphemeBreakBetween(
- _core._nthContiguous(relativeOffset),
- _core._nthContiguous(relativeOffset+1)
- ) {
- return 1
- }
- } else {
- // TODO: Check for (potentially non-contiguous) UTF16 NSStrings,
- // especially small tagged pointers
- }
- return _measureExtendedGraphemeClusterForwardSlow(
- relativeOffset: relativeOffset,
- start: start,
- end: end,
- startIndexUTF16: startIndexUTF16
- )
+ return _measureExtendedGraphemeClusterForwardSlow(startOffset: startOffset)
}
@inline(never)
@_versioned
func _measureExtendedGraphemeClusterForwardSlow(
- relativeOffset: Int,
- start: String.UnicodeScalarView.Index,
- end: String.UnicodeScalarView.Index,
- startIndexUTF16: Int
+ startOffset: Int
) -> Int {
- if _core._baseAddress != nil {
+ let endOffset = unicodeScalars.endIndex._position - _coreOffset
+ let numCodeUnits = endOffset - startOffset
+ _sanityCheck(numCodeUnits >= 2, "should have at least two code units")
+
+ // The vast majority of time, we can get a pointer and a length directly
+ if _fastPath(_core._baseAddress != nil) {
_onFastPath() // Please aggressively inline
let breakIterator = _ThreadLocalStorage.getUBreakIterator(for: _core)
- let ubrkFollowing = __swift_stdlib_ubrk_following(
- breakIterator, Int32(relativeOffset)
+ let ubrkFollowingOffset = __swift_stdlib_ubrk_following(
+ breakIterator, Int32(startOffset)
)
// ubrk_following may return UBRK_DONE (-1). Treat that as the rest of the
// string.
- let nextPosition =
- ubrkFollowing == -1 ? end._position : Int(ubrkFollowing)
- return nextPosition - relativeOffset
- } else {
- // TODO: See if we can get fast character contents.
+ if _slowPath(ubrkFollowingOffset == -1) {
+ return numCodeUnits
+ }
+ _sanityCheck(startOffset != Int(ubrkFollowingOffset),
+ "zero-sized grapheme?")
+ return Int(ubrkFollowingOffset) - startOffset
}
- // FIXME: Need to handle the general case correctly with Unicode 9+
- // semantics, as opposed to this legacy Unicode 8 path. This gets hit for
- // e.g. non-contiguous NSStrings. In such cases, there may be an alternative
- // CFString API available, or worst case we can map over it via UTextFuncs.
-
- return legacyGraphemeForward(
- start: start, end: end, startIndexUTF16: startIndexUTF16
- )
- }
-
- @inline(never)
- func legacyGraphemeForward(
- start: UnicodeScalarView.Index,
- end: UnicodeScalarView.Index,
- startIndexUTF16: Int
- ) -> Int {
- var start = start
- let graphemeClusterBreakProperty =
- _UnicodeGraphemeClusterBreakPropertyTrie()
- let segmenter = _UnicodeExtendedGraphemeClusterSegmenter()
-
- var gcb0 = graphemeClusterBreakProperty.getPropertyRawValue(
- unicodeScalars[start].value)
- unicodeScalars.formIndex(after: &start)
-
- while start != end {
- // FIXME(performance): consider removing this "fast path". A branch
- // that is hard to predict could be worse for performance than a few
- // loads from cache to fetch the property 'gcb1'.
- if segmenter.isBoundaryAfter(gcb0) {
- break
- }
- let gcb1 = graphemeClusterBreakProperty.getPropertyRawValue(
- unicodeScalars[start].value)
- if segmenter.isBoundary(gcb0, gcb1) {
- break
- }
- gcb0 = gcb1
- unicodeScalars.formIndex(after: &start)
+ // We have a non-contiguous string. Pull out some code units into a fixed
+ // array and try to perform grapheme breaking on that. If even that's not
+ // sufficient (i.e. very pathological) then copy into an Array.
+ var codeUnitBuffer = _FixedArray16<UInt16>(allZeros:())
+ let maxBufferCount = codeUnitBuffer.count
+ let bufferCount = Swift.min(maxBufferCount, numCodeUnits)
+ for i in 0..<bufferCount {
+ codeUnitBuffer[i] = _core[startOffset+i]
}
-
- return start._position - startIndexUTF16
+
+ return withUnsafeBytes(of: &codeUnitBuffer.storage) {
+ (rawPtr : UnsafeRawBufferPointer) -> Int in
+ let bufPtr = UnsafeBufferPointer(
+ start: rawPtr.baseAddress!.assumingMemoryBound(to: UInt16.self),
+ count: bufferCount)
+
+ let breakIterator = _ThreadLocalStorage.getUBreakIterator(for: bufPtr)
+ let ubrkFollowingOffset = __swift_stdlib_ubrk_following(
+ breakIterator, Int32(0))
+
+ if _fastPath(
+ bufferCount < maxBufferCount ||
+ (ubrkFollowingOffset != -1 && ubrkFollowingOffset != maxBufferCount)
+ ) {
+ // The offset into our buffer *is* the distance.
+ _sanityCheck(ubrkFollowingOffset != 0, "zero-sized grapheme?")
+ return Int(ubrkFollowingOffset)
+ }
+
+ // Nuclear option: copy out the rest of the string into an array
+ var codeUnits = Array<UInt16>()
+ codeUnits.reserveCapacity(numCodeUnits)
+ for i in startOffset..<endOffset {
+ codeUnits.append(_core[i])
+ }
+ return codeUnits.withUnsafeBufferPointer { bufPtr -> Int in
+ let breakIterator = _ThreadLocalStorage.getUBreakIterator(for: bufPtr)
+ let ubrkFollowingOffset = __swift_stdlib_ubrk_following(
+ breakIterator, Int32(0)
+ )
+ // ubrk_following may return UBRK_DONE (-1). Treat that as the rest of
+ // the string.
+ if _slowPath(ubrkFollowingOffset == -1) {
+ return numCodeUnits
+ }
+ _sanityCheck(ubrkFollowingOffset != 0, "zero-sized grapheme?")
+ return Int(ubrkFollowingOffset)
+ }
+ }
}
// NOTE: Because this function is inlineable, it should contain only the fast
@@ -498,14 +501,25 @@
internal func _measureExtendedGraphemeClusterBackward(
from end: UnicodeScalarView.Index
) -> Int {
- let start = unicodeScalars.startIndex
- if start == end {
+ let startPosition = unicodeScalars.startIndex._position
+ let endPosition = end._position
+
+ // No more graphemes
+ if startPosition == endPosition {
return 0
}
- // The relative position (offset) to the last code unit.
- let lastOffset = end._position - _coreOffset - 1
- // The relative position (offset) that is one-past-the-last
+ // Last code unit means final grapheme length of 1
+ if startPosition == endPosition - 1 {
+ return 1
+ }
+
+ // The relative offset from the _StringCore's baseAddress pointer for the
+ // one-past-the-end and the last code unit under consideration. If our
+ // _core is not a substring, these are the same as positions. Otherwise,
+ // these are code unit relative offsets into the substring and not the
+ // absolute positions into the outer string.
+ let lastOffset = endPosition - _coreOffset - 1
let endOffset = lastOffset + 1
// Grapheme breaking is much simpler if known ASCII
@@ -513,15 +527,14 @@
_onFastPath() // Please aggressively inline
let asciiBuffer = _core.asciiBuffer._unsafelyUnwrappedUnchecked
_sanityCheck(
- lastOffset >= asciiBuffer.startIndex,
- "should of been caught in earlier start-of-scalars check")
+ lastOffset-1 >= asciiBuffer.startIndex,
+ "should of been caught in earlier trivially-sized checks")
// With the exception of CR-LF, ASCII graphemes are single-scalar. Check
// for that one exception.
if _slowPath(
- asciiBuffer[lastOffset] == _LF &&
- lastOffset-1 >= asciiBuffer.startIndex &&
- asciiBuffer[lastOffset-1] == _CR
+ asciiBuffer[lastOffset-1] == _CR &&
+ asciiBuffer[lastOffset] == _LF
) {
return 2
}
@@ -529,92 +542,94 @@
return 1
}
- let endIndexUTF16 = end._position
-
- // First scalar is its own grapheme
- if (endIndexUTF16-1 == start._position) {
+ // Perform a quick single-code-unit grapheme check
+ if _fastPath(String.CharacterView._quickCheckGraphemeBreakBetween(
+ _core[lastOffset-1], _core[lastOffset])
+ ) {
return 1
}
- // Perform a quick single-code-unit grapheme check
- if _core._baseAddress != nil {
- if String.CharacterView._quickCheckGraphemeBreakBetween(
- _core._nthContiguous(lastOffset-1),
- _core._nthContiguous(lastOffset)
- ) {
- return 1
- }
- }
- return _measureExtendedGraphemeClusterBackwardSlow(
- endOffset: endOffset, start: start, end: end, endIndexUTF16: endIndexUTF16
- )
+ return _measureExtendedGraphemeClusterBackwardSlow(endOffset: endOffset)
}
@inline(never)
@_versioned
func _measureExtendedGraphemeClusterBackwardSlow(
- endOffset: Int,
- start: String.UnicodeScalarView.Index,
- end: String.UnicodeScalarView.Index,
- endIndexUTF16: Int
+ endOffset: Int
) -> Int {
- if _core._baseAddress != nil {
- _onFastPath() // Please aggressively inline
- let breakIterator = _ThreadLocalStorage.getUBreakIterator(for: _core)
- let ubrkPreceding = __swift_stdlib_ubrk_preceding(
- breakIterator, Int32(endOffset)
- )
+ let startOffset = 0
+ let numCodeUnits = endOffset - startOffset
+ _sanityCheck(unicodeScalars.startIndex._position - _coreOffset == 0,
+ "position/offset mismatch in _StringCore as a substring")
+ _sanityCheck(numCodeUnits >= 2,
+ "should have at least two code units")
+
+ func measureFromUBreakOffset(_ ubrkOffset: Int32) -> Int {
// ubrk_following may return UBRK_DONE (-1). Treat that as the rest of the
// string.
- let priorPosition =
- ubrkPreceding == -1 ? start._position : Int(ubrkPreceding)
- return endOffset - priorPosition
- } else {
- // TODO: See if we can get fast character contents.
- }
-
- // FIXME: Need to handle the general case correctly with Unicode 9+
- // semantics, as opposed to this legacy Unicode 8 path. This gets hit for
- // e.g. non-contiguous NSStrings. In such cases, there may be an alternative
- // CFString API available, or worst case we can map over it via UTextFuncs.
-
- return legacyGraphemeBackward(
- start: start, end: end, endIndexUTF16: endIndexUTF16
- )
- }
-
- @inline(never)
- func legacyGraphemeBackward(
- start: UnicodeScalarView.Index,
- end: UnicodeScalarView.Index,
- endIndexUTF16: Int
- ) -> Int {
- let graphemeClusterBreakProperty =
- _UnicodeGraphemeClusterBreakPropertyTrie()
- let segmenter = _UnicodeExtendedGraphemeClusterSegmenter()
-
- var graphemeClusterStart = end
-
- unicodeScalars.formIndex(before: &graphemeClusterStart)
- var gcb0 = graphemeClusterBreakProperty.getPropertyRawValue(
- unicodeScalars[graphemeClusterStart].value)
-
- var graphemeClusterStartUTF16 = graphemeClusterStart._position
-
- while graphemeClusterStart != start {
- unicodeScalars.formIndex(before: &graphemeClusterStart)
- let gcb1 = graphemeClusterBreakProperty.getPropertyRawValue(
- unicodeScalars[graphemeClusterStart].value)
- if segmenter.isBoundary(gcb1, gcb0) {
- break
+ if _slowPath(ubrkOffset == -1) {
+ return numCodeUnits
}
- gcb0 = gcb1
- graphemeClusterStartUTF16 = graphemeClusterStart._position
+ _sanityCheck(endOffset > Int(ubrkOffset), "zero-sized grapheme?")
+ return endOffset - Int(ubrkOffset)
}
-
- return endIndexUTF16 - graphemeClusterStartUTF16
+
+ // The vast majority of time, we can get a pointer and a length directly
+ if _fastPath(_core._baseAddress != nil) {
+ _onFastPath() // Please aggressively inline
+ let breakIterator = _ThreadLocalStorage.getUBreakIterator(for: _core)
+ let ubrkPrecedingOffset = __swift_stdlib_ubrk_preceding(
+ breakIterator, Int32(endOffset)
+ )
+ return measureFromUBreakOffset(ubrkPrecedingOffset)
+ }
+
+ // We have a non-contiguous string. Pull out some code units into a fixed
+ // array and try to perform grapheme breaking on that. If even that's not
+ // sufficient (i.e. very pathological) then copy into an Array.
+ var codeUnitBuffer = _FixedArray16<UInt16>(allZeros:())
+ let maxBufferCount = codeUnitBuffer.count
+ let coreStartIdx = Swift.max(startOffset, endOffset - maxBufferCount)
+ let bufferCount = Swift.min(maxBufferCount, numCodeUnits)
+ for i in 0..<bufferCount {
+ codeUnitBuffer[i] = _core[coreStartIdx+i]
+ }
+
+ return withUnsafeBytes(of: &codeUnitBuffer.storage) {
+ (rawPtr : UnsafeRawBufferPointer) -> Int in
+ let bufPtr = UnsafeBufferPointer(
+ start: rawPtr.baseAddress!.assumingMemoryBound(to: UInt16.self),
+ count: bufferCount)
+
+ let breakIterator = _ThreadLocalStorage.getUBreakIterator(for: bufPtr)
+ let ubrkPrecedingOffset = __swift_stdlib_ubrk_preceding(
+ breakIterator, Int32(bufferCount)
+ )
+
+ if _fastPath(numCodeUnits < maxBufferCount || ubrkPrecedingOffset > 1) {
+ // There was a grapheme break within our buffer.
+ _sanityCheck(ubrkPrecedingOffset < bufferCount, "offset mismatch")
+ return bufferCount - Int(ubrkPrecedingOffset)
+ }
+
+ // Nuclear option: copy out the prefix of the string into an array
+ var codeUnits = Array<UInt16>()
+ codeUnits.reserveCapacity(numCodeUnits)
+ for i in startOffset..<endOffset {
+ codeUnits.append(_core[i])
+ }
+ return codeUnits.withUnsafeBufferPointer { bufPtr -> Int in
+ let breakIterator = _ThreadLocalStorage.getUBreakIterator(for: bufPtr)
+ let ubrkPrecedingOffset = __swift_stdlib_ubrk_preceding(
+ breakIterator, Int32(endOffset)
+ )
+ // No need to adjust ubrkPrecedingOffset as we copied the prefix: it is
+ // the position in the original string
+ return measureFromUBreakOffset(ubrkPrecedingOffset)
+ }
+ }
}
-
+
/// Accesses the character at the given position.
///
/// The following example searches a string's character view for a capital
diff --git a/stdlib/public/core/Substring.swift.gyb b/stdlib/public/core/Substring.swift.gyb
index 6add01c..8bb704f 100644
--- a/stdlib/public/core/Substring.swift.gyb
+++ b/stdlib/public/core/Substring.swift.gyb
@@ -283,8 +283,11 @@
let wholeCore = _slice._base._core
let subCore : _StringCore = wholeCore[
startIndex._base._position..<endIndex._base._position]
- // check that we haven't allocated a new buffer for the result
- _sanityCheck(subCore._owner === wholeCore._owner)
+ // Check that we haven't allocated a new buffer for the result, if we have
+ // contiguous storage.
+ _sanityCheck(
+ subCore._owner === wholeCore._owner || !wholeCore.hasContiguousStorage)
+
return String(subCore)
}
}
diff --git a/stdlib/public/core/ThreadLocalStorage.swift b/stdlib/public/core/ThreadLocalStorage.swift
index cb213d5..2c09ac3 100644
--- a/stdlib/public/core/ThreadLocalStorage.swift
+++ b/stdlib/public/core/ThreadLocalStorage.swift
@@ -74,20 +74,25 @@
}
// Retrieve our thread's local uBreakIterator and set it up for the given
- // StringCore. Checks our TLS cache to avoid excess text resetting.
+ // StringCore.
static internal func getUBreakIterator(
for core: _StringCore
) -> OpaquePointer {
+ _sanityCheck(core._owner != nil || core._baseAddress != nil,
+ "invalid StringCore")
+ let corePtr: UnsafeMutablePointer<UTF16.CodeUnit> = core.startUTF16
+ return getUBreakIterator(
+ for: UnsafeBufferPointer(start: corePtr, count: core.count))
+ }
+ static internal func getUBreakIterator(
+ for bufPtr: UnsafeBufferPointer<UTF16.CodeUnit>
+ ) -> OpaquePointer {
let tlsPtr = getPointer()
let brkIter = tlsPtr[0].uBreakIterator
- _sanityCheck(core._owner != nil || core._baseAddress != nil,
- "invalid StringCore")
-
var err = __swift_stdlib_U_ZERO_ERROR
- let corePtr: UnsafeMutablePointer<UTF16.CodeUnit>
- corePtr = core.startUTF16
- __swift_stdlib_ubrk_setText(brkIter, corePtr, Int32(core.count), &err)
+ __swift_stdlib_ubrk_setText(
+ brkIter, bufPtr.baseAddress!, Int32(bufPtr.count), &err)
_precondition(err.isSuccess, "unexpected ubrk_setUText failure")
return brkIter
diff --git a/test/stdlib/Inputs/NSSlowString/NSSlowString.h b/test/stdlib/Inputs/NSSlowString/NSSlowString.h
new file mode 100644
index 0000000..548939c
--- /dev/null
+++ b/test/stdlib/Inputs/NSSlowString/NSSlowString.h
@@ -0,0 +1,10 @@
+#import <Foundation/NSString.h>
+
+// An NSString whose _fastCharacterContents always returns nil
+@interface NSSlowString : NSString
+
+@property (nonatomic, strong) id myProperty;
+
+- (void *) _fastCharacterContents;
+
+@end
diff --git a/test/stdlib/Inputs/NSSlowString/NSSlowString.m b/test/stdlib/Inputs/NSSlowString/NSSlowString.m
new file mode 100644
index 0000000..5f38447
--- /dev/null
+++ b/test/stdlib/Inputs/NSSlowString/NSSlowString.m
@@ -0,0 +1,37 @@
+#import "NSSlowString.h"
+
+
+@interface NSSlowString ()
+
+@property (nonatomic, strong) NSString *stringHolder;
+
+@end
+
+@implementation NSSlowString
+
+- (instancetype)initWithString:(NSString *)name {
+ self = [super init];
+ if (self == nil) {
+ return nil;
+ }
+ self.stringHolder = name;
+ return self;
+}
+
+- (NSUInteger)length {
+ return self.stringHolder.length;
+}
+
+- (id)copy {
+ return self;
+}
+
+- (unichar)characterAtIndex:(NSUInteger)index {
+ return [self.stringHolder characterAtIndex:index];
+}
+
+- (void *) _fastCharacterContents {
+ return nil;
+}
+
+@end
\ No newline at end of file
diff --git a/test/stdlib/Inputs/NSSlowString/module.map b/test/stdlib/Inputs/NSSlowString/module.map
new file mode 100644
index 0000000..60ce0d3
--- /dev/null
+++ b/test/stdlib/Inputs/NSSlowString/module.map
@@ -0,0 +1,3 @@
+module NSSlowString {
+ header "NSSlowString.h"
+}
diff --git a/test/stdlib/NSSlowString.swift b/test/stdlib/NSSlowString.swift
new file mode 100644
index 0000000..5c649bd
--- /dev/null
+++ b/test/stdlib/NSSlowString.swift
@@ -0,0 +1,79 @@
+// RUN: mkdir -p %t
+// RUN: %target-clang -fobjc-arc %S/Inputs/NSSlowString/NSSlowString.m -c -o %t/NSSlowString.o
+// RUN: %target-build-swift -I %S/Inputs/NSSlowString/ %t/NSSlowString.o %s -o %t/a.out
+// RUN: %target-run %t/a.out
+
+// REQUIRES: executable_test
+// REQUIRES: objc_interop
+
+import Foundation
+import NSSlowString
+import Swift
+
+import StdlibUnittest
+
+let tests = TestSuite("NonContiguousStrings")
+
+// Perform expected test checks
+func checkSingleForm<S: StringProtocol>(
+ _ s: S, expectedCount: Int, expectedCodeUnitCount: Int?
+) {
+ expectEqual(expectedCount, Int(s.count))
+ if let cuCount = expectedCodeUnitCount {
+ expectEqual(cuCount, Int(s.utf16.count))
+ }
+
+ // Now check various reversed properties
+ let reversedCharacters = Array<Character>(s.reversed())
+
+ expectEqual(Int(s.count), reversedCharacters.count)
+ expectEqualSequence(s.reversed(), reversedCharacters)
+ expectEqual(String(s), String(reversedCharacters.reversed()))
+}
+func check(
+ _ s: String, expectedCount count: Int, expectedCodeUnitCount cuCount: Int
+) {
+ checkSingleForm(s, expectedCount: count, expectedCodeUnitCount: cuCount)
+
+ // Substring tests
+ checkSingleForm(s[...], expectedCount: count, expectedCodeUnitCount: cuCount)
+ checkSingleForm(s.dropFirst(), expectedCount: count-1, expectedCodeUnitCount: nil)
+ checkSingleForm(s.dropLast(), expectedCount: count-1, expectedCodeUnitCount: nil)
+ checkSingleForm(s.dropLast().dropFirst(), expectedCount: count-2, expectedCodeUnitCount: nil)
+}
+
+tests.test("Unicode 9 grapheme breaking") {
+
+ // Test string lengths that correspond to smaller than our fixed size code
+ // unit buffer, larger than it, and exactly it.
+ let strSmall = NSSlowString(string: "aππ©π©π§π¦")
+ let strBig = NSSlowString(string: "abcdefgππ©π©π§π¦")
+ let strJustRight = NSSlowString(string: "abcππ©π©π§π¦")
+ check(strSmall as String, expectedCount: 3, expectedCodeUnitCount: 14)
+ check(strBig as String, expectedCount: 9, expectedCodeUnitCount: 20)
+ check(strJustRight as String, expectedCount: 5, expectedCodeUnitCount: 16)
+}
+
+tests.test("Zalgo") {
+ // Check that we handle absurdly long graphemes
+ var zalgo = "aπ©π©π§π¦c"
+ for combo in 0x300...0x36f {
+ zalgo.append(String(UnicodeScalar(combo)!))
+ }
+ check(
+ NSSlowString(string: zalgo) as String,
+ expectedCount: 3,
+ expectedCodeUnitCount: 125
+ )
+
+ // Check for interspersed zalgo and emoji
+ var megaZalgo = zalgo + zalgo + zalgo + zalgo
+ check(
+ NSSlowString(string: megaZalgo) as String,
+ expectedCount: megaZalgo.count,
+ expectedCodeUnitCount: megaZalgo.utf16.count
+ )
+}
+
+runAllTests()
+
diff --git a/test/stdlib/NewString.swift b/test/stdlib/NewString.swift
index ec4c586..da620a5 100644
--- a/test/stdlib/NewString.swift
+++ b/test/stdlib/NewString.swift
@@ -129,6 +129,8 @@
var nsASCII = NSString(utf8String: "foobar")!
// CHECK-NEXT: has UTF-16: false
print("has UTF-16: \(CFStringGetCharactersPtr(unsafeBitCast(nsASCII, to: CFString.self)) != nil)")
+ print("has ASCII pointer: \(CFStringGetCStringPtr(unsafeBitCast(nsASCII, to: CFString.self), 0x0600) != nil)")
+ print("has ASCII pointer: \(CFStringGetCStringPtr(unsafeBitCast(nsASCII, to: CFString.self), 0x08000100) != nil)")
// CHECK: --- ASCII basic round-tripping ---
print("--- ASCII basic round-tripping ---")