[4.2] Some string performance changes (#16542)

* stdlib: remove some @inlineables from String API functions.

Beside the general goal to remove inlinable functions, this reduces code size and also improves performance for several benchmarks.
The performance problem was that by inlining top-level String API functions into client code (like String.count) it ended up calling non-inlinable internal String functions eventually.
This is much slower than to make a single call at the top-level API boundary into the library. Inside the library all the internal String functions can be specialized and inlined.

rdar://problem/39921548

* stdlib: fix performance regression for long string appends.

Re-wrote the inner memcpy loops so that they can be vectorized.
Also added a few inline(__always).

Since we removed some @inlineable attributes this string-append code is not code generated in the client anymore.
The code generation in the stdlib binary is different because all the precondition checks are not folded away.
Using explicit loop control statements instead of for-in-range removes the precondition-overhead for those time critical memcpy loops.

* stdlib: Speed up UTF8View -> Array conversion by using _copyContents

* [test] Update diagnostic test for SR-7599
diff --git a/stdlib/public/core/ContiguousArrayBuffer.swift b/stdlib/public/core/ContiguousArrayBuffer.swift
index 659a54e..eb9de9f 100644
--- a/stdlib/public/core/ContiguousArrayBuffer.swift
+++ b/stdlib/public/core/ContiguousArrayBuffer.swift
@@ -642,15 +642,17 @@
     _uninitializedCount: count,
     minimumCapacity: 0)
 
-  var p = result.firstElementAddress
-  var i = source.startIndex
-  for _ in 0..<count {
-    // FIXME(performance): use _copyContents(initializing:).
-    p.initialize(to: source[i])
-    source.formIndex(after: &i)
-    p += 1
-  }
-  _expectEnd(of: source, is: i)
+  var p = UnsafeMutableBufferPointer(start: result.firstElementAddress, count: count)
+  var (itr, end) = source._copyContents(initializing: p)
+
+  _debugPrecondition(itr.next() == nil,
+    "invalid Collection: more than 'count' elements in collection")
+  // We also have to check the evil shrink case in release builds, because
+  // it can result in uninitialized array elements and therefore undefined
+  // behavior.
+  _precondition(end == p.endIndex,
+    "invalid Collection: less than 'count' elements in collection")
+
   return ContiguousArray(_buffer: result)
 }
 
diff --git a/stdlib/public/core/StringGuts.swift b/stdlib/public/core/StringGuts.swift
index 95eb11b..eec0c85 100644
--- a/stdlib/public/core/StringGuts.swift
+++ b/stdlib/public/core/StringGuts.swift
@@ -1030,7 +1030,6 @@
     self.append(other._wholeString._guts, range: other._encodedOffsetRange)
   }
 
-  @inlinable
   public // TODO(StringGuts): for testing only
   mutating func append(_ other: _StringGuts) {
     // FIXME(TODO: JIRA): shouldn't _isEmptySingleton be sufficient?
diff --git a/stdlib/public/core/StringIndexConversions.swift b/stdlib/public/core/StringIndexConversions.swift
index 15b5a47..c97cd87 100644
--- a/stdlib/public/core/StringIndexConversions.swift
+++ b/stdlib/public/core/StringIndexConversions.swift
@@ -49,7 +49,6 @@
   ///     `sourcePosition` must be a valid index of at least one of the views
   ///     of `target`.
   ///   - target: The string referenced by the resulting index.
-  @inlinable // FIXME(sil-serialize-all)
   public init?(
     _ sourcePosition: String.Index,
     within target: String
diff --git a/stdlib/public/core/StringLegacy.swift b/stdlib/public/core/StringLegacy.swift
index 1254207..fc365f7 100644
--- a/stdlib/public/core/StringLegacy.swift
+++ b/stdlib/public/core/StringLegacy.swift
@@ -160,7 +160,6 @@
 }
 
 extension String {
-  @inlinable // FIXME(sil-serialize-all)
   public func hasPrefix(_ prefix: String) -> Bool {
     let prefixCount = prefix._guts.count
     if prefixCount == 0 { return true }
@@ -208,7 +207,6 @@
     return self.starts(with: prefix)
   }
 
-  @inlinable // FIXME(sil-serialize-all)
   public func hasSuffix(_ suffix: String) -> Bool {
     let suffixCount = suffix._guts.count
     if suffixCount == 0 { return true }
diff --git a/stdlib/public/core/StringRangeReplaceableCollection.swift b/stdlib/public/core/StringRangeReplaceableCollection.swift
index 80e4780..40f5efb 100644
--- a/stdlib/public/core/StringRangeReplaceableCollection.swift
+++ b/stdlib/public/core/StringRangeReplaceableCollection.swift
@@ -89,6 +89,11 @@
   @inlinable // FIXME(sil-serialize-all)
   public var endIndex: Index { return Index(encodedOffset: _guts.count) }
 
+  /// The number of characters in a string.
+  public var count: Int {
+    return distance(from: startIndex, to: endIndex)
+  }
+
   @inlinable
   @inline(__always)
   internal func _boundsCheck(_ index: Index) {
@@ -114,7 +119,6 @@
       "String index range is out of bounds")
   }
 
-  @inlinable // FIXME(sil-serialize-all)
   internal func _index(atEncodedOffset offset: Int) -> Index {
     return _visitGuts(_guts, args: offset,
       ascii: { ascii, offset in return ascii.characterIndex(atOffset: offset) },
@@ -128,7 +132,6 @@
   /// - Parameter i: A valid index of the collection. `i` must be less than
   ///   `endIndex`.
   /// - Returns: The index value immediately after `i`.
-  @inlinable // FIXME(sil-serialize-all)
   public func index(after i: Index) -> Index {
     return _visitGuts(_guts, args: i,
       ascii: { ascii, i in ascii.characterIndex(after: i) },
@@ -141,7 +144,6 @@
   /// - Parameter i: A valid index of the collection. `i` must be greater than
   ///   `startIndex`.
   /// - Returns: The index value immediately before `i`.
-  @inlinable // FIXME(sil-serialize-all)
   public func index(before i: Index) -> Index {
     return _visitGuts(_guts, args: i,
       ascii: { ascii, i in ascii.characterIndex(before: i) },
@@ -171,7 +173,6 @@
   ///   to `index(before:)`.
   ///
   /// - Complexity: O(*n*), where *n* is the absolute value of `n`.
-  @inlinable // FIXME(sil-serialize-all)
   public func index(_ i: Index, offsetBy n: IndexDistance) -> Index {
     return _visitGuts(_guts, args: (i, n),
       ascii: { ascii, args in let (i, n) = args
@@ -219,7 +220,6 @@
   ///   the method returns `nil`.
   ///
   /// - Complexity: O(*n*), where *n* is the absolute value of `n`.
-  @inlinable // FIXME(sil-serialize-all)
   public func index(
     _ i: Index, offsetBy n: IndexDistance, limitedBy limit: Index
   ) -> Index? {
@@ -241,7 +241,6 @@
   /// - Returns: The distance between `start` and `end`.
   ///
   /// - Complexity: O(*n*), where *n* is the resulting distance.
-  @inlinable // FIXME(sil-serialize-all)
   public func distance(from start: Index, to end: Index) -> IndexDistance {
     return _visitGuts(_guts, args: (start, end),
       ascii: { ascii, args in let (start, end) = args
@@ -267,7 +266,6 @@
   ///
   /// - Parameter i: A valid index of the string. `i` must be less than the
   ///   string's end index.
-  @inlinable // FIXME(sil-serialize-all)
   public subscript(i: Index) -> Character {
     return _visitGuts(_guts, args: i,
       ascii: { ascii, i in return ascii.character(at: i) },
diff --git a/stdlib/public/core/StringStorage.swift b/stdlib/public/core/StringStorage.swift
index 43fdbe7..3c19b0c 100644
--- a/stdlib/public/core/StringStorage.swift
+++ b/stdlib/public/core/StringStorage.swift
@@ -174,7 +174,10 @@
   @inlinable
   @nonobjc
   var unusedBuffer: UnsafeMutableBufferPointer<CodeUnit> {
-    return UnsafeMutableBufferPointer(start: end, count: capacity - count)
+    @inline(__always)
+    get {
+      return UnsafeMutableBufferPointer(start: end, count: capacity - count)
+    }
   }
 
   @inlinable
diff --git a/stdlib/public/core/UnmanagedString.swift b/stdlib/public/core/UnmanagedString.swift
index 583df04..d109c17 100644
--- a/stdlib/public/core/UnmanagedString.swift
+++ b/stdlib/public/core/UnmanagedString.swift
@@ -16,6 +16,7 @@
 internal typealias _UnmanagedUTF16String = _UnmanagedString<UTF16.CodeUnit>
 
 @inlinable
+@inline(__always)
 internal
 func memcpy_zext<
   Target: FixedWidthInteger & UnsignedInteger,
@@ -24,12 +25,18 @@
   dst: UnsafeMutablePointer<Target>, src: UnsafePointer<Source>, count: Int
 ) {
   _sanityCheck(Source.bitWidth < Target.bitWidth)
-  for i in 0..<count {
+  _sanityCheck(count >= 0)
+  // Don't use the for-in-range syntax to avoid precondition checking in Range.
+  // This enables vectorization of the memcpy loop.
+  var i = 0
+  while i < count {
     dst[i] = Target(src[i])
+    i = i &+ 1
   }
 }
 
 @inlinable
+@inline(__always)
 internal
 func memcpy_trunc<
   Target: FixedWidthInteger & UnsignedInteger,
@@ -38,8 +45,13 @@
   dst: UnsafeMutablePointer<Target>, src: UnsafePointer<Source>, count: Int
 ) {
   _sanityCheck(Source.bitWidth > Target.bitWidth)
-  for i in 0..<count {
+  _sanityCheck(count >= 0)
+  // Don't use the for-in-range syntax to avoid precondition checking in Range.
+  // This enables vectorization of the memcpy loop.
+  var i = 0
+  while i < count {
     dst[i] = Target(truncatingIfNeeded: src[i])
+    i = i &+ 1
   }
 }
 
@@ -194,6 +206,7 @@
   }
 
   @inlinable // FIXME(sil-serialize-all)
+  @inline(__always)
   internal func _copy<TargetCodeUnit>(
     into target: UnsafeMutableBufferPointer<TargetCodeUnit>
   ) where TargetCodeUnit : FixedWidthInteger & UnsignedInteger {
diff --git a/test/Constraints/diagnostics.swift b/test/Constraints/diagnostics.swift
index 6701990..9f6c031 100644
--- a/test/Constraints/diagnostics.swift
+++ b/test/Constraints/diagnostics.swift
@@ -161,7 +161,8 @@
 // <rdar://problem/21080030> Bad diagnostic for invalid method call in boolean expression: (_, ExpressibleByIntegerLiteral)' is not convertible to 'ExpressibleByIntegerLiteral
 func rdar21080030() {
   var s = "Hello"
-  if s.count() == 0 {} // expected-error{{cannot call value of non-function type 'Int'}}{{13-15=}}
+  // SR-7599: This should be `cannot_call_non_function_value`
+  if s.count() == 0 {} // expected-error{{cannot invoke 'count' with no arguments}}
 }
 
 // <rdar://problem/21248136> QoI: problem with return type inference mis-diagnosed as invalid arguments
diff --git a/validation-test/stdlib/Arrays.swift.gyb b/validation-test/stdlib/Arrays.swift.gyb
index 2bd0b6b..c0795f2 100644
--- a/validation-test/stdlib/Arrays.swift.gyb
+++ b/validation-test/stdlib/Arrays.swift.gyb
@@ -68,7 +68,8 @@
     expectEqual(0, c.timesMakeIteratorCalled.value)
     expectEqual(0, c.timesStartIndexCalled.value)
     let copy = c._copyToContiguousArray()
-    expectEqual(0, c.timesMakeIteratorCalled.value)
+    // _copyToContiguousArray calls Sequence._copyContents, which makes an iterator.
+    expectEqual(1, c.timesMakeIteratorCalled.value)
     expectNotEqual(0, c.timesStartIndexCalled.value)
     expectEqualSequence(
       Array(10..<27),
@@ -82,7 +83,8 @@
     expectEqual(0, wrapped.timesMakeIteratorCalled.value)
     expectEqual(0, wrapped.timesStartIndexCalled.value)
     let copy = s._copyToContiguousArray()
-    expectEqual(0, wrapped.timesMakeIteratorCalled.value)
+    // _copyToContiguousArray calls Sequence._copyContents, which makes an iterator.
+    expectEqual(1, wrapped.timesMakeIteratorCalled.value)
     expectNotEqual(0, wrapped.timesStartIndexCalled.value)
 
     expectEqualSequence(
@@ -2418,10 +2420,9 @@
     ? evilBoundsError
     : "invalid Collection: count differed in successive traversals"
 
-  let constructionMessage =
-    /*_isStdlibInternalChecksEnabled() && !evilBoundsCheck && step <= 0
-      ? "_UnsafePartiallyInitializedContiguousArrayBuffer has no more capacity"
-      :*/ message
+  let constructionMessage = step < 0
+    ? "invalid Collection: less than 'count' elements in collection"
+    : "invalid Collection: more than 'count' elements in collection"
 
   // The invalid Collection error is a _debugPreconditon that will only fire
   // in a Debug assert configuration.
@@ -2472,7 +2473,7 @@
   .code {
     let evil = EvilCollection(step, boundsChecked: evilBoundsCheck)
 
-    if expectedToFail {
+    if step < 0 || _isDebugAssertConfiguration() {
       expectCrashLater()
     }