Merge pull request #22614 from linux-on-ibm-z/s390x-smallstring-5.0-backport [5.0] [string] Fix string implementation for big endian platforms

commit: 817dff3ec0534c4ef62b6c6df2b919296b993732 [log] [tgz]
author: Ted Kremenek <kremenek@apple.com> Mon Feb 18 12:32:52 2019 -0800
committer: GitHub <noreply@github.com> Mon Feb 18 12:32:52 2019 -0800
tree: 8afe8ee6fa4fd4c84bda0ccee7ca060c015ce6de
parent: 15be364c627d5dd402e327704af7080d33102ea6 [diff]
parent: 4f7d007e2663bfa4558e37aea2e46a585e09e551 [diff]
diff --git a/stdlib/public/core/SmallString.swift b/stdlib/public/core/SmallString.swift
index af51dea..9d4570c 100644
--- a/stdlib/public/core/SmallString.swift
+++ b/stdlib/public/core/SmallString.swift

@@ -10,6 +10,19 @@
 //
 //===----------------------------------------------------------------------===//
 
+// The code units in _SmallString are always stored in memory in the same order
+// that they would be stored in an array. This means that on big-endian
+// platforms the order of the bytes in storage is reversed compared to
+// _StringObject whereas on little-endian platforms the order is the same.
+//
+// Memory layout:
+//
+// |0 1 2 3 4 5 6 7 8 9 A B C D E F| ← hexadecimal offset in bytes
+// |  _storage.0   |  _storage.1   | ← raw bits
+// |          code units         | | ← encoded layout
+//  ↑                             ↑
+//  first (leftmost) code unit    discriminator (incl. count)
+//
 @_fixed_layout @usableFromInline
 internal struct _SmallString {
   @usableFromInline
@@ -50,16 +63,18 @@
   @inlinable @inline(__always)
   internal init(_ object: _StringObject) {
     _internalInvariant(object.isSmall)
-    self.init(raw: object.rawBits)
+    // On big-endian platforms the byte order is the reverse of _StringObject.
+    let leading = object.rawBits.0.littleEndian
+    let trailing = object.rawBits.1.littleEndian
+    self.init(raw: (leading, trailing))
   }
 
   @inlinable @inline(__always)
   internal init() {
-    self.init(raw: _StringObject(empty:()).rawBits)
+    self.init(_StringObject(empty:()))
   }
 }
 
-// TODO
 extension _SmallString {
   @inlinable
   internal static var capacity: Int {
@@ -72,9 +87,12 @@
     }
   }
 
+  // Get an integer equivalent to the _StringObject.discriminatedObjectRawBits
+  // computed property.
   @inlinable @inline(__always)
   internal var rawDiscriminatedObject: UInt64 {
-    return _storage.1
+    // Reverse the bytes on big-endian systems.
+    return _storage.1.littleEndian
   }
 
   @inlinable
@@ -107,7 +125,7 @@
   // usage: it always clears the discriminator and count (in case it's full)
   @inlinable @inline(__always)
   internal var zeroTerminatedRawCodeUnits: RawBitPattern {
-    let smallStringCodeUnitMask: UInt64 = 0x00FF_FFFF_FFFF_FFFF
+    let smallStringCodeUnitMask = ~UInt64(0xFF).bigEndian // zero last byte
     return (self._storage.0, self._storage.1 & smallStringCodeUnitMask)
   }
 
@@ -231,11 +249,12 @@
     _internalInvariant(count <= _SmallString.capacity)
 
     let isASCII = (leading | trailing) & 0x8080_8080_8080_8080 == 0
-    let countAndDiscriminator = UInt64(truncatingIfNeeded: count) &<< 56
-                              | _StringObject.Nibbles.small(isASCII: isASCII)
-    _internalInvariant(trailing & countAndDiscriminator == 0)
+    let discriminator = _StringObject.Nibbles
+      .small(withCount: count, isASCII: isASCII)
+      .littleEndian // reversed byte order on big-endian platforms
+    _internalInvariant(trailing & discriminator == 0)
 
-    self.init(raw: (leading, trailing | countAndDiscriminator))
+    self.init(raw: (leading, trailing | discriminator))
     _internalInvariant(self.count == count)
   }
 
@@ -300,23 +319,31 @@
 #endif
 
 extension UInt64 {
-  // Fetches the `i`th byte, from least-significant to most-significant
-  //
-  // TODO: endianess awareness day
+  // Fetches the `i`th byte in memory order. On little-endian systems the byte
+  // at i=0 is the least significant byte (LSB) while on big-endian systems the
+  // byte at i=7 is the LSB.
   @inlinable @inline(__always)
   internal func _uncheckedGetByte(at i: Int) -> UInt8 {
     _internalInvariant(i >= 0 && i < MemoryLayout<UInt64>.stride)
+#if _endian(big)
+    let shift = (7 - UInt64(truncatingIfNeeded: i)) &* 8
+#else
     let shift = UInt64(truncatingIfNeeded: i) &* 8
+#endif
     return UInt8(truncatingIfNeeded: (self &>> shift))
   }
 
-  // Sets the `i`th byte, from least-significant to most-significant
-  //
-  // TODO: endianess awareness day
+  // Sets the `i`th byte in memory order. On little-endian systems the byte
+  // at i=0 is the least significant byte (LSB) while on big-endian systems the
+  // byte at i=7 is the LSB.
   @inlinable @inline(__always)
   internal mutating func _uncheckedSetByte(at i: Int, to value: UInt8) {
     _internalInvariant(i >= 0 && i < MemoryLayout<UInt64>.stride)
+#if _endian(big)
+    let shift = (7 - UInt64(truncatingIfNeeded: i)) &* 8
+#else
     let shift = UInt64(truncatingIfNeeded: i) &* 8
+#endif
     let valueMask: UInt64 = 0xFF &<< shift
     self = (self & ~valueMask) | (UInt64(truncatingIfNeeded: value) &<< shift)
   }
@@ -336,5 +363,6 @@
     r = r | (UInt64(input[idx]) &<< shift)
     shift = shift &+ 8
   }
-  return r
+  // Convert from little-endian to host byte order.
+  return r.littleEndian
 }

diff --git a/stdlib/public/core/StringObject.swift b/stdlib/public/core/StringObject.swift
index 5bc877f..3c785f4 100644
--- a/stdlib/public/core/StringObject.swift
+++ b/stdlib/public/core/StringObject.swift

@@ -516,11 +516,15 @@
 
  */
 extension _StringObject {
-#if arch(i386) || arch(arm)
   @inlinable @inline(__always)
   internal init(_ small: _SmallString) {
+    // Small strings are encoded as _StringObjects in reverse byte order
+    // on big-endian platforms. This is to match the discriminator to the
+    // spare bits (the most significant nibble) in a pointer.
+    let word1 = small.rawBits.0.littleEndian
+    let word2 = small.rawBits.1.littleEndian
+#if arch(i386) || arch(arm)
     // On 32-bit, we need to unpack the small string.
-    let (word1, word2) = small.rawBits
     let smallStringDiscriminatorAndCount: UInt64 = 0xFF00_0000_0000_0000
 
     let leadingFour = Int(truncatingIfNeeded: word1)
@@ -532,15 +536,12 @@
       variant: .immortal(nextFour),
       discriminator: smallDiscriminatorAndCount,
       flags: trailingTwo)
-    _internalInvariant(isSmall)
-  }
 #else
-  @inlinable @inline(__always)
-  internal init(_ small: _SmallString) {
-    self.init(rawValue: small.rawBits)
+    // On 64-bit, we copy the raw bits (to host byte order).
+    self.init(rawValue: (word1, word2))
+#endif
     _internalInvariant(isSmall)
   }
-#endif
 
   @inlinable
   internal static func getSmallCount(fromRaw x: UInt64) -> Int {
commit	817dff3ec0534c4ef62b6c6df2b919296b993732	[log] [tgz]
author	Ted Kremenek <kremenek@apple.com>	Mon Feb 18 12:32:52 2019 -0800
committer	GitHub <noreply@github.com>	Mon Feb 18 12:32:52 2019 -0800
tree	8afe8ee6fa4fd4c84bda0ccee7ca060c015ce6de
parent	15be364c627d5dd402e327704af7080d33102ea6 [diff]
parent	4f7d007e2663bfa4558e37aea2e46a585e09e551 [diff]