stdlib/public/core/StringComparison.swift - third_party/swift - Git at Google

 //===----------------------------------------------------------------------===//
 //
 // This source file is part of the Swift.org open source project
 //
 // Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See https://swift.org/LICENSE.txt for license information
 // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
 //
 //===----------------------------------------------------------------------===//

 import SwiftShims

 //HACK: This gets rid of some retains/releases that was slowing down the
 //memcmp fast path for comparing ascii strings. rdar://problem/37473470
 @inline(never) // @outlined
 @effects(readonly)
 @_versioned // @opaque
 internal
 func _compareUnicode(
   _ lhs: _StringGuts._RawBitPattern, _ rhs: _StringGuts._RawBitPattern
 ) -> Int {
   let left = _StringGuts(rawBits: lhs)
   let right = _StringGuts(rawBits: rhs)

   if _slowPath(!left._isContiguous || !right._isContiguous) {
     if !left._isContiguous {
       return left._asOpaque()._compareOpaque(right).rawValue
     } else {
       return right._asOpaque()._compareOpaque(left).flipped.rawValue
     }
   }

   return left._compareContiguous(right)
 }

 @inline(never) // @outlined
 @effects(readonly)
 @_versioned // @opaque
 internal
 func _compareUnicode(
   _ lhs: _StringGuts._RawBitPattern, _ leftRange: Range<Int>,
   _ rhs: _StringGuts._RawBitPattern, _ rightRange: Range<Int>
 ) -> Int {
   let left = _StringGuts(rawBits: lhs)
   let right = _StringGuts(rawBits: rhs)

    if _slowPath(!left._isContiguous || !right._isContiguous) {
      if !left._isContiguous {
        return left._asOpaque()[leftRange]._compareOpaque(
          right, rightRange
        ).rawValue
      } else {
        return right._asOpaque()[rightRange]._compareOpaque(
          left, leftRange
        ).flipped.rawValue
      }
    }

   return left._compareContiguous(leftRange, right, rightRange)
 }

 //
 // Pointer casting helpers
 //
 @inline(__always)
 private func _unsafeMutableBufferPointerCast<T, U>(
   _ ptr: UnsafeMutablePointer<T>,
   _ count: Int,
   to: U.Type = U.self
 ) -> UnsafeMutableBufferPointer<U> {
   return UnsafeMutableBufferPointer(
     start: UnsafeMutableRawPointer(ptr).assumingMemoryBound(to: U.self),
     count: count
   )
 }
 @inline(__always)
 private func _unsafeBufferPointerCast<T, U>(
   _ ptr: UnsafePointer<T>,
   _ count: Int,
   to: U.Type = U.self
 ) -> UnsafeBufferPointer<U> {
   return UnsafeBufferPointer(
     start: UnsafeRawPointer(ptr).assumingMemoryBound(to: U.self),
     count: count
   )
 }

 internal let _leadingSurrogateBias: UInt16 = 0xd800
 internal let _trailingSurrogateBias: UInt16 = 0xdc00
 internal let _surrogateMask: UInt16 = 0xfc00

 @inline(__always)
 internal func _isSurrogate(_ cu: UInt16) -> Bool {
   return _isLeadingSurrogate(cu) || _isTrailingSurrogate(cu)
 }

 @inline(__always)
 internal func _isLeadingSurrogate(_ cu: UInt16) -> Bool {
   // NOTE: Specifically match against the trailing surrogate mask, as it matches
   // more cases.
   return cu & _surrogateMask == _leadingSurrogateBias
 }

 @inline(__always)
 internal func _isTrailingSurrogate(_ cu: UInt16) -> Bool {
   return cu & _surrogateMask == _trailingSurrogateBias
 }
 @inline(__always)
 internal func _decodeSurrogatePair(
   leading high: UInt16, trailing low: UInt16
 ) -> UInt32 {
   _sanityCheck(_isLeadingSurrogate(high) && _isTrailingSurrogate(low))
   let hi10: UInt32 = UInt32(high) &- UInt32(_leadingSurrogateBias)
   _sanityCheck(hi10 < 1<<10, "I said high 10. Not high, like, 20 or something")
   let lo10: UInt32 = UInt32(low) &- UInt32(_trailingSurrogateBias)
   _sanityCheck(lo10 < 1<<10, "I said low 10. Not low, like, 20 or something")

   return ((hi10 &<< 10) | lo10) &+ 0x1_00_00
 }

 internal func _hasNormalizationBoundary(before cu: UInt16) -> Bool {
   guard !_isSurrogate(cu) else { return false }
   return UnicodeScalar(_unchecked: UInt32(cu))._hasNormalizationBoundaryBefore
 }

 //
 // Pointer casting helpers
 //
 internal func _castOutputBuffer(
   _ ptr: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>,
   endingAt endIdx: Int = _Normalization._SegmentOutputBuffer.capacity
 ) -> UnsafeMutableBufferPointer<UInt16> {
   let bufPtr: UnsafeMutableBufferPointer<UInt16> =
     _unsafeMutableBufferPointerCast(
       ptr, _Normalization._SegmentOutputBuffer.capacity)
   return UnsafeMutableBufferPointer<UInt16>(rebasing: bufPtr[..<endIdx])
 }
 internal func _castOutputBuffer(
   _ ptr: UnsafePointer<_Normalization._SegmentOutputBuffer>,
   endingAt endIdx: Int = _Normalization._SegmentOutputBuffer.capacity
 ) -> UnsafeBufferPointer<UInt16> {
   let bufPtr: UnsafeBufferPointer<UInt16> =
     _unsafeBufferPointerCast(
       ptr, _Normalization._SegmentOutputBuffer.capacity)
   return UnsafeBufferPointer<UInt16>(rebasing: bufPtr[..<endIdx])
 }

 extension _FixedArray16 where T == UInt16 {
   mutating func fill(from other: _UnmanagedString<T>) {
     _sanityCheck(other.count < _FixedArray16<T>.capacity,
       "out of bounds fill")
     for i in 0..<other.count {
       self[i] = other[i]
     }
   }
 }

 @_versioned internal
 enum _Ordering: Int, Equatable {
   case less = -1
   case equal = 0
   case greater = 1

   @_versioned internal
   var flipped: _Ordering {
     switch self {
       case .less: return .greater
       case .equal: return .equal
       case .greater: return .less
     }
   }

   @inline(__always)
   @_versioned internal
   init(signedNotation int: Int) {
     self = int < 0 ? .less : int == 0 ? .equal : .greater
   }
 }

 extension _UnmanagedString where CodeUnit == UInt8 {
   // TODO: These should be SIMD-ized
   internal func _findDiffIdx(_ other: _UnmanagedString<UInt16>) -> Int {
     let count = Swift.min(self.count, other.count)
     for idx in 0..<count {
       guard UInt16(self[idx]) == other[idx] else {
         return idx
       }
     }
     return count
   }
 }

 internal func _findDiffIdx(
   _ left: UnsafeBufferPointer<UInt8>,
   _ right: UnsafeBufferPointer<UInt16>
 ) -> Int {
   let count = Swift.min(left.count, right.count)
   for idx in 0..<count {
     guard UInt16(left[idx]) == right[idx] else {
       return idx
     }
   }
   return count
 }

 internal func _findDiffIdx<CodeUnit>(
   _ left: UnsafeBufferPointer<CodeUnit>,
   _ right: UnsafeBufferPointer<CodeUnit>
 ) -> Int where CodeUnit : FixedWidthInteger & UnsignedInteger {
   let count = Swift.min(left.count, right.count)
   for idx in 0..<count {
     guard left[idx] == right[idx] else {
       return idx
     }
   }
   return count
 }

 extension _UnmanagedString where CodeUnit : FixedWidthInteger & UnsignedInteger {
   internal func _findDiffIdx<CodeUnit>(
     _ other: _UnmanagedString<CodeUnit>
   ) -> Int {
     let count = Swift.min(self.count, other.count)
     for idx in 0..<count {
       guard self[idx] == other[idx] else {
         return idx
       }
     }
     return count
   }
 }

 extension _UnmanagedOpaqueString {
   internal func _findDiffIdx(_ other: _StringGuts, _ otherRange: Range<Int>
   ) -> Int {
     let count = Swift.min(self.count, otherRange.count)
     for idx in 0..<count {
       guard self[idx] == other[idx + otherRange.lowerBound] else {
         return idx
       }
     }
     return count
   }
 }

 internal func _lexicographicalCompare(_ lhs: Int, _ rhs: Int) -> _Ordering {
   // TODO: inspect code quality
   return lhs < rhs ? .less : (lhs > rhs ? .greater : .equal)
 }

 internal func _lexicographicalCompare(
   _ lhs: UInt16, _ rhs: UInt16
 ) -> _Ordering {
   return lhs < rhs ? .less : (lhs > rhs ? .greater : .equal)
 }

 internal func _lexicographicalCompare(
   _ leftHS: UnsafeBufferPointer<UInt16>,
   _ rightHS: UnsafeBufferPointer<UInt16>
 ) -> _Ordering {
   let count = Swift.min(leftHS.count, rightHS.count)

   let idx = _findDiffIdx(leftHS, rightHS)
   guard idx < count else {
     return _lexicographicalCompare(leftHS.count, rightHS.count)
   }
   let leftHSPtr = leftHS.baseAddress._unsafelyUnwrappedUnchecked
   let rightHSPtr = rightHS.baseAddress._unsafelyUnwrappedUnchecked
   return _lexicographicalCompare(leftHSPtr[idx], rightHSPtr[idx])
 }

 internal func _lexicographicalCompare(
   _ leftHS: UnsafeBufferPointer<UInt8>,
   _ rightHS: UnsafeBufferPointer<UInt16>
 ) -> _Ordering {
   let count = Swift.min(leftHS.count, rightHS.count)

   let idx = _findDiffIdx(leftHS, rightHS)
   guard idx < count else {
     return _lexicographicalCompare(leftHS.count, rightHS.count)
   }
   let leftHSPtr = leftHS.baseAddress._unsafelyUnwrappedUnchecked
   let rightHSPtr = rightHS.baseAddress._unsafelyUnwrappedUnchecked
   return _lexicographicalCompare(UInt16(leftHSPtr[idx]), rightHSPtr[idx])
 }
 @inline(__always)
 internal func _lexicographicalCompare(
   _ leftHS: UnsafePointer<_Normalization._SegmentOutputBuffer>,
   leftCount: Int,
   _ rightHS: UnsafePointer<_Normalization._SegmentOutputBuffer>,
   rightCount: Int
 ) -> _Ordering {
   return _lexicographicalCompare(
     _castOutputBuffer(leftHS, endingAt: leftCount),
     _castOutputBuffer(rightHS, endingAt: rightCount))
 }
 @inline(__always)
 internal func _lexicographicalCompare(
   _ leftHS: Array<UInt16>,
   _ rightHS: Array<UInt16>
 ) -> _Ordering {
   return leftHS.withUnsafeBufferPointer { leftPtr in
     return rightHS.withUnsafeBufferPointer { rightPtr in
       return _lexicographicalCompare(leftPtr, rightPtr)
     }
   }
 }

 internal func _parseRawScalar(
   _ buf: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>,
   startingFrom idx: Int = 0
 ) -> (UnicodeScalar, scalarEndIndex: Int) {
   return Swift._parseRawScalar(buffer: _castOutputBuffer(buf), startingFrom: idx)
 }

 internal func _parseRawScalar(
   buffer buf: UnsafeBufferPointer<UInt16>,
   startingFrom idx: Int = 0
 ) -> (UnicodeScalar, scalarEndIndex: Int) {
   let ptr = buf.baseAddress._unsafelyUnwrappedUnchecked
   _sanityCheck(idx >= 0 && idx < buf.count, "out of bounds index")
   let cu: UInt16 = ptr[idx]
   if _slowPath(idx+1 == buf.count) {
     return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
   }
   guard _isLeadingSurrogate(cu) else {
     return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
   }
   let nextCu: UInt16 = ptr[idx+1]
   guard _isTrailingSurrogate(nextCu) else {
     // Invalid surrogate pair: just return the invalid value
     return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
   }

   // Decode
   let value: UInt32 = _decodeSurrogatePair(leading: cu, trailing: nextCu)
   _sanityCheck(Int32(exactly: value) != nil, "top bit shouldn't be set")
   return (UnicodeScalar(_unchecked: value), idx+2)
 }

 extension _UnmanagedOpaqueString {
   internal func _parseRawScalar(
     startingFrom idx: Int = 0
   ) -> (UnicodeScalar, scalarEndIndex: Int) {
     var buffer = _FixedArray2<UInt16>(allZeros:())
     if idx+1 < self.count {
       buffer[0] = self[idx]
       buffer[1] = self[idx+1]

       let bufferPointer = _unsafeBufferPointerCast(
         &buffer, 2, to: UInt16.self
       )
       return Swift._parseRawScalar(buffer: bufferPointer, startingFrom: 0)
     } else {
       buffer[0] = self[idx]

       let bufferPointer = _unsafeBufferPointerCast(
         &buffer, 1, to: UInt16.self
       )
       return Swift._parseRawScalar(buffer: bufferPointer, startingFrom: 0)
     }
   }
 }

 extension _UnmanagedString where CodeUnit == UInt16 {
   internal func _parseRawScalar(
     startingFrom idx: Int = 0
   ) -> (UnicodeScalar, scalarEndIndex: Int) {
     _sanityCheck(idx >= 0 && idx < self.count, "out of bounds index")
     let cu = self[idx]
     if _slowPath(idx+1 == self.count) {
       return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
     }
     guard _isLeadingSurrogate(cu) else {
       return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
     }
     let nextCu = self[idx+1]
     guard _isTrailingSurrogate(nextCu) else {
       // Invalid surrogate pair: just return the invalid value
       return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1)
     }

     // Decode
     let value: UInt32 = _decodeSurrogatePair(leading: cu, trailing: nextCu)
     _sanityCheck(Int32(exactly: value) != nil, "top bit shouldn't be set")
     return (UnicodeScalar(_unchecked: value), idx+2)
   }

   internal func _reverseParseRawScalar(
     endingAt idx: Int // one-past-the-end
   ) -> (UnicodeScalar, scalarStartIndex: Int) {
     _sanityCheck(idx > 0 && idx <= self.count, "out of bounds end index")

     // Corner case: leading un-paired surrogate
     if _slowPath(idx == 1) {
       return (UnicodeScalar(_unchecked: UInt32(self[0])), 0)
     }

     let cu = self[idx-1]
     guard _isTrailingSurrogate(cu) else {
       return (UnicodeScalar(_unchecked: UInt32(cu)), idx-1)
     }
     let priorCU = self[idx-2]
     guard _isLeadingSurrogate(priorCU) else {
       return (UnicodeScalar(_unchecked: UInt32(cu)), idx-1)
     }

     // Decode
     let value: UInt32 = _decodeSurrogatePair(leading: priorCU, trailing: cu)
     _sanityCheck(Int32(exactly: value) != nil, "top bit shouldn't be set")
     return (UnicodeScalar(_unchecked: value), idx-2)
   }

   internal func _tryNormalize(
     into outputBuffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>
   ) -> Int? {
     return self._tryNormalize(into: _castOutputBuffer(outputBuffer))
   }

   internal func _tryNormalize(
     into outputBuffer: UnsafeMutableBufferPointer<UInt16>
   ) -> Int? {
     var err = __swift_stdlib_U_ZERO_ERROR
     let count = __swift_stdlib_unorm2_normalize(
       _Normalization._nfcNormalizer,
       self.start,
       numericCast(self.count),
       outputBuffer.baseAddress._unsafelyUnwrappedUnchecked,
       numericCast(outputBuffer.count),
       &err
     )
     guard err.isSuccess else {
       // The output buffer needs to grow
       return nil
     }
     return numericCast(count)
   }

   internal func _slowNormalize() -> [UInt16] {
     _sanityCheck(self.count > 0, "called on empty string")

     let canary = self.count * _Normalization._maxNFCExpansionFactor
     var count = self.count
     while true {
       var result = Array<UInt16>(repeating: 0, count: count)
       if let length = result.withUnsafeMutableBufferPointer({ (bufPtr) -> Int? in
         return self._tryNormalize(into: bufPtr)
       }) {
         result.removeLast(count - length)
         return result
       }
       // Otherwise, we need to grow
       guard count <= canary else {
         fatalError("Invariant broken: Max decomposition factor insufficient")
       }
       count *= 2
     }
   }
 }

 internal func _tryNormalize(
   _ input: UnsafeBufferPointer<UInt16>,
   into outputBuffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>
 ) -> Int? {
   return _tryNormalize(input, into: _castOutputBuffer(outputBuffer))
 }
 internal func _tryNormalize(
   _ input: UnsafeBufferPointer<UInt16>,
   into outputBuffer: UnsafeMutableBufferPointer<UInt16>
 ) -> Int? {
   var err = __swift_stdlib_U_ZERO_ERROR
   let count = __swift_stdlib_unorm2_normalize(
     _Normalization._nfcNormalizer,
     input.baseAddress._unsafelyUnwrappedUnchecked,
     numericCast(input.count),
     outputBuffer.baseAddress._unsafelyUnwrappedUnchecked,
     numericCast(outputBuffer.count),
     &err
   )
   guard err.isSuccess else {
     // The output buffer needs to grow
     return nil
   }
   return numericCast(count)
 }

 extension _UnmanagedString where CodeUnit == UInt8 {
   @_inlineable // FIXME(sil-serialize-all)
   @_versioned
   internal func compareASCII(to other: _UnmanagedString<UInt8>) -> Int {
     // FIXME Results should be the same across all platforms.
     if self.start == other.start {
       return (self.count &- other.count).signum()
     }
     var cmp = Int(truncatingIfNeeded:
       _stdlib_memcmp(
         self.rawStart, other.rawStart,
         Swift.min(self.count, other.count)))
     if cmp == 0 {
       cmp = self.count &- other.count
     }
     return cmp.signum()
   }
 }

 public extension _StringGuts {
   @inline(__always)
   public
   func _compareContiguous(_ other: _StringGuts) -> Int {
     _sanityCheck(self._isContiguous && other._isContiguous)
     switch (self.isASCII, other.isASCII) {
     case (true, true):
       fatalError("Should have hit the ascii comp in StringComparable.compare()")
     case (true, false):
       return self._unmanagedASCIIView._compareStringsPreLoop(
         other: other._unmanagedUTF16View
       ).rawValue
     case (false, true):
       // Same compare, just invert result
       return other._unmanagedASCIIView._compareStringsPreLoop(
         other: self._unmanagedUTF16View
       ).flipped.rawValue
     case (false, false):
       return self._unmanagedUTF16View._compareStringsPreLoop(
         other: other._unmanagedUTF16View
       ).rawValue
     }
   }

   @inline(__always)
   public
   func _compareContiguous(
     _ selfRange: Range<Int>,
     _ other: _StringGuts,
     _ otherRange: Range<Int>
   ) -> Int {
     _sanityCheck(self._isContiguous && other._isContiguous)
     switch (self.isASCII, other.isASCII) {
     case (true, true):
       fatalError("Should have hit the ascii comp in StringComparable.compare()")
     case (true, false):
       return self._unmanagedASCIIView[selfRange]._compareStringsPreLoop(
         other: other._unmanagedUTF16View[otherRange]
       ).rawValue
     case (false, true):
       // Same compare, just invert result
       return other._unmanagedASCIIView[otherRange]._compareStringsPreLoop(
         other: self._unmanagedUTF16View[selfRange]
       ).flipped.rawValue
     case (false, false):
       return self._unmanagedUTF16View[selfRange]._compareStringsPreLoop(
         other: other._unmanagedUTF16View[otherRange]
       ).rawValue
     }
   }
 }

 extension _UnmanagedOpaqueString {
   @inline(never) // @outlined
   @_versioned
   internal
   func _compareOpaque(_ other: _StringGuts) -> _Ordering {
     return self._compareOpaque(other, 0..<other.count)
   }

   @inline(never) // @outlined
   @_versioned
   internal
   func _compareOpaque(
     _ other: _StringGuts, _ otherRange: Range<Int>
   ) -> _Ordering {
     //
     // Do a fast Latiny comparison loop; bail if that proves insufficient.
     //
     // The vast majority of the time, seemingly-non-contiguous Strings are
     // really ASCII strings that were bridged improperly. E.g., unknown nul-
     // termination of an all-ASCII file loaded by String.init(contentsOfFile:).
     //

     let selfCount = self.count
     let otherCount = otherRange.count
     let count = Swift.min(selfCount, otherCount)
     let idx = self._findDiffIdx(other, otherRange)
     if idx == count {
       return _lexicographicalCompare(selfCount, otherCount)
     }

     let selfCU = self[idx]
     let otherCU = other[idx + otherRange.lowerBound]

     //
     // Fast path: if one is ASCII, we can often compare the code units directly.
     //
     let selfIsASCII = selfCU <= 0x7F
     let otherIsASCII = otherCU <= 0x7F

     let selfIsSingleSegmentScalar =
         self.hasNormalizationBoundary(after: idx)
         && _hasNormalizationBoundary(before: selfCU)
     let otherIsSingleSegmentScalar =
         other.hasNormalizationBoundary(after: idx)
         && _hasNormalizationBoundary(before: otherCU)

     if _fastPath(selfIsASCII || otherIsASCII) {
       _sanityCheck(idx < selfCount && idx < otherCount,
         "Should be caught by check against min-count")
       // Check if next CU is <0x300, or if we're in a
       // "_isNormalizedSuperASCII" case. 99.9% of the time, we're here because
       // the non-contig string is ASCII. We never want to hit the pathological
       // path for those.

       if selfIsASCII && otherIsASCII {
         if selfIsSingleSegmentScalar && otherIsSingleSegmentScalar {
           return _lexicographicalCompare(selfCU, otherCU)
         }

         return self._compareOpaquePathological(
           other, otherRange, startingFrom: Swift.max(0, idx-1))
       }

       if selfIsASCII && selfIsSingleSegmentScalar
       && self._parseRawScalar(startingFrom: idx).0._isNormalizedSuperASCII {
          return .less
       } else if otherIsASCII && otherIsSingleSegmentScalar
       && self._parseRawScalar(startingFrom: idx).0._isNormalizedSuperASCII {
          return .greater
        }
     }

     return self._compareOpaquePathological(
       other, otherRange, startingFrom: Swift.max(0, idx-1)
     )
   }

   @inline(never)
   func _compareOpaquePathological(
     _ other: _StringGuts, _ otherRange: Range<Int>,
     startingFrom: Int
   ) -> _Ordering {
     // Compare by pulling in a segment at a time, normalizing then comparing
     // individual code units
     var selfIterator = _NormalizedCodeUnitIterator(self, startIndex: startingFrom)
     return selfIterator.compare(with:
       _NormalizedCodeUnitIterator(other, otherRange, startIndex: startingFrom)
     )
   }
 }

 extension UnicodeScalar {
   internal func _normalize(
     into outputBuffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>
   ) -> Int {
     // Implementation: Perform the normalization on an input buffer and output
     // buffer.
     func impl(
       _ input: UnsafeMutablePointer<_FixedArray2<UInt16>>,
       count: Int,
       into output: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>
     ) -> Int {
       let inputBuffer = _unsafeBufferPointerCast(
         input, count, to: UInt16.self
       )
       let outputBuffer = _unsafeMutableBufferPointerCast(
         output, _FixedArray8<UInt16>.capacity, to: UInt16.self
       )
       return _tryNormalize(
         inputBuffer, into: outputBuffer
       )._unsafelyUnwrappedUnchecked
     }

     var inBuffer = _FixedArray2<UInt16>(allZeros:())
     var inLength = 0
     for cu in self.utf16 {
       inBuffer[inLength] = cu
       inLength += 1
     }

     return impl(&inBuffer, count: inLength, into: outputBuffer)
   }

   static internal let maxValue = 0x0010_FFFF
 }

 private struct _UnicodeScalarExceptions {
   fileprivate let _multiSegmentExpanders: Set<UInt32>
   fileprivate let _normalizedASCIIStarter: Array<UInt32>

   @inline(__always)
   init() {
     var msExpanders = Set<UInt32>()
     msExpanders.reserveCapacity(16)
     var normalizedASCIIStarter = Array<UInt32>()
     normalizedASCIIStarter.reserveCapacity(8)

     for rawValue in 0..<UnicodeScalar.maxValue {
       guard let scalar = UnicodeScalar(rawValue) else { continue }

       // Fast path: skip unassigned code points
       guard scalar._isDefined else { continue }

       // Fast path: skip unless QC_FCD=no
       if _fastPath(!scalar._hasFullCompExclusion) {
         continue
       }

       var outBuffer = _Normalization._SegmentOutputBuffer(allZeros:())
       let length = scalar._normalize(into: &outBuffer)

       // See if this normalized to have an ASCII starter
       if _slowPath(outBuffer[0] <= 0x7F) {
         normalizedASCIIStarter.append(scalar.value)
       }

       // See if this normalizes to multiple segments
       var i = 0
       while i < length {
         let (innerScalar, nextI) = _parseRawScalar(&outBuffer, startingFrom: i)
         if _slowPath(i != 0 && innerScalar._hasNormalizationBoundaryBefore) {
           guard innerScalar._hasNormalizationBoundaryBefore else {
             fatalError(
               "Unicode invariant violated: non-starter multi-segment expander")
           }
           msExpanders.insert(scalar.value)
           break
         }
         i = nextI
       }
     }

     self._multiSegmentExpanders = msExpanders
     self._normalizedASCIIStarter = normalizedASCIIStarter
   }
 }
 private let _unicodeScalarExceptions: _UnicodeScalarExceptions = {
   return _UnicodeScalarExceptions()
 }()

 extension UnicodeScalar {
   // Multi-Segment Expanders - Unicode defines "expanding canonical
   // decompositions", where even in NFC a single scalar expands to multiple
   // scalars. A small subset (currently 12 scalars circa Unicode 10) of these
   // will expand into multiple normalization segments, breaking any kind of
   // segment-by- segment logic or processing even under NFC. These are a subset
   // of what is identified by the UCD as "composition exclusion" scalars. Since
   // we don't have access to a UCD (available only at runtime), we go through
   // ICU which lumps those and even more as "Full Composition Exclusions". Of
   // the many full composition exclusions, this set (created once at runtime as
   // this can change with Unicode version) tracks just those that can expand
   // into multiple normalization segments.
   internal var _isMultiSegmentExpander: Bool {
     return _unicodeScalarExceptions._multiSegmentExpanders.contains(self.value)
   }

   // Whether, post-normalization, this scalar definitely compares greater than
   // any ASCII scalar. This is true for all super-ASCII scalars that are not
   // ASCII Normalized Starters.
   //
   // ASCII Normalized Starters - A handful of scalars normalize to have ASCII
   // starters, e.g. Greek question mark ";". As of Unicode 10 there are 3 (all
   // from Unicode 1.1 originally) and more are unlikely. But, there could be
   // more in future versions, so determine at runtime.
   internal var _isNormalizedSuperASCII: Bool {
     if _slowPath(
       _unicodeScalarExceptions._normalizedASCIIStarter.contains(self.value)
     ) {
       return false
     }
     return self.value > 0x7F
   }
 }

 extension _UnmanagedString where CodeUnit == UInt8 {
   @_versioned
   internal func _compareStringsPreLoop(
     other: _UnmanagedString<UInt16>
   ) -> _Ordering {
     let count = Swift.min(self.count, other.count)

     //
     // Fast scan until we find a difference
     //
     let idx = self._findDiffIdx(other)
     guard idx < count else {
       return _lexicographicalCompare(self.count, other.count)
     }
     let otherCU = other[idx]

     //
     // Fast path: if other is super-ASCII post-normalization, we must be less. If
     // other is ASCII and a single-scalar segment, we have our answer.
     //
     if otherCU > 0x7F {
       if _fastPath(
         other._parseRawScalar(startingFrom: idx).0._isNormalizedSuperASCII
       ) {
        return .less
       }
     } else {
       let selfASCIIChar = UInt16(self[idx])
       _sanityCheck(selfASCIIChar != otherCU, "should be different")
       if idx+1 == other.count {
         return _lexicographicalCompare(selfASCIIChar, otherCU)
       }
       if _fastPath(other.hasNormalizationBoundary(after: idx)) {
         return _lexicographicalCompare(selfASCIIChar, otherCU)
       }
     }

     //
     // Otherwise, need to normalize the segment and then compare
     //
     let selfASCIIChar = UInt16(self[idx])
     return _compareStringsPostSuffix(
       selfASCIIChar: selfASCIIChar, otherUTF16: other[idx...]
       )
   }
 }

 extension _StringGuts {
   func hasNormalizationBoundary(after index: Int) -> Bool {
     let nextIndex = index + 1
     if nextIndex >= self.count {
       return true
     }

     let nextCU = self[nextIndex]
     return _hasNormalizationBoundary(before: nextCU)
   }
 }

 extension _UnmanagedOpaqueString {
   func hasNormalizationBoundary(after index: Int) -> Bool {
     let nextIndex = index + 1
     if nextIndex >= self.count {
       return true
     }

     let nextCU = self[nextIndex]
     return _hasNormalizationBoundary(before: nextCU)
   }
 }

 extension _UnmanagedString where CodeUnit == UInt16 {
   func hasNormalizationBoundary(after index: Int) -> Bool {
     let nextIndex = index + 1
     if nextIndex >= self.count {
       return true
     }

     let nextCU = self[nextIndex]
     return _hasNormalizationBoundary(before: nextCU)
   }
 }

 private func _compareStringsPostSuffix(
   selfASCIIChar: UInt16,
   otherUTF16: _UnmanagedString<UInt16>
 ) -> _Ordering {
   let otherCU = otherUTF16[0]
   _sanityCheck(otherCU <= 0x7F, "should be ASCII, otherwise no need to call")

   let segmentEndIdx = otherUTF16._findNormalizationSegmentEnd(startingFrom: 0)
   let segment = otherUTF16[..<segmentEndIdx]

   // Fast path: If prenormal, we're done.
   if _Normalization._prenormalQuickCheckYes(segment) {
     return _lexicographicalCompare(selfASCIIChar, otherCU)
   }

   // Normalize segment, and then compare first code unit
   var outputBuffer = _Normalization._SegmentOutputBuffer(allZeros:())
   if _fastPath(
     segment._tryNormalize(into: &outputBuffer) != nil
   ) {
     return _lexicographicalCompare(selfASCIIChar, outputBuffer[0])
   }
   return _lexicographicalCompare(selfASCIIChar, segment._slowNormalize()[0])
 }

 extension _UnmanagedString where CodeUnit == UInt16 {
   //
   // Find the end of the normalization segment
   //
   internal func _findNormalizationSegmentEnd(startingFrom idx: Int) -> Int {
     let count = self.count
     _sanityCheck(idx < count, "out of bounds")

     // Normalization boundaries are best queried before known starters. Advance
     // past one scalar first.
     var (_, segmentEndIdx) = self._parseRawScalar(startingFrom: idx)
     while segmentEndIdx < count {
       let (scalar, nextIdx) = self._parseRawScalar(startingFrom: segmentEndIdx)
       if scalar._hasNormalizationBoundaryBefore {
         break
       }
       segmentEndIdx = nextIdx
     }
     return segmentEndIdx
   }

   internal func _findNormalizationSegmentStart(
     endingAt idx: Int // one-past-the-end
   ) -> Int {
     var idx = idx
     let count = self.count
     _sanityCheck(idx > 0 && idx <= count, "out of bounds")

     while idx > 0 {
       let (scalar, priorIdx) = _reverseParseRawScalar(endingAt: idx)
       idx = priorIdx
       if scalar._hasNormalizationBoundaryBefore {
         break
       }
     }
     return idx
   }

   internal func _findNormalizationSegment(spanning idx: Int) -> (Int, Int) {
     var idx = idx

     // Corner case: if we're sub-surrogate, back up
     if _slowPath(
       idx > 0
       && _isTrailingSurrogate(self[idx])
       && _isLeadingSurrogate(self[idx-1])
     ) {
       idx -= 1
     }
     let segmentEnd = self._findNormalizationSegmentEnd(startingFrom: idx)

     // Find the start
     if _slowPath(idx == 0) {
       return (0, segmentEnd)
     }

     // Check current scalar
     if self._parseRawScalar(startingFrom: idx).0._hasNormalizationBoundaryBefore {
       return (idx, segmentEnd)
     }

     // Reverse parse until we found the segment start
     let segmentStart = self._findNormalizationSegmentStart(endingAt: idx)

     return (segmentStart, segmentEnd)
   }

   // Wether the segment identified by `idx` is prenormal.
   //
   // Scalar values below 0x300 are special: normalization segments containing only
   // one such scalar are trivially prenormal under NFC. Most Latin-derived scripts
   // can be represented entirely by <0x300 scalar values, meaning that many user
   // strings satisfy this prenormal check. We call sub-0x300 scalars "Latiny" (not
   // official terminology).
   //
   // The check is effectively:
   //   1) Whether the current scalar <0x300, AND
   //   2) Whether the current scalar comprises the entire segment
   //
   internal func _isLatinyPrenormal(idx: Int
   ) -> Bool {
     _sanityCheck(idx < self.count, "out of bounds")

     let cu = self[idx]
     if _slowPath(cu >= 0x300) {
       return false
     }
     if _slowPath(idx+1 == self.count) {
       return true
     }

     let nextCU = self[idx+1]
     return nextCU < 0x300 || _hasNormalizationBoundary(before: nextCU)
   }

   @_versioned
   internal
   func _compareStringsPreLoop(
     other: _UnmanagedString<UInt16>
   ) -> _Ordering {
     let count = Swift.min(self.count, other.count)

     //
     // Fast scan until we find a diff
     //
     let idx = _findDiffIdx(other)
     guard idx < count else {
       return _lexicographicalCompare(self.count, other.count)
     }
     let selfCU = self[idx]
     let otherCU = other[idx]

     //
     // Fast path: sub-0x300 single-scalar segments can be compared directly
     //
     if _fastPath(
       _isLatinyPrenormal(idx: idx)
       && other._isLatinyPrenormal(idx: idx)
     ) {
       return _lexicographicalCompare(selfCU, otherCU)
     }

     return self._compareStringsSuffix(other: other, randomIndex: idx)
   }

   //Is the shorter of the two parameters a prefix of the other parameter?
   private func shorterPrefixesOther(
     _ other: _UnmanagedString<UInt16>
   ) -> Bool {
     if self.count == other.count {
       return false
     }

     let minimumLength = Swift.min(self.count, other.count)
     for i in 0..<minimumLength {
       if self[i] != other[i] {
         return false
       }
     }
     return true
   }

   private func _compareStringsSuffix(
     other: _UnmanagedString<UInt16>,
     randomIndex: Int
   ) -> _Ordering {
     let count = Swift.min(self.count, other.count)
     let selfCU = self[randomIndex]
     let otherCU = other[randomIndex]
     _sanityCheck(randomIndex >= 0 && randomIndex < count, "out of bounds")
     _sanityCheck(selfCU != otherCU, "should be called at a point of difference")

     //
     // Find the segment surrounding the random index passed in. This may involve
     // some back tracking to the nearest normalization boundary. Once we've
     // identified the segment, we can normalize and continue comparision.
     //
     // NOTE: We need to back-track for both self and other. Even though prefixes
     // are binary equal, the point of difference might be at the start of a new
     // segment for one and in the middle of the prior segment for the other. In
     // which case, we will want to effectively compare the two consecutive
     // segments together.
     //
     let (selfSegmentStartIdx, selfSegmentEndIdx) =
       self._findNormalizationSegment(spanning: randomIndex)
     let (otherSegmentStartIdx, otherSegmentEndIdx) =
       other._findNormalizationSegment(spanning: randomIndex)
     let comparisonStartIdx = Swift.min(selfSegmentStartIdx, otherSegmentStartIdx)


     //
     // Fast path: if both are prenormal, we have our answer
     //
     let selfSegment = self[comparisonStartIdx..<selfSegmentEndIdx]
     let otherSegment = other[comparisonStartIdx..<otherSegmentEndIdx]
     let selfSegmentPrenormal = _Normalization._prenormalQuickCheckYes(selfSegment)
     let otherSegmentPrenormal = _Normalization._prenormalQuickCheckYes(
       otherSegment)
     if selfSegmentPrenormal && otherSegmentPrenormal {
       return _lexicographicalCompare(selfCU, otherCU)
     }

     //
     // Pathological case: multi-segment expanders ruin segment-by-segment
     // processing.
     //
     // NOTE: Multi-segment expanders are (at least up til Unicode 10) always the
     // beginning of a normalization segment (i.e. they are starters). This is very
     // unlikely to change in the future, as new non-starter scalars that normalize
     // to pre-existing scalars would have to produce a starter. We validate this
     // fact on constructing our MultiSegmentExpander set, so we can rely on it
     // here.
     //
     if _slowPath(
        selfSegment._parseRawScalar().0._isMultiSegmentExpander
     || otherSegment._parseRawScalar().0._isMultiSegmentExpander
     ) {
       return self[comparisonStartIdx...]._compareStringsPathological(
         other: other[comparisonStartIdx...]
       )
     }

     //
     // Normalize segments and compare. If they still differ, we have our answer.
     //
     var selfOutputBuffer = _Normalization._SegmentOutputBuffer(allZeros:())
     var otherOutputBuffer = _Normalization._SegmentOutputBuffer(allZeros:())
     let selfSegmentLengthOpt: Int?
     let otherSegmentLengthOpt: Int?
     if selfSegmentPrenormal {
       selfOutputBuffer.fill(from: selfSegment)
       selfSegmentLengthOpt = selfSegment.count
     } else {
       selfSegmentLengthOpt = selfSegment._tryNormalize(into: &selfOutputBuffer)
     }
     if otherSegmentPrenormal {
       otherOutputBuffer.fill(from: otherSegment)
       otherSegmentLengthOpt = otherSegment.count
     } else {
       otherSegmentLengthOpt = otherSegment._tryNormalize(into: &otherOutputBuffer)
     }

     if _slowPath(selfSegmentLengthOpt == nil || otherSegmentLengthOpt == nil) {
       // If we couldn't normalize a segment into a generously large stack buffer,
       // we have a pathological String.
       return self[comparisonStartIdx...]._compareStringsPathological(
         other: other[comparisonStartIdx...]
       )
     }
     let selfLength = selfSegmentLengthOpt._unsafelyUnwrappedUnchecked
     let otherLength = otherSegmentLengthOpt._unsafelyUnwrappedUnchecked

     if Swift.shorterPrefixesOther(
       &selfOutputBuffer, selfLength,
       &otherOutputBuffer, otherLength)
     {
       let selfSlice = self[selfSegmentStartIdx...]
       let otherSlice = other[otherSegmentStartIdx...]
       return selfSlice._compareStringsPathological(other: otherSlice)
     }

     let comp = _lexicographicalCompare(
       &selfOutputBuffer, leftCount: selfLength,
       &otherOutputBuffer, rightCount: otherLength)
     if _fastPath(comp != .equal) {
       return comp
     }

     //
     // If they compare equal after normalization, we may have equal strings that
     // differ in form, e.g. NFC vs NFD strings. Or, we may have strings that
     // differ in form that also will differ later on. Either way, segment-by-
     // segment processing incurs significant overhead. We'd rather do larger
     // chunks of work at a time (e.g. ~1KB of text at a time). For now, we eagerly
     // process the entire strings, as chunking properly without guarantees of
     // normality is tricky (and expensive at times as well).
     //
     // NOTE: We could add a chunking path. It is hard to do correctly, because
     // Unicode. It's especially hard to test, because Unicode. It's hard to ensure
     // lasting correctness, because Unicode. (Also, sometimes it's impossible, but
     // that's what _compareStringsPathological is for.) However, it helps for very
     // long strings that differ in the middle. We might want this one day... but
     // not today.
     //
     // TODO: An additional (or even repeated) reapplying of the algorithm,
     // including the binary diff scan, could greatly benefit strings that only
     // sparsely differ in normality (penalizing strings that densely differ in
     // normality). This would add complexity, but with compelling data could be an
     // alternative to chunking.
     //
     return self[selfSegmentEndIdx...]._compareStringsPathological(
       other: other[otherSegmentEndIdx...]
     )
   }

   private func _compareStringsPathological(
     other: _UnmanagedString<UInt16>
   ) -> _Ordering {
     var selfIterator = _NormalizedCodeUnitIterator(self)
     return selfIterator.compare(with:
       _NormalizedCodeUnitIterator(other)
     )
   }
 }

 private func shorterPrefixesOther(
   _ selfBuffer: UnsafePointer<_Normalization._SegmentOutputBuffer>,
   _ selfLength: Int,
   _ otherBuffer: UnsafePointer<_Normalization._SegmentOutputBuffer>,
   _ otherLength: Int
 ) -> Bool {
   return shorterPrefixesOther(
     _castOutputBuffer(selfBuffer, endingAt: selfLength),
     _castOutputBuffer(otherBuffer, endingAt: otherLength)
   )
 }

 //Is the shorter of the two parameters a prefix of the other parameter?
 private func shorterPrefixesOther(
   _ selfBuffer: UnsafeBufferPointer<UInt16>,
   _ otherBuffer: UnsafeBufferPointer<UInt16>
 ) -> Bool {
   if selfBuffer.count == otherBuffer.count {
     return false
   }

   let minimumLength = Swift.min(selfBuffer.count, otherBuffer.count)
   for i in 0..<minimumLength {
     if selfBuffer[i] != otherBuffer[i] {
       return false
     }
   }
   return true
 }