stdlib/public/core/Character.swift - third_party/swift - Git at Google

 //===----------------------------------------------------------------------===//
 //
 // This source file is part of the Swift.org open source project
 //
 // Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See http://swift.org/LICENSE.txt for license information
 // See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
 //
 //===----------------------------------------------------------------------===//

 /// A single extended grapheme cluster, which approximates a user-perceived
 /// character.
 ///
 /// The `Character` type represents a character made up of one or more Unicode
 /// scalar values, grouped by a Unicode boundary algorithm. Generally, a
 /// `Character` instance matches what the reader of a string will perceive as
 /// a single character. The number of visible characters is generally the most
 /// natural way to count the length of a string.
 ///
 ///     let greeting = "Hello! 🐥"
 ///     print("Character count: \(greeting.characters.count)")
 ///     // Prints "Character count: 8"
 ///
 /// Because each character in a string can be made up of one or more Unicode
 /// code points, the number of characters in a string may not match the length
 /// of the Unicode code point representation or the length of the string in a
 /// particular binary representation.
 ///
 ///     print("Unicode code point count: \(greeting.unicodeScalars.count)")
 ///     // Prints "Unicode code point count: 15"
 ///
 ///     print("UTF-8 representation count: \(greeting.utf8.count)")
 ///     // Prints "UTF-8 representation count: 18"
 ///
 /// Every `Character` instance is composed of one or more Unicode code points
 /// that are grouped together as an *extended grapheme cluster*. The way these
 /// code points are grouped is defined by a canonical, localized, or otherwise
 /// tailored Unicode segmentation algorithm.
 ///
 /// For example, a country's Unicode flag character is made up of two regional
 /// indicator code points that correspond to that country's ISO 3166-1 alpha-2
 /// code. The alpha-2 code for The United States is "US", so its flag
 /// character is made up of the Unicode code points `"\u{1F1FA}"` (REGIONAL
 /// INDICATOR SYMBOL LETTER U) and `"\u{1F1F8}"` (REGIONAL INDICATOR SYMBOL
 /// LETTER S). When placed next to each other in a Swift string literal, these
 /// two code points are combined into a single grapheme cluster, represented
 /// by a `Character` instance in Swift.
 ///
 ///     let usFlag: Character = "\u{1F1FA}\u{1F1F8}"
 ///     print(usFlag)
 ///     // Prints "🇺🇸"
 ///
 /// For more information about the Unicode terms used in this discussion, see
 /// the [Unicode.org glossary][glossary]. In particular, this discussion
 /// mentions [extended grapheme clusters][clusters] and [Unicode scalar
 /// values][scalars].
 ///
 /// [glossary]: http://www.unicode.org/glossary/
 /// [clusters]: http://www.unicode.org/glossary/#extended_grapheme_cluster
 /// [scalars]: http://www.unicode.org/glossary/#unicode_scalar_value
 public struct Character :
   _ExpressibleByBuiltinExtendedGraphemeClusterLiteral,
   ExpressibleByExtendedGraphemeClusterLiteral, Hashable {

   // Fundamentally, it is just a String, but it is optimized for the
   // common case where the UTF-8 representation fits in 63 bits.  The
   // remaining bit is used to discriminate between small and large
   // representations.  In the small representation, the unused bytes
   // are filled with 0xFF.
   //
   // If the grapheme cluster can be represented as `.small`, it
   // should be represented as such.
   @_versioned
   internal enum Representation {
     // A _StringBuffer whose first grapheme cluster is self.
     // NOTE: may be more than 1 Character long.
     case large(_StringBuffer._Storage)
     case small(Builtin.Int63)
   }

   /// Creates a character containing the given Unicode scalar value.
   ///
   /// - Parameter scalar: The Unicode scalar value to convert into a character.
   public init(_ scalar: UnicodeScalar) {
     var asInt: UInt64 = 0
     var shift: UInt64 = 0

     let output: (UTF8.CodeUnit) -> Void = {
       asInt |= UInt64($0) << shift
       shift += 8
     }

     UTF8.encode(scalar, into: output)
     asInt |= (~0) << shift
     _representation = .small(Builtin.trunc_Int64_Int63(asInt._value))
   }

   @effects(readonly)
   public init(_builtinUnicodeScalarLiteral value: Builtin.Int32) {
     self = Character(
       String._fromWellFormedCodeUnitSequence(
         UTF32.self, input: CollectionOfOne(UInt32(value))))
   }

   /// Creates a character with the specified value.
   ///
   /// Don't call this initializer directly. It is used by the compiler when you
   /// use a string literal to initialize a `Character` instance. For example:
   ///
   ///     let snowflake: Character = "❄︎"
   ///     print(snowflake)
   ///     // Prints "❄︎"
   ///
   /// The assignment to the `snowflake` constant calls this initializer behind
   /// the scenes.
   public init(unicodeScalarLiteral value: Character) {
     self = value
   }

   @effects(readonly)
   public init(
     _builtinExtendedGraphemeClusterLiteral start: Builtin.RawPointer,
     utf8CodeUnitCount: Builtin.Word,
     isASCII: Builtin.Int1
   ) {
     self = Character(
       String(
         _builtinExtendedGraphemeClusterLiteral: start,
         utf8CodeUnitCount: utf8CodeUnitCount,
         isASCII: isASCII))
   }

   /// Creates a character with the specified value.
   ///
   /// Don't call this initializer directly. It is used by the compiler when you
   /// use a string literal to initialize a `Character` instance. For example:
   ///
   ///     let oBreve: Character = "o\u{306}"
   ///     print(oBreve)
   ///     // Prints "ŏ"
   ///
   /// The assignment to the `oBreve` constant calls this initializer behind the
   /// scenes.
   public init(extendedGraphemeClusterLiteral value: Character) {
     self = value
   }

   /// Creates a character from a single-character string.
   ///
   /// The following example creates a new character from the uppercase version
   /// of a string that only holds one character.
   ///
   ///     let a = "a"
   ///     let capitalA = Character(a.uppercased())
   ///
   /// - Parameter s: The single-character string to convert to a `Character`
   ///   instance. `s` must contain exactly one extended grapheme cluster.
   public init(_ s: String) {
     // The small representation can accept up to 8 code units as long
     // as the last one is a continuation.  Since the high bit of the
     // last byte is used for the enum's discriminator, we have to
     // reconstruct it.  As a result, we can't store 0x7f in the final
     // byte, because we wouldn't be able to distinguish it from an
     // unused 0xFF byte.  Rather than trying to squeeze in other
     // one-byte code points there, we simplify decoding by banning
     // starting a code point in the last byte, and assuming that its
     // high bit is 1.
     _precondition(
       s._core.count != 0, "Can't form a Character from an empty String")
     _precondition(
       s.index(after: s.startIndex) == s.endIndex,
       "Can't form a Character from a String containing more than one extended grapheme cluster")

     let (count, initialUTF8) = s._core._encodeSomeUTF8(from: 0)
     // Notice that the result of sizeof() is a small non-zero number and can't
     // overflow when multiplied by 8.
     let bits = MemoryLayout.size(ofValue: initialUTF8) &* 8 &- 1
     if _fastPath(
       count == s._core.count && (initialUTF8 & (1 << numericCast(bits))) != 0) {
       _representation = .small(Builtin.trunc_Int64_Int63(initialUTF8._value))
     }
     else {
       if let native = s._core.nativeBuffer,
          native.start == s._core._baseAddress! {
         _representation = .large(native._storage)
         return
       }
       var nativeString = ""
       nativeString.append(s)
       _representation = .large(nativeString._core.nativeBuffer!._storage)
     }
   }

   /// Returns the index of the lowest byte that is 0xFF, or 8 if
   /// there is none.
   static func _smallSize(_ value: UInt64) -> Int {
     var mask: UInt64 = 0xFF
     for i in 0..<8 {
       if (value & mask) == mask {
         return i
       }
       mask <<= 8
     }
     return 8
   }

   static func _smallValue(_ value: Builtin.Int63) -> UInt64 {
     return UInt64(Builtin.zext_Int63_Int64(value)) | (1<<63)
   }

   internal struct _SmallUTF8 : RandomAccessCollection {
     typealias Indices = CountableRange<Int>

     var indices: CountableRange<Int> {
       return startIndex..<endIndex
     }

     init(_ u8: UInt64) {
       let utf8Count = Character._smallSize(u8)
       _sanityCheck(utf8Count <= 8, "Character with more than 8 UTF-8 code units")
       self.count = UInt16(utf8Count)
       self.data = u8
     }

     /// The position of the first element in a non-empty collection.
     ///
     /// In an empty collection, `startIndex == endIndex`.
     var startIndex: Int {
       return 0
     }

     /// The collection's "past the end" position.
     ///
     /// `endIndex` is not a valid argument to `subscript`, and is always
     /// reachable from `startIndex` by zero or more applications of
     /// `index(after:)`.
     var endIndex: Int {
       return Int(count)
     }

     /// Access the code unit at `position`.
     ///
     /// - Precondition: `position` is a valid position in `self` and
     ///   `position != endIndex`.
     subscript(position: Int) -> UTF8.CodeUnit {
       _sanityCheck(position >= 0)
       _sanityCheck(position < Int(count))
       // Note: using unchecked arithmetic because overflow cannot happen if the
       // above sanity checks hold.
       return UTF8.CodeUnit(
         truncatingBitPattern: data >> (UInt64(position) &* 8))
     }

     internal struct Iterator : IteratorProtocol {
       init(_ data: UInt64) {
         self._data = data
       }

       internal mutating func next() -> UInt8? {
         let result = UInt8(truncatingBitPattern: _data)
         if result == 0xFF {
           return nil
         }
         _data = (_data >> 8) | 0xFF00_0000_0000_0000
         return result
       }

       internal var _data: UInt64
     }

     internal func makeIterator() -> Iterator {
       return Iterator(data)
     }

     var count: UInt16
     var data: UInt64
   }

   struct _SmallUTF16 : RandomAccessCollection {
     typealias Indices = CountableRange<Int>

     init(_ u8: UInt64) {
       let count = UTF16.transcodedLength(
         of: _SmallUTF8(u8).makeIterator(),
         decodedAs: UTF8.self,
         repairingIllFormedSequences: true)!.0
       _sanityCheck(count <= 4, "Character with more than 4 UTF-16 code units")
       self.count = UInt16(count)
       var u16: UInt64 = 0
       let output: (UTF16.CodeUnit) -> Void = {
         u16 = u16 << 16
         u16 = u16 | UInt64($0)
       }
       _ = transcode(
         _SmallUTF8(u8).makeIterator(),
         from: UTF8.self, to: UTF16.self,
         stoppingOnError: false,
         into: output)
       self.data = u16
     }

     /// The position of the first element in a non-empty collection.
     ///
     /// In an empty collection, `startIndex == endIndex`.
     var startIndex: Int {
       return 0
     }

     /// The collection's "past the end" position.
     ///
     /// `endIndex` is not a valid argument to `subscript`, and is always
     /// reachable from `startIndex` by zero or more applications of
     /// `successor()`.
     var endIndex: Int {
       return Int(count)
     }

     /// Access the code unit at `position`.
     ///
     /// - Precondition: `position` is a valid position in `self` and
     ///   `position != endIndex`.
     subscript(position: Int) -> UTF16.CodeUnit {
       _sanityCheck(position >= 0)
       _sanityCheck(position < Int(count))
       // Note: using unchecked arithmetic because overflow cannot happen if the
       // above sanity checks hold.
       return UTF16.CodeUnit(truncatingBitPattern:
         data >> ((UInt64(count) &- UInt64(position) &- 1) &* 16))
     }

     var count: UInt16
     var data: UInt64
   }

   /// The character's hash value.
   ///
   /// Hash values are not guaranteed to be equal across different executions of
   /// your program. Do not save hash values to use during a future execution.
   public var hashValue: Int {
     // FIXME(performance): constructing a temporary string is extremely
     // wasteful and inefficient.
     return String(self).hashValue
   }

   typealias UTF16View = String.UTF16View
   var utf16: UTF16View {
     return String(self).utf16
   }

   @_versioned
   internal var _representation: Representation
 }

 extension Character : CustomStringConvertible {
   public var description: String {
     return String(describing: self)
   }
 }

 extension Character : LosslessStringConvertible {}

 extension Character : CustomDebugStringConvertible {
   /// A textual representation of the character, suitable for debugging.
   public var debugDescription: String {
     return String(self).debugDescription
   }
 }

 extension String {
   /// Creates a string containing the given character.
   ///
   /// - Parameter c: The character to convert to a string.
   public init(_ c: Character) {
     switch c._representation {
     case let .small(_63bits):
       let value = Character._smallValue(_63bits)
       let smallUTF8 = Character._SmallUTF8(value)
       self = String._fromWellFormedCodeUnitSequence(
         UTF8.self, input: smallUTF8)
     case let .large(value):
       let buf = String(_StringCore(_StringBuffer(value)))
       self = buf[buf.startIndex..<buf.index(after: buf.startIndex)]
     }
   }
 }

 /// `.small` characters are stored in an Int63 with their UTF-8 representation,
 /// with any unused bytes set to 0xFF. ASCII characters will have all bytes set
 /// to 0xFF except for the lowest byte, which will store the ASCII value. Since
 /// 0x7FFFFFFFFFFFFF80 or greater is an invalid UTF-8 sequence, we know if a
 /// value is ASCII by checking if it is greater than or equal to
 /// 0x7FFFFFFFFFFFFF00.
 internal var _minASCIICharReprBuiltin: Builtin.Int63 {
   @inline(__always) get {
     let x: Int64 = 0x7FFFFFFFFFFFFF00
     return Builtin.truncOrBitCast_Int64_Int63(x._value)
   }
 }

 extension Character : Equatable {
   public static func == (lhs: Character, rhs: Character) -> Bool {
     switch (lhs._representation, rhs._representation) {
     case let (.small(lbits), .small(rbits)) where
       Bool(Builtin.cmp_uge_Int63(lbits, _minASCIICharReprBuiltin))
       && Bool(Builtin.cmp_uge_Int63(rbits, _minASCIICharReprBuiltin)):
       return Bool(Builtin.cmp_eq_Int63(lbits, rbits))
     default:
       // FIXME(performance): constructing two temporary strings is extremely
       // wasteful and inefficient.
       return String(lhs) == String(rhs)
     }
   }
 }

 extension Character : Comparable {
   public static func < (lhs: Character, rhs: Character) -> Bool {
     switch (lhs._representation, rhs._representation) {
     case let (.small(lbits), .small(rbits)) where
       // Note: This is consistent with Foundation but unicode incorrect.
       // See String._compareASCII.
       Bool(Builtin.cmp_uge_Int63(lbits, _minASCIICharReprBuiltin))
       && Bool(Builtin.cmp_uge_Int63(rbits, _minASCIICharReprBuiltin)):
       return Bool(Builtin.cmp_ult_Int63(lbits, rbits))
     default:
       // FIXME(performance): constructing two temporary strings is extremely
       // wasteful and inefficient.
       return String(lhs) < String(rhs)
     }
   }
 }
	//===----------------------------------------------------------------------===//
	//
	// This source file is part of the Swift.org open source project
	//
	// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
	// Licensed under Apache License v2.0 with Runtime Library Exception
	//
	// See http://swift.org/LICENSE.txt for license information
	// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
	//
	//===----------------------------------------------------------------------===//

	/// A single extended grapheme cluster, which approximates a user-perceived
	/// character.
	///
	/// The `Character` type represents a character made up of one or more Unicode
	/// scalar values, grouped by a Unicode boundary algorithm. Generally, a
	/// `Character` instance matches what the reader of a string will perceive as
	/// a single character. The number of visible characters is generally the most
	/// natural way to count the length of a string.
	///
	/// let greeting = "Hello! 🐥"
	/// print("Character count: \(greeting.characters.count)")
	/// // Prints "Character count: 8"
	///
	/// Because each character in a string can be made up of one or more Unicode
	/// code points, the number of characters in a string may not match the length
	/// of the Unicode code point representation or the length of the string in a
	/// particular binary representation.
	///
	/// print("Unicode code point count: \(greeting.unicodeScalars.count)")
	/// // Prints "Unicode code point count: 15"
	///
	/// print("UTF-8 representation count: \(greeting.utf8.count)")
	/// // Prints "UTF-8 representation count: 18"
	///
	/// Every `Character` instance is composed of one or more Unicode code points
	/// that are grouped together as an extended grapheme cluster. The way these
	/// code points are grouped is defined by a canonical, localized, or otherwise
	/// tailored Unicode segmentation algorithm.
	///
	/// For example, a country's Unicode flag character is made up of two regional
	/// indicator code points that correspond to that country's ISO 3166-1 alpha-2
	/// code. The alpha-2 code for The United States is "US", so its flag
	/// character is made up of the Unicode code points `"\u{1F1FA}"` (REGIONAL
	/// INDICATOR SYMBOL LETTER U) and `"\u{1F1F8}"` (REGIONAL INDICATOR SYMBOL
	/// LETTER S). When placed next to each other in a Swift string literal, these
	/// two code points are combined into a single grapheme cluster, represented
	/// by a `Character` instance in Swift.
	///
	/// let usFlag: Character = "\u{1F1FA}\u{1F1F8}"
	/// print(usFlag)
	/// // Prints "🇺🇸"
	///
	/// For more information about the Unicode terms used in this discussion, see
	/// the [Unicode.org glossary][glossary]. In particular, this discussion
	/// mentions [extended grapheme clusters][clusters] and [Unicode scalar
	/// values][scalars].
	///
	/// [glossary]: http://www.unicode.org/glossary/
	/// [clusters]: http://www.unicode.org/glossary/#extended_grapheme_cluster
	/// [scalars]: http://www.unicode.org/glossary/#unicode_scalar_value
	public struct Character :
	_ExpressibleByBuiltinExtendedGraphemeClusterLiteral,
	ExpressibleByExtendedGraphemeClusterLiteral, Hashable {

	// Fundamentally, it is just a String, but it is optimized for the
	// common case where the UTF-8 representation fits in 63 bits. The
	// remaining bit is used to discriminate between small and large
	// representations. In the small representation, the unused bytes
	// are filled with 0xFF.
	//
	// If the grapheme cluster can be represented as `.small`, it
	// should be represented as such.
	@_versioned
	internal enum Representation {
	// A _StringBuffer whose first grapheme cluster is self.
	// NOTE: may be more than 1 Character long.
	case large(_StringBuffer._Storage)
	case small(Builtin.Int63)
	}

	/// Creates a character containing the given Unicode scalar value.
	///
	/// - Parameter scalar: The Unicode scalar value to convert into a character.
	public init(_ scalar: UnicodeScalar) {
	var asInt: UInt64 = 0
	var shift: UInt64 = 0

	let output: (UTF8.CodeUnit) -> Void = {
	asInt \|= UInt64($0) << shift
	shift += 8
	}

	UTF8.encode(scalar, into: output)
	asInt \|= (~0) << shift
	_representation = .small(Builtin.trunc_Int64_Int63(asInt._value))
	}

	@effects(readonly)
	public init(_builtinUnicodeScalarLiteral value: Builtin.Int32) {
	self = Character(
	String._fromWellFormedCodeUnitSequence(
	UTF32.self, input: CollectionOfOne(UInt32(value))))
	}

	/// Creates a character with the specified value.
	///
	/// Don't call this initializer directly. It is used by the compiler when you
	/// use a string literal to initialize a `Character` instance. For example:
	///
	/// let snowflake: Character = "❄︎"
	/// print(snowflake)
	/// // Prints "❄︎"
	///
	/// The assignment to the `snowflake` constant calls this initializer behind
	/// the scenes.
	public init(unicodeScalarLiteral value: Character) {
	self = value
	}

	@effects(readonly)
	public init(
	_builtinExtendedGraphemeClusterLiteral start: Builtin.RawPointer,
	utf8CodeUnitCount: Builtin.Word,
	isASCII: Builtin.Int1
	) {
	self = Character(
	String(
	_builtinExtendedGraphemeClusterLiteral: start,
	utf8CodeUnitCount: utf8CodeUnitCount,
	isASCII: isASCII))
	}

	/// Creates a character with the specified value.
	///
	/// Don't call this initializer directly. It is used by the compiler when you
	/// use a string literal to initialize a `Character` instance. For example:
	///
	/// let oBreve: Character = "o\u{306}"
	/// print(oBreve)
	/// // Prints "ŏ"
	///
	/// The assignment to the `oBreve` constant calls this initializer behind the
	/// scenes.
	public init(extendedGraphemeClusterLiteral value: Character) {
	self = value
	}

	/// Creates a character from a single-character string.
	///
	/// The following example creates a new character from the uppercase version
	/// of a string that only holds one character.
	///
	/// let a = "a"
	/// let capitalA = Character(a.uppercased())
	///
	/// - Parameter s: The single-character string to convert to a `Character`
	/// instance. `s` must contain exactly one extended grapheme cluster.
	public init(_ s: String) {
	// The small representation can accept up to 8 code units as long
	// as the last one is a continuation. Since the high bit of the
	// last byte is used for the enum's discriminator, we have to
	// reconstruct it. As a result, we can't store 0x7f in the final
	// byte, because we wouldn't be able to distinguish it from an
	// unused 0xFF byte. Rather than trying to squeeze in other
	// one-byte code points there, we simplify decoding by banning
	// starting a code point in the last byte, and assuming that its
	// high bit is 1.
	_precondition(
	s._core.count != 0, "Can't form a Character from an empty String")
	_precondition(
	s.index(after: s.startIndex) == s.endIndex,
	"Can't form a Character from a String containing more than one extended grapheme cluster")

	let (count, initialUTF8) = s._core._encodeSomeUTF8(from: 0)
	// Notice that the result of sizeof() is a small non-zero number and can't
	// overflow when multiplied by 8.
	let bits = MemoryLayout.size(ofValue: initialUTF8) &* 8 &- 1
	if _fastPath(
	count == s._core.count && (initialUTF8 & (1 << numericCast(bits))) != 0) {
	_representation = .small(Builtin.trunc_Int64_Int63(initialUTF8._value))
	}
	else {
	if let native = s._core.nativeBuffer,
	native.start == s._core._baseAddress! {
	_representation = .large(native._storage)
	return
	}
	var nativeString = ""
	nativeString.append(s)
	_representation = .large(nativeString._core.nativeBuffer!._storage)
	}
	}

	/// Returns the index of the lowest byte that is 0xFF, or 8 if
	/// there is none.
	static func _smallSize(_ value: UInt64) -> Int {
	var mask: UInt64 = 0xFF
	for i in 0..<8 {
	if (value & mask) == mask {
	return i
	}
	mask <<= 8
	}
	return 8
	}

	static func _smallValue(_ value: Builtin.Int63) -> UInt64 {
	return UInt64(Builtin.zext_Int63_Int64(value)) \| (1<<63)
	}

	internal struct _SmallUTF8 : RandomAccessCollection {
	typealias Indices = CountableRange<Int>

	var indices: CountableRange<Int> {
	return startIndex..<endIndex
	}

	init(_ u8: UInt64) {
	let utf8Count = Character._smallSize(u8)
	_sanityCheck(utf8Count <= 8, "Character with more than 8 UTF-8 code units")
	self.count = UInt16(utf8Count)
	self.data = u8
	}

	/// The position of the first element in a non-empty collection.
	///
	/// In an empty collection, `startIndex == endIndex`.
	var startIndex: Int {
	return 0
	}

	/// The collection's "past the end" position.
	///
	/// `endIndex` is not a valid argument to `subscript`, and is always
	/// reachable from `startIndex` by zero or more applications of
	/// `index(after:)`.
	var endIndex: Int {
	return Int(count)
	}

	/// Access the code unit at `position`.
	///
	/// - Precondition: `position` is a valid position in `self` and
	/// `position != endIndex`.
	subscript(position: Int) -> UTF8.CodeUnit {
	_sanityCheck(position >= 0)
	_sanityCheck(position < Int(count))
	// Note: using unchecked arithmetic because overflow cannot happen if the
	// above sanity checks hold.
	return UTF8.CodeUnit(
	truncatingBitPattern: data >> (UInt64(position) &* 8))
	}

	internal struct Iterator : IteratorProtocol {
	init(_ data: UInt64) {
	self._data = data
	}

	internal mutating func next() -> UInt8? {
	let result = UInt8(truncatingBitPattern: _data)
	if result == 0xFF {
	return nil
	}
	_data = (_data >> 8) \| 0xFF00_0000_0000_0000
	return result
	}

	internal var _data: UInt64
	}

	internal func makeIterator() -> Iterator {
	return Iterator(data)
	}

	var count: UInt16
	var data: UInt64
	}

	struct _SmallUTF16 : RandomAccessCollection {
	typealias Indices = CountableRange<Int>

	init(_ u8: UInt64) {
	let count = UTF16.transcodedLength(
	of: _SmallUTF8(u8).makeIterator(),
	decodedAs: UTF8.self,
	repairingIllFormedSequences: true)!.0
	_sanityCheck(count <= 4, "Character with more than 4 UTF-16 code units")
	self.count = UInt16(count)
	var u16: UInt64 = 0
	let output: (UTF16.CodeUnit) -> Void = {
	u16 = u16 << 16
	u16 = u16 \| UInt64($0)
	}
	_ = transcode(
	_SmallUTF8(u8).makeIterator(),
	from: UTF8.self, to: UTF16.self,
	stoppingOnError: false,
	into: output)
	self.data = u16
	}

	/// The position of the first element in a non-empty collection.
	///
	/// In an empty collection, `startIndex == endIndex`.
	var startIndex: Int {
	return 0
	}

	/// The collection's "past the end" position.
	///
	/// `endIndex` is not a valid argument to `subscript`, and is always
	/// reachable from `startIndex` by zero or more applications of
	/// `successor()`.
	var endIndex: Int {
	return Int(count)
	}

	/// Access the code unit at `position`.
	///
	/// - Precondition: `position` is a valid position in `self` and
	/// `position != endIndex`.
	subscript(position: Int) -> UTF16.CodeUnit {
	_sanityCheck(position >= 0)
	_sanityCheck(position < Int(count))
	// Note: using unchecked arithmetic because overflow cannot happen if the
	// above sanity checks hold.
	return UTF16.CodeUnit(truncatingBitPattern:
	data >> ((UInt64(count) &- UInt64(position) &- 1) &* 16))
	}

	var count: UInt16
	var data: UInt64
	}

	/// The character's hash value.
	///
	/// Hash values are not guaranteed to be equal across different executions of
	/// your program. Do not save hash values to use during a future execution.
	public var hashValue: Int {
	// FIXME(performance): constructing a temporary string is extremely
	// wasteful and inefficient.
	return String(self).hashValue
	}

	typealias UTF16View = String.UTF16View
	var utf16: UTF16View {
	return String(self).utf16
	}

	@_versioned
	internal var _representation: Representation
	}

	extension Character : CustomStringConvertible {
	public var description: String {
	return String(describing: self)
	}
	}

	extension Character : LosslessStringConvertible {}

	extension Character : CustomDebugStringConvertible {
	/// A textual representation of the character, suitable for debugging.
	public var debugDescription: String {
	return String(self).debugDescription
	}
	}

	extension String {
	/// Creates a string containing the given character.
	///
	/// - Parameter c: The character to convert to a string.
	public init(_ c: Character) {
	switch c._representation {
	case let .small(_63bits):
	let value = Character._smallValue(_63bits)
	let smallUTF8 = Character._SmallUTF8(value)
	self = String._fromWellFormedCodeUnitSequence(
	UTF8.self, input: smallUTF8)
	case let .large(value):
	let buf = String(_StringCore(_StringBuffer(value)))
	self = buf[buf.startIndex..<buf.index(after: buf.startIndex)]
	}
	}
	}

	/// `.small` characters are stored in an Int63 with their UTF-8 representation,
	/// with any unused bytes set to 0xFF. ASCII characters will have all bytes set
	/// to 0xFF except for the lowest byte, which will store the ASCII value. Since
	/// 0x7FFFFFFFFFFFFF80 or greater is an invalid UTF-8 sequence, we know if a
	/// value is ASCII by checking if it is greater than or equal to
	/// 0x7FFFFFFFFFFFFF00.
	internal var _minASCIICharReprBuiltin: Builtin.Int63 {
	@inline(__always) get {
	let x: Int64 = 0x7FFFFFFFFFFFFF00
	return Builtin.truncOrBitCast_Int64_Int63(x._value)
	}
	}

	extension Character : Equatable {
	public static func == (lhs: Character, rhs: Character) -> Bool {
	switch (lhs._representation, rhs._representation) {
	case let (.small(lbits), .small(rbits)) where
	Bool(Builtin.cmp_uge_Int63(lbits, _minASCIICharReprBuiltin))
	&& Bool(Builtin.cmp_uge_Int63(rbits, _minASCIICharReprBuiltin)):
	return Bool(Builtin.cmp_eq_Int63(lbits, rbits))
	default:
	// FIXME(performance): constructing two temporary strings is extremely
	// wasteful and inefficient.
	return String(lhs) == String(rhs)
	}
	}
	}

	extension Character : Comparable {
	public static func < (lhs: Character, rhs: Character) -> Bool {
	switch (lhs._representation, rhs._representation) {
	case let (.small(lbits), .small(rbits)) where
	// Note: This is consistent with Foundation but unicode incorrect.
	// See String._compareASCII.
	Bool(Builtin.cmp_uge_Int63(lbits, _minASCIICharReprBuiltin))
	&& Bool(Builtin.cmp_uge_Int63(rbits, _minASCIICharReprBuiltin)):
	return Bool(Builtin.cmp_ult_Int63(lbits, rbits))
	default:
	// FIXME(performance): constructing two temporary strings is extremely
	// wasteful and inefficient.
	return String(lhs) < String(rhs)
	}
	}
	}