// This source file is part of the open source project
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
// See for license information
// See for the list of Swift project authors
// Unicode.Scalar Type
extension Unicode {
/// A Unicode scalar value.
/// The `Unicode.Scalar` type, representing a single Unicode scalar value, is
/// the element type of a string's `unicodeScalars` collection.
/// You can create a `Unicode.Scalar` instance by using a string literal that
/// contains a single character representing exactly one Unicode scalar value.
/// let letterK: Unicode.Scalar = "K"
/// let kim: Unicode.Scalar = "김"
/// print(letterK, kim)
/// // Prints "K 김"
/// You can also create Unicode scalar values directly from their numeric
/// representation.
/// let airplane = Unicode.Scalar(9992)
/// print(airplane)
/// // Prints "✈︎"
public struct Scalar {
@inlinable // FIXME(sil-serialize-all)
internal init(_value: UInt32) {
self._value = _value
@usableFromInline // FIXME(sil-serialize-all)
internal var _value: UInt32
extension Unicode.Scalar :
ExpressibleByUnicodeScalarLiteral {
/// A numeric representation of the Unicode scalar.
@inlinable // FIXME(sil-serialize-all)
public var value: UInt32 { return _value }
@inlinable // FIXME(sil-serialize-all)
public init(_builtinUnicodeScalarLiteral value: Builtin.Int32) {
self._value = UInt32(value)
/// Creates a Unicode scalar with the specified value.
/// Do not call this initializer directly. It may be used by the compiler
/// when you use a string literal to initialize a `Unicode.Scalar` instance.
/// let letterK: Unicode.Scalar = "K"
/// print(letterK)
/// // Prints "K"
/// In this example, the assignment to the `letterK` constant is handled by
/// this initializer behind the scenes.
@inlinable // FIXME(sil-serialize-all)
public init(unicodeScalarLiteral value: Unicode.Scalar) {
self = value
/// Creates a Unicode scalar with the specified numeric value.
/// For example, the following code sample creates a `Unicode.Scalar`
/// instance with a value of an emoji character:
/// let codepoint: UInt32 = 127881
/// let emoji = Unicode.Scalar(codepoint)
/// print(emoji!)
/// // Prints "🎉"
/// In case of an invalid input value, nil is returned.
/// let codepoint: UInt32 = extValue // This might be an invalid value
/// if let emoji = Unicode.Scalar(codepoint) {
/// print(emoji)
/// } else {
/// // Do something else
/// }
/// - Parameter v: The Unicode code point to use for the scalar. The
/// initializer succeeds if `v` is a valid Unicode scalar value---that is,
/// if `v` is in the range `0...0xD7FF` or `0xE000...0x10FFFF`. If `v` is
/// an invalid Unicode scalar value, the result is `nil`.
@inlinable // FIXME(sil-serialize-all)
public init?(_ v: UInt32) {
// Unicode 6.3.0:
// D9. Unicode codespace: A range of integers from 0 to 10FFFF.
// D76. Unicode scalar value: Any Unicode code point except
// high-surrogate and low-surrogate code points.
// * As a result of this definition, the set of Unicode scalar values
// consists of the ranges 0 to D7FF and E000 to 10FFFF, inclusive.
if (v < 0xD800 || v > 0xDFFF) && v <= 0x10FFFF {
self._value = v
// Return nil in case of an invalid unicode scalar value.
return nil
/// Creates a Unicode scalar with the specified numeric value.
/// For example, the following code sample creates a `Unicode.Scalar`
/// instance with a value of `"밥"`, the Korean word for rice:
/// let codepoint: UInt16 = 48165
/// let bap = Unicode.Scalar(codepoint)
/// print(bap!)
/// // Prints "밥"
/// In case of an invalid input value, the result is `nil`.
/// let codepoint: UInt16 = extValue // This might be an invalid value
/// if let bap = Unicode.Scalar(codepoint) {
/// print(bap)
/// } else {
/// // Do something else
/// }
/// - Parameter v: The Unicode code point to use for the scalar. The
/// initializer succeeds if `v` is a valid Unicode scalar value, in the
/// range `0...0xD7FF` or `0xE000...0x10FFFF`. If `v` is an invalid
/// unicode scalar value, the result is `nil`.
@inlinable // FIXME(sil-serialize-all)
public init?(_ v: UInt16) {
/// Creates a Unicode scalar with the specified numeric value.
/// For example, the following code sample creates a `Unicode.Scalar`
/// instance with a value of `"7"`:
/// let codepoint: UInt8 = 55
/// let seven = Unicode.Scalar(codepoint)
/// print(seven)
/// // Prints "7"
/// - Parameter v: The code point to use for the scalar.
@inlinable // FIXME(sil-serialize-all)
public init(_ v: UInt8) {
self._value = UInt32(v)
/// Creates a duplicate of the given Unicode scalar.
@inlinable // FIXME(sil-serialize-all)
public init(_ v: Unicode.Scalar) {
// This constructor allows one to provide necessary type context to
// disambiguate between function overloads on 'String' and 'Unicode.Scalar'.
self = v
/// Returns a string representation of the Unicode scalar.
/// Scalar values representing characters that are normally unprintable or
/// that otherwise require escaping are escaped with a backslash.
/// let tab = Unicode.Scalar(9)
/// print(tab)
/// // Prints " "
/// print(tab.escaped(asASCII: false))
/// // Prints "\t"
/// When the `forceASCII` parameter is `true`, a `Unicode.Scalar` instance
/// with a value greater than 127 is represented using an escaped numeric
/// value; otherwise, non-ASCII characters are represented using their
/// typical string value.
/// let bap = Unicode.Scalar(48165)
/// print(bap.escaped(asASCII: false))
/// // Prints "밥"
/// print(bap.escaped(asASCII: true))
/// // Prints "\u{BC25}"
/// - Parameter forceASCII: Pass `true` if you need the result to use only
/// ASCII characters; otherwise, pass `false`.
/// - Returns: A string representation of the scalar.
@inlinable // FIXME(sil-serialize-all)
public func escaped(asASCII forceASCII: Bool) -> String {
func lowNibbleAsHex(_ v: UInt32) -> String {
let nibble = v & 15
if nibble < 10 {
return String(Unicode.Scalar(nibble+48)!) // 48 = '0'
} else {
return String(Unicode.Scalar(nibble-10+65)!) // 65 = 'A'
if self == "\\" {
return "\\\\"
} else if self == "\'" {
return "\\\'"
} else if self == "\"" {
return "\\\""
} else if _isPrintableASCII {
return String(self)
} else if self == "\0" {
return "\\0"
} else if self == "\n" {
return "\\n"
} else if self == "\r" {
return "\\r"
} else if self == "\t" {
return "\\t"
} else if UInt32(self) < 128 {
return "\\u{"
+ lowNibbleAsHex(UInt32(self) >> 4)
+ lowNibbleAsHex(UInt32(self)) + "}"
} else if !forceASCII {
return String(self)
} else if UInt32(self) <= 0xFFFF {
var result = "\\u{"
result += lowNibbleAsHex(UInt32(self) >> 12)
result += lowNibbleAsHex(UInt32(self) >> 8)
result += lowNibbleAsHex(UInt32(self) >> 4)
result += lowNibbleAsHex(UInt32(self))
result += "}"
return result
} else {
// FIXME: Type checker performance prohibits this from being a
// single chained "+".
var result = "\\u{"
result += lowNibbleAsHex(UInt32(self) >> 28)
result += lowNibbleAsHex(UInt32(self) >> 24)
result += lowNibbleAsHex(UInt32(self) >> 20)
result += lowNibbleAsHex(UInt32(self) >> 16)
result += lowNibbleAsHex(UInt32(self) >> 12)
result += lowNibbleAsHex(UInt32(self) >> 8)
result += lowNibbleAsHex(UInt32(self) >> 4)
result += lowNibbleAsHex(UInt32(self))
result += "}"
return result
/// A Boolean value indicating whether the Unicode scalar is an ASCII
/// character.
/// ASCII characters have a scalar value between 0 and 127, inclusive. For
/// example:
/// let canyon = "Cañón"
/// for scalar in canyon.unicodeScalars {
/// print(scalar, scalar.isASCII, scalar.value)
/// }
/// // Prints "C true 67"
/// // Prints "a true 97"
/// // Prints "ñ false 241"
/// // Prints "ó false 243"
/// // Prints "n true 110"
@inlinable // FIXME(sil-serialize-all)
public var isASCII: Bool {
return value <= 127
// FIXME: Is there a similar term of art in Unicode?
@inlinable // FIXME(sil-serialize-all)
public var _isASCIIDigit: Bool {
return self >= "0" && self <= "9"
// FIXME: Unicode makes this interesting.
@inlinable // FIXME(sil-serialize-all)
internal var _isPrintableASCII: Bool {
return (self >= Unicode.Scalar(0o040) && self <= Unicode.Scalar(0o176))
extension Unicode.Scalar : CustomStringConvertible, CustomDebugStringConvertible {
/// A textual representation of the Unicode scalar.
@inlinable // FIXME(sil-serialize-all)
public var description: String {
return String(self)
/// An escaped textual representation of the Unicode scalar, suitable for
/// debugging.
public var debugDescription: String {
return "\"\(escaped(asASCII: true))\""
extension Unicode.Scalar : LosslessStringConvertible {
@inlinable // FIXME(sil-serialize-all)
public init?(_ description: String) {
let scalars = description.unicodeScalars
guard let v = scalars.first, scalars.count == 1 else {
return nil
self = v
extension Unicode.Scalar : Hashable {
/// Hashes the essential components of this value by feeding them into the
/// given hasher.
/// - Parameter hasher: The hasher to use when combining the components
/// of this instance.
public func hash(into hasher: inout Hasher) {
extension Unicode.Scalar {
/// Creates a Unicode scalar with the specified numeric value.
/// - Parameter v: The Unicode code point to use for the scalar. `v` must be
/// a valid Unicode scalar value, in the ranges `0...0xD7FF` or
/// `0xE000...0x10FFFF`. In case of an invalid unicode scalar value, nil is
/// returned.
/// For example, the following code sample creates a `Unicode.Scalar` instance
/// with a value of an emoji character:
/// let codepoint = 127881
/// let emoji = Unicode.Scalar(codepoint)
/// print(emoji)
/// // Prints "🎉"
/// In case of an invalid input value, nil is returned.
/// let codepoint: UInt32 = extValue // This might be an invalid value.
/// if let emoji = Unicode.Scalar(codepoint) {
/// print(emoji)
/// } else {
/// // Do something else
/// }
@inlinable // FIXME(sil-serialize-all)
public init?(_ v: Int) {
if let us = Unicode.Scalar(UInt32(v)) {
self = us
} else {
return nil
extension UInt8 {
/// Construct with value `v.value`.
/// - Precondition: `v.value` can be represented as ASCII (0..<128).
@inlinable // FIXME(sil-serialize-all)
public init(ascii v: Unicode.Scalar) {
_precondition(v.value < 128,
"Code point value does not fit into ASCII")
self = UInt8(v.value)
extension UInt32 {
/// Construct with value `v.value`.
@inlinable // FIXME(sil-serialize-all)
public init(_ v: Unicode.Scalar) {
self = v.value
extension UInt64 {
/// Construct with value `v.value`.
@inlinable // FIXME(sil-serialize-all)
public init(_ v: Unicode.Scalar) {
self = UInt64(v.value)
extension Unicode.Scalar : Equatable {
@inlinable // FIXME(sil-serialize-all)
public static func == (lhs: Unicode.Scalar, rhs: Unicode.Scalar) -> Bool {
return lhs.value == rhs.value
extension Unicode.Scalar : Comparable {
@inlinable // FIXME(sil-serialize-all)
public static func < (lhs: Unicode.Scalar, rhs: Unicode.Scalar) -> Bool {
return lhs.value < rhs.value
extension Unicode.Scalar {
@_fixed_layout // FIXME(sil-serialize-all)
public struct UTF16View {
@inlinable // FIXME(sil-serialize-all)
internal init(value: Unicode.Scalar) {
self.value = value
@usableFromInline // FIXME(sil-serialize-all)
internal var value: Unicode.Scalar
@inlinable // FIXME(sil-serialize-all)
public var utf16: UTF16View {
return UTF16View(value: self)
extension Unicode.Scalar.UTF16View : RandomAccessCollection {
public typealias Indices = Range<Int>
/// The position of the first code unit.
@inlinable // FIXME(sil-serialize-all)
public var startIndex: Int {
return 0
/// The "past the end" position---that is, the position one
/// greater than the last valid subscript argument.
/// If the collection is empty, `endIndex` is equal to `startIndex`.
@inlinable // FIXME(sil-serialize-all)
public var endIndex: Int {
return 0 + UTF16.width(value)
/// Accesses the code unit at the specified position.
/// - Parameter position: The position of the element to access. `position`
/// must be a valid index of the collection that is not equal to the
/// `endIndex` property.
@inlinable // FIXME(sil-serialize-all)
public subscript(position: Int) -> UTF16.CodeUnit {
return position == 0 ? (
endIndex == 1 ? UTF16.CodeUnit(value.value) : UTF16.leadSurrogate(value)
) : UTF16.trailSurrogate(value)
/// Returns c as a UTF16.CodeUnit. Meant to be used as _ascii16("x").
@inlinable // FIXME(sil-serialize-all)
public // SPI(SwiftExperimental)
func _ascii16(_ c: Unicode.Scalar) -> UTF16.CodeUnit {
_sanityCheck(c.value >= 0 && c.value <= 0x7F, "not ASCII")
return UTF16.CodeUnit(c.value)
extension Unicode.Scalar {
@inlinable // FIXME(sil-serialize-all)
internal static var _replacementCharacter: Unicode.Scalar {
return Unicode.Scalar(_value: UTF32._replacementCodeUnit)
extension Unicode.Scalar {
/// Creates an instance of the NUL scalar value.
@available(*, unavailable, message: "use 'Unicode.Scalar(0)'")
public init() {
// @available(swift, obsoleted: 4.0, renamed: "Unicode.Scalar")
public typealias UnicodeScalar = Unicode.Scalar