| //! A character type. |
| //! |
| //! The `char` type represents a single character. More specifically, since |
| //! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode |
| //! scalar value]', which is similar to, but not the same as, a '[Unicode code |
| //! point]'. |
| //! |
| //! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value |
| //! [Unicode code point]: http://www.unicode.org/glossary/#code_point |
| //! |
| //! This module exists for technical reasons, the primary documentation for |
| //! `char` is directly on [the `char` primitive type](../../std/primitive.char.html) |
| //! itself. |
| //! |
| //! This module is the home of the iterator implementations for the iterators |
| //! implemented on `char`, as well as some useful constants and conversion |
| //! functions that convert various types to `char`. |
| |
| #![allow(non_snake_case)] |
| #![stable(feature = "core_char", since = "1.2.0")] |
| |
| mod convert; |
| mod decode; |
| mod methods; |
| |
| // stable re-exports |
| #[stable(feature = "char_from_unchecked", since = "1.5.0")] |
| pub use self::convert::from_u32_unchecked; |
| #[stable(feature = "try_from", since = "1.34.0")] |
| pub use self::convert::CharTryFromError; |
| #[stable(feature = "char_from_str", since = "1.20.0")] |
| pub use self::convert::ParseCharError; |
| #[stable(feature = "rust1", since = "1.0.0")] |
| pub use self::convert::{from_digit, from_u32}; |
| #[stable(feature = "decode_utf16", since = "1.9.0")] |
| pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error}; |
| |
| // unstable re-exports |
| #[unstable(feature = "unicode_version", issue = "49726")] |
| pub use crate::unicode::tables::UNICODE_VERSION; |
| #[unstable(feature = "unicode_version", issue = "49726")] |
| pub use crate::unicode::version::UnicodeVersion; |
| |
| use crate::fmt::{self, Write}; |
| use crate::iter::FusedIterator; |
| |
| // UTF-8 ranges and tags for encoding characters |
| const TAG_CONT: u8 = 0b1000_0000; |
| const TAG_TWO_B: u8 = 0b1100_0000; |
| const TAG_THREE_B: u8 = 0b1110_0000; |
| const TAG_FOUR_B: u8 = 0b1111_0000; |
| const MAX_ONE_B: u32 = 0x80; |
| const MAX_TWO_B: u32 = 0x800; |
| const MAX_THREE_B: u32 = 0x10000; |
| |
| /* |
| Lu Uppercase_Letter an uppercase letter |
| Ll Lowercase_Letter a lowercase letter |
| Lt Titlecase_Letter a digraphic character, with first part uppercase |
| Lm Modifier_Letter a modifier letter |
| Lo Other_Letter other letters, including syllables and ideographs |
| Mn Nonspacing_Mark a nonspacing combining mark (zero advance width) |
| Mc Spacing_Mark a spacing combining mark (positive advance width) |
| Me Enclosing_Mark an enclosing combining mark |
| Nd Decimal_Number a decimal digit |
| Nl Letter_Number a letterlike numeric character |
| No Other_Number a numeric character of other type |
| Pc Connector_Punctuation a connecting punctuation mark, like a tie |
| Pd Dash_Punctuation a dash or hyphen punctuation mark |
| Ps Open_Punctuation an opening punctuation mark (of a pair) |
| Pe Close_Punctuation a closing punctuation mark (of a pair) |
| Pi Initial_Punctuation an initial quotation mark |
| Pf Final_Punctuation a final quotation mark |
| Po Other_Punctuation a punctuation mark of other type |
| Sm Math_Symbol a symbol of primarily mathematical use |
| Sc Currency_Symbol a currency sign |
| Sk Modifier_Symbol a non-letterlike modifier symbol |
| So Other_Symbol a symbol of other type |
| Zs Space_Separator a space character (of various non-zero widths) |
| Zl Line_Separator U+2028 LINE SEPARATOR only |
| Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only |
| Cc Control a C0 or C1 control code |
| Cf Format a format control character |
| Cs Surrogate a surrogate code point |
| Co Private_Use a private-use character |
| Cn Unassigned a reserved unassigned code point or a noncharacter |
| */ |
| |
| /// The highest valid code point a `char` can have. |
| /// |
| /// A [`char`] is a [Unicode Scalar Value], which means that it is a [Code |
| /// Point], but only ones within a certain range. `MAX` is the highest valid |
| /// code point that's a valid [Unicode Scalar Value]. |
| /// |
| /// [`char`]: ../../std/primitive.char.html |
| /// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value |
| /// [Code Point]: http://www.unicode.org/glossary/#code_point |
| #[stable(feature = "rust1", since = "1.0.0")] |
| pub const MAX: char = '\u{10ffff}'; |
| |
| /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a |
| /// decoding error. |
| /// |
| /// It can occur, for example, when giving ill-formed UTF-8 bytes to |
| /// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy). |
| #[stable(feature = "decode_utf16", since = "1.9.0")] |
| pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}'; |
| |
| /// Returns an iterator that yields the hexadecimal Unicode escape of a |
| /// character, as `char`s. |
| /// |
| /// This `struct` is created by the [`escape_unicode`] method on [`char`]. See |
| /// its documentation for more. |
| /// |
| /// [`escape_unicode`]: ../../std/primitive.char.html#method.escape_unicode |
| /// [`char`]: ../../std/primitive.char.html |
| #[derive(Clone, Debug)] |
| #[stable(feature = "rust1", since = "1.0.0")] |
| pub struct EscapeUnicode { |
| c: char, |
| state: EscapeUnicodeState, |
| |
| // The index of the next hex digit to be printed (0 if none), |
| // i.e., the number of remaining hex digits to be printed; |
| // increasing from the least significant digit: 0x543210 |
| hex_digit_idx: usize, |
| } |
| |
| // The enum values are ordered so that their representation is the |
| // same as the remaining length (besides the hexadecimal digits). This |
| // likely makes `len()` a single load from memory) and inline-worth. |
| #[derive(Clone, Debug)] |
| enum EscapeUnicodeState { |
| Done, |
| RightBrace, |
| Value, |
| LeftBrace, |
| Type, |
| Backslash, |
| } |
| |
| #[stable(feature = "rust1", since = "1.0.0")] |
| impl Iterator for EscapeUnicode { |
| type Item = char; |
| |
| fn next(&mut self) -> Option<char> { |
| match self.state { |
| EscapeUnicodeState::Backslash => { |
| self.state = EscapeUnicodeState::Type; |
| Some('\\') |
| } |
| EscapeUnicodeState::Type => { |
| self.state = EscapeUnicodeState::LeftBrace; |
| Some('u') |
| } |
| EscapeUnicodeState::LeftBrace => { |
| self.state = EscapeUnicodeState::Value; |
| Some('{') |
| } |
| EscapeUnicodeState::Value => { |
| let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf; |
| let c = from_digit(hex_digit, 16).unwrap(); |
| if self.hex_digit_idx == 0 { |
| self.state = EscapeUnicodeState::RightBrace; |
| } else { |
| self.hex_digit_idx -= 1; |
| } |
| Some(c) |
| } |
| EscapeUnicodeState::RightBrace => { |
| self.state = EscapeUnicodeState::Done; |
| Some('}') |
| } |
| EscapeUnicodeState::Done => None, |
| } |
| } |
| |
| #[inline] |
| fn size_hint(&self) -> (usize, Option<usize>) { |
| let n = self.len(); |
| (n, Some(n)) |
| } |
| |
| #[inline] |
| fn count(self) -> usize { |
| self.len() |
| } |
| |
| fn last(self) -> Option<char> { |
| match self.state { |
| EscapeUnicodeState::Done => None, |
| |
| EscapeUnicodeState::RightBrace |
| | EscapeUnicodeState::Value |
| | EscapeUnicodeState::LeftBrace |
| | EscapeUnicodeState::Type |
| | EscapeUnicodeState::Backslash => Some('}'), |
| } |
| } |
| } |
| |
| #[stable(feature = "exact_size_escape", since = "1.11.0")] |
| impl ExactSizeIterator for EscapeUnicode { |
| #[inline] |
| fn len(&self) -> usize { |
| // The match is a single memory access with no branching |
| self.hex_digit_idx |
| + match self.state { |
| EscapeUnicodeState::Done => 0, |
| EscapeUnicodeState::RightBrace => 1, |
| EscapeUnicodeState::Value => 2, |
| EscapeUnicodeState::LeftBrace => 3, |
| EscapeUnicodeState::Type => 4, |
| EscapeUnicodeState::Backslash => 5, |
| } |
| } |
| } |
| |
| #[stable(feature = "fused", since = "1.26.0")] |
| impl FusedIterator for EscapeUnicode {} |
| |
| #[stable(feature = "char_struct_display", since = "1.16.0")] |
| impl fmt::Display for EscapeUnicode { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| for c in self.clone() { |
| f.write_char(c)?; |
| } |
| Ok(()) |
| } |
| } |
| |
| /// An iterator that yields the literal escape code of a `char`. |
| /// |
| /// This `struct` is created by the [`escape_default`] method on [`char`]. See |
| /// its documentation for more. |
| /// |
| /// [`escape_default`]: ../../std/primitive.char.html#method.escape_default |
| /// [`char`]: ../../std/primitive.char.html |
| #[derive(Clone, Debug)] |
| #[stable(feature = "rust1", since = "1.0.0")] |
| pub struct EscapeDefault { |
| state: EscapeDefaultState, |
| } |
| |
| #[derive(Clone, Debug)] |
| enum EscapeDefaultState { |
| Done, |
| Char(char), |
| Backslash(char), |
| Unicode(EscapeUnicode), |
| } |
| |
| #[stable(feature = "rust1", since = "1.0.0")] |
| impl Iterator for EscapeDefault { |
| type Item = char; |
| |
| fn next(&mut self) -> Option<char> { |
| match self.state { |
| EscapeDefaultState::Backslash(c) => { |
| self.state = EscapeDefaultState::Char(c); |
| Some('\\') |
| } |
| EscapeDefaultState::Char(c) => { |
| self.state = EscapeDefaultState::Done; |
| Some(c) |
| } |
| EscapeDefaultState::Done => None, |
| EscapeDefaultState::Unicode(ref mut iter) => iter.next(), |
| } |
| } |
| |
| #[inline] |
| fn size_hint(&self) -> (usize, Option<usize>) { |
| let n = self.len(); |
| (n, Some(n)) |
| } |
| |
| #[inline] |
| fn count(self) -> usize { |
| self.len() |
| } |
| |
| fn nth(&mut self, n: usize) -> Option<char> { |
| match self.state { |
| EscapeDefaultState::Backslash(c) if n == 0 => { |
| self.state = EscapeDefaultState::Char(c); |
| Some('\\') |
| } |
| EscapeDefaultState::Backslash(c) if n == 1 => { |
| self.state = EscapeDefaultState::Done; |
| Some(c) |
| } |
| EscapeDefaultState::Backslash(_) => { |
| self.state = EscapeDefaultState::Done; |
| None |
| } |
| EscapeDefaultState::Char(c) => { |
| self.state = EscapeDefaultState::Done; |
| |
| if n == 0 { Some(c) } else { None } |
| } |
| EscapeDefaultState::Done => None, |
| EscapeDefaultState::Unicode(ref mut i) => i.nth(n), |
| } |
| } |
| |
| fn last(self) -> Option<char> { |
| match self.state { |
| EscapeDefaultState::Unicode(iter) => iter.last(), |
| EscapeDefaultState::Done => None, |
| EscapeDefaultState::Backslash(c) | EscapeDefaultState::Char(c) => Some(c), |
| } |
| } |
| } |
| |
| #[stable(feature = "exact_size_escape", since = "1.11.0")] |
| impl ExactSizeIterator for EscapeDefault { |
| fn len(&self) -> usize { |
| match self.state { |
| EscapeDefaultState::Done => 0, |
| EscapeDefaultState::Char(_) => 1, |
| EscapeDefaultState::Backslash(_) => 2, |
| EscapeDefaultState::Unicode(ref iter) => iter.len(), |
| } |
| } |
| } |
| |
| #[stable(feature = "fused", since = "1.26.0")] |
| impl FusedIterator for EscapeDefault {} |
| |
| #[stable(feature = "char_struct_display", since = "1.16.0")] |
| impl fmt::Display for EscapeDefault { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| for c in self.clone() { |
| f.write_char(c)?; |
| } |
| Ok(()) |
| } |
| } |
| |
| /// An iterator that yields the literal escape code of a `char`. |
| /// |
| /// This `struct` is created by the [`escape_debug`] method on [`char`]. See its |
| /// documentation for more. |
| /// |
| /// [`escape_debug`]: ../../std/primitive.char.html#method.escape_debug |
| /// [`char`]: ../../std/primitive.char.html |
| #[stable(feature = "char_escape_debug", since = "1.20.0")] |
| #[derive(Clone, Debug)] |
| pub struct EscapeDebug(EscapeDefault); |
| |
| #[stable(feature = "char_escape_debug", since = "1.20.0")] |
| impl Iterator for EscapeDebug { |
| type Item = char; |
| fn next(&mut self) -> Option<char> { |
| self.0.next() |
| } |
| fn size_hint(&self) -> (usize, Option<usize>) { |
| self.0.size_hint() |
| } |
| } |
| |
| #[stable(feature = "char_escape_debug", since = "1.20.0")] |
| impl ExactSizeIterator for EscapeDebug {} |
| |
| #[stable(feature = "fused", since = "1.26.0")] |
| impl FusedIterator for EscapeDebug {} |
| |
| #[stable(feature = "char_escape_debug", since = "1.20.0")] |
| impl fmt::Display for EscapeDebug { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| fmt::Display::fmt(&self.0, f) |
| } |
| } |
| |
| /// Returns an iterator that yields the lowercase equivalent of a `char`. |
| /// |
| /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See |
| /// its documentation for more. |
| /// |
| /// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase |
| /// [`char`]: ../../std/primitive.char.html |
| #[stable(feature = "rust1", since = "1.0.0")] |
| #[derive(Debug, Clone)] |
| pub struct ToLowercase(CaseMappingIter); |
| |
| #[stable(feature = "rust1", since = "1.0.0")] |
| impl Iterator for ToLowercase { |
| type Item = char; |
| fn next(&mut self) -> Option<char> { |
| self.0.next() |
| } |
| fn size_hint(&self) -> (usize, Option<usize>) { |
| self.0.size_hint() |
| } |
| } |
| |
| #[stable(feature = "fused", since = "1.26.0")] |
| impl FusedIterator for ToLowercase {} |
| |
| #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")] |
| impl ExactSizeIterator for ToLowercase {} |
| |
| /// Returns an iterator that yields the uppercase equivalent of a `char`. |
| /// |
| /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See |
| /// its documentation for more. |
| /// |
| /// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase |
| /// [`char`]: ../../std/primitive.char.html |
| #[stable(feature = "rust1", since = "1.0.0")] |
| #[derive(Debug, Clone)] |
| pub struct ToUppercase(CaseMappingIter); |
| |
| #[stable(feature = "rust1", since = "1.0.0")] |
| impl Iterator for ToUppercase { |
| type Item = char; |
| fn next(&mut self) -> Option<char> { |
| self.0.next() |
| } |
| fn size_hint(&self) -> (usize, Option<usize>) { |
| self.0.size_hint() |
| } |
| } |
| |
| #[stable(feature = "fused", since = "1.26.0")] |
| impl FusedIterator for ToUppercase {} |
| |
| #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")] |
| impl ExactSizeIterator for ToUppercase {} |
| |
| #[derive(Debug, Clone)] |
| enum CaseMappingIter { |
| Three(char, char, char), |
| Two(char, char), |
| One(char), |
| Zero, |
| } |
| |
| impl CaseMappingIter { |
| fn new(chars: [char; 3]) -> CaseMappingIter { |
| if chars[2] == '\0' { |
| if chars[1] == '\0' { |
| CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0' |
| } else { |
| CaseMappingIter::Two(chars[0], chars[1]) |
| } |
| } else { |
| CaseMappingIter::Three(chars[0], chars[1], chars[2]) |
| } |
| } |
| } |
| |
| impl Iterator for CaseMappingIter { |
| type Item = char; |
| fn next(&mut self) -> Option<char> { |
| match *self { |
| CaseMappingIter::Three(a, b, c) => { |
| *self = CaseMappingIter::Two(b, c); |
| Some(a) |
| } |
| CaseMappingIter::Two(b, c) => { |
| *self = CaseMappingIter::One(c); |
| Some(b) |
| } |
| CaseMappingIter::One(c) => { |
| *self = CaseMappingIter::Zero; |
| Some(c) |
| } |
| CaseMappingIter::Zero => None, |
| } |
| } |
| |
| fn size_hint(&self) -> (usize, Option<usize>) { |
| let size = match self { |
| CaseMappingIter::Three(..) => 3, |
| CaseMappingIter::Two(..) => 2, |
| CaseMappingIter::One(_) => 1, |
| CaseMappingIter::Zero => 0, |
| }; |
| (size, Some(size)) |
| } |
| } |
| |
| impl fmt::Display for CaseMappingIter { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| match *self { |
| CaseMappingIter::Three(a, b, c) => { |
| f.write_char(a)?; |
| f.write_char(b)?; |
| f.write_char(c) |
| } |
| CaseMappingIter::Two(b, c) => { |
| f.write_char(b)?; |
| f.write_char(c) |
| } |
| CaseMappingIter::One(c) => f.write_char(c), |
| CaseMappingIter::Zero => Ok(()), |
| } |
| } |
| } |
| |
| #[stable(feature = "char_struct_display", since = "1.16.0")] |
| impl fmt::Display for ToLowercase { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| fmt::Display::fmt(&self.0, f) |
| } |
| } |
| |
| #[stable(feature = "char_struct_display", since = "1.16.0")] |
| impl fmt::Display for ToUppercase { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| fmt::Display::fmt(&self.0, f) |
| } |
| } |