Rollup merge of #107058 - clubby789:eqeq-homoglyph, r=wesleywiser
Recognise double-equals homoglyph
Recognise `⩵` as a homoglyph for `==`.
The first commit switches `char` to `&str`, as all previous homoglyphs corresponded to a single ASCII character, while the second implements the fix.
`@rustbot` label +A-diagnostics +A-parser
diff --git a/compiler/rustc_parse/src/lexer/unicode_chars.rs b/compiler/rustc_parse/src/lexer/unicode_chars.rs
index 65479b3..34d003c 100644
--- a/compiler/rustc_parse/src/lexer/unicode_chars.rs
+++ b/compiler/rustc_parse/src/lexer/unicode_chars.rs
@@ -7,329 +7,331 @@
use rustc_span::{symbol::kw, BytePos, Pos, Span};
#[rustfmt::skip] // for line breaks
-pub(crate) const UNICODE_ARRAY: &[(char, &str, char)] = &[
- ('
', "Line Separator", ' '),
- ('
', "Paragraph Separator", ' '),
- (' ', "Ogham Space mark", ' '),
- (' ', "En Quad", ' '),
- (' ', "Em Quad", ' '),
- (' ', "En Space", ' '),
- (' ', "Em Space", ' '),
- (' ', "Three-Per-Em Space", ' '),
- (' ', "Four-Per-Em Space", ' '),
- (' ', "Six-Per-Em Space", ' '),
- (' ', "Punctuation Space", ' '),
- (' ', "Thin Space", ' '),
- (' ', "Hair Space", ' '),
- (' ', "Medium Mathematical Space", ' '),
- (' ', "No-Break Space", ' '),
- (' ', "Figure Space", ' '),
- (' ', "Narrow No-Break Space", ' '),
- (' ', "Ideographic Space", ' '),
+pub(crate) const UNICODE_ARRAY: &[(char, &str, &str)] = &[
+ ('
', "Line Separator", " "),
+ ('
', "Paragraph Separator", " "),
+ (' ', "Ogham Space mark", " "),
+ (' ', "En Quad", " "),
+ (' ', "Em Quad", " "),
+ (' ', "En Space", " "),
+ (' ', "Em Space", " "),
+ (' ', "Three-Per-Em Space", " "),
+ (' ', "Four-Per-Em Space", " "),
+ (' ', "Six-Per-Em Space", " "),
+ (' ', "Punctuation Space", " "),
+ (' ', "Thin Space", " "),
+ (' ', "Hair Space", " "),
+ (' ', "Medium Mathematical Space", " "),
+ (' ', "No-Break Space", " "),
+ (' ', "Figure Space", " "),
+ (' ', "Narrow No-Break Space", " "),
+ (' ', "Ideographic Space", " "),
- ('ߺ', "Nko Lajanyalan", '_'),
- ('﹍', "Dashed Low Line", '_'),
- ('﹎', "Centreline Low Line", '_'),
- ('﹏', "Wavy Low Line", '_'),
- ('_', "Fullwidth Low Line", '_'),
+ ('ߺ', "Nko Lajanyalan", "_"),
+ ('﹍', "Dashed Low Line", "_"),
+ ('﹎', "Centreline Low Line", "_"),
+ ('﹏', "Wavy Low Line", "_"),
+ ('_', "Fullwidth Low Line", "_"),
- ('‐', "Hyphen", '-'),
- ('‑', "Non-Breaking Hyphen", '-'),
- ('‒', "Figure Dash", '-'),
- ('–', "En Dash", '-'),
- ('—', "Em Dash", '-'),
- ('﹘', "Small Em Dash", '-'),
- ('۔', "Arabic Full Stop", '-'),
- ('⁃', "Hyphen Bullet", '-'),
- ('˗', "Modifier Letter Minus Sign", '-'),
- ('−', "Minus Sign", '-'),
- ('➖', "Heavy Minus Sign", '-'),
- ('Ⲻ', "Coptic Letter Dialect-P Ni", '-'),
- ('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'),
- ('-', "Fullwidth Hyphen-Minus", '-'),
- ('―', "Horizontal Bar", '-'),
- ('─', "Box Drawings Light Horizontal", '-'),
- ('━', "Box Drawings Heavy Horizontal", '-'),
- ('㇐', "CJK Stroke H", '-'),
- ('ꟷ', "Latin Epigraphic Letter Sideways I", '-'),
- ('ᅳ', "Hangul Jungseong Eu", '-'),
- ('ㅡ', "Hangul Letter Eu", '-'),
- ('一', "CJK Unified Ideograph-4E00", '-'),
- ('⼀', "Kangxi Radical One", '-'),
+ ('‐', "Hyphen", "-"),
+ ('‑', "Non-Breaking Hyphen", "-"),
+ ('‒', "Figure Dash", "-"),
+ ('–', "En Dash", "-"),
+ ('—', "Em Dash", "-"),
+ ('﹘', "Small Em Dash", "-"),
+ ('۔', "Arabic Full Stop", "-"),
+ ('⁃', "Hyphen Bullet", "-"),
+ ('˗', "Modifier Letter Minus Sign", "-"),
+ ('−', "Minus Sign", "-"),
+ ('➖', "Heavy Minus Sign", "-"),
+ ('Ⲻ', "Coptic Letter Dialect-P Ni", "-"),
+ ('ー', "Katakana-Hiragana Prolonged Sound Mark", "-"),
+ ('-', "Fullwidth Hyphen-Minus", "-"),
+ ('―', "Horizontal Bar", "-"),
+ ('─', "Box Drawings Light Horizontal", "-"),
+ ('━', "Box Drawings Heavy Horizontal", "-"),
+ ('㇐', "CJK Stroke H", "-"),
+ ('ꟷ', "Latin Epigraphic Letter Sideways I", "-"),
+ ('ᅳ', "Hangul Jungseong Eu", "-"),
+ ('ㅡ', "Hangul Letter Eu", "-"),
+ ('一', "CJK Unified Ideograph-4E00", "-"),
+ ('⼀', "Kangxi Radical One", "-"),
- ('؍', "Arabic Date Separator", ','),
- ('٫', "Arabic Decimal Separator", ','),
- ('‚', "Single Low-9 Quotation Mark", ','),
- ('¸', "Cedilla", ','),
- ('ꓹ', "Lisu Letter Tone Na Po", ','),
- (',', "Fullwidth Comma", ','),
+ ('؍', "Arabic Date Separator", ","),
+ ('٫', "Arabic Decimal Separator", ","),
+ ('‚', "Single Low-9 Quotation Mark", ","),
+ ('¸', "Cedilla", ","),
+ ('ꓹ', "Lisu Letter Tone Na Po", ","),
+ (',', "Fullwidth Comma", ","),
- (';', "Greek Question Mark", ';'),
- (';', "Fullwidth Semicolon", ';'),
- ('︔', "Presentation Form For Vertical Semicolon", ';'),
+ (';', "Greek Question Mark", ";"),
+ (';', "Fullwidth Semicolon", ";"),
+ ('︔', "Presentation Form For Vertical Semicolon", ";"),
- ('ः', "Devanagari Sign Visarga", ':'),
- ('ઃ', "Gujarati Sign Visarga", ':'),
- (':', "Fullwidth Colon", ':'),
- ('։', "Armenian Full Stop", ':'),
- ('܃', "Syriac Supralinear Colon", ':'),
- ('܄', "Syriac Sublinear Colon", ':'),
- ('᛬', "Runic Multiple Punctuation", ':'),
- ('︰', "Presentation Form For Vertical Two Dot Leader", ':'),
- ('᠃', "Mongolian Full Stop", ':'),
- ('᠉', "Mongolian Manchu Full Stop", ':'),
- ('⁚', "Two Dot Punctuation", ':'),
- ('׃', "Hebrew Punctuation Sof Pasuq", ':'),
- ('˸', "Modifier Letter Raised Colon", ':'),
- ('꞉', "Modifier Letter Colon", ':'),
- ('∶', "Ratio", ':'),
- ('ː', "Modifier Letter Triangular Colon", ':'),
- ('ꓽ', "Lisu Letter Tone Mya Jeu", ':'),
- ('︓', "Presentation Form For Vertical Colon", ':'),
+ ('ः', "Devanagari Sign Visarga", ":"),
+ ('ઃ', "Gujarati Sign Visarga", ":"),
+ (':', "Fullwidth Colon", ":"),
+ ('։', "Armenian Full Stop", ":"),
+ ('܃', "Syriac Supralinear Colon", ":"),
+ ('܄', "Syriac Sublinear Colon", ":"),
+ ('᛬', "Runic Multiple Punctuation", ":"),
+ ('︰', "Presentation Form For Vertical Two Dot Leader", ":"),
+ ('᠃', "Mongolian Full Stop", ":"),
+ ('᠉', "Mongolian Manchu Full Stop", ":"),
+ ('⁚', "Two Dot Punctuation", ":"),
+ ('׃', "Hebrew Punctuation Sof Pasuq", ":"),
+ ('˸', "Modifier Letter Raised Colon", ":"),
+ ('꞉', "Modifier Letter Colon", ":"),
+ ('∶', "Ratio", ":"),
+ ('ː', "Modifier Letter Triangular Colon", ":"),
+ ('ꓽ', "Lisu Letter Tone Mya Jeu", ":"),
+ ('︓', "Presentation Form For Vertical Colon", ":"),
- ('!', "Fullwidth Exclamation Mark", '!'),
- ('ǃ', "Latin Letter Retroflex Click", '!'),
- ('ⵑ', "Tifinagh Letter Tuareg Yang", '!'),
- ('︕', "Presentation Form For Vertical Exclamation Mark", '!'),
+ ('!', "Fullwidth Exclamation Mark", "!"),
+ ('ǃ', "Latin Letter Retroflex Click", "!"),
+ ('ⵑ', "Tifinagh Letter Tuareg Yang", "!"),
+ ('︕', "Presentation Form For Vertical Exclamation Mark", "!"),
- ('ʔ', "Latin Letter Glottal Stop", '?'),
- ('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
- ('ॽ', "Devanagari Letter Glottal Stop", '?'),
- ('Ꭾ', "Cherokee Letter He", '?'),
- ('ꛫ', "Bamum Letter Ntuu", '?'),
- ('?', "Fullwidth Question Mark", '?'),
- ('︖', "Presentation Form For Vertical Question Mark", '?'),
+ ('ʔ', "Latin Letter Glottal Stop", "?"),
+ ('Ɂ', "Latin Capital Letter Glottal Stop", "?"),
+ ('ॽ', "Devanagari Letter Glottal Stop", "?"),
+ ('Ꭾ', "Cherokee Letter He", "?"),
+ ('ꛫ', "Bamum Letter Ntuu", "?"),
+ ('?', "Fullwidth Question Mark", "?"),
+ ('︖', "Presentation Form For Vertical Question Mark", "?"),
- ('𝅭', "Musical Symbol Combining Augmentation Dot", '.'),
- ('․', "One Dot Leader", '.'),
- ('܁', "Syriac Supralinear Full Stop", '.'),
- ('܂', "Syriac Sublinear Full Stop", '.'),
- ('꘎', "Vai Full Stop", '.'),
- ('𐩐', "Kharoshthi Punctuation Dot", '.'),
- ('٠', "Arabic-Indic Digit Zero", '.'),
- ('۰', "Extended Arabic-Indic Digit Zero", '.'),
- ('ꓸ', "Lisu Letter Tone Mya Ti", '.'),
- ('·', "Middle Dot", '.'),
- ('・', "Katakana Middle Dot", '.'),
- ('・', "Halfwidth Katakana Middle Dot", '.'),
- ('᛫', "Runic Single Punctuation", '.'),
- ('·', "Greek Ano Teleia", '.'),
- ('⸱', "Word Separator Middle Dot", '.'),
- ('𐄁', "Aegean Word Separator Dot", '.'),
- ('•', "Bullet", '.'),
- ('‧', "Hyphenation Point", '.'),
- ('∙', "Bullet Operator", '.'),
- ('⋅', "Dot Operator", '.'),
- ('ꞏ', "Latin Letter Sinological Dot", '.'),
- ('ᐧ', "Canadian Syllabics Final Middle Dot", '.'),
- ('ᐧ', "Canadian Syllabics Final Middle Dot", '.'),
- ('.', "Fullwidth Full Stop", '.'),
- ('。', "Ideographic Full Stop", '.'),
- ('︒', "Presentation Form For Vertical Ideographic Full Stop", '.'),
+ ('𝅭', "Musical Symbol Combining Augmentation Dot", "."),
+ ('․', "One Dot Leader", "."),
+ ('܁', "Syriac Supralinear Full Stop", "."),
+ ('܂', "Syriac Sublinear Full Stop", "."),
+ ('꘎', "Vai Full Stop", "."),
+ ('𐩐', "Kharoshthi Punctuation Dot", "."),
+ ('٠', "Arabic-Indic Digit Zero", "."),
+ ('۰', "Extended Arabic-Indic Digit Zero", "."),
+ ('ꓸ', "Lisu Letter Tone Mya Ti", "."),
+ ('·', "Middle Dot", "."),
+ ('・', "Katakana Middle Dot", "."),
+ ('・', "Halfwidth Katakana Middle Dot", "."),
+ ('᛫', "Runic Single Punctuation", "."),
+ ('·', "Greek Ano Teleia", "."),
+ ('⸱', "Word Separator Middle Dot", "."),
+ ('𐄁', "Aegean Word Separator Dot", "."),
+ ('•', "Bullet", "."),
+ ('‧', "Hyphenation Point", "."),
+ ('∙', "Bullet Operator", "."),
+ ('⋅', "Dot Operator", "."),
+ ('ꞏ', "Latin Letter Sinological Dot", "."),
+ ('ᐧ', "Canadian Syllabics Final Middle Dot", "."),
+ ('ᐧ', "Canadian Syllabics Final Middle Dot", "."),
+ ('.', "Fullwidth Full Stop", "."),
+ ('。', "Ideographic Full Stop", "."),
+ ('︒', "Presentation Form For Vertical Ideographic Full Stop", "."),
- ('՝', "Armenian Comma", '\''),
- (''', "Fullwidth Apostrophe", '\''),
- ('‘', "Left Single Quotation Mark", '\''),
- ('’', "Right Single Quotation Mark", '\''),
- ('‛', "Single High-Reversed-9 Quotation Mark", '\''),
- ('′', "Prime", '\''),
- ('‵', "Reversed Prime", '\''),
- ('՚', "Armenian Apostrophe", '\''),
- ('׳', "Hebrew Punctuation Geresh", '\''),
- ('`', "Grave Accent", '\''),
- ('`', "Greek Varia", '\''),
- ('`', "Fullwidth Grave Accent", '\''),
- ('´', "Acute Accent", '\''),
- ('΄', "Greek Tonos", '\''),
- ('´', "Greek Oxia", '\''),
- ('᾽', "Greek Koronis", '\''),
- ('᾿', "Greek Psili", '\''),
- ('῾', "Greek Dasia", '\''),
- ('ʹ', "Modifier Letter Prime", '\''),
- ('ʹ', "Greek Numeral Sign", '\''),
- ('ˈ', "Modifier Letter Vertical Line", '\''),
- ('ˊ', "Modifier Letter Acute Accent", '\''),
- ('ˋ', "Modifier Letter Grave Accent", '\''),
- ('˴', "Modifier Letter Middle Grave Accent", '\''),
- ('ʻ', "Modifier Letter Turned Comma", '\''),
- ('ʽ', "Modifier Letter Reversed Comma", '\''),
- ('ʼ', "Modifier Letter Apostrophe", '\''),
- ('ʾ', "Modifier Letter Right Half Ring", '\''),
- ('ꞌ', "Latin Small Letter Saltillo", '\''),
- ('י', "Hebrew Letter Yod", '\''),
- ('ߴ', "Nko High Tone Apostrophe", '\''),
- ('ߵ', "Nko Low Tone Apostrophe", '\''),
- ('ᑊ', "Canadian Syllabics West-Cree P", '\''),
- ('ᛌ', "Runic Letter Short-Twig-Sol S", '\''),
- ('𖽑', "Miao Sign Aspiration", '\''),
- ('𖽒', "Miao Sign Reformed Voicing", '\''),
+ ('՝', "Armenian Comma", "\'"),
+ (''', "Fullwidth Apostrophe", "\'"),
+ ('‘', "Left Single Quotation Mark", "\'"),
+ ('’', "Right Single Quotation Mark", "\'"),
+ ('‛', "Single High-Reversed-9 Quotation Mark", "\'"),
+ ('′', "Prime", "\'"),
+ ('‵', "Reversed Prime", "\'"),
+ ('՚', "Armenian Apostrophe", "\'"),
+ ('׳', "Hebrew Punctuation Geresh", "\'"),
+ ('`', "Grave Accent", "\'"),
+ ('`', "Greek Varia", "\'"),
+ ('`', "Fullwidth Grave Accent", "\'"),
+ ('´', "Acute Accent", "\'"),
+ ('΄', "Greek Tonos", "\'"),
+ ('´', "Greek Oxia", "\'"),
+ ('᾽', "Greek Koronis", "\'"),
+ ('᾿', "Greek Psili", "\'"),
+ ('῾', "Greek Dasia", "\'"),
+ ('ʹ', "Modifier Letter Prime", "\'"),
+ ('ʹ', "Greek Numeral Sign", "\'"),
+ ('ˈ', "Modifier Letter Vertical Line", "\'"),
+ ('ˊ', "Modifier Letter Acute Accent", "\'"),
+ ('ˋ', "Modifier Letter Grave Accent", "\'"),
+ ('˴', "Modifier Letter Middle Grave Accent", "\'"),
+ ('ʻ', "Modifier Letter Turned Comma", "\'"),
+ ('ʽ', "Modifier Letter Reversed Comma", "\'"),
+ ('ʼ', "Modifier Letter Apostrophe", "\'"),
+ ('ʾ', "Modifier Letter Right Half Ring", "\'"),
+ ('ꞌ', "Latin Small Letter Saltillo", "\'"),
+ ('י', "Hebrew Letter Yod", "\'"),
+ ('ߴ', "Nko High Tone Apostrophe", "\'"),
+ ('ߵ', "Nko Low Tone Apostrophe", "\'"),
+ ('ᑊ', "Canadian Syllabics West-Cree P", "\'"),
+ ('ᛌ', "Runic Letter Short-Twig-Sol S", "\'"),
+ ('𖽑', "Miao Sign Aspiration", "\'"),
+ ('𖽒', "Miao Sign Reformed Voicing", "\'"),
- ('᳓', "Vedic Sign Nihshvasa", '"'),
- ('"', "Fullwidth Quotation Mark", '"'),
- ('“', "Left Double Quotation Mark", '"'),
- ('”', "Right Double Quotation Mark", '"'),
- ('‟', "Double High-Reversed-9 Quotation Mark", '"'),
- ('″', "Double Prime", '"'),
- ('‶', "Reversed Double Prime", '"'),
- ('〃', "Ditto Mark", '"'),
- ('״', "Hebrew Punctuation Gershayim", '"'),
- ('˝', "Double Acute Accent", '"'),
- ('ʺ', "Modifier Letter Double Prime", '"'),
- ('˶', "Modifier Letter Middle Double Acute Accent", '"'),
- ('˵', "Modifier Letter Middle Double Grave Accent", '"'),
- ('ˮ', "Modifier Letter Double Apostrophe", '"'),
- ('ײ', "Hebrew Ligature Yiddish Double Yod", '"'),
- ('❞', "Heavy Double Comma Quotation Mark Ornament", '"'),
- ('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'),
+ ('᳓', "Vedic Sign Nihshvasa", "\""),
+ ('"', "Fullwidth Quotation Mark", "\""),
+ ('“', "Left Double Quotation Mark", "\""),
+ ('”', "Right Double Quotation Mark", "\""),
+ ('‟', "Double High-Reversed-9 Quotation Mark", "\""),
+ ('″', "Double Prime", "\""),
+ ('‶', "Reversed Double Prime", "\""),
+ ('〃', "Ditto Mark", "\""),
+ ('״', "Hebrew Punctuation Gershayim", "\""),
+ ('˝', "Double Acute Accent", "\""),
+ ('ʺ', "Modifier Letter Double Prime", "\""),
+ ('˶', "Modifier Letter Middle Double Acute Accent", "\""),
+ ('˵', "Modifier Letter Middle Double Grave Accent", "\""),
+ ('ˮ', "Modifier Letter Double Apostrophe", "\""),
+ ('ײ', "Hebrew Ligature Yiddish Double Yod", "\""),
+ ('❞', "Heavy Double Comma Quotation Mark Ornament", "\""),
+ ('❝', "Heavy Double Turned Comma Quotation Mark Ornament", "\""),
- ('(', "Fullwidth Left Parenthesis", '('),
- ('❨', "Medium Left Parenthesis Ornament", '('),
- ('﴾', "Ornate Left Parenthesis", '('),
+ ('(', "Fullwidth Left Parenthesis", "("),
+ ('❨', "Medium Left Parenthesis Ornament", "("),
+ ('﴾', "Ornate Left Parenthesis", "("),
- (')', "Fullwidth Right Parenthesis", ')'),
- ('❩', "Medium Right Parenthesis Ornament", ')'),
- ('﴿', "Ornate Right Parenthesis", ')'),
+ (')', "Fullwidth Right Parenthesis", ")"),
+ ('❩', "Medium Right Parenthesis Ornament", ")"),
+ ('﴿', "Ornate Right Parenthesis", ")"),
- ('[', "Fullwidth Left Square Bracket", '['),
- ('❲', "Light Left Tortoise Shell Bracket Ornament", '['),
- ('「', "Left Corner Bracket", '['),
- ('『', "Left White Corner Bracket", '['),
- ('【', "Left Black Lenticular Bracket", '['),
- ('〔', "Left Tortoise Shell Bracket", '['),
- ('〖', "Left White Lenticular Bracket", '['),
- ('〘', "Left White Tortoise Shell Bracket", '['),
- ('〚', "Left White Square Bracket", '['),
+ ('[', "Fullwidth Left Square Bracket", "["),
+ ('❲', "Light Left Tortoise Shell Bracket Ornament", "["),
+ ('「', "Left Corner Bracket", "["),
+ ('『', "Left White Corner Bracket", "["),
+ ('【', "Left Black Lenticular Bracket", "["),
+ ('〔', "Left Tortoise Shell Bracket", "["),
+ ('〖', "Left White Lenticular Bracket", "["),
+ ('〘', "Left White Tortoise Shell Bracket", "["),
+ ('〚', "Left White Square Bracket", "["),
- (']', "Fullwidth Right Square Bracket", ']'),
- ('❳', "Light Right Tortoise Shell Bracket Ornament", ']'),
- ('」', "Right Corner Bracket", ']'),
- ('』', "Right White Corner Bracket", ']'),
- ('】', "Right Black Lenticular Bracket", ']'),
- ('〕', "Right Tortoise Shell Bracket", ']'),
- ('〗', "Right White Lenticular Bracket", ']'),
- ('〙', "Right White Tortoise Shell Bracket", ']'),
- ('〛', "Right White Square Bracket", ']'),
+ (']', "Fullwidth Right Square Bracket", "]"),
+ ('❳', "Light Right Tortoise Shell Bracket Ornament", "]"),
+ ('」', "Right Corner Bracket", "]"),
+ ('』', "Right White Corner Bracket", "]"),
+ ('】', "Right Black Lenticular Bracket", "]"),
+ ('〕', "Right Tortoise Shell Bracket", "]"),
+ ('〗', "Right White Lenticular Bracket", "]"),
+ ('〙', "Right White Tortoise Shell Bracket", "]"),
+ ('〛', "Right White Square Bracket", "]"),
- ('❴', "Medium Left Curly Bracket Ornament", '{'),
- ('𝄔', "Musical Symbol Brace", '{'),
- ('{', "Fullwidth Left Curly Bracket", '{'),
+ ('❴', "Medium Left Curly Bracket Ornament", "{"),
+ ('𝄔', "Musical Symbol Brace", "{"),
+ ('{', "Fullwidth Left Curly Bracket", "{"),
- ('❵', "Medium Right Curly Bracket Ornament", '}'),
- ('}', "Fullwidth Right Curly Bracket", '}'),
+ ('❵', "Medium Right Curly Bracket Ornament", "}"),
+ ('}', "Fullwidth Right Curly Bracket", "}"),
- ('⁎', "Low Asterisk", '*'),
- ('٭', "Arabic Five Pointed Star", '*'),
- ('∗', "Asterisk Operator", '*'),
- ('𐌟', "Old Italic Letter Ess", '*'),
- ('*', "Fullwidth Asterisk", '*'),
+ ('⁎', "Low Asterisk", "*"),
+ ('٭', "Arabic Five Pointed Star", "*"),
+ ('∗', "Asterisk Operator", "*"),
+ ('𐌟', "Old Italic Letter Ess", "*"),
+ ('*', "Fullwidth Asterisk", "*"),
- ('᜵', "Philippine Single Punctuation", '/'),
- ('⁁', "Caret Insertion Point", '/'),
- ('∕', "Division Slash", '/'),
- ('⁄', "Fraction Slash", '/'),
- ('╱', "Box Drawings Light Diagonal Upper Right To Lower Left", '/'),
- ('⟋', "Mathematical Rising Diagonal", '/'),
- ('⧸', "Big Solidus", '/'),
- ('𝈺', "Greek Instrumental Notation Symbol-47", '/'),
- ('㇓', "CJK Stroke Sp", '/'),
- ('〳', "Vertical Kana Repeat Mark Upper Half", '/'),
- ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'),
- ('ノ', "Katakana Letter No", '/'),
- ('丿', "CJK Unified Ideograph-4E3F", '/'),
- ('⼃', "Kangxi Radical Slash", '/'),
- ('/', "Fullwidth Solidus", '/'),
+ ('᜵', "Philippine Single Punctuation", "/"),
+ ('⁁', "Caret Insertion Point", "/"),
+ ('∕', "Division Slash", "/"),
+ ('⁄', "Fraction Slash", "/"),
+ ('╱', "Box Drawings Light Diagonal Upper Right To Lower Left", "/"),
+ ('⟋', "Mathematical Rising Diagonal", "/"),
+ ('⧸', "Big Solidus", "/"),
+ ('𝈺', "Greek Instrumental Notation Symbol-47", "/"),
+ ('㇓', "CJK Stroke Sp", "/"),
+ ('〳', "Vertical Kana Repeat Mark Upper Half", "/"),
+ ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", "/"),
+ ('ノ', "Katakana Letter No", "/"),
+ ('丿', "CJK Unified Ideograph-4E3F", "/"),
+ ('⼃', "Kangxi Radical Slash", "/"),
+ ('/', "Fullwidth Solidus", "/"),
- ('\', "Fullwidth Reverse Solidus", '\\'),
- ('﹨', "Small Reverse Solidus", '\\'),
- ('∖', "Set Minus", '\\'),
- ('⟍', "Mathematical Falling Diagonal", '\\'),
- ('⧵', "Reverse Solidus Operator", '\\'),
- ('⧹', "Big Reverse Solidus", '\\'),
- ('⧹', "Greek Vocal Notation Symbol-16", '\\'),
- ('⧹', "Greek Instrumental Symbol-48", '\\'),
- ('㇔', "CJK Stroke D", '\\'),
- ('丶', "CJK Unified Ideograph-4E36", '\\'),
- ('⼂', "Kangxi Radical Dot", '\\'),
- ('、', "Ideographic Comma", '\\'),
- ('ヽ', "Katakana Iteration Mark", '\\'),
+ ('\', "Fullwidth Reverse Solidus", "\\"),
+ ('﹨', "Small Reverse Solidus", "\\"),
+ ('∖', "Set Minus", "\\"),
+ ('⟍', "Mathematical Falling Diagonal", "\\"),
+ ('⧵', "Reverse Solidus Operator", "\\"),
+ ('⧹', "Big Reverse Solidus", "\\"),
+ ('⧹', "Greek Vocal Notation Symbol-16", "\\"),
+ ('⧹', "Greek Instrumental Symbol-48", "\\"),
+ ('㇔', "CJK Stroke D", "\\"),
+ ('丶', "CJK Unified Ideograph-4E36", "\\"),
+ ('⼂', "Kangxi Radical Dot", "\\"),
+ ('、', "Ideographic Comma", "\\"),
+ ('ヽ', "Katakana Iteration Mark", "\\"),
- ('ꝸ', "Latin Small Letter Um", '&'),
- ('&', "Fullwidth Ampersand", '&'),
+ ('ꝸ', "Latin Small Letter Um", "&"),
+ ('&', "Fullwidth Ampersand", "&"),
- ('᛭', "Runic Cross Punctuation", '+'),
- ('➕', "Heavy Plus Sign", '+'),
- ('𐊛', "Lycian Letter H", '+'),
- ('﬩', "Hebrew Letter Alternative Plus Sign", '+'),
- ('+', "Fullwidth Plus Sign", '+'),
+ ('᛭', "Runic Cross Punctuation", "+"),
+ ('➕', "Heavy Plus Sign", "+"),
+ ('𐊛', "Lycian Letter H", "+"),
+ ('﬩', "Hebrew Letter Alternative Plus Sign", "+"),
+ ('+', "Fullwidth Plus Sign", "+"),
- ('‹', "Single Left-Pointing Angle Quotation Mark", '<'),
- ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'),
- ('˂', "Modifier Letter Left Arrowhead", '<'),
- ('𝈶', "Greek Instrumental Symbol-40", '<'),
- ('ᐸ', "Canadian Syllabics Pa", '<'),
- ('ᚲ', "Runic Letter Kauna", '<'),
- ('❬', "Medium Left-Pointing Angle Bracket Ornament", '<'),
- ('⟨', "Mathematical Left Angle Bracket", '<'),
- ('〈', "Left-Pointing Angle Bracket", '<'),
- ('〈', "Left Angle Bracket", '<'),
- ('㇛', "CJK Stroke Pd", '<'),
- ('く', "Hiragana Letter Ku", '<'),
- ('𡿨', "CJK Unified Ideograph-21FE8", '<'),
- ('《', "Left Double Angle Bracket", '<'),
- ('<', "Fullwidth Less-Than Sign", '<'),
+ ('‹', "Single Left-Pointing Angle Quotation Mark", "<"),
+ ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", "<"),
+ ('˂', "Modifier Letter Left Arrowhead", "<"),
+ ('𝈶', "Greek Instrumental Symbol-40", "<"),
+ ('ᐸ', "Canadian Syllabics Pa", "<"),
+ ('ᚲ', "Runic Letter Kauna", "<"),
+ ('❬', "Medium Left-Pointing Angle Bracket Ornament", "<"),
+ ('⟨', "Mathematical Left Angle Bracket", "<"),
+ ('〈', "Left-Pointing Angle Bracket", "<"),
+ ('〈', "Left Angle Bracket", "<"),
+ ('㇛', "CJK Stroke Pd", "<"),
+ ('く', "Hiragana Letter Ku", "<"),
+ ('𡿨', "CJK Unified Ideograph-21FE8", "<"),
+ ('《', "Left Double Angle Bracket", "<"),
+ ('<', "Fullwidth Less-Than Sign", "<"),
- ('᐀', "Canadian Syllabics Hyphen", '='),
- ('⹀', "Double Hyphen", '='),
- ('゠', "Katakana-Hiragana Double Hyphen", '='),
- ('꓿', "Lisu Punctuation Full Stop", '='),
- ('=', "Fullwidth Equals Sign", '='),
+ ('᐀', "Canadian Syllabics Hyphen", "="),
+ ('⹀', "Double Hyphen", "="),
+ ('゠', "Katakana-Hiragana Double Hyphen", "="),
+ ('꓿', "Lisu Punctuation Full Stop", "="),
+ ('=', "Fullwidth Equals Sign", "="),
- ('›', "Single Right-Pointing Angle Quotation Mark", '>'),
- ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'),
- ('˃', "Modifier Letter Right Arrowhead", '>'),
- ('𝈷', "Greek Instrumental Symbol-42", '>'),
- ('ᐳ', "Canadian Syllabics Po", '>'),
- ('𖼿', "Miao Letter Archaic Zza", '>'),
- ('❭', "Medium Right-Pointing Angle Bracket Ornament", '>'),
- ('⟩', "Mathematical Right Angle Bracket", '>'),
- ('〉', "Right-Pointing Angle Bracket", '>'),
- ('〉', "Right Angle Bracket", '>'),
- ('》', "Right Double Angle Bracket", '>'),
- ('>', "Fullwidth Greater-Than Sign", '>'),
+ ('›', "Single Right-Pointing Angle Quotation Mark", ">"),
+ ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", ">"),
+ ('˃', "Modifier Letter Right Arrowhead", ">"),
+ ('𝈷', "Greek Instrumental Symbol-42", ">"),
+ ('ᐳ', "Canadian Syllabics Po", ">"),
+ ('𖼿', "Miao Letter Archaic Zza", ">"),
+ ('❭', "Medium Right-Pointing Angle Bracket Ornament", ">"),
+ ('⟩', "Mathematical Right Angle Bracket", ">"),
+ ('〉', "Right-Pointing Angle Bracket", ">"),
+ ('〉', "Right Angle Bracket", ">"),
+ ('》', "Right Double Angle Bracket", ">"),
+ ('>', "Fullwidth Greater-Than Sign", ">"),
+ ('⩵', "Two Consecutive Equals Signs", "==")
];
// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of
// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`.
// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add
// fancier error recovery to it, as there will be less overall work to do this way.
-const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[
- (' ', "Space", None),
- ('_', "Underscore", Some(token::Ident(kw::Underscore, false))),
- ('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))),
- (',', "Comma", Some(token::Comma)),
- (';', "Semicolon", Some(token::Semi)),
- (':', "Colon", Some(token::Colon)),
- ('!', "Exclamation Mark", Some(token::Not)),
- ('?', "Question Mark", Some(token::Question)),
- ('.', "Period", Some(token::Dot)),
- ('(', "Left Parenthesis", Some(token::OpenDelim(Delimiter::Parenthesis))),
- (')', "Right Parenthesis", Some(token::CloseDelim(Delimiter::Parenthesis))),
- ('[', "Left Square Bracket", Some(token::OpenDelim(Delimiter::Bracket))),
- (']', "Right Square Bracket", Some(token::CloseDelim(Delimiter::Bracket))),
- ('{', "Left Curly Brace", Some(token::OpenDelim(Delimiter::Brace))),
- ('}', "Right Curly Brace", Some(token::CloseDelim(Delimiter::Brace))),
- ('*', "Asterisk", Some(token::BinOp(token::Star))),
- ('/', "Slash", Some(token::BinOp(token::Slash))),
- ('\\', "Backslash", None),
- ('&', "Ampersand", Some(token::BinOp(token::And))),
- ('+', "Plus Sign", Some(token::BinOp(token::Plus))),
- ('<', "Less-Than Sign", Some(token::Lt)),
- ('=', "Equals Sign", Some(token::Eq)),
- ('>', "Greater-Than Sign", Some(token::Gt)),
+const ASCII_ARRAY: &[(&str, &str, Option<token::TokenKind>)] = &[
+ (" ", "Space", None),
+ ("_", "Underscore", Some(token::Ident(kw::Underscore, false))),
+ ("-", "Minus/Hyphen", Some(token::BinOp(token::Minus))),
+ (",", "Comma", Some(token::Comma)),
+ (";", "Semicolon", Some(token::Semi)),
+ (":", "Colon", Some(token::Colon)),
+ ("!", "Exclamation Mark", Some(token::Not)),
+ ("?", "Question Mark", Some(token::Question)),
+ (".", "Period", Some(token::Dot)),
+ ("(", "Left Parenthesis", Some(token::OpenDelim(Delimiter::Parenthesis))),
+ (")", "Right Parenthesis", Some(token::CloseDelim(Delimiter::Parenthesis))),
+ ("[", "Left Square Bracket", Some(token::OpenDelim(Delimiter::Bracket))),
+ ("]", "Right Square Bracket", Some(token::CloseDelim(Delimiter::Bracket))),
+ ("{", "Left Curly Brace", Some(token::OpenDelim(Delimiter::Brace))),
+ ("}", "Right Curly Brace", Some(token::CloseDelim(Delimiter::Brace))),
+ ("*", "Asterisk", Some(token::BinOp(token::Star))),
+ ("/", "Slash", Some(token::BinOp(token::Slash))),
+ ("\\", "Backslash", None),
+ ("&", "Ampersand", Some(token::BinOp(token::And))),
+ ("+", "Plus Sign", Some(token::BinOp(token::Plus))),
+ ("<", "Less-Than Sign", Some(token::Lt)),
+ ("=", "Equals Sign", Some(token::Eq)),
+ ("==", "Double Equals Sign", Some(token::EqEq)),
+ (">", "Greater-Than Sign", Some(token::Gt)),
// FIXME: Literals are already lexed by this point, so we can't recover gracefully just by
// spitting the correct token out.
- ('\'', "Single Quote", None),
- ('"', "Quotation Mark", None),
+ ("\'", "Single Quote", None),
+ ("\"", "Quotation Mark", None),
];
pub(super) fn check_for_substitution<'a>(
@@ -339,11 +341,11 @@
err: &mut Diagnostic,
count: usize,
) -> Option<token::TokenKind> {
- let &(_u_char, u_name, ascii_char) = UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch)?;
+ let &(_, u_name, ascii_str) = UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch)?;
let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8() * count));
- let Some((_ascii_char, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) else {
+ let Some((_, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(s, _, _)| s == ascii_str) else {
let msg = format!("substitution character not found for '{}'", ch);
reader.sess.span_diagnostic.span_bug_no_panic(span, &msg);
return None;
@@ -354,7 +356,7 @@
let msg = format!(
"Unicode characters '“' (Left Double Quotation Mark) and \
'”' (Right Double Quotation Mark) look like '{}' ({}), but are not",
- ascii_char, ascii_name
+ ascii_str, ascii_name
);
err.span_suggestion(
Span::with_root_ctxt(
@@ -368,12 +370,12 @@
} else {
let msg = format!(
"Unicode character '{}' ({}) looks like '{}' ({}), but it is not",
- ch, u_name, ascii_char, ascii_name
+ ch, u_name, ascii_str, ascii_name
);
err.span_suggestion(
span,
&msg,
- ascii_char.to_string().repeat(count),
+ ascii_str.to_string().repeat(count),
Applicability::MaybeIncorrect,
);
}
diff --git a/tests/ui/parser/unicode-chars.rs b/tests/ui/parser/unicode-chars.rs
index f012256..cd25c75 100644
--- a/tests/ui/parser/unicode-chars.rs
+++ b/tests/ui/parser/unicode-chars.rs
@@ -6,4 +6,7 @@
//~^ ERROR unknown start of token: \u{a0}
//~^^ NOTE character appears 3 more times
//~^^^ HELP Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
+ let _ = 1 ⩵ 2;
+ //~^ ERROR unknown start of token
+ //~^^ HELP Unicode character '⩵' (Two Consecutive Equals Signs) looks like '==' (Double Equals Sign), but it is not
}
diff --git a/tests/ui/parser/unicode-chars.stderr b/tests/ui/parser/unicode-chars.stderr
index b1d4a0a..086de5e 100644
--- a/tests/ui/parser/unicode-chars.stderr
+++ b/tests/ui/parser/unicode-chars.stderr
@@ -21,5 +21,16 @@
LL | let x = 0;
| ++++
-error: aborting due to 2 previous errors
+error: unknown start of token: \u{2a75}
+ --> $DIR/unicode-chars.rs:9:15
+ |
+LL | let _ = 1 ⩵ 2;
+ | ^
+ |
+help: Unicode character '⩵' (Two Consecutive Equals Signs) looks like '==' (Double Equals Sign), but it is not
+ |
+LL | let _ = 1 == 2;
+ | ~~
+
+error: aborting due to 3 previous errors