blob: 5dafbfe0ac98a392be21967a63546eb91ea223a8 [file] [log] [blame]
// Copyright 2017 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "lexer.h"
#include <ctype.h>
namespace fidl {
namespace {
bool IsIdentifierBody(char c) {
return isalnum(c) || c == '_';
}
bool IsNumericLiteralBody(char c) {
switch (c) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case 'a':
case 'A':
case 'b':
case 'B':
case 'c':
case 'C':
case 'd':
case 'D':
case 'e':
case 'E':
case 'f':
case 'F':
case 'x':
case 'X':
case '-':
case '_':
case '.':
return true;
default:
return false;
}
}
} // namespace
constexpr char Lexer::Peek() const {
return *current_;
}
void Lexer::Skip() {
++offset_;
++current_;
++token_start_;
}
char Lexer::Consume() {
auto current = *current_;
++offset_;
++current_;
++token_size_;
return current;
}
StringView Lexer::Reset() {
auto data = StringView(token_start_, token_size_);
token_start_ = current_;
token_size_ = 0u;
return data;
}
Token Lexer::Finish(Token::Kind kind) {
return Token(Reset(), offset_, kind);
}
Token Lexer::LexEndOfStream() {
return Finish(Token::Kind::EndOfFile);
}
Token Lexer::LexNumericLiteral() {
while (IsNumericLiteralBody(Peek()))
Consume();
return Finish(Token::Kind::NumericLiteral);
}
Token Lexer::LexIdentifier() {
while (IsIdentifierBody(Peek()))
Consume();
return identifier_table_->MakeIdentifier(Reset(), offset_, /* escaped */ false);
}
Token Lexer::LexEscapedIdentifier() {
// Reset() to drop the initial @ from the identifier.
Reset();
while (IsIdentifierBody(Peek()))
Consume();
return identifier_table_->MakeIdentifier(Reset(), offset_, /* escaped */ true);
}
Token Lexer::LexStringLiteral() {
auto last = Peek();
// Lexing a "string literal" to the next matching delimiter.
for (;;) {
auto next = Consume();
switch (next) {
case 0:
return Finish(Token::Kind::NotAToken);
case '"':
if (last != '\\')
return Finish(Token::Kind::StringLiteral);
// Fall through.
default:
last = next;
}
}
}
Token Lexer::LexCXXComment() {
// Consume the second /.
assert(Peek() == '/');
Consume();
// Lexing a C++-style // comment. Go to the end of the line or
// file.
for (;;) {
switch (Peek()) {
case 0:
case '\n':
return Finish(Token::Kind::Comment);
default:
Consume();
continue;
}
}
}
Token Lexer::LexCComment() {
// Consume the *.
assert(Peek() == '*');
auto last = Consume();
// TODO(kulakowski) Commit to either true C-style comments, or
// support nesting of /* */.
// Lexing a C-style /* comment */. Go to the next matching
// delimiter.
for (;;) {
auto next = Consume();
switch (next) {
case 0:
return Finish(Token::Kind::NotAToken);
case '/':
if (last == '*')
return Finish(Token::Kind::Comment);
// Fall through.
default:
last = next;
}
}
}
void Lexer::SkipWhitespace() {
for (;;) {
switch (Peek()) {
case ' ':
case '\n':
case '\r':
case '\t':
Skip();
continue;
default:
return;
}
}
}
Token Lexer::LexNoComments() {
for (;;) {
auto token = Lex();
if (token.kind() == Token::Kind::Comment)
continue;
return token;
}
}
Token Lexer::Lex() {
SkipWhitespace();
switch (Consume()) {
case 0:
return LexEndOfStream();
case ' ':
case '\n':
case '\r':
case '\t':
assert(false && "Should have been handled by SkipWhitespace!");
case '-':
// Maybe the start of an arrow.
if (Peek() == '>') {
Consume();
return Finish(Token::Kind::Arrow);
}
// Fallthrough
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
return LexNumericLiteral();
case 'a':
case 'A':
case 'b':
case 'B':
case 'c':
case 'C':
case 'd':
case 'D':
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
case 'h':
case 'H':
case 'i':
case 'I':
case 'j':
case 'J':
case 'k':
case 'K':
case 'l':
case 'L':
case 'm':
case 'M':
case 'n':
case 'N':
case 'o':
case 'O':
case 'p':
case 'P':
case 'q':
case 'Q':
case 'r':
case 'R':
case 's':
case 'S':
case 't':
case 'T':
case 'u':
case 'U':
case 'v':
case 'V':
case 'w':
case 'W':
case 'x':
case 'X':
case 'y':
case 'Y':
case 'z':
case 'Z':
case '_':
return LexIdentifier();
case '@':
return LexEscapedIdentifier();
case '"':
return LexStringLiteral();
case '/':
// Maybe the start of a comment.
switch (Peek()) {
case '/':
return LexCXXComment();
case '*':
return LexCComment();
default:
return Finish(Token::Kind::NotAToken);
}
case '(':
return Finish(Token::Kind::LeftParen);
case ')':
return Finish(Token::Kind::RightParen);
case '[':
return Finish(Token::Kind::LeftSquare);
case ']':
return Finish(Token::Kind::RightSquare);
case '{':
return Finish(Token::Kind::LeftCurly);
case '}':
return Finish(Token::Kind::RightCurly);
case '<':
return Finish(Token::Kind::LeftAngle);
case '>':
return Finish(Token::Kind::RightAngle);
case '.':
return Finish(Token::Kind::Dot);
case ',':
return Finish(Token::Kind::Comma);
case ';':
return Finish(Token::Kind::Semicolon);
case ':':
return Finish(Token::Kind::Colon);
case '?':
return Finish(Token::Kind::Question);
case '=':
return Finish(Token::Kind::Equal);
case '&':
return Finish(Token::Kind::Ampersand);
default:
return Finish(Token::Kind::NotAToken);
}
}
} // namespace fidl