system/host/fidl/lib/lexer.cpp - zircon - Git at Google

 // Copyright 2017 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "fidl/lexer.h"

 #include <ctype.h>

 namespace fidl {

 namespace {

 bool IsIdentifierBody(char c) {
     return isalnum(c) || c == '_';
 }

 bool IsNumericLiteralBody(char c) {
     switch (c) {
     case '0':
     case '1':
     case '2':
     case '3':
     case '4':
     case '5':
     case '6':
     case '7':
     case '8':
     case '9':
     case 'a':
     case 'A':
     case 'b':
     case 'B':
     case 'c':
     case 'C':
     case 'd':
     case 'D':
     case 'e':
     case 'E':
     case 'f':
     case 'F':
     case 'x':
     case 'X':
     case '-':
     case '_':
     case '.':
         return true;
     default:
         return false;
     }
 }

 } // namespace

 constexpr char Lexer::Peek() const {
     return *current_;
 }

 void Lexer::Skip() {
     ++current_;
     ++token_start_;
 }

 char Lexer::Consume() {
     auto current = *current_;
     ++current_;
     ++token_size_;
     return current;
 }

 StringView Lexer::Reset() {
     auto data = StringView(token_start_, token_size_);
     token_start_ = current_;
     token_size_ = 0u;
     return data;
 }

 Token Lexer::Finish(Token::Kind kind) {
     return Token(SourceLocation(Reset(), source_file_), kind);
 }

 Token Lexer::LexEndOfStream() {
     return Finish(Token::Kind::EndOfFile);
 }

 Token Lexer::LexNumericLiteral() {
     while (IsNumericLiteralBody(Peek()))
         Consume();
     return Finish(Token::Kind::NumericLiteral);
 }

 Token Lexer::LexIdentifier() {
     while (IsIdentifierBody(Peek()))
         Consume();
     return identifier_table_->MakeIdentifier(Reset(), source_file_, /* escaped */ false);
 }

 Token Lexer::LexEscapedIdentifier() {
     // Reset() to drop the initial @ from the identifier.
     Reset();

     while (IsIdentifierBody(Peek()))
         Consume();
     return identifier_table_->MakeIdentifier(Reset(), source_file_, /* escaped */ true);
 }

 Token Lexer::LexStringLiteral() {
     auto last = Peek();

     // Lexing a "string literal" to the next matching delimiter.
     for (;;) {
         auto next = Consume();
         switch (next) {
         case 0:
             return Finish(Token::Kind::NotAToken);
         case '"':
             // This escaping logic is incorrect for the input: "\\"
             if (last != '\\')
                 return Finish(Token::Kind::StringLiteral);
         // Fall through.
         default:
             last = next;
         }
     }
 }

 Token Lexer::LexComment() {
     // Consume the second /.
     assert(Peek() == '/');
     Consume();

     // Lexing a C++-style // comment. Go to the end of the line or
     // file.
     for (;;) {
         switch (Peek()) {
         case 0:
         case '\n':
             return Finish(Token::Kind::Comment);
         default:
             Consume();
             continue;
         }
     }
 }

 void Lexer::SkipWhitespace() {
     for (;;) {
         switch (Peek()) {
         case ' ':
         case '\n':
         case '\r':
         case '\t':
             Skip();
             continue;
         default:
             return;
         }
     }
 }

 Token Lexer::LexNoComments() {
     for (;;) {
         auto token = Lex();
         if (token.kind() == Token::Kind::Comment)
             continue;
         return token;
     }
 }

 Token Lexer::Lex() {
     SkipWhitespace();

     switch (Consume()) {
     case 0:
         return LexEndOfStream();

     case ' ':
     case '\n':
     case '\r':
     case '\t':
         assert(false && "Should have been handled by SkipWhitespace!");

     case '-':
         // Maybe the start of an arrow.
         if (Peek() == '>') {
             Consume();
             return Finish(Token::Kind::Arrow);
         }
     // Fallthrough
     case '0':
     case '1':
     case '2':
     case '3':
     case '4':
     case '5':
     case '6':
     case '7':
     case '8':
     case '9':
         return LexNumericLiteral();

     case 'a':
     case 'A':
     case 'b':
     case 'B':
     case 'c':
     case 'C':
     case 'd':
     case 'D':
     case 'e':
     case 'E':
     case 'f':
     case 'F':
     case 'g':
     case 'G':
     case 'h':
     case 'H':
     case 'i':
     case 'I':
     case 'j':
     case 'J':
     case 'k':
     case 'K':
     case 'l':
     case 'L':
     case 'm':
     case 'M':
     case 'n':
     case 'N':
     case 'o':
     case 'O':
     case 'p':
     case 'P':
     case 'q':
     case 'Q':
     case 'r':
     case 'R':
     case 's':
     case 'S':
     case 't':
     case 'T':
     case 'u':
     case 'U':
     case 'v':
     case 'V':
     case 'w':
     case 'W':
     case 'x':
     case 'X':
     case 'y':
     case 'Y':
     case 'z':
     case 'Z':
     case '_':
         return LexIdentifier();

     case '@':
         return LexEscapedIdentifier();

     case '"':
         return LexStringLiteral();

     case '/':
         // Maybe the start of a comment.
         switch (Peek()) {
         case '/':
             return LexComment();
         default:
             return Finish(Token::Kind::NotAToken);
         }

     case '(':
         return Finish(Token::Kind::LeftParen);
     case ')':
         return Finish(Token::Kind::RightParen);
     case '[':
         return Finish(Token::Kind::LeftSquare);
     case ']':
         return Finish(Token::Kind::RightSquare);
     case '{':
         return Finish(Token::Kind::LeftCurly);
     case '}':
         return Finish(Token::Kind::RightCurly);
     case '<':
         return Finish(Token::Kind::LeftAngle);
     case '>':
         return Finish(Token::Kind::RightAngle);

     case '.':
         return Finish(Token::Kind::Dot);
     case ',':
         return Finish(Token::Kind::Comma);
     case ';':
         return Finish(Token::Kind::Semicolon);
     case ':':
         return Finish(Token::Kind::Colon);
     case '?':
         return Finish(Token::Kind::Question);
     case '=':
         return Finish(Token::Kind::Equal);
     case '&':
         return Finish(Token::Kind::Ampersand);

     default:
         return Finish(Token::Kind::NotAToken);
     }
 }

 } // namespace fidl
	// Copyright 2017 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "fidl/lexer.h"

	#include <ctype.h>

	namespace fidl {

	namespace {

	bool IsIdentifierBody(char c) {
	return isalnum(c) \|\| c == '_';
	}

	bool IsNumericLiteralBody(char c) {
	switch (c) {
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	case '8':
	case '9':
	case 'a':
	case 'A':
	case 'b':
	case 'B':
	case 'c':
	case 'C':
	case 'd':
	case 'D':
	case 'e':
	case 'E':
	case 'f':
	case 'F':
	case 'x':
	case 'X':
	case '-':
	case '_':
	case '.':
	return true;
	default:
	return false;
	}
	}

	} // namespace

	constexpr char Lexer::Peek() const {
	return *current_;
	}

	void Lexer::Skip() {
	++current_;
	++token_start_;
	}

	char Lexer::Consume() {
	auto current = *current_;
	++current_;
	++token_size_;
	return current;
	}

	StringView Lexer::Reset() {
	auto data = StringView(token_start_, token_size_);
	token_start_ = current_;
	token_size_ = 0u;
	return data;
	}

	Token Lexer::Finish(Token::Kind kind) {
	return Token(SourceLocation(Reset(), source_file_), kind);
	}

	Token Lexer::LexEndOfStream() {
	return Finish(Token::Kind::EndOfFile);
	}

	Token Lexer::LexNumericLiteral() {
	while (IsNumericLiteralBody(Peek()))
	Consume();
	return Finish(Token::Kind::NumericLiteral);
	}

	Token Lexer::LexIdentifier() {
	while (IsIdentifierBody(Peek()))
	Consume();
	return identifier_table_->MakeIdentifier(Reset(), source_file_, /* escaped */ false);
	}

	Token Lexer::LexEscapedIdentifier() {
	// Reset() to drop the initial @ from the identifier.
	Reset();

	while (IsIdentifierBody(Peek()))
	Consume();
	return identifier_table_->MakeIdentifier(Reset(), source_file_, /* escaped */ true);
	}

	Token Lexer::LexStringLiteral() {
	auto last = Peek();

	// Lexing a "string literal" to the next matching delimiter.
	for (;;) {
	auto next = Consume();
	switch (next) {
	case 0:
	return Finish(Token::Kind::NotAToken);
	case '"':
	// This escaping logic is incorrect for the input: "\\"
	if (last != '\\')
	return Finish(Token::Kind::StringLiteral);
	// Fall through.
	default:
	last = next;
	}
	}
	}

	Token Lexer::LexComment() {
	// Consume the second /.
	assert(Peek() == '/');
	Consume();

	// Lexing a C++-style // comment. Go to the end of the line or
	// file.
	for (;;) {
	switch (Peek()) {
	case 0:
	case '\n':
	return Finish(Token::Kind::Comment);
	default:
	Consume();
	continue;
	}
	}
	}

	void Lexer::SkipWhitespace() {
	for (;;) {
	switch (Peek()) {
	case ' ':
	case '\n':
	case '\r':
	case '\t':
	Skip();
	continue;
	default:
	return;
	}
	}
	}

	Token Lexer::LexNoComments() {
	for (;;) {
	auto token = Lex();
	if (token.kind() == Token::Kind::Comment)
	continue;
	return token;
	}
	}

	Token Lexer::Lex() {
	SkipWhitespace();

	switch (Consume()) {
	case 0:
	return LexEndOfStream();

	case ' ':
	case '\n':
	case '\r':
	case '\t':
	assert(false && "Should have been handled by SkipWhitespace!");

	case '-':
	// Maybe the start of an arrow.
	if (Peek() == '>') {
	Consume();
	return Finish(Token::Kind::Arrow);
	}
	// Fallthrough
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	case '8':
	case '9':
	return LexNumericLiteral();

	case 'a':
	case 'A':
	case 'b':
	case 'B':
	case 'c':
	case 'C':
	case 'd':
	case 'D':
	case 'e':
	case 'E':
	case 'f':
	case 'F':
	case 'g':
	case 'G':
	case 'h':
	case 'H':
	case 'i':
	case 'I':
	case 'j':
	case 'J':
	case 'k':
	case 'K':
	case 'l':
	case 'L':
	case 'm':
	case 'M':
	case 'n':
	case 'N':
	case 'o':
	case 'O':
	case 'p':
	case 'P':
	case 'q':
	case 'Q':
	case 'r':
	case 'R':
	case 's':
	case 'S':
	case 't':
	case 'T':
	case 'u':
	case 'U':
	case 'v':
	case 'V':
	case 'w':
	case 'W':
	case 'x':
	case 'X':
	case 'y':
	case 'Y':
	case 'z':
	case 'Z':
	case '_':
	return LexIdentifier();

	case '@':
	return LexEscapedIdentifier();

	case '"':
	return LexStringLiteral();

	case '/':
	// Maybe the start of a comment.
	switch (Peek()) {
	case '/':
	return LexComment();
	default:
	return Finish(Token::Kind::NotAToken);
	}

	case '(':
	return Finish(Token::Kind::LeftParen);
	case ')':
	return Finish(Token::Kind::RightParen);
	case '[':
	return Finish(Token::Kind::LeftSquare);
	case ']':
	return Finish(Token::Kind::RightSquare);
	case '{':
	return Finish(Token::Kind::LeftCurly);
	case '}':
	return Finish(Token::Kind::RightCurly);
	case '<':
	return Finish(Token::Kind::LeftAngle);
	case '>':
	return Finish(Token::Kind::RightAngle);

	case '.':
	return Finish(Token::Kind::Dot);
	case ',':
	return Finish(Token::Kind::Comma);
	case ';':
	return Finish(Token::Kind::Semicolon);
	case ':':
	return Finish(Token::Kind::Colon);
	case '?':
	return Finish(Token::Kind::Question);
	case '=':
	return Finish(Token::Kind::Equal);
	case '&':
	return Finish(Token::Kind::Ampersand);

	default:
	return Finish(Token::Kind::NotAToken);
	}
	}

	} // namespace fidl