include/llbuild/Ninja/Lexer.h - third_party/swift-llbuild - Git at Google

 //===- Lexer.h --------------------------------------------------*- C++ -*-===//
 //
 // This source file is part of the Swift.org open source project
 //
 // Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See http://swift.org/LICENSE.txt for license information
 // See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
 //
 //===----------------------------------------------------------------------===//

 #ifndef LLBUILD_NINJA_LEXER_H
 #define LLBUILD_NINJA_LEXER_H

 #include <cstdint>

 #include "llvm/ADT/StringRef.h"

 #include "llbuild/Basic/LLVM.h"

 namespace llbuild {
 namespace ninja {

 struct Token {
   enum class Kind {
     Colon,                    ///< ':'
     Comment,                  ///< '# ...'
     EndOfFile,                ///< <end of file>
     Equals,                   ///< '='
     Indentation,              ///< Leading whitespace
     Identifier,               ///< "Identifiers", really everything else
     KWBuild,                  ///< 'build' keyword
     KWDefault,                ///< 'default' keyword
     KWInclude,                ///< 'include' keyword
     KWPool,                   ///< 'pool' keyword
     KWRule,                   ///< 'rule' keyword
     KWSubninja,               ///< 'subninja' keyword
     Newline,                  ///< The end of a line.
     Pipe,                     ///< '|'
     PipePipe,                 ///< '||'
     String,                   ///< Strings, only lexed in string mode.
     Unknown,                  ///< <other>

     KWKindFirst = KWBuild,
     KWKindLast = KWSubninja
   };

   const char* start;          /// The beginning of the token string.
   Kind        tokenKind;      /// The token kind.
   unsigned    length;         /// The length of the token.
   unsigned    line;           /// The line number of the start of this token.
   unsigned    column;         /// The column number at the start of this token.

   /// The name of this token's kind.
   const char *getKindName() const;

   /// True if this token is a keyword.
   bool isKeyword() const {
     return tokenKind >= Kind::KWKindFirst && tokenKind <= Kind::KWKindLast;
   }

   // Dump the token to stderr.
   void dump();
 };

 /// Interface for lexing tokens from a Ninja build manifest.
 ///
 /// The Ninja manifest language unfortunately has no real string token, rather,
 /// the lexing is done in a context sensitive fashion and string tokens are only
 /// recognized when the lexer is in a specific mode. Identifier tokens also
 /// behave slightly different when in lexed in a context where only an
 /// identifier is expected. See \see Lexer::LexingMode and \see
 /// Lexer::setMode().
 class Lexer {
 public:
   enum class LexingMode {
     /// No string tokens will be recognized, identifier tokens will follow usual
     /// rules.
     None,

     /// No string tokens will be recognized, identifier tokens will be lexed
     /// follow specific rules.
     IdentifierSpecific,

     /// Strings will be lexed as expected for path references.
     PathString,

     /// Strings will be lexed as expected for variable assignents.
     VariableString,
   };

 private:
   /// The buffer contents.
   StringRef buffer;

   const char* bufferPos;      ///< The current lexer position.
   unsigned    lineNumber;     ///< The current line.
   unsigned    columnNumber;   ///< The current column.
   LexingMode  mode;           ///< The current lexing mode.

   /// Eat a character or -1 from the stream.
   int getNextChar();

   /// Return the next character without consuming it from the stream. This does
   /// not perform newline canonicalization.
   int peekNextChar();

   /// Skip forward until the end of the line.
   void skipToEndOfLine();

   /// Set the token Kind and Length based on the current lexer position, and
   /// return the input.
   Token& setTokenKind(Token& result, Token::Kind kind) const;

   /// Set the token Kind assuming the token is an identifier or keyword, and
   /// return the input.
   Token& setIdentifierTokenKind(Token& result) const;

   /// Lex a token, assuming the current position is the start of an identifier.
   Token& lexIdentifier(Token& result);

   /// Lex a token, assuming the current position is the start of a string and
   /// the lexer is in the "path" string mode.
   Token& lexPathString(Token& result);

   /// Lex a token, assuming the current position is the start of a string and
   /// the lexer is in the "variable" string mode.
   Token& lexVariableString(Token& result);

 public:
   explicit Lexer(StringRef contents);
   ~Lexer();

   /// Return the next token from the file or EOF continually
   /// when the end of the file is reached. The input argument is
   /// used as the result, for convenience.
   Token& lex(Token& result);

   /// Get the buffer contents.
   StringRef getBuffer() const { return buffer; }

   /// Get the current lexing mode.
   LexingMode getMode() const { return mode; }

   /// Set the current lexing mode.
   void setMode(LexingMode value) { mode = value; }

   /// @name Utility Methods
   /// @{

   /// Check whether the given \arg Char is valid in an identifier.
   static bool isIdentifierChar(char c) {
     return (c >= 'a' && c <= 'z') ||
       (c >= 'A' && c <= 'Z') ||
       (c >= '0' && c <= '9') ||
       c == '_' || c == '.' || c == '-';
   }

   /// Check whether the given \arg Char is valid in a simple identifier (one
   /// which can appear in the middle of an expression string outside braces).
   static bool isSimpleIdentifierChar(char c) {
     return (c >= 'a' && c <= 'z') ||
       (c >= 'A' && c <= 'Z') ||
       (c >= '0' && c <= '9') ||
       c == '_' || c == '-';
   }

   /// @}
 };

 }
 }

 #endif
	//===- Lexer.h --------------------------------------------------- C++ --===//
	//
	// This source file is part of the Swift.org open source project
	//
	// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
	// Licensed under Apache License v2.0 with Runtime Library Exception
	//
	// See http://swift.org/LICENSE.txt for license information
	// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLBUILD_NINJA_LEXER_H
	#define LLBUILD_NINJA_LEXER_H

	#include <cstdint>

	#include "llvm/ADT/StringRef.h"

	#include "llbuild/Basic/LLVM.h"

	namespace llbuild {
	namespace ninja {

	struct Token {
	enum class Kind {
	Colon, ///< ':'
	Comment, ///< '# ...'
	EndOfFile, ///< <end of file>
	Equals, ///< '='
	Indentation, ///< Leading whitespace
	Identifier, ///< "Identifiers", really everything else
	KWBuild, ///< 'build' keyword
	KWDefault, ///< 'default' keyword
	KWInclude, ///< 'include' keyword
	KWPool, ///< 'pool' keyword
	KWRule, ///< 'rule' keyword
	KWSubninja, ///< 'subninja' keyword
	Newline, ///< The end of a line.
	Pipe, ///< '\|'
	PipePipe, ///< '\|\|'
	String, ///< Strings, only lexed in string mode.
	Unknown, ///< <other>

	KWKindFirst = KWBuild,
	KWKindLast = KWSubninja
	};

	const char* start; /// The beginning of the token string.
	Kind tokenKind; /// The token kind.
	unsigned length; /// The length of the token.
	unsigned line; /// The line number of the start of this token.
	unsigned column; /// The column number at the start of this token.

	/// The name of this token's kind.
	const char *getKindName() const;

	/// True if this token is a keyword.
	bool isKeyword() const {
	return tokenKind >= Kind::KWKindFirst && tokenKind <= Kind::KWKindLast;
	}

	// Dump the token to stderr.
	void dump();
	};

	/// Interface for lexing tokens from a Ninja build manifest.
	///
	/// The Ninja manifest language unfortunately has no real string token, rather,
	/// the lexing is done in a context sensitive fashion and string tokens are only
	/// recognized when the lexer is in a specific mode. Identifier tokens also
	/// behave slightly different when in lexed in a context where only an
	/// identifier is expected. See \see Lexer::LexingMode and \see
	/// Lexer::setMode().
	class Lexer {
	public:
	enum class LexingMode {
	/// No string tokens will be recognized, identifier tokens will follow usual
	/// rules.
	None,

	/// No string tokens will be recognized, identifier tokens will be lexed
	/// follow specific rules.
	IdentifierSpecific,

	/// Strings will be lexed as expected for path references.
	PathString,

	/// Strings will be lexed as expected for variable assignents.
	VariableString,
	};

	private:
	/// The buffer contents.
	StringRef buffer;

	const char* bufferPos; ///< The current lexer position.
	unsigned lineNumber; ///< The current line.
	unsigned columnNumber; ///< The current column.
	LexingMode mode; ///< The current lexing mode.

	/// Eat a character or -1 from the stream.
	int getNextChar();

	/// Return the next character without consuming it from the stream. This does
	/// not perform newline canonicalization.
	int peekNextChar();

	/// Skip forward until the end of the line.
	void skipToEndOfLine();

	/// Set the token Kind and Length based on the current lexer position, and
	/// return the input.
	Token& setTokenKind(Token& result, Token::Kind kind) const;

	/// Set the token Kind assuming the token is an identifier or keyword, and
	/// return the input.
	Token& setIdentifierTokenKind(Token& result) const;

	/// Lex a token, assuming the current position is the start of an identifier.
	Token& lexIdentifier(Token& result);

	/// Lex a token, assuming the current position is the start of a string and
	/// the lexer is in the "path" string mode.
	Token& lexPathString(Token& result);

	/// Lex a token, assuming the current position is the start of a string and
	/// the lexer is in the "variable" string mode.
	Token& lexVariableString(Token& result);

	public:
	explicit Lexer(StringRef contents);
	~Lexer();

	/// Return the next token from the file or EOF continually
	/// when the end of the file is reached. The input argument is
	/// used as the result, for convenience.
	Token& lex(Token& result);

	/// Get the buffer contents.
	StringRef getBuffer() const { return buffer; }

	/// Get the current lexing mode.
	LexingMode getMode() const { return mode; }

	/// Set the current lexing mode.
	void setMode(LexingMode value) { mode = value; }

	/// @name Utility Methods
	/// @{

	/// Check whether the given \arg Char is valid in an identifier.
	static bool isIdentifierChar(char c) {
	return (c >= 'a' && c <= 'z') \|\|
	(c >= 'A' && c <= 'Z') \|\|
	(c >= '0' && c <= '9') \|\|
	c == '_' \|\| c == '.' \|\| c == '-';
	}

	/// Check whether the given \arg Char is valid in a simple identifier (one
	/// which can appear in the middle of an expression string outside braces).
	static bool isSimpleIdentifierChar(char c) {
	return (c >= 'a' && c <= 'z') \|\|
	(c >= 'A' && c <= 'Z') \|\|
	(c >= '0' && c <= '9') \|\|
	c == '_' \|\| c == '-';
	}

	/// @}
	};

	}
	}

	#endif