blob: f861239089530b7e222585b6cf54c105f7c5c73a [file] [log] [blame]
// Copyright 2011 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lexer.h"
#include <stdio.h>
#include "eval_env.h"
#include "util.h"
bool Lexer::Error(const string& message, string* err) {
// Compute line/column.
int line = 1;
const char* context = input_.str_;
for (const char* p = input_.str_; p < last_token_; ++p) {
if (*p == '\n') {
++line;
context = p + 1;
}
}
int col = last_token_ ? (int)(last_token_ - context) : 0;
char buf[1024];
snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line);
*err = buf;
*err += message + "\n";
// Add some context to the message.
const int kTruncateColumn = 72;
if (col > 0 && col < kTruncateColumn) {
int len;
bool truncated = true;
for (len = 0; len < kTruncateColumn; ++len) {
if (context[len] == 0 || context[len] == '\n') {
truncated = false;
break;
}
}
*err += string(context, len);
if (truncated)
*err += "...";
*err += "\n";
*err += string(col, ' ');
*err += "^ near here";
}
return false;
}
Lexer::Lexer(const char* input) {
Start("input", input);
}
void Lexer::Start(StringPiece filename, StringPiece input) {
filename_ = filename;
input_ = input;
ofs_ = input_.str_;
last_token_ = NULL;
}
const char* Lexer::TokenName(Token t) {
switch (t) {
case ERROR: return "lexing error";
case BUILD: return "'build'";
case COLON: return "':'";
case DEFAULT: return "'default'";
case EQUALS: return "'='";
case IDENT: return "identifier";
case INCLUDE: return "'include'";
case INDENT: return "indent";
case NEWLINE: return "newline";
case PIPE2: return "'||'";
case PIPE: return "'|'";
case POOL: return "'pool'";
case RULE: return "'rule'";
case SUBNINJA: return "'subninja'";
case TEOF: return "eof";
}
return NULL; // not reached
}
const char* Lexer::TokenErrorHint(Token expected) {
switch (expected) {
case COLON:
return " ($ also escapes ':')";
default:
return "";
}
}
string Lexer::DescribeLastError() {
if (last_token_) {
switch (last_token_[0]) {
case '\t':
return "tabs are not allowed, use spaces";
}
}
return "lexing error";
}
void Lexer::UnreadToken() {
ofs_ = last_token_;
}
Lexer::Token Lexer::ReadToken() {
const char* p = ofs_;
const char* q;
const char* start;
Lexer::Token token;
for (;;) {
start = p;
/*!re2c
re2c:define:YYCTYPE = "unsigned char";
re2c:define:YYCURSOR = p;
re2c:define:YYMARKER = q;
re2c:yyfill:enable = 0;
nul = "\000";
simple_varname = [a-zA-Z0-9_-]+;
varname = [a-zA-Z0-9_.-]+;
[ ]*"#"[^\000\n]*"\n" { continue; }
[ ]*"\r\n" { token = NEWLINE; break; }
[ ]*"\n" { token = NEWLINE; break; }
[ ]+ { token = INDENT; break; }
"build" { token = BUILD; break; }
"pool" { token = POOL; break; }
"rule" { token = RULE; break; }
"default" { token = DEFAULT; break; }
"=" { token = EQUALS; break; }
":" { token = COLON; break; }
"||" { token = PIPE2; break; }
"|" { token = PIPE; break; }
"include" { token = INCLUDE; break; }
"subninja" { token = SUBNINJA; break; }
varname { token = IDENT; break; }
nul { token = TEOF; break; }
[^] { token = ERROR; break; }
*/
}
last_token_ = start;
ofs_ = p;
if (token != NEWLINE && token != TEOF)
EatWhitespace();
return token;
}
bool Lexer::PeekToken(Token token) {
Token t = ReadToken();
if (t == token)
return true;
UnreadToken();
return false;
}
void Lexer::EatWhitespace() {
const char* p = ofs_;
const char* q;
for (;;) {
ofs_ = p;
/*!re2c
[ ]+ { continue; }
"$\r\n" { continue; }
"$\n" { continue; }
nul { break; }
[^] { break; }
*/
}
}
bool Lexer::ReadIdent(string* out) {
const char* p = ofs_;
for (;;) {
const char* start = p;
/*!re2c
varname {
out->assign(start, p - start);
break;
}
[^] { return false; }
*/
}
ofs_ = p;
EatWhitespace();
return true;
}
bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
const char* p = ofs_;
const char* q;
const char* start;
for (;;) {
start = p;
/*!re2c
[^$ :\r\n|\000]+ {
eval->AddText(StringPiece(start, p - start));
continue;
}
"\r\n" {
if (path)
p = start;
break;
}
[ :|\n] {
if (path) {
p = start;
break;
} else {
if (*start == '\n')
break;
eval->AddText(StringPiece(start, 1));
continue;
}
}
"$$" {
eval->AddText(StringPiece("$", 1));
continue;
}
"$ " {
eval->AddText(StringPiece(" ", 1));
continue;
}
"$\r\n"[ ]* {
continue;
}
"$\n"[ ]* {
continue;
}
"${"varname"}" {
eval->AddSpecial(StringPiece(start + 2, p - start - 3));
continue;
}
"$"simple_varname {
eval->AddSpecial(StringPiece(start + 1, p - start - 1));
continue;
}
"$:" {
eval->AddText(StringPiece(":", 1));
continue;
}
"$". {
last_token_ = start;
return Error("bad $-escape (literal $ must be written as $$)", err);
}
nul {
last_token_ = start;
return Error("unexpected EOF", err);
}
[^] {
last_token_ = start;
return Error(DescribeLastError(), err);
}
*/
}
last_token_ = start;
ofs_ = p;
if (path)
EatWhitespace();
// Non-path strings end in newlines, so there's no whitespace to eat.
return true;
}