blob: 8ad036708b2ee8afb074f199b4c524da5f7d2e9d [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "garnet/bin/zxdb/expr/expr_tokenizer.h"
#include "lib/fxl/logging.h"
#include "lib/fxl/strings/string_printf.h"
namespace zxdb {
namespace {
bool IsNameFirstChar(char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_';
}
bool IsNameContinuingChar(char c) {
return IsNameFirstChar(c) || (c >= '0' && c <= '9');
}
bool IsIntegerFirstChar(char c) { return c >= '0' && c <= '9'; }
bool IsIntegerContinuingChar(char c) {
// The 'a'-'f' and 'x' allows hexadecimal numbers. The number will be
// validated and interpreted later.
return IsIntegerFirstChar(c) || c == 'x' || (c >= 'A' && c <= 'F') ||
(c >= 'a' && c <= 'f');
}
} // namespace
ExprTokenizer::ExprTokenizer(const std::string& input) : input_(input) {}
bool ExprTokenizer::Tokenize() {
while (!done()) {
AdvanceToNextToken();
if (done())
break;
ExprToken::Type type = ClassifyCurrent();
if (has_error())
break;
size_t token_begin = cur_;
AdvanceToEndOfToken(type);
if (has_error())
break;
size_t token_end = cur_;
std::string token_value(&input_[token_begin], token_end - token_begin);
tokens_.emplace_back(type, token_value, token_begin);
}
return !has_error();
}
// static
std::string ExprTokenizer::GetErrorContext(const std::string& input,
size_t byte_offset) {
// Index should be in range of the input string. Also allow indicating one
// character past the end.
FXL_DCHECK(byte_offset <= input.size());
// Future enhancements:
// - If we allow multiline expressions in the, the returned context should
// not cross newlines or it will be messed up.
// - Input longer than 80 chars should be clipped to guarantee it doesn't
// wrap.
std::string output;
output = " " + input + "\n ";
output.append(byte_offset, ' ');
output.push_back('^');
return output;
}
void ExprTokenizer::AdvanceChars(int n) { cur_ += n; }
void ExprTokenizer::AdvanceOneChar() { cur_++; }
void ExprTokenizer::AdvanceToNextToken() {
while (!at_end() && IsCurrentWhitespace())
AdvanceOneChar();
}
void ExprTokenizer::AdvanceToEndOfToken(ExprToken::Type type) {
switch (type) {
case ExprToken::kInteger:
do {
AdvanceOneChar();
} while (!at_end() && IsIntegerContinuingChar(cur_char()));
break;
case ExprToken::kName:
do {
AdvanceOneChar();
} while (!at_end() && IsNameContinuingChar(cur_char()));
break;
case ExprToken::kArrow:
case ExprToken::kColonColon:
case ExprToken::kEquality:
// The classification code should already have validated there were two
// characters available.
AdvanceOneChar();
AdvanceOneChar();
break;
case ExprToken::kEquals:
case ExprToken::kDot:
case ExprToken::kComma:
case ExprToken::kStar:
case ExprToken::kAmpersand:
case ExprToken::kLeftSquare:
case ExprToken::kRightSquare:
case ExprToken::kLeftParen:
case ExprToken::kRightParen:
case ExprToken::kLess:
case ExprToken::kGreater:
case ExprToken::kMinus:
case ExprToken::kPlus:
AdvanceOneChar(); // All are one char.
break;
case ExprToken::kTrue:
AdvanceChars(4);
break;
case ExprToken::kFalse:
AdvanceChars(5);
break;
case ExprToken::kInvalid:
case ExprToken::kNumTypes:
FXL_NOTREACHED();
err_ = Err("Internal parser error.");
error_location_ = cur_;
break;
}
}
bool ExprTokenizer::IsCurrentString(std::string_view s) const {
if (!can_advance(s.size() - 1))
return false;
for (size_t i = 0; i < s.size(); i++) {
if (input_[cur_ + i] != s[i])
return false;
}
return true;
}
bool ExprTokenizer::IsCurrentName(std::string_view s) const {
if (!IsCurrentString(s))
return false;
return input_.size() == cur_ + s.size() || // End of buffer.
!IsNameContinuingChar(input_[cur_ + s.size()]); // Non-name char.
}
bool ExprTokenizer::IsCurrentWhitespace() const {
FXL_DCHECK(!at_end());
char c = input_[cur_];
return c == 0x0A || c == 0x0D || c == 0x20;
}
ExprToken::Type ExprTokenizer::ClassifyCurrent() {
FXL_DCHECK(!at_end());
char cur = cur_char();
// Numbers.
if (cur >= '0' && cur <= '9')
return ExprToken::kInteger;
// Words.
if (IsNameFirstChar(cur)) {
if (IsCurrentName("true"))
return ExprToken::kTrue;
else if (IsCurrentName("false"))
return ExprToken::kFalse;
return ExprToken::kName;
}
// Punctuation.
switch (cur) {
case '-':
// Hyphen could be itself or an arrow, look ahead.
if (can_advance()) {
if (input_[cur_ + 1] == '>')
return ExprToken::kArrow;
}
// Anything else is a standalone hyphen.
return ExprToken::kMinus;
case '=':
// Check for "==".
if (can_advance()) {
if (input_[cur_ + 1] == '=')
return ExprToken::kEquality;
}
return ExprToken::kEquals;
case '.':
return ExprToken::kDot;
case ',':
return ExprToken::kComma;
case '*':
return ExprToken::kStar;
case '&':
return ExprToken::kAmpersand;
case '[':
return ExprToken::kLeftSquare;
case ']':
return ExprToken::kRightSquare;
case '(':
return ExprToken::kLeftParen;
case ')':
return ExprToken::kRightParen;
case '<':
return ExprToken::kLess;
case '>':
return ExprToken::kGreater;
case ':':
// Currently only support colons as part of "::", look ahead.
if (can_advance()) {
if (input_[cur_ + 1] == ':')
return ExprToken::kColonColon;
}
// Any other use of colon is an error.
error_location_ = cur_;
err_ = Err("Invalid standalone ':' in expression.\n" +
GetErrorContext(input_, cur_));
return ExprToken::kInvalid;
default:
error_location_ = cur_;
err_ = Err(
fxl::StringPrintf("Invalid character '%c' in expression.\n", cur) +
GetErrorContext(input_, cur_));
return ExprToken::kInvalid;
}
}
} // namespace zxdb