blob: 1a66f1b2f71e0f60fdfc242c72d97b2ab307134b [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/developer/debug/zxdb/console/command_parser.h"
#include <lib/syslog/cpp/macros.h>
#include <stdio.h>
#include <map>
#include <set>
#include "src/developer/debug/zxdb/common/err.h"
#include "src/developer/debug/zxdb/console/command.h"
#include "src/developer/debug/zxdb/console/nouns.h"
#include "src/developer/debug/zxdb/expr/parse_string.h"
namespace zxdb {
namespace {
bool IsTokenSeparator(char c) { return c == ' '; }
// Finds the record for the switch associated with long switch string (which
// includes the two leading dashes), or null if there is no match.
//
// The token can contain and equals sign. In this case, only the text
// preceding the equals sign counts as the switch, and the index of the equals
// sign is placed into *equals_index. Otherwise *equals_index will be set to
// std::string::npos. This is to handle the fact that long switches can be
// expressed as either "--foo=bar" and "--foo bar".
const SwitchRecord* FindLongSwitch(const std::string& str,
const std::vector<SwitchRecord>& switches,
size_t* equals_index) {
// Should have two leading dashes.
FX_DCHECK(str.size() >= 2 && str.substr(0, 2) == "--");
// Extract the switch value (varying depend on presence of '='), not counting
// the two leading dashes.
*equals_index = str.find('=');
std::string switch_str;
if (*equals_index == std::string::npos) {
switch_str = str.substr(2);
} else {
switch_str = str.substr(2, *equals_index - 2);
}
for (const auto& sr : switches) {
if (sr.name == switch_str)
return &sr;
}
return nullptr;
}
const SwitchRecord* FindSwitch(char ch, const std::vector<SwitchRecord>& switches) {
for (const auto& sr : switches) {
if (sr.ch == ch)
return &sr;
}
return nullptr;
}
bool StartsWith(const std::string& str, const std::string& starts_with) {
if (str.size() < starts_with.size())
return false;
for (size_t i = 0; i < starts_with.size(); i++) {
if (str[i] != starts_with[i])
return false;
}
return true;
}
// Returns true if the string is all numeric digits that mean it's an index
// token.
bool IsIndexToken(const std::string& token) {
for (char c : token) {
if (c < '0' || c > '9')
return false;
}
return true;
}
class Parser {
public:
Parser(const std::string& input, Command* command, const FillCommandContextCallback& fill_context)
: input_(input), fill_context_(fill_context), command_(command) {}
const Err& Parse() {
if (!Tokenize(input_)) {
return err_;
}
while (Advance())
;
return err_;
}
const Err& Complete(std::vector<std::string>* results) {
FX_DCHECK(results->empty());
if (!Tokenize(input_)) {
return err_;
}
std::string to_complete;
if (!tokens_.empty() && input_.back() != ' ') {
to_complete = tokens_.back().str;
tokens_.pop_back();
}
while (Advance())
;
if (!at_end()) {
return err_;
}
for (const auto& state : states_at_pos_) {
(this->*(state->complete))(to_complete, results);
}
std::string prefix = input_.substr(0, input_.size() - to_complete.size());
for (auto& result : *results) {
result = prefix + result;
}
err_ = Err();
return err_;
}
Err err() { return err_; }
bool at_end() { return tokens_.size() == pos_; }
private:
struct State {
bool (Parser::*advance)();
// Completer callback. The first argument is a prefix we expect all
// completions to share. The second is a vector to which we should append
// full single tokens which would be valid completions.
void (Parser::*complete)(const std::string&, std::vector<std::string>*);
State(bool (Parser::*advance)(),
void (Parser::*complete)(const std::string&, std::vector<std::string>*))
: advance(advance), complete(complete) {}
};
const std::string& token_str() { return tokens_[pos_].str; }
bool Advance() {
const State* start = state_;
size_t start_pos = pos_;
bool result = (this->*(state_->advance))();
if (start_pos != pos_) {
states_at_pos_.clear();
} else {
states_at_pos_.push_back(start);
}
return result;
}
bool Consume(const State& state) {
state_ = &state;
return Consume();
}
bool GoTo(const State& state) {
state_ = &state;
return true;
}
bool Consume() {
pos_++;
return true;
}
bool Fail(const std::string& msg) {
err_ = Err(msg);
return false;
}
bool Accept() { return false; }
bool DoNounState();
bool DoNounIndexState();
bool DoVerbState();
bool DoSwitchesState();
bool DoSwitchState();
bool DoLongSwitchState();
bool DoSwitchArgState();
bool DoArgState();
void NoComplete(const std::string&, std::vector<std::string>*) {}
void DoCompleteNoun(const std::string& to_complete, std::vector<std::string>* result);
void DoCompleteSwitches(const std::string& to_complete, std::vector<std::string>* result);
void DoCompleteVerb(const std::string& to_complete, std::vector<std::string>* result);
void DoCompleteArgs(const std::string& to_complete, std::vector<std::string>* result);
static const State kNounState;
static const State kNounIndexState;
static const State kVerbState;
static const State kSwitchesState;
static const State kSwitchState;
static const State kLongSwitchState;
static const State kSwitchArgState;
static const State kArgState;
bool Tokenize(const std::string& input) {
FX_DCHECK(!err_.has_error() && pos_ == 0 && tokens_.size() == 0);
err_ = TokenizeCommand(input, &tokens_);
return !err_.has_error();
}
const std::string input_;
// Used for completions. Optionally set to fill in the noun information for
// a given command so the completion code can use them.
const FillCommandContextCallback& fill_context_;
Command* command_ = nullptr;
const State* state_ = &kNounState;
Err err_;
// The current parse position within the token stream.
size_t pos_ = 0;
// This is set by NounState and can be read from NounIndexState.
Noun noun_;
// As soon as we parse a valid verb, this points to its VerbRecord.
const VerbRecord* verb_record_ = nullptr;
// This is set to the record for the found switch by SwitchState, and should
// be readable from SwitchArgState.
const SwitchRecord* sw_record_ = nullptr;
// This is set to the text of the switch by SwitchState, and can be read from
// SwitchArgState for error-reporting purposes.
std::string sw_name_;
// This is set by SwitchState when we have an argument that is in the same
// token as the switch itself (i.e. `--foo=1` or `-f1`). It is read from
// SwitchArgState, and if it is populated, we take the argument from there
// instead of consuming another token.
std::optional<std::string> sw_value_ = std::nullopt;
std::vector<CommandToken> tokens_;
// States we've been through without advancing. This is important for
// completion.
//
// In essence this is a list of every state we've been through since the last
// time pos_ changed. If we are completing, and thus at the end of the
// stream, these are potentially all states that would like to have consumed
// another token but could not, and so passed control to another state or
// halted. Completion means going back to each of them and asking what we
// would like to have seen.
//
// An example. Suppose we had just the noun token "thread". We will parse
// that from the noun state, then enter the NounIndex state, which will
// move us back to the Noun state without parsing anything, and so will be
// added to this list. The Noun state will find no further tokens and thus
// pass control to the Verb state, and thus also get added to this list. The
// verb state will then accept without parsing, and it too will be added to
// this list, so at halt we will have the Noun, NounIndex, and Verb states,
// and on completion we will try to complete with either a Noun, an index,
// or a Verb.
//
// It's important to note that any time the parser advances, this list is
// cleared, so we end with only states that tried to match at the end but
// found no token.
std::vector<const State*> states_at_pos_;
};
const Parser::State Parser::kNounState(&Parser::DoNounState, &Parser::DoCompleteNoun);
const Parser::State Parser::kNounIndexState(&Parser::DoNounIndexState, &Parser::NoComplete);
const Parser::State Parser::kVerbState(&Parser::DoVerbState, &Parser::DoCompleteVerb);
const Parser::State Parser::kSwitchesState(&Parser::DoSwitchesState, &Parser::DoCompleteSwitches);
const Parser::State Parser::kSwitchState(&Parser::DoSwitchState, &Parser::NoComplete);
const Parser::State Parser::kLongSwitchState(&Parser::DoLongSwitchState, &Parser::NoComplete);
const Parser::State Parser::kSwitchArgState(&Parser::DoSwitchArgState, &Parser::NoComplete);
const Parser::State Parser::kArgState(&Parser::DoArgState, &Parser::DoCompleteArgs);
bool Parser::DoNounState() {
if (at_end()) {
return GoTo(Parser::kVerbState);
}
const auto& nouns = GetStringNounMap();
auto found = nouns.find(token_str());
if (found == nouns.end()) {
if (pos_ > 0 && !at_end() && token_str()[0] == '-') {
return GoTo(Parser::kSwitchesState);
} else {
return GoTo(Parser::kVerbState);
}
}
noun_ = found->second;
if (command_->HasNoun(noun_)) {
return Fail("Noun \"" + NounToString(noun_) + "\" specified twice.");
}
return Consume(Parser::kNounIndexState);
}
// Consume optional following index if it's all integers. For example, it
// could be "process 2 run" (with index) or "process run" (without).
bool Parser::DoNounIndexState() {
if (at_end() || !IsIndexToken(token_str())) {
command_->SetNoun(noun_, Command::kNoIndex);
return GoTo(Parser::kNounState);
}
size_t noun_index = Command::kNoIndex;
if (sscanf(token_str().c_str(), "%zu", &noun_index) != 1) {
return Fail("Invalid index \"" + token_str() + "\" for \"" + NounToString(noun_) + "\".");
}
command_->SetNoun(noun_, noun_index);
return Consume(Parser::kNounState);
}
bool Parser::DoSwitchesState() {
if (at_end()) {
return GoTo(Parser::kArgState);
}
if (token_str()[0] != '-') {
return GoTo(Parser::kArgState);
}
if (token_str() == "--") {
return Consume(Parser::kArgState);
}
if (token_str().size() == 1) {
return Fail("Invalid switch \"-\".");
}
if (token_str()[1] == '-') {
return GoTo(Parser::kLongSwitchState);
} else {
return GoTo(Parser::kSwitchState);
}
}
bool Parser::DoLongSwitchState() {
const std::vector<SwitchRecord>& switches =
verb_record_ ? verb_record_->switches : GetNounSwitches();
// Two-hyphen (--) switch.
size_t equals_index = std::string::npos;
sw_record_ = FindLongSwitch(token_str(), switches, &equals_index);
if (!sw_record_) {
return Fail("Unknown switch \"" + token_str() + "\".");
}
sw_name_ = std::string("--") + sw_record_->name;
if (equals_index != std::string::npos) {
// Extract the token following the equals sign.
sw_value_ = token_str().substr(equals_index + 1);
} else {
sw_value_ = std::nullopt;
}
return Consume(Parser::kSwitchArgState);
}
bool Parser::DoSwitchState() {
const std::vector<SwitchRecord>& switches =
verb_record_ ? verb_record_->switches : GetNounSwitches();
std::string value;
// Single-dash token means one character.
char switch_char = token_str()[1];
sw_record_ = FindSwitch(switch_char, switches);
if (!sw_record_) {
return Fail(std::string("Unknown switch \"-") + switch_char + "\".");
}
sw_name_ = std::string("-") + sw_record_->ch;
if (token_str().size() > 2) {
sw_value_ = token_str().substr(2);
}
return Consume(Parser::kSwitchArgState);
}
bool Parser::DoSwitchArgState() {
if (!sw_record_->has_value && !sw_value_) {
command_->SetSwitch(sw_record_->id, "");
return GoTo(Parser::kSwitchesState);
}
if (!sw_record_->has_value) {
return Fail(std::string("--") + sw_record_->name + " takes no argument.");
}
if (sw_value_) {
command_->SetSwitch(sw_record_->id, std::move(*sw_value_));
return GoTo(Parser::kSwitchesState);
} else if (at_end()) {
return Fail("Argument needed for \"" + sw_name_ + "\".");
} else {
command_->SetSwitch(sw_record_->id, token_str());
return Consume(Parser::kSwitchesState);
}
}
bool Parser::DoVerbState() {
if (at_end()) {
return Accept();
}
// Consume the verb.
const auto& verb_strings = GetStringVerbMap();
auto found_verb_str = verb_strings.find(token_str());
if (found_verb_str == verb_strings.end()) {
return Fail("The string \"" + token_str() + "\" is not a valid verb.");
}
command_->set_verb(found_verb_str->second);
// Find the verb record.
const auto& verbs = GetVerbs();
auto found_verb = verbs.find(command_->verb());
FX_DCHECK(found_verb != verbs.end()); // Valid verb should always be found.
verb_record_ = &found_verb->second;
return Consume(Parser::kSwitchesState);
}
bool Parser::DoArgState() {
if (pos_ == tokens_.size()) {
return Accept();
}
std::vector<std::string> args;
if (verb_record_ && verb_record_->param_type == VerbRecord::kOneParam) {
// Treat all arguments as one giant parameter rather than whitespace separated ones.
args.push_back(input_.substr(tokens_[pos_].offset));
} else {
for (size_t i = pos_; i < tokens_.size(); i++)
args.push_back(tokens_[i].str);
}
command_->set_args(std::move(args));
pos_ = tokens_.size();
return Accept();
}
void Parser::DoCompleteNoun(const std::string& to_complete, std::vector<std::string>* result) {
for (const auto& noun_pair : GetNouns()) {
if (command_->HasNoun(noun_pair.first)) {
continue;
}
for (size_t i = 0; i < noun_pair.second.aliases.size(); i++) {
std::string noun_name = noun_pair.second.aliases[i];
if (StartsWith(noun_name, to_complete)) {
result->push_back(noun_name);
break;
}
}
}
}
void Parser::DoCompleteSwitches(const std::string& to_complete, std::vector<std::string>* result) {
const std::vector<SwitchRecord>& switches =
verb_record_ ? verb_record_->switches : GetNounSwitches();
for (const auto& sw : switches) {
std::string long_name = "--";
long_name += sw.name;
if (StartsWith(long_name, to_complete)) {
result->push_back(long_name);
}
}
}
void Parser::DoCompleteVerb(const std::string& to_complete, std::vector<std::string>* result) {
if (verb_record_) {
return;
}
for (const auto& verb_pair : GetVerbs()) {
for (size_t i = 0; i < verb_pair.second.aliases.size(); i++) {
std::string verb_name = verb_pair.second.aliases[i];
if (StartsWith(verb_name, to_complete)) {
result->push_back(verb_name);
break;
}
}
}
}
void Parser::DoCompleteArgs(const std::string& to_complete, std::vector<std::string>* result) {
if (verb_record_ && verb_record_->complete) {
// Fill in the noun context if possible for the completion routine.
if (fill_context_)
fill_context_(command_);
verb_record_->complete(*command_, to_complete, result);
}
}
} // namespace
Err TokenizeCommand(const std::string& input, std::vector<CommandToken>* result) {
result->clear();
size_t cur = 0;
while (true) {
// Skip separators
while (cur < input.size() && IsTokenSeparator(input[cur])) {
++cur;
}
if (cur == input.size())
break;
size_t token_begin = cur;
std::string token;
// Skip to end of token.
while (cur < input.size() && !IsTokenSeparator(input[cur])) {
// Allow quoting. Only treat double-quotes as quoted commands. This handling of quotes
// will prohibit the use of strings in expressions. If that's needed, we may want to add a
// mode where the "treat everything as one parameter" mode that the "print" command uses
// doesn't do any quote handling.
if (auto string_info = DoesBeginStringOrCharLiteral(ExprLanguage::kC, input, cur);
string_info && string_info->token_type == ExprTokenType::kStringLiteral) {
size_t error_location = 0;
auto str = ParseStringOrCharLiteral(input, *string_info, &cur, &error_location);
if (str.has_error())
return str.err();
token += str.value();
} else {
// Handle all other characters as literals. We may want backslash-escaping here in the
// future.
token.push_back(input[cur]);
++cur;
}
}
if (cur == token_begin)
break; // Got to end of input.
// Emit token.
result->emplace_back(token_begin, std::move(token));
}
// This returns an Err() to allow for adding escaping errors in the future.
return Err();
}
Err ParseCommand(const std::string& input, Command* output) {
*output = Command();
Parser parser(input, output, FillCommandContextCallback());
parser.Parse();
FX_DCHECK(parser.err().has_error() || parser.at_end());
return parser.err();
}
// It would be nice to do more context-aware completions. For now, just
// complete based on all known nouns and verbs.
std::vector<std::string> GetCommandCompletions(const std::string& input,
const FillCommandContextCallback& fill_context) {
Command temp;
Parser parser(input, &temp, std::move(fill_context));
std::vector<std::string> result;
parser.Complete(&result);
return result;
}
} // namespace zxdb