|  | // Copyright 2019 The Fuchsia Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | #include "src/developer/debug/zxdb/expr/parse_string.h" | 
|  |  | 
|  | #include <ctype.h> | 
|  | #include <lib/syslog/cpp/macros.h> | 
|  | #include <stdlib.h> | 
|  |  | 
|  | namespace zxdb { | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | // A character sequence made of any source character except for parentheses, backslash and spaces. | 
|  | bool IsValidCRawStringDelimeter(char c) { return c != '(' && c != ')' && c != '\\' && !isspace(c); } | 
|  |  | 
|  | std::optional<StringLiteralBegin> DoesBeginRawCStringLiteral(std::string_view input, size_t begin) { | 
|  | // This only supports raw string literals and not the various flavors of Unicode prefixes. | 
|  | if (input.size() <= begin + 2 || input[begin] != 'R' || input[begin + 1] != '"') | 
|  | return std::nullopt; | 
|  |  | 
|  | // Skip over the delimiter. | 
|  | size_t cur = begin + 2; | 
|  | while (input.size() > cur && IsValidCRawStringDelimeter(input[cur])) | 
|  | cur++; | 
|  |  | 
|  | // Expecting a paren to begin the string. | 
|  | if (cur == input.size() || input[cur] != '(') | 
|  | return std::nullopt; | 
|  |  | 
|  | StringLiteralBegin info; | 
|  | info.language = ExprLanguage::kC; | 
|  | info.is_raw = true; | 
|  | info.raw_marker = input.substr(begin + 2, cur - begin - 2); | 
|  | info.string_begin = begin; | 
|  | info.contents_begin = cur + 1; | 
|  |  | 
|  | return info; | 
|  | } | 
|  |  | 
|  | // Rust raw strings start with 'r', some number of '#' characters, and a quote. | 
|  | std::optional<StringLiteralBegin> DoesBeginRawRustStringLiteral(std::string_view input, | 
|  | size_t begin) { | 
|  | // This only supports "raw" strings, not "byte" strings. It could be enhanced in the future. | 
|  | if (input.size() <= begin + 2 || input[begin] != 'r' || input[begin + 1] != '#') | 
|  | return std::nullopt; | 
|  |  | 
|  | size_t cur = begin + 1; | 
|  | while (input.size() > cur && input[cur] == '#') | 
|  | cur++; | 
|  |  | 
|  | if (cur == input.size() || input[cur] != '"') | 
|  | return std::nullopt; | 
|  |  | 
|  | StringLiteralBegin info; | 
|  | info.language = ExprLanguage::kRust; | 
|  | info.is_raw = true; | 
|  | info.raw_marker = input.substr(begin + 1, cur - begin - 1); | 
|  | info.string_begin = begin; | 
|  | info.contents_begin = cur + 1; | 
|  |  | 
|  | return info; | 
|  | } | 
|  |  | 
|  | // Determines if the current index marks the beginning of the end of the string. If it does, | 
|  | // returns the index of the character immediately following the string (which might point to | 
|  | // one-past-the-end of the input). Otherwise returns 0. | 
|  | size_t EndsStringLiteral(std::string_view input, const StringLiteralBegin& info, size_t cur) { | 
|  | FX_DCHECK(cur < input.size()); | 
|  |  | 
|  | if (!info.is_raw) { | 
|  | if (input[cur] == '"') | 
|  | return cur + 1; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | switch (info.language) { | 
|  | case ExprLanguage::kC: | 
|  | if (input.size() - cur >= info.raw_marker.size() + 2) { | 
|  | if (input[cur] == ')' && input[cur + info.raw_marker.size() + 1] == '"' && | 
|  | input.substr(cur + 1, info.raw_marker.size()) == info.raw_marker) | 
|  | return cur + info.raw_marker.size() + 2; | 
|  | } | 
|  | break; | 
|  | case ExprLanguage::kRust: | 
|  | if (input.size() - cur >= info.raw_marker.size() + 1) { | 
|  | if (input[cur] == '"' && input.substr(cur + 1, info.raw_marker.size()) == info.raw_marker) | 
|  | return cur + info.raw_marker.size() + 1; | 
|  | } | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | bool IsHexDigit(char c) { | 
|  | return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); | 
|  | } | 
|  | bool IsOctalDigit(char c) { return c >= '0' && c <= '7'; } | 
|  |  | 
|  | // See HandleEscaped() below for the parameter description. |*cur| should point to the first hex | 
|  | // digit. | 
|  | Err HandleHexEscaped(std::string_view input, const StringLiteralBegin& info, size_t* cur, | 
|  | size_t* error_location, std::string* result) { | 
|  | if (!IsHexDigit(input[*cur])) { | 
|  | *error_location = *cur; | 
|  | return Err("Expecting hex escape sequence."); | 
|  | } | 
|  |  | 
|  | std::string hex_digits; | 
|  | switch (info.language) { | 
|  | case ExprLanguage::kC: | 
|  | // C reads hex digits until there are no more. | 
|  | for (size_t i = *cur; i < input.size() && IsHexDigit(input[i]); i++) | 
|  | hex_digits.push_back(input[i]); | 
|  | break; | 
|  | case ExprLanguage::kRust: | 
|  | // Rust requires exactly two characters. | 
|  | if (*cur + 1 >= input.size() || !IsHexDigit(input[*cur + 1])) { | 
|  | *error_location = *cur; | 
|  | return Err("Expecting two hex digits."); | 
|  | } | 
|  | hex_digits.push_back(input[*cur]); | 
|  | hex_digits.push_back(input[*cur + 1]); | 
|  | break; | 
|  | } | 
|  |  | 
|  | char* endptr = nullptr; | 
|  | unsigned long value = strtoul(hex_digits.c_str(), &endptr, 16); | 
|  | if (endptr != hex_digits.data() + hex_digits.size()) { | 
|  | *error_location = *cur; | 
|  | return Err("Unexpected hex input."); | 
|  | } | 
|  |  | 
|  | (*cur) += hex_digits.size(); | 
|  | result->push_back(static_cast<unsigned char>(value)); | 
|  | return Err(); | 
|  | } | 
|  |  | 
|  | // See HandleEscaped() below for the parameter description. |*cur| should point to the first octal | 
|  | // digit. | 
|  | Err HandleOctalEscaped(std::string_view input, const StringLiteralBegin& info, size_t* cur, | 
|  | size_t* error_location, std::string* result) { | 
|  | if (!IsOctalDigit(input[*cur])) { | 
|  | *error_location = *cur; | 
|  | return Err("Expecting hex escape sequence."); | 
|  | } | 
|  |  | 
|  | std::string octal_digits; | 
|  | for (size_t i = *cur; i < input.size() && IsOctalDigit(input[i]); i++) | 
|  | octal_digits.push_back(input[i]); | 
|  |  | 
|  | char* endptr = nullptr; | 
|  | unsigned long value = strtoul(octal_digits.c_str(), &endptr, 8); | 
|  | if (endptr != octal_digits.data() + octal_digits.size()) { | 
|  | *error_location = *cur; | 
|  | return Err("Unexpected octal input."); | 
|  | } | 
|  |  | 
|  | (*cur) += octal_digits.size(); | 
|  | result->push_back(static_cast<unsigned char>(value)); | 
|  | return Err(); | 
|  | } | 
|  |  | 
|  | // On input, |*cur| should point to a valid character in |input| immediately following a backslash. | 
|  | // On success, |*cur| will be updated to point to the character immediately following the escape. | 
|  | Err HandleEscaped(std::string_view input, const StringLiteralBegin& info, size_t* cur, | 
|  | size_t* error_location, std::string* result) { | 
|  | // Shared C/Rust escape sequences. | 
|  | switch (input[*cur]) { | 
|  | // clang-format off | 
|  | case 'n':  result->push_back('\n'); ++(*cur); return Err(); | 
|  | case 'r':  result->push_back('\r'); ++(*cur); return Err(); | 
|  | case 't':  result->push_back('\t'); ++(*cur); return Err(); | 
|  | case '\\': result->push_back('\\'); ++(*cur); return Err(); | 
|  | case '\'': result->push_back('\''); ++(*cur); return Err(); | 
|  | case '"':  result->push_back('"');  ++(*cur); return Err(); | 
|  | default: break; | 
|  | // clang-format on | 
|  | } | 
|  |  | 
|  | if (input[*cur] == 'x') { | 
|  | // Hex digit. | 
|  | ++(*cur); | 
|  | if (*cur == input.size()) { | 
|  | *error_location = *cur - 2;  // Point to backslash. | 
|  | return Err("End of input found in hex escape."); | 
|  | } | 
|  | return HandleHexEscaped(input, info, cur, error_location, result); | 
|  | } | 
|  |  | 
|  | if (info.language == ExprLanguage::kC) { | 
|  | // C-specific escape sequences. | 
|  | switch (input[*cur]) { | 
|  | // clang-format off | 
|  | case '?':  result->push_back('?'); ++(*cur); return Err(); | 
|  | case 'a':  result->push_back('\a'); ++(*cur); return Err(); | 
|  | case 'b':  result->push_back('\b'); ++(*cur); return Err(); | 
|  | case 'f':  result->push_back('\f'); ++(*cur); return Err(); | 
|  | case 'v':  result->push_back('\v'); ++(*cur); return Err(); | 
|  | default: break; | 
|  | // clang-format on | 
|  | } | 
|  |  | 
|  | if (input[*cur] == 'u' || input[*cur] == 'U') | 
|  | return Err("Unicode escape sequences are not supported."); | 
|  |  | 
|  | if (IsOctalDigit(input[*cur])) { | 
|  | // Octal. | 
|  | return HandleOctalEscaped(input, info, cur, error_location, result); | 
|  | } | 
|  | } | 
|  |  | 
|  | if (info.language == ExprLanguage::kRust) { | 
|  | // Rust-specific escape sequences. | 
|  | if (input[*cur] == '0') { | 
|  | // Null. | 
|  | result->push_back(0); | 
|  | ++(*cur); | 
|  | return Err(); | 
|  | } | 
|  |  | 
|  | if (input[*cur] == 'u') | 
|  | return Err("Unicode escape sequences are not supported."); | 
|  | } | 
|  |  | 
|  | *error_location = *cur - 1;  // Point to backslash. | 
|  | return Err("Unknown escape sequence."); | 
|  | } | 
|  |  | 
|  | }  // namespace | 
|  |  | 
|  | std::optional<StringLiteralBegin> DoesBeginStringLiteral(ExprLanguage lang, std::string_view input, | 
|  | size_t cur) { | 
|  | if (cur >= input.size()) | 
|  | return std::nullopt;  // No room. | 
|  |  | 
|  | StringLiteralBegin info; | 
|  | info.language = lang; | 
|  |  | 
|  | if (input[cur] == '"') { | 
|  | // Regular literal string. Leave the raw string marker empty. | 
|  | info.string_begin = cur; | 
|  | info.contents_begin = cur + 1; | 
|  | return info; | 
|  | } | 
|  |  | 
|  | switch (lang) { | 
|  | case ExprLanguage::kC: | 
|  | return DoesBeginRawCStringLiteral(input, cur); | 
|  | case ExprLanguage::kRust: | 
|  | return DoesBeginRawRustStringLiteral(input, cur); | 
|  | } | 
|  |  | 
|  | FX_NOTREACHED(); | 
|  | return std::nullopt; | 
|  | } | 
|  |  | 
|  | ErrOr<std::string> ParseStringLiteral(std::string_view input, const StringLiteralBegin& info, | 
|  | size_t* in_out_cur, size_t* error_location) { | 
|  | FX_DCHECK(info.contents_begin <= input.size()); | 
|  |  | 
|  | std::string result; | 
|  | size_t cur = info.contents_begin; | 
|  |  | 
|  | while (cur < input.size()) { | 
|  | if (size_t end = EndsStringLiteral(input, info, cur)) { | 
|  | *in_out_cur = end; | 
|  | return result; | 
|  | } | 
|  |  | 
|  | if (!info.is_raw && input[cur] == '\\') { | 
|  | cur++;  // Advance over backslash. | 
|  | if (cur == input.size()) { | 
|  | *error_location = cur - 1; | 
|  | return Err("Hit end of input before the end of the escape sequence."); | 
|  | } | 
|  |  | 
|  | Err err = HandleEscaped(input, info, &cur, error_location, &result); | 
|  | if (err.has_error()) | 
|  | return err; | 
|  | } else { | 
|  | // Non-escaped. | 
|  | result.push_back(input[cur]); | 
|  | cur++; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Hit the end without an end-of-string. | 
|  | *error_location = info.string_begin; | 
|  | return Err("Hit end of input before the end of the string."); | 
|  | } | 
|  |  | 
|  | }  // namespace zxdb |