// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/developer/debug/zxdb/console/format_context.h"

#include <algorithm>
#include <vector>

#include "src/developer/debug/zxdb/client/arch_info.h"
#include "src/developer/debug/zxdb/client/disassembler.h"
#include "src/developer/debug/zxdb/client/memory_dump.h"
#include "src/developer/debug/zxdb/client/process.h"
#include "src/developer/debug/zxdb/client/session.h"
#include "src/developer/debug/zxdb/client/setting_schema_definition.h"
#include "src/developer/debug/zxdb/common/file_util.h"
#include "src/developer/debug/zxdb/common/string_util.h"
#include "src/developer/debug/zxdb/console/command_utils.h"
#include "src/developer/debug/zxdb/console/console.h"
#include "src/developer/debug/zxdb/console/format_location.h"
#include "src/developer/debug/zxdb/console/format_table.h"
#include "src/developer/debug/zxdb/console/output_buffer.h"
#include "src/developer/debug/zxdb/console/string_util.h"
#include "src/developer/debug/zxdb/expr/expr_tokenizer.h"
#include "src/developer/debug/zxdb/expr/keywords.h"
#include "src/developer/debug/zxdb/symbols/input_location.h"
#include "src/developer/debug/zxdb/symbols/loaded_module_symbols.h"
#include "src/developer/debug/zxdb/symbols/location.h"
#include "src/developer/debug/zxdb/symbols/module_symbols.h"
#include "src/developer/debug/zxdb/symbols/process_symbols.h"
#include "src/developer/debug/zxdb/symbols/resolve_options.h"
#include "src/developer/debug/zxdb/symbols/source_file_provider.h"
#include "src/developer/debug/zxdb/symbols/source_util.h"
#include "src/lib/files/file.h"
#include "src/lib/fxl/strings/string_printf.h"

namespace zxdb {

namespace {

using LineInfo = std::pair<int, std::string>;  // Line #, Line contents.
using LineVector = std::vector<LineInfo>;

OutputBuffer FormatSourceLineNoSyntax(const FormatSourceOpts& opts, bool is_highlight_line,
                                      const std::string& line) {
  if (!is_highlight_line) {
    // Non-highlighted lines just get output in either regular or dim.
    Syntax syntax = opts.dim_others ? Syntax::kComment : Syntax::kNormal;
    return OutputBuffer(syntax, line);
  }

  // Highlighted lines may need part of the line highligted or all of it. Convert the colum to
  // 0-based with clamping (since the offsets come from symbols, they could be invalid).
  int line_size = static_cast<int>(line.size());
  int col_index = std::min(std::max(0, opts.highlight_column - 1), line_size);

  OutputBuffer result;
  if (col_index == 0) {
    result.Append(Syntax::kHeading, line);
  } else {
    result.Append(Syntax::kNormal, line.substr(0, col_index));
    if (col_index < line_size)
      result.Append(Syntax::kHeading, line.substr(col_index));
  }
  return result;
}

struct SyntaxVariants {
  SyntaxVariants(Syntax normal_in, Syntax dim_in, Syntax bold_in)
      : normal(normal_in), dim(dim_in), bold(bold_in) {}
  Syntax normal;
  Syntax dim;
  Syntax bold;
};

SyntaxVariants SyntaxForTokenType(ExprTokenType type) {
  // Normal names and such.
  if (type == ExprTokenType::kInvalid || type == ExprTokenType::kName)
    return SyntaxVariants(Syntax::kNormal, Syntax::kComment, Syntax::kHeading);

  // Numbers. Treat true and false as numbers as well.
  if (type == ExprTokenType::kFloat || type == ExprTokenType::kInteger ||
      type == ExprTokenType::kTrue || type == ExprTokenType::kFalse)
    return SyntaxVariants(Syntax::kNumberNormal, Syntax::kNumberDim, Syntax::kNumberBold);

  // Strings.
  if (type == ExprTokenType::kStringLiteral)
    return SyntaxVariants(Syntax::kStringNormal, Syntax::kStringDim, Syntax::kStringBold);

  // Comments.
  if (type == ExprTokenType::kComment)
    return SyntaxVariants(Syntax::kComment, Syntax::kComment, Syntax::kComment);

  // Assume everything that's an alphanumeric token is a keyword. Count Rust lifetimes as keywords
  // also since they're special language things.
  const ExprTokenRecord& record = RecordForTokenType(type);
  if (record.is_alphanum || type == ExprTokenType::kRustLifetime)
    return SyntaxVariants(Syntax::kKeywordNormal, Syntax::kKeywordDim, Syntax::kKeywordBold);

  // Everything else is an operator.
  return SyntaxVariants(Syntax::kOperatorNormal, Syntax::kOperatorDim, Syntax::kOperatorBold);
}

// Assumes a valid nonempty token list.
OutputBuffer FormatSourceLineWithTokens(const FormatSourceOpts& opts, bool is_highlight_line,
                                        const std::string& line,
                                        const std::vector<ExprToken>& tokens) {
  FX_DCHECK(!tokens.empty());
  FX_DCHECK(opts.language);

  // The code here always uses the text from the source file. We always want to show the literal
  // source rather than what the tokenizer interpreted it as (though normally these will be the
  // same).
  OutputBuffer out;

  // Specially handle C preprocessor and Rust attributes. Assume these lines start with a "#" and
  // ignore everything after it. Since this code is line-based anyway, the lack of multiline support
  // for these constructs isn't an additional limitation. This just makes the typical uses look
  // better.
  if (tokens[0].type() == ExprTokenType::kOctothorpe) {
    out.Append(Syntax::kComment, line);
    return out;
  }

  // Construct a list of ranges indicating the syntax type. The last item will reference the end of
  // the list to make end conditions easier to handle.
  using OffsetType = std::pair<size_t, ExprTokenType>;
  std::vector<OffsetType> spans;
  if (tokens[0].byte_offset() > 0)  // Stuff before first token (normally whitespace).
    spans.emplace_back(0, ExprTokenType::kInvalid);

  const std::set<std::string>& keywords = AllKeywordsForLanguage(*opts.language, true);
  for (const auto& token : tokens) {
    // The tokenizer doesn't kave tokens for all keywords. Check the name to see if it's a common
    // builtin to annotate accordingly.
    if (token.type() == ExprTokenType::kName && keywords.find(token.value()) != keywords.end()) {
      // Keyword or quasi-built-in. Since there's no general "keyword" token type, assign these all
      // to the "if" token which will trigger the keyword formatting.
      spans.emplace_back(token.byte_offset(), ExprTokenType::kIf);
    } else if (token.type() == ExprTokenType::kCommentBlockEnd) {
      // We have a "*/" on a line. Assume that everything before it was actually a comment and
      // we just didn't see the opening "/*" on a previous line.
      spans.clear();
      spans.emplace_back(0, ExprTokenType::kComment);
    } else {
      // All other tokens.
      spans.emplace_back(token.byte_offset(), token.type());
    }
  }
  spans.emplace_back(line.size(), ExprTokenType::kInvalid);  // End boundary.

  // Convert spans to formatted text. Skip the last once since that's the dummy span at the end.
  for (size_t i = 0; i < spans.size() - 1; i++) {
    // Since we added a dummy span at the end not covered by the loop, there will always be a next
    // span.
    size_t begin_offset = spans[i].first;
    size_t end_offset = spans[i + 1].first;
    if (begin_offset == end_offset)
      continue;

    SyntaxVariants variants = SyntaxForTokenType(spans[i].second);
    Syntax syntax = variants.normal;
    if (!is_highlight_line) {
      // Non-highlighted lines just get output in either regular or dim.
      if (opts.dim_others)
        syntax = variants.dim;
    } else {
      // Highlighted line, Anything past the (1-based) colum gets bolded, everything else is normal.
      if (static_cast<int>(begin_offset) >= opts.highlight_column - 1)
        syntax = variants.bold;
    }

    out.Append(syntax, line.substr(begin_offset, end_offset - begin_offset));
  }

  return out;
}

// Retrieves the proper MOduleSymbols (or null) for the given location as a weak pointer. This is
// used to compute the right module to ask for out-of-date file warnings.
fxl::WeakPtr<ModuleSymbols> GetWeakModuleForLocation(Process* process, const Location& location) {
  if (LoadedModuleSymbols* loaded_sym =
          process->GetSymbols()->GetModuleForAddress(location.address()))
    return loaded_sym->module_symbols()->GetWeakPtr();
  return fxl::WeakPtr<ModuleSymbols>();
}

// Generates the source listing for source intersperced with assembly code for the source between
// the given two lines. The prev_line is the last one outputted.
//
// This re-opens and line splits the file for each block of source shown. This is very inefficient
// but normally disassembly is not performance sensitive. If needed this could be cached.
//
// The module_for_time_warning is an optional pointer to the module corresponding to this source
// file so we can show warnings if the build is out-of-date.
OutputBuffer FormatAsmSourceForRange(Process* process,
                                     fxl::WeakPtr<ModuleSymbols> module_for_time_warning,
                                     const SourceFileProvider& file_provider,
                                     const FileLine& prev_line, const FileLine& line) {
  // Maximum number of lines of source we'll include.
  constexpr int kMaxContext = 4;

  int first_num = line.line() - kMaxContext + 1;  // Most context we'll show.
  if (prev_line.file() == line.file())            // Same file, try to include since the last line.
    first_num = std::max(prev_line.line() + 1, first_num);
  first_num = std::max(1, first_num);  // Clamp to beginning of file.

  FormatSourceOpts opts;
  opts.first_line = first_num;
  opts.last_line = line.line();
  opts.left_indent = 2;
  opts.dim_others = true;  // Dim everything (we didn't specify an active line).
  opts.module_for_time_warning = std::move(module_for_time_warning);

  FileLine start_line(line.file(), line.comp_dir(), first_num);
  OutputBuffer out;
  if (FormatSourceFileContext(start_line, file_provider, opts, &out).ok()) {
    // The formatted table will end in a newline which will combine with our table's newline and
    // insert a blank below the source code. Trim the embedded newline so we only get one.
    out.TrimTrailingNewlines();
    return out;
  }

  // Some error getting the source code, show the location file/line number instead.
  return FormatFileLine(start_line, process->GetSymbols()->target_symbols());
}

// Describes the destination for the given call destination, formatted as for a disassembly. The
// process may be null which will mean only addresses will be printed, no symbols.
OutputBuffer DescribeAsmCallDest(Process* process, uint64_t call_dest) {
  OutputBuffer result(Syntax::kComment, GetRightArrow() + " ");

  std::vector<Location> resolved;
  if (process) {
    // If there are multiple symbols starting at the given location (like nested inline calls), use
    // the outermost one since this is a jump *to* that location.
    ResolveOptions options;
    options.ambiguous_inline = ResolveOptions::AmbiguousInline::kOuter;

    resolved = process->GetSymbols()->ResolveInputLocation(InputLocation(call_dest), options);
    FX_DCHECK(resolved.size() == 1);  // Addresses should always match one location.
  } else {
    // Can't symbolize, use the address.
    resolved.emplace_back(Location(Location::State::kAddress, call_dest));
  }

  FormatLocationOptions opts;
  if (process)
    opts = FormatLocationOptions(process->GetTarget());
  opts.always_show_addresses = false;
  opts.show_file_line = false;

  result.Append(FormatLocation(resolved[0], opts));
  return result;
}

}  // namespace

void FormatSourceOpts::SetLanguageFromFileName(const std::string& file_name) {
  language = FileNameToLanguage(file_name);
  if (!language) {
    // Default to C for anything still unknown because it should give reasonable highlighting for
    // most languages.
    language = ExprLanguage::kC;
  }
}

Err OutputSourceContext(Process* process, std::unique_ptr<SourceFileProvider> file_provider,
                        const Location& location, SourceAffinity source_affinity) {
  if (source_affinity != SourceAffinity::kAssembly && location.file_line().is_valid()) {
    // Synchronous source output.
    FormatSourceOpts source_opts;
    source_opts.active_line = location.file_line().line();
    source_opts.highlight_line = source_opts.active_line;
    source_opts.highlight_column = location.column();
    source_opts.first_line = source_opts.active_line - 2;
    source_opts.last_line = source_opts.active_line + 2;
    source_opts.dim_others = true;
    source_opts.module_for_time_warning = GetWeakModuleForLocation(process, location);

    if (const Symbol* sym = location.symbol().Get())
      source_opts.language = DwarfLangToExprLanguage(sym->GetLanguage());

    OutputBuffer out;
    Err err = FormatSourceFileContext(location.file_line(), *file_provider, source_opts, &out);
    if (err.has_error())
      return err;

    Console::get()->Output(out);
  } else {
    // Fall back to disassembly.
    FormatAsmOpts options;
    options.emit_addresses = true;
    options.emit_bytes = false;
    options.include_source = true;
    options.active_address = location.address();

    uint64_t start_address;
    const ArchInfo& arch_info = process->session()->arch_info();
    if (arch_info.is_fixed_instr()) {
      // Fixed instruction length, back up 2 instructions to provide context.
      start_address = location.address() - 2 * arch_info.max_instr_len();
      options.max_instructions = 5;
    } else {
      // Variable length instructions. Since this code path is triggered when
      // there are no symbols, we can't back up reliably. Just disassemble
      // starting from the current location.
      //
      // In the future it might be nice to keep some record of recently stepped
      // instructions since usually this address will be the one after the one
      // that was just stepped.
      start_address = location.address();
      options.max_instructions = 4;
    }

    size_t size = options.max_instructions * arch_info.max_instr_len();

    process->ReadMemory(
        start_address, size,
        [options, weak_process = process->GetWeakPtr(), file_provider = std::move(file_provider)](
            const Err& in_err, MemoryDump dump) {
          if (!weak_process)
            return;  // Give up when the process went away.

          Console* console = Console::get();
          if (in_err.has_error()) {
            console->Output(in_err);
            return;
          }
          OutputBuffer out;
          Err err = FormatAsmContext(weak_process->session()->arch_info(), dump, options,
                                     weak_process.get(), *file_provider, &out);
          if (err.has_error())
            console->Output(err);
          else
            console->Output(out);
        });
  }
  return Err();
}

// This doesn't cache the file contents. We may want to add that for performance, but we should be
// careful to always pick the latest version since it can get updated.
Err FormatSourceFileContext(const FileLine& file_line, const SourceFileProvider& file_provider,
                            const FormatSourceOpts& opts, OutputBuffer* out) {
  auto data_or = file_provider.GetFileData(file_line.file(), file_line.comp_dir());
  if (data_or.has_error())
    return data_or.err();

  // Check modification times for warning about out-of-date builds.
  if (opts.module_for_time_warning) {
    // Either of the times can be 0 if there was an error. Ignore the check in that case.
    std::time_t module_time = opts.module_for_time_warning->GetModificationTime();
    std::time_t file_time = data_or.value().modification_time;
    if (module_time && file_time && file_time > module_time) {
      // File is known out-of-date. Only show warning once for each file per module.
      if (opts.module_for_time_warning->newer_files_warned().insert(file_line.file()).second) {
        out->Append(Syntax::kWarning, GetExclamation() + " Warning:");
        out->Append(" Source file is newer than the binary. The build may be out-of-date.\n");
      }
    }
  }

  return FormatSourceContext(data_or.value().full_path, data_or.value().contents, opts, out);
}

Err FormatSourceContext(const std::string& file_name_for_display, const std::string& file_contents,
                        const FormatSourceOpts& opts, OutputBuffer* out) {
  FX_DCHECK(opts.active_line == 0 || !opts.require_active_line ||
            (opts.active_line >= opts.first_line && opts.active_line <= opts.last_line));

  // Allow the beginning to be out-of-range. This mirrors the end handling
  // (clamped to end-of-file) so callers can blindly create offsets from
  // a current line without clamping.
  int first_line = std::max(1, opts.first_line);

  std::vector<std::string> context = ExtractSourceLines(file_contents, first_line, opts.last_line);
  if (context.empty()) {
    // No source found for this location. If highlight_line exists, assume
    // it's the one the user cares about.
    int err_line = opts.highlight_line ? opts.highlight_line : first_line;
    return Err(fxl::StringPrintf("There is no line %d in the file %s", err_line,
                                 file_name_for_display.c_str()));
  }
  if (opts.active_line != 0 && opts.require_active_line &&
      first_line + static_cast<int>(context.size()) < opts.active_line) {
    return Err(fxl::StringPrintf("There is no line %d in the file %s", opts.active_line,
                                 file_name_for_display.c_str()));
  }

  // Optional file name.
  if (opts.show_file_name) {
    out->Append("📄 ");
    out->Append(Syntax::kFileName, file_name_for_display);
    out->Append("\n");
  }

  // String to put at the beginning of each line.
  std::string indent(opts.left_indent, ' ');

  std::vector<std::vector<OutputBuffer>> rows;
  for (size_t i = 0; i < context.size(); i++) {
    int line_number = first_line + i;

    rows.emplace_back();
    std::vector<OutputBuffer>& row = rows.back();

    // Compute markers in the left margin.
    OutputBuffer margin;
    if (opts.left_indent)
      margin.Append(indent);

    auto found_bp = opts.bp_lines.find(line_number);
    if (found_bp != opts.bp_lines.end()) {
      std::string breakpoint_marker =
          found_bp->second ? GetBreakpointMarker() : GetDisabledBreakpointMarker();

      if (line_number == opts.active_line) {
        // Active + breakpoint.
        margin.Append(Syntax::kError, breakpoint_marker);
        margin.Append(Syntax::kHeading, GetCurrentRowMarker());
      } else {
        // Breakpoint.
        margin.Append(Syntax::kError, " " + breakpoint_marker);
      }
    } else {
      if (line_number == opts.active_line) {
        // Active line.
        margin.Append(Syntax::kHeading, " " + GetCurrentRowMarker());
      } else {
        // Inactive line with no breakpoint.
        margin.Append("  ");
      }
    }
    row.push_back(std::move(margin));

    std::string number = std::to_string(line_number);
    if (line_number == opts.highlight_line) {
      // This is the line to mark.
      row.emplace_back(Syntax::kHeading, std::move(number));
      row.push_back(FormatSourceLine(opts, true, context[i]));
    } else {
      // Normal context line.
      Syntax syntax = opts.dim_others ? Syntax::kComment : Syntax::kNormal;
      row.emplace_back(syntax, std::move(number));
      row.push_back(FormatSourceLine(opts, false, context[i]));
    }
  }

  FormatTable(
      {ColSpec(Align::kLeft), ColSpec(Align::kRight), ColSpec(Align::kLeft, 0, std::string(), 0)},
      rows, out);
  return Err();
}

Err FormatAsmContext(const ArchInfo& arch_info, const MemoryDump& dump, const FormatAsmOpts& opts,
                     Process* process, const SourceFileProvider& file_provider, OutputBuffer* out) {
  // Make the disassembler.
  Disassembler disassembler;
  Err my_err = disassembler.Init(&arch_info);
  if (my_err.has_error())
    return my_err;

  Disassembler::Options options;

  std::vector<Disassembler::Row> rows;
  disassembler.DisassembleDump(dump, dump.address(), options, opts.max_instructions, &rows);

  FileLine prev_file_line;  // Last source line printed.

  std::vector<std::vector<OutputBuffer>> table;
  for (auto& row : rows) {
    if (opts.include_source) {
      // Output source code if necessary.
      std::vector<Location> loc =
          process->GetSymbols()->ResolveInputLocation(InputLocation(row.address));
      if (!loc.empty() && loc[0].file_line().is_valid() && prev_file_line != loc[0].file_line()) {
        std::vector<OutputBuffer>& out_row = table.emplace_back();
        out_row.push_back(
            FormatAsmSourceForRange(process, GetWeakModuleForLocation(process, loc[0]),
                                    file_provider, prev_file_line, loc[0].file_line()));

        prev_file_line = loc[0].file_line();
      }
    }

    std::vector<OutputBuffer>& out_row = table.emplace_back();

    // Compute markers in the left margin.
    OutputBuffer margin;
    auto found_bp = opts.bp_addrs.find(row.address);
    if (found_bp != opts.bp_addrs.end()) {
      std::string breakpoint_marker =
          found_bp->second ? GetBreakpointMarker() : GetDisabledBreakpointMarker();

      if (row.address == opts.active_address) {
        // Active + breakpoint.
        margin.Append(Syntax::kError, breakpoint_marker);
        margin.Append(Syntax::kHeading, GetCurrentRowMarker());
      } else {
        // Breakpoint.
        margin.Append(Syntax::kError, " " + breakpoint_marker);
      }
    } else {
      if (row.address == opts.active_address) {
        // Active line.
        margin.Append(Syntax::kHeading, " " + GetCurrentRowMarker());
      } else {
        // Inactive line with no breakpoint.
        margin.Append("  ");
      }
    }
    out_row.push_back(std::move(margin));

    if (opts.emit_addresses)
      out_row.emplace_back(Syntax::kComment, to_hex_string(row.address));
    if (opts.emit_bytes) {
      std::string bytes_str;
      for (size_t i = 0; i < row.bytes.size(); i++) {
        if (i > 0)
          bytes_str.push_back(' ');
        bytes_str.append(fxl::StringPrintf("%2.2x", row.bytes[i]));
      }
      out_row.emplace_back(Syntax::kComment, std::move(bytes_str));
    }

    Syntax op_param_syntax =
        row.address == opts.active_address ? Syntax::kHeading : Syntax::kNormal;
    out_row.emplace_back(op_param_syntax, std::move(row.op));
    out_row.emplace_back(op_param_syntax, std::move(row.params));

    // If there's a call destination, include that. Otherwise use the disassembler-generated comment
    // if present.
    if (row.call_dest) {
      out_row.push_back(DescribeAsmCallDest(process, *row.call_dest));
    } else {
      out_row.emplace_back(Syntax::kComment, std::move(row.comment));
    }
  }

  std::vector<ColSpec> spec;
  spec.emplace_back(Align::kLeft);  // Margin.
  if (opts.emit_addresses)
    spec.emplace_back(Align::kRight);
  if (opts.emit_bytes) {
    // Max out the bytes @ 17 cols (holds 6 bytes) to keep it from pushing
    // things too far over in the common case.
    spec.emplace_back(Align::kLeft, 17, std::string(), 1);
  }

  // When there was an address or byte listing, put 1 extra column of space
  // to separate the opcode. Otherwise keep it by the left margin.
  if (spec.size() > 1)
    spec.emplace_back(Align::kLeft, 0, std::string(), 1);  // Instructions.
  else
    spec.emplace_back(Align::kLeft, 0, std::string(), 0);  // Instructions.

  // Params. Some can be very long so provide a max so the comments don't get
  // pushed too far out.
  spec.emplace_back(Align::kLeft, 10, std::string(), 1);
  spec.emplace_back(Align::kLeft);  // Comments.

  FormatTable(spec, table, out);
  return Err();
}

Err FormatBreakpointContext(const Location& location, const SourceFileProvider& file_provider,
                            bool enabled, OutputBuffer* out) {
  if (!location.has_symbols())
    return Err("No symbols for this location.");

  int line = location.file_line().line();
  constexpr int kBreakpointContext = 1;

  FormatSourceOpts opts;
  opts.SetLanguageFromFileName(location.file_line().file());
  opts.first_line = line - kBreakpointContext;
  opts.last_line = line + kBreakpointContext;
  opts.highlight_line = line;
  opts.bp_lines[line] = enabled;
  return FormatSourceFileContext(location.file_line(), file_provider, opts, out);
}

OutputBuffer FormatSourceLine(const FormatSourceOpts& opts, bool is_highlight_line,
                              const std::string& line) {
  if (opts.language) {
    ExprTokenizer tokenizer(line, *opts.language);
    if (tokenizer.Tokenize() && !tokenizer.tokens().empty())
      return FormatSourceLineWithTokens(opts, is_highlight_line, line, tokenizer.tokens());
  }
  return FormatSourceLineNoSyntax(opts, is_highlight_line, line);
}

}  // namespace zxdb
