|  | // Copyright 2018 The Fuchsia Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | // The implementation for the FormattingTreeVisitor that pretty-prints FIDL code. | 
|  | #include "fidl/formatter.h" | 
|  |  | 
|  | #include <locale> | 
|  | #include <map> | 
|  | #include <regex> | 
|  | #include <set> | 
|  | #include <string> | 
|  |  | 
|  | namespace fidl { | 
|  | namespace raw { | 
|  |  | 
|  | // Rules: | 
|  | //   No more than one blank line in a row. | 
|  | //   Keep blank lines before and after comments. | 
|  | //   Will add newlines before top level declarations later. | 
|  | void FormattingTreeVisitor::Segment::RemoveExtraBlankLines(bool respects_trailing_blankline) { | 
|  | std::set<int> blank_line_indices; | 
|  | std::set<int> comment_line_indices; | 
|  | std::map<int, int> line_offsets; | 
|  |  | 
|  | // First, find all of the blank lines and comment lines. | 
|  | int line_num = 0; | 
|  | line_offsets[line_num++] = 0; | 
|  | for (int i = 0; i < static_cast<int>(output_.size()); i++) { | 
|  | if (output_[i] == '\n' && i + 1 != static_cast<int>(output_.size())) { | 
|  | line_offsets[line_num] = i + 1; | 
|  | bool is_blank = true; | 
|  | bool is_comment = false; | 
|  | for (int j = i + 1; j < static_cast<int>(output_.size()) && output_[j] != '\n'; j++) { | 
|  | if (!utils::IsWhitespaceNoNewline(output_[j])) { | 
|  | is_blank = false; | 
|  | } | 
|  | if (utils::LineFromOffsetIsRegularComment(output_, j)) { | 
|  | is_comment = true; | 
|  | } | 
|  | } | 
|  | if (is_blank) { | 
|  | blank_line_indices.insert(line_num); | 
|  | } | 
|  | if (is_comment) { | 
|  | comment_line_indices.insert(line_num); | 
|  | } | 
|  | line_num++; | 
|  | } | 
|  | } | 
|  |  | 
|  | int last_line_num = line_num - 1; | 
|  |  | 
|  | // Next, get rid of any blank line that isn't next to a comment or, | 
|  | // if we are respecting trailing blank lines, right before the end. | 
|  | // Make an exception if the previous line is blank - i.e., coalesce | 
|  | // multiple blank lines.  Work backwards so we don't screw up line | 
|  | // numbers. | 
|  | auto unmodified_bli(blank_line_indices); | 
|  | for (auto i = unmodified_bli.rbegin(); i != unmodified_bli.rend(); ++i) { | 
|  | int line_num = *i; | 
|  | bool is_next_to_comment = comment_line_indices.count(line_num - 1) != 0 || | 
|  | comment_line_indices.count(line_num + 1) != 0; | 
|  | bool need_to_preserve_because_last = | 
|  | respects_trailing_blankline && line_num == last_line_num - 1; | 
|  | bool need_to_coalesce = blank_line_indices.count(line_num + 1) != 0; | 
|  | if ((!is_next_to_comment && !need_to_preserve_because_last) || need_to_coalesce) { | 
|  | int offset = line_offsets[line_num]; | 
|  | size_t next_newline = output_.find_first_of("\n", offset); | 
|  | if (next_newline == std::string::npos) { | 
|  | next_newline = output_.size(); | 
|  | } | 
|  | output_.erase(offset, next_newline - offset + 1); | 
|  | // This line doesn't exist anymore.  Erase it from the various | 
|  | // indices so that subsequent lines don't get confused about whether | 
|  | // they need to coalesce with a line that isn't there anymore. | 
|  | comment_line_indices.erase(line_num); | 
|  | blank_line_indices.erase(line_num); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // Assumptions: Leading WS has been stripped. | 
|  | // Rules: | 
|  | //  - newlines after ';', '{' (unless before a comment) | 
|  | //  - newlines before top-level decls (unless after a comment). | 
|  | void FormattingTreeVisitor::Segment::InsertRequiredNewlines(bool is_top_level) { | 
|  | // Insert lines after ';' and '{', if not already present | 
|  | for (int i = 0; i < static_cast<int>(output_.size()); i++) { | 
|  | MaybeWindPastComment(output_, i); | 
|  | char ch = output_[i]; | 
|  | if (ch == ';' || ch == '{') { | 
|  | if (i == static_cast<int>(output_.size()) - 1) { | 
|  | output_.append("\n"); | 
|  | } else { | 
|  | size_t j = output_.find_first_not_of(utils::kWhitespaceNoNewlineChars, i + 1); | 
|  | // Unless the next thing is a comment. | 
|  | if (j != std::string::npos && !utils::LineFromOffsetIsRegularComment(output_, j)) { | 
|  | // Make the next thing a newline. | 
|  | if (utils::IsWhitespaceNoNewline(output_[i + 1])) { | 
|  | output_[i + 1] = '\n'; | 
|  | } else if (output_[i + 1] != '\n') { | 
|  | output_.insert(i + 1, "\n"); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // Insert lines before top level decls. | 
|  | if (is_top_level) { | 
|  | // Right before the last word in this string, we need a blank line, | 
|  | // followed by some (possibly zero) number of comment lines.  So we | 
|  | // break the string into lines, and then work backwards. | 
|  |  | 
|  | std::stringstream ss(output_); | 
|  | std::string tmp; | 
|  | std::vector<std::string> lines; | 
|  |  | 
|  | while (std::getline(ss, tmp, '\n')) { | 
|  | lines.push_back(tmp); | 
|  | } | 
|  | std::string terminal = lines.back(); | 
|  | lines.pop_back(); | 
|  | if (lines.size() == 1) { | 
|  | lines[0].append("\n"); | 
|  | } else { | 
|  | // From the end of the list of lines, find the first line | 
|  | // that isn't a comment, and insert a blank line (if it | 
|  | // isn't already blank). | 
|  | int i = static_cast<int>(lines.size()) - 1; | 
|  | while (i >= 0 && utils::FirstLineIsRegularComment(lines[i])) { | 
|  | i--; | 
|  | } | 
|  |  | 
|  | if (!utils::FirstLineIsBlank(lines[i])) { | 
|  | lines.insert(lines.begin() + i + 1, ""); | 
|  | } | 
|  | } | 
|  | output_ = ""; | 
|  | for (auto line : lines) { | 
|  | output_ += line + "\n"; | 
|  | } | 
|  | output_ += terminal; | 
|  | } | 
|  | } | 
|  |  | 
|  | int FormattingTreeVisitor::Segment::EraseMultipleSpacesAt(int pos, int leave_this_many, | 
|  | bool incl_newline) { | 
|  | std::function<bool(char)> is_ws; | 
|  | if (incl_newline) { | 
|  | is_ws = [](char ch) { return utils::IsWhitespace(ch); }; | 
|  | } else { | 
|  | is_ws = [](char ch) { return utils::IsWhitespaceNoNewline(ch); }; | 
|  | } | 
|  | if (!is_ws(output_[pos])) { | 
|  | return 0; | 
|  | } | 
|  | int length_of_spaces = 0; | 
|  |  | 
|  | int start_pos = pos; | 
|  | int end_pos = pos; | 
|  | while (start_pos > 0 && is_ws(output_[start_pos - 1])) { | 
|  | start_pos--; | 
|  | } | 
|  |  | 
|  | // int_size - 2 can be negative, and output_.size() is unsigned, | 
|  | // cast to make the comparison work. | 
|  | int int_size = static_cast<int>(output_.size()); | 
|  | while (end_pos <= int_size - 2 && is_ws(output_[end_pos + 1])) { | 
|  | end_pos++; | 
|  | } | 
|  |  | 
|  | length_of_spaces = end_pos - start_pos + 1; | 
|  | int num_deleted_spaces = std::max(length_of_spaces - leave_this_many, 0); | 
|  | output_.erase(start_pos, num_deleted_spaces); | 
|  | return num_deleted_spaces; | 
|  | } | 
|  |  | 
|  | // Assumption: Trailing WS has been stripped, spaces have been changed to ' ' | 
|  | // Rules: | 
|  | //  - No non-' ' or '\n' whitespace | 
|  | //  - One ws token before / after every ws-requiring character | 
|  | //  - No non-newline ws before / after characters that don't want it. | 
|  | //  - "->" operators are never at the end of the line. | 
|  | void FormattingTreeVisitor::Segment::RegularizeSpaces(bool& ws_required_next, bool& no_ws_next) { | 
|  | assert(!(ws_required_next && no_ws_next)); | 
|  |  | 
|  | bool last_char_required_ws = false; | 
|  | bool last_char_no_ws = false; | 
|  |  | 
|  | if (ws_required_next && (output_.empty() || !utils::IsWhitespace(output_[0]))) { | 
|  | // The current string does not begin with the required whitespace, so insert it. | 
|  | output_.insert(0, " "); | 
|  | ws_required_next = false; | 
|  | } else if (no_ws_next) { | 
|  | EraseMultipleSpacesAt(0, 0); | 
|  | } | 
|  |  | 
|  | for (int i = 0; i < static_cast<int>(output_.size()); i++) { | 
|  | // If it is a comment, jump to EOL. | 
|  | MaybeWindPastComment(output_, i); | 
|  |  | 
|  | // If we see "->\n", change it to "\n->". | 
|  | const char arrow_nl[] = "->\n"; | 
|  | if (output_.compare(i, strlen(arrow_nl), arrow_nl) == 0) { | 
|  | output_.replace(i, strlen(arrow_nl), "\n->"); | 
|  | i -= EraseMultipleSpacesAt(i - 1, 0); | 
|  | } | 
|  |  | 
|  | // Erase multiple spaces | 
|  | EraseMultipleSpacesAt(i); | 
|  |  | 
|  | // Ensure whitespace around certain characters | 
|  | if (RequiresWSBeforeChar(output_[i])) { | 
|  | if (i == 0 || !utils::IsWhitespace(output_[i - 1])) { | 
|  | output_.insert(i, " "); | 
|  | i++; | 
|  | } | 
|  | } | 
|  |  | 
|  | // This is a little weird.  '(' requires ws if it follows an | 
|  | // arrow, but not if it follows a method name.  Both of these | 
|  | // are in protocol method definitions, so this ends up being | 
|  | // slightly easier than having it positionally defined during | 
|  | // AST traversal. | 
|  | if (output_[i] == '(') { | 
|  | if (!last_char_required_ws && i > 0) { | 
|  | i -= EraseMultipleSpacesAt(i - 1, 0); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Ensure no whitespace around other characters | 
|  | if (NoSpacesBeforeChar(output_[i])) { | 
|  | if (i > 0) { | 
|  | i -= EraseMultipleSpacesAt(i - 1, 0, NoWSBeforeChar(output_[i])); | 
|  | } | 
|  | } | 
|  |  | 
|  | // We don't want whitespace after these characters... unless there is a | 
|  | // comment after the WS. | 
|  | int j; | 
|  | for (j = i + 1; | 
|  | j < static_cast<int>(output_.size()) && utils::IsWhitespaceNoNewline(output_[j]); j++) | 
|  | ; | 
|  | if (NoWSAfterChar(output_[i]) && !utils::LineFromOffsetIsRegularComment(output_, j)) { | 
|  | EraseMultipleSpacesAt(i + 1, 0); | 
|  | } | 
|  |  | 
|  | // The following clause is figuring out whether the next | 
|  | // iteration requires ws, so we need to keep it past anything | 
|  | // that uses that information in the loop. | 
|  | if (RequiresWSAfterChar(output_[i])) { | 
|  | if (i != static_cast<int>(output_.size()) - 1 && !utils::IsWhitespace(output_[i + 1])) { | 
|  | output_.insert(i + 1, " "); | 
|  | i++; | 
|  | } | 
|  | last_char_required_ws = true; | 
|  | } else { | 
|  | if (!utils::IsWhitespace(output_[i])) { | 
|  | last_char_required_ws = false; | 
|  | } | 
|  | last_char_no_ws = NoWSAfterChar(output_[i]); | 
|  | } | 
|  | } | 
|  | ws_required_next = last_char_required_ws; | 
|  | no_ws_next = last_char_no_ws; | 
|  | } | 
|  |  | 
|  | // Rules are mostly obvious, but see TrackProtocolMethodAlignment below. | 
|  | // Precondition: By now, everything should have had its leading ws | 
|  | // stripped, and } characters are the first things on their own lines. | 
|  | void FormattingTreeVisitor::Segment::Indent(int& current_nesting) { | 
|  | for (int i = 0; i < static_cast<int>(output_.size()); i++) { | 
|  | if (output_[i] == '\n') { | 
|  | // Don't indent a blank line. | 
|  | if (output_[i + 1] == '\n') { | 
|  | continue; | 
|  | } | 
|  | // If this is an outdent line, do that. | 
|  | if (output_[i + 1] == '}') { | 
|  | current_nesting--; | 
|  | } | 
|  | int indent = current_nesting * kIndentSpaces; | 
|  | if (visitor_->newline_means_indent_more_) { | 
|  | if (visitor_->protocol_method_alignment_ && | 
|  | visitor_->protocol_method_alignment_size_ > -1) { | 
|  | indent = visitor_->protocol_method_alignment_size_; | 
|  | } else { | 
|  | indent += kIndentSpaces; | 
|  | } | 
|  | } | 
|  | output_.insert(i + 1, indent, ' '); | 
|  | } | 
|  |  | 
|  | int pos = i; | 
|  | // Skip comments at this point, because we don't want to | 
|  | // increase nesting based on a '{' character in a comment. :) | 
|  | MaybeWindPastComment(output_, pos); | 
|  |  | 
|  | // 1 less than pos because i will be incremented on the next | 
|  | // iteration.  But that means it is a real character, so we need | 
|  | // to skip testing that character to see if it changes the | 
|  | // nesting level. | 
|  | if (pos != i) { | 
|  | i = pos - 1; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | if (output_[i] == '{') { | 
|  | current_nesting++; | 
|  | } | 
|  | if (output_[i] == ')') { | 
|  | visitor_->protocol_method_alignment_size_ = visitor_->offset_of_first_id_; | 
|  | } | 
|  | if (output_[i] == ']') { | 
|  | visitor_->protocol_method_alignment_size_ = visitor_->protocol_method_alignment_size_backup; | 
|  | visitor_->protocol_method_alignment_size_backup = -1; | 
|  | } | 
|  | if (output_[i] == ';') { | 
|  | visitor_->protocol_method_alignment_size_ = -1; | 
|  | visitor_->protocol_method_alignment_ = false; | 
|  | visitor_->newline_means_indent_more_ = false; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // The purpose of this method is to figure out what the indentation will be if | 
|  | // we encounter a newline.  The rule is : | 
|  | //  - If there isn't a parameter on the same line after the '(' character, +1 | 
|  | //    indent past the beginning of the method name. | 
|  | //  - If there is a parameter on the same line after the '(' character, | 
|  | //    align at the same vertical column as that parameter. | 
|  | //  - If there is a parameter attribute, +1 indent past the beginning of '[' | 
|  | void FormattingTreeVisitor::TrackProtocolMethodAlignment(const std::string& str) { | 
|  | static std::locale c_locale("C"); | 
|  | if (!protocol_method_alignment_) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | for (int i = 0; i < static_cast<int>(str.size()); i++) { | 
|  | MaybeWindPastComment(str, i); | 
|  |  | 
|  | char ch = str[i]; | 
|  | if (ch == '\n') { | 
|  | if (has_encountered_param_list_start_ && | 
|  | !is_param_list_first_param_on_same_line_.has_value()) { | 
|  | // The first parameter of the parameter list has not been encountered before a newline | 
|  | // character; thus, the first parameter is not on the same line. | 
|  | is_param_list_first_param_on_same_line_ = false; | 
|  | } | 
|  | distance_from_last_newline_ = 0; | 
|  | } else { | 
|  | distance_from_last_newline_++; | 
|  | } | 
|  |  | 
|  | // This figures out if we are supposed to align to the '(' or the | 
|  | // method name. | 
|  | if (ch == '(') { | 
|  | has_encountered_param_list_start_ = true; | 
|  | } | 
|  |  | 
|  | // Alignment for attributes. | 
|  | if (ch == '[') { | 
|  | if (has_encountered_param_list_start_ && | 
|  | !is_param_list_first_param_on_same_line_.has_value()) { | 
|  | is_param_list_first_param_on_same_line_ = true; | 
|  | protocol_method_alignment_size_ = distance_from_last_newline_ - 1; | 
|  | } | 
|  | protocol_method_alignment_size_backup = protocol_method_alignment_size_; | 
|  | protocol_method_alignment_size_ = distance_from_last_newline_; | 
|  | } | 
|  |  | 
|  | if (isalpha(ch, c_locale)) { | 
|  | if (protocol_method_alignment_size_ == -1) { | 
|  | // This should be the method identifier. | 
|  | offset_of_first_id_ = protocol_method_alignment_size_ = | 
|  | distance_from_last_newline_ + kIndentSpaces - 1; | 
|  | } else if (has_encountered_param_list_start_ && | 
|  | !is_param_list_first_param_on_same_line_.has_value()) { | 
|  | // This is the first element of the parameter list and its on the same line. | 
|  | is_param_list_first_param_on_same_line_ = true; | 
|  | protocol_method_alignment_size_ = distance_from_last_newline_ - 1; | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void FormattingTreeVisitor::OnFile(std::unique_ptr<fidl::raw::File> const& element) { | 
|  | // Eat ws at the beginning of the file. | 
|  | fidl::Token real_start = element->start_; | 
|  | std::string_view start_view = real_start.previous_end().data(); | 
|  | const char* start_ptr = start_view.data(); | 
|  | size_t initial_length = start_view.size(); | 
|  | size_t offset = strspn(start_ptr, utils::kWhitespaceChars); | 
|  | std::string_view processed_file_start(start_ptr + offset, initial_length - offset); | 
|  | element->start_.set_previous_end( | 
|  | fidl::SourceSpan(processed_file_start, real_start.previous_end().source_file())); | 
|  |  | 
|  | DeclarationOrderTreeVisitor::OnFile(element); | 
|  | // Remove EOF whitespace | 
|  | size_t final_size = formatted_output_.size(); | 
|  | size_t last_char_index = final_size - 1; | 
|  | if (formatted_output_.at(last_char_index) != '\n' || | 
|  | strchr(utils::kWhitespaceChars, formatted_output_.at(last_char_index - 1)) != nullptr) { | 
|  | while (strchr(utils::kWhitespaceChars, formatted_output_.at(last_char_index)) != nullptr) { | 
|  | last_char_index--; | 
|  | } | 
|  | formatted_output_ = formatted_output_.substr(0, last_char_index + 1); | 
|  | formatted_output_.append("\n"); | 
|  | } | 
|  | } | 
|  |  | 
|  | }  // namespace raw | 
|  | }  // namespace fidl |