| //===------------------------- SourceComparator.cpp ------------------------==// |
| // |
| // This source file is part of the Swift.org open source project |
| // |
| // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors |
| // Licensed under Apache License v2.0 with Runtime Library Exception |
| // |
| // See https://swift.org/LICENSE.txt for license information |
| // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "swift/Driver/SourceComparator.h" |
| |
| #include <tuple> |
| #include <unordered_map> |
| |
| using namespace swift; |
| using namespace incremental_ranges; |
| |
| //============================================================================== |
| // MARK: SourceComparator |
| //============================================================================== |
| |
| //============================================================================== |
| // MARK: initialization |
| //============================================================================== |
| |
| SourceComparator::SourceComparator(StringRef s1, StringRef s2) |
| : linesToCompare(splitIntoLines(s1), splitIntoLines(s2)), |
| regionsToCompare(LineSpan({0, 0}, linesToCompare.size())), |
| matches(linesToCompare.lhs().size(), None) {} |
| |
| std::vector<StringRef> SourceComparator::splitIntoLines(const StringRef s) { |
| std::vector<StringRef> result; |
| for (auto toSplit = s; !toSplit.empty();) { |
| StringRef line; |
| std::tie(line, toSplit) = toSplit.split('\n'); |
| result.push_back(line); |
| } |
| return result; |
| } |
| |
| //============================================================================== |
| // MARK: Comparing |
| //============================================================================== |
| |
| void SourceComparator::compare() { |
| // Trim ends of common elements |
| trimStart(), trimEnd(); |
| // Do the comparison |
| scanMatchedLines(buildDAGOfSubsequences(buildEquivalenceClasses())); |
| } |
| |
| void SourceComparator::trimStart() { |
| for (; regionsToCompare.bothNonEmpty() && |
| linesToCompare.matchAt(regionsToCompare.start); |
| ++regionsToCompare.start) |
| matches[regionsToCompare.start.lhs()] = regionsToCompare.start.rhs(); |
| } |
| void SourceComparator::trimEnd() { |
| for (; regionsToCompare.bothNonEmpty() && |
| linesToCompare.matchAt(--regionsToCompare.end);) |
| matches[regionsToCompare.end.lhs()] = regionsToCompare.end.rhs(); |
| } |
| |
| std::unordered_map<std::string, std::vector<size_t>> |
| SourceComparator::buildEquivalenceClasses() { |
| // Map each element of rhs to the sequence of indices it occupies. |
| std::unordered_map<std::string, std::vector<size_t>> rhsMap; |
| for (auto index = regionsToCompare.start.rhs(); |
| index < regionsToCompare.end.rhs(); ++index) |
| rhsMap[linesToCompare.rhs()[index].str()].push_back(index); |
| return rhsMap; |
| } |
| |
| std::pair<std::vector<SourceComparator::PRLink>, |
| SourceComparator::SortedSequence> |
| SourceComparator::buildDAGOfSubsequences( |
| std::unordered_map<std::string, std::vector<size_t>> rhsMap) { |
| SortedSequence thresh; |
| const size_t linksSize = regionsToCompare.size().min(); |
| std::vector<PRLink> links; |
| links.resize(linksSize); |
| |
| for (auto i = regionsToCompare.start.lhs(); i < regionsToCompare.end.lhs(); |
| ++i) { |
| // What lines in rhs does the ith line in lhs match? |
| auto iter = rhsMap.find(linesToCompare.lhs()[i].str()); |
| if (iter == rhsMap.end()) |
| continue; // no match in rhs |
| auto &res = iter->second; |
| size_t lastJ = ~0; |
| // For each match in rhs in reverse order |
| for (auto j : llvm::reverse(res)) { |
| assert(j < lastJ); |
| lastJ = j; |
| // replace the index of the match with the index of the earlier match |
| // or add it if last match |
| // (thresh gets populated for each lhs match with rhs indices) |
| if (auto optK = thresh.replaceNextLargerWith(j)) { |
| // There was a later match at thresh[k] |
| auto k = optK.getValue(); |
| // prev match (of what?) was at links[k-1]? |
| // chain to that match |
| Optional<size_t> newNext = k == 0 ? None : Optional<size_t>(k - 1); |
| links.emplace(links.begin() + k, LineIndices(i, j), newNext); |
| } |
| } |
| } |
| return {std::move(links), thresh}; |
| } |
| |
| void SourceComparator::scanMatchedLines( |
| std::pair<std::vector<PRLink>, SortedSequence> &&linksAndThres) { |
| const auto &links = linksAndThres.first; |
| const auto &thresh = linksAndThres.second; |
| |
| if (thresh.empty()) |
| return; |
| // For every match put rhs index in matches[col2 index] |
| for (const auto *p = &links[thresh.size() - 1]; p; |
| p = p->next ? &links[p->next.getValue()] : nullptr) |
| matches[p->lines.lhs()] = p->lines.rhs(); |
| } |
| |
| //============================================================================== |
| // MARK: summarizing |
| //============================================================================== |
| |
| std::vector<SourceComparator::LineSpan> |
| SourceComparator::getMatchingRegions() const { |
| std::vector<LineSpan> matches; |
| forEachMatch([&](const LineSpan r) { matches.push_back(r); }); |
| return matches; |
| } |
| |
| std::vector<SourceComparator::LineSpan> |
| SourceComparator::getMismatchingRegions() const { |
| std::vector<LineSpan> mismatches; |
| forEachMismatch([&](const LineSpan r) { mismatches.push_back(r); }); |
| return mismatches; |
| } |
| |
| void SourceComparator::forEachMatch( |
| function_ref<void(const LineSpan)> consumeMatch) const { |
| for (LineIndices nextMatch = getFirstMatch(), |
| nextMismatch = getNextMismatchAfter(nextMatch); |
| areEitherInRange(nextMatch); nextMatch = getNextMatchAfter(nextMismatch), |
| nextMismatch = getNextMismatchAfter(nextMatch)) { |
| consumeMatch(LineSpan{nextMatch, nextMismatch}); |
| } |
| } |
| |
| void SourceComparator::forEachMismatch( |
| function_ref<void(const LineSpan)> consumeMismatch) const { |
| for (LineIndices nextMismatch = getFirstMismatch(), |
| nextMatch = getNextMatchAfter(nextMismatch); |
| areEitherInRange(nextMismatch); |
| nextMismatch = getNextMismatchAfter(nextMatch), |
| nextMatch = getNextMatchAfter(nextMismatch)) { |
| consumeMismatch(LineSpan{nextMismatch, nextMatch}); |
| } |
| } |
| |
| bool SourceComparator::areEitherInRange(const LineIndices lines) const { |
| return lines.lhs() < linesToCompare.lhs().size() || |
| lines.rhs() < linesToCompare.rhs().size(); |
| } |
| |
| SourceComparator::LineIndices SourceComparator::getFirstMatch() const { |
| for (size_t i = 0; i < linesToCompare.lhs().size(); ++i) { |
| if (auto m = matches[i]) |
| return {i, m.getValue()}; |
| } |
| return linesToCompare.size(); |
| } |
| |
| SourceComparator::LineIndices SourceComparator::getFirstMismatch() const { |
| size_t i = 0; |
| for (; i < linesToCompare.lhs().size() && matches[i].getValueOr(~0) == i; |
| ++i) { |
| } |
| return {i, i}; |
| } |
| |
| SourceComparator::LineIndices |
| SourceComparator::getNextMismatchAfter(const LineIndices nextMatch) const { |
| LineIndices possibleNextMatch = nextMatch + 1; |
| for (; possibleNextMatch.lhs() < matches.size() && |
| possibleNextMatch.rhs() == |
| matches[possibleNextMatch.lhs()].getValueOr(~0); |
| ++possibleNextMatch) { |
| } |
| return possibleNextMatch; |
| } |
| |
| SourceComparator::LineIndices |
| SourceComparator::getNextMatchAfter(const LineIndices nextMismatch) const { |
| // matches[nextMismatch.lhs] could be present if the gap was in 2 |
| // so start right at nextMismatch |
| for (auto maybeMatch1 = nextMismatch.lhs(); maybeMatch1 < matches.size(); |
| ++maybeMatch1) { |
| if (auto rhsMatch = matches[maybeMatch1]) |
| return {maybeMatch1, rhsMatch.getValue()}; |
| } |
| return linesToCompare.size(); |
| } |
| |
| SourceComparator::LRSerializableRange |
| SourceComparator::convertAMismatch(const LineSpan mismatchLines) const { |
| CharIndices firstMismatchInLine = computeFirstMismatchInLine(mismatchLines); |
| CharIndices endMismatchInLine = |
| computeEndMismatchInLine(mismatchLines, firstMismatchInLine); |
| |
| const LineIndices endLinesForCoords = { |
| computeEndLinesToGoWithChars(mismatchLines, endMismatchInLine, Side::L), |
| computeEndLinesToGoWithChars(mismatchLines, endMismatchInLine, Side::R), |
| }; |
| |
| // 1-origin for diff format |
| // Since we convert from a semi-closed line range to a semi-closed |
| // line, char range, we don't add 1 to the end lines. |
| const auto lhsMismatchStart = SerializableSourceLocation{ |
| mismatchLines.start.lhs() + 1, firstMismatchInLine.lhs() + 1}; |
| const auto rhsMismatchStart = SerializableSourceLocation{ |
| mismatchLines.start.rhs() + 1, firstMismatchInLine.rhs() + 1}; |
| const auto lhsMismatchEnd = SerializableSourceLocation{ |
| endLinesForCoords.lhs() + 1, endMismatchInLine.lhs() + 1}; |
| const auto rhsMismatchEnd = SerializableSourceLocation{ |
| endLinesForCoords.rhs() + 1, endMismatchInLine.rhs() + 1}; |
| |
| if (lhsMismatchEnd < lhsMismatchStart) |
| llvm::errs() << "LEFT: "; |
| if (rhsMismatchEnd < rhsMismatchStart) |
| llvm::errs() << "RIGHT: "; |
| |
| const auto lhsMismatchRange = |
| SerializableSourceRange(lhsMismatchStart, lhsMismatchEnd); |
| const auto rhsMismatchRange = |
| SerializableSourceRange(rhsMismatchStart, rhsMismatchEnd); |
| |
| return {lhsMismatchRange, rhsMismatchRange}; |
| } |
| |
| SourceComparator::CharIndices SourceComparator::computeFirstMismatchInLine( |
| const LineSpan mismatchLines) const { |
| return mismatchLines.bothNonEmpty() |
| ? computeFirstMismatchInLinesOnBothSides(mismatchLines) |
| : computeFirstMismatchInLineOnOneSide( |
| mismatchLines, |
| mismatchLines.start.lhs() < mismatchLines.end.lhs() |
| ? Side::L |
| : Side::R); |
| } |
| |
| SourceComparator::CharIndices |
| SourceComparator::computeFirstMismatchInLinesOnBothSides( |
| const LineSpan mismatchLines) const { |
| CharIndices firstMismatchInLine = {0, 0}; // next char after last char |
| if (mismatchLines.start < linesToCompare.size()) { |
| const auto firstLines = linesToCompare[mismatchLines.start]; |
| const auto ends = firstLines.size(); |
| for (; |
| firstMismatchInLine < ends && firstLines.matchAt(firstMismatchInLine); |
| ++firstMismatchInLine) { |
| } |
| } |
| return firstMismatchInLine; |
| } |
| |
| SourceComparator::CharIndices |
| SourceComparator::computeFirstMismatchInLineOnOneSide( |
| const LineSpan mismatchLines, const Side nonemptySide) const { |
| return {0, 0}; |
| } |
| |
| SourceComparator::CharIndices SourceComparator::computeEndMismatchInLine( |
| const LineSpan mismatchLines, const CharIndices firstMismatchInLine) const { |
| return mismatchLines.bothNonEmpty() |
| ? computeEndMismatchInLineOnBothSides(mismatchLines, |
| firstMismatchInLine) |
| : computeEndMismatchInLineOnOneSide( |
| mismatchLines, firstMismatchInLine, |
| mismatchLines.start.lhs() < mismatchLines.end.lhs() |
| ? Side::L |
| : Side::R); |
| } |
| |
| SourceComparator::CharIndices |
| SourceComparator::computeEndMismatchInLineOnBothSides( |
| LineSpan mismatchLines, const CharIndices firstMismatchInLine) const { |
| CharIndices endMismatchInLine(0, 0); |
| const auto lastLines = linesToCompare[mismatchLines.end - 1]; |
| for (endMismatchInLine = lastLines.size(); |
| firstMismatchInLine < endMismatchInLine && |
| lastLines.matchAt(endMismatchInLine - 1); |
| --endMismatchInLine) { |
| } |
| return endMismatchInLine; |
| } |
| |
| SourceComparator::CharIndices |
| SourceComparator::computeEndMismatchInLineOnOneSide( |
| LineSpan mismatchLines, const CharIndices firstMismatchInLine, |
| const Side side) const { |
| const size_t lineSize = |
| linesToCompare.side(side)[mismatchLines.end[side] - 1].size(); |
| return side == Side::L ? CharIndices(lineSize, 0) : CharIndices(0, lineSize); |
| } |
| |
| size_t SourceComparator::computeEndLinesToGoWithChars( |
| const LineSpan mismatchLines, const CharIndices endMismatchInLine, |
| const Side side) const { |
| const bool noMismatch = mismatchLines.start[side] == mismatchLines.end[side]; |
| if (noMismatch) |
| return mismatchLines.start[side]; |
| const bool mismatchEndsWithinALine = endMismatchInLine[side] != 0; |
| // If mismatch ends within a line, then the line coordinate should be the |
| // line *before* the line after the mismatch. |
| return mismatchEndsWithinALine ? mismatchLines.end[side] - 1 |
| : mismatchLines.end[side]; |
| } |
| |
| SourceComparator::LRRanges SourceComparator::convertAllMismatches() const { |
| LRRanges mismatches; |
| |
| forEachMismatch([&](const SourceComparator::LineSpan mismatch) { |
| mismatches.push_back(convertAMismatch(mismatch)); |
| }); |
| return mismatches; |
| } |
| |
| //============================================================================== |
| // MARK: printing |
| //============================================================================== |
| |
| void SourceComparator::print(raw_ostream &out) const { |
| forEachMismatch( |
| [&](const LineSpan &mismatch) { reportMismatch(mismatch, out); }); |
| } |
| |
| void SourceComparator::reportMismatch(const LineSpan mismatch, |
| raw_ostream &out) const { |
| |
| auto printRange = [&](size_t rangeStart, size_t rangeEnd) { |
| // 1-origin for llvm compat |
| ++rangeStart, ++rangeEnd; |
| const auto rangeLast = rangeEnd - 1; |
| out << std::min(rangeStart, rangeLast); |
| if (rangeStart < rangeLast) |
| out << ", " << rangeLast; |
| }; |
| |
| if (mismatch.bothEmpty()) |
| return; |
| |
| const auto lhsStart = mismatch.start.lhs(); |
| const auto lhsEnd = mismatch.end.lhs(); |
| const auto rhsStart = mismatch.start.rhs(); |
| const auto rhsEnd = mismatch.end.rhs(); |
| |
| printRange(lhsStart, lhsEnd); |
| { |
| const auto added = "a", deleted = "d", changed = "c"; |
| out << (lhsStart >= lhsEnd ? added |
| : rhsStart >= rhsEnd ? deleted : changed); |
| } |
| printRange(rhsStart, rhsEnd); |
| out << "\n"; |
| |
| for (auto i = lhsStart; i < lhsEnd; ++i) |
| out << "< " << linesToCompare.lhs()[i] << "\n"; |
| if (lhsStart < lhsEnd && rhsStart < rhsEnd) |
| out << "---\n"; |
| for (auto i = rhsStart; i < rhsEnd; ++i) |
| out << "> " << linesToCompare.rhs()[i] << "\n"; |
| } |
| |
| //============================================================================== |
| // MARK: SortedSequence |
| //============================================================================== |
| |
| Optional<size_t> |
| SourceComparator::SortedSequence::replaceNextLargerWith(const Elem x) { |
| auto loc = locationOfFirstGreaterOrEqualElement(x); |
| if (loc >= contents.size()) { |
| contents.push_back(x); |
| return loc; |
| } |
| if (contents[loc] == x) |
| return None; |
| contents[loc] = x; |
| return loc; |
| } |
| |
| size_t SourceComparator::SortedSequence::locationOfFirstGreaterOrEqualElement( |
| const Elem x) const { |
| return std::lower_bound(contents.begin(), contents.end(), x) - |
| contents.begin(); |
| } |
| |
| //============================================================================== |
| // MARK: testing |
| //============================================================================== |
| |
| bool SourceComparator::test() { |
| auto doCompare = [&](const char *lhs, |
| const char *rhs) -> LRSerializableRange { |
| SourceComparator sc(lhs, rhs); |
| sc.compare(); |
| sc.dump(); |
| auto mismatches = sc.convertAllMismatches(); |
| assert(mismatches.lhs().size() == 1); |
| assert(mismatches.rhs().size() == 1); |
| return LRSerializableRange(mismatches.lhs()[0], mismatches.rhs()[0]); |
| }; |
| { |
| auto a = "}\nfunc nine() {}\nfunc ten() {}"; |
| auto b = "}\nfunc ten() {}"; |
| auto mismatches = doCompare(a, b); |
| assert(mismatches.lhs() == SerializableSourceRange({2, 1}, {2, 15})); |
| assert(mismatches.rhs() == SerializableSourceRange({2, 1}, {2, 1})); |
| } |
| { |
| auto a = "abcde"; |
| auto b = "abde"; |
| auto mismatches = doCompare(a, b); |
| assert(mismatches.lhs() == SerializableSourceRange({1, 3}, {1, 4})); |
| assert(mismatches.rhs() == SerializableSourceRange({1, 3}, {1, 3})); |
| } |
| { |
| auto a = "line0\n"; |
| auto b = "line0\nline1\n"; |
| auto mismatches = doCompare(a, b); |
| assert(mismatches.lhs() == SerializableSourceRange({2, 1}, {2, 1})); |
| assert(mismatches.rhs() == SerializableSourceRange({2, 1}, {2, 6})); |
| } |
| { |
| auto a = "var snort = 333\n"; |
| auto b = "var fred = 123456\n"; |
| auto mismatches = doCompare(a, b); |
| assert(mismatches.lhs() == SerializableSourceRange({1, 5}, {1, 16})); |
| assert(mismatches.rhs() == SerializableSourceRange({1, 5}, {1, 18})); |
| } |
| { |
| auto a = "line1\nline2a\nline3\nline4a\nline5\nline6\n"; |
| auto b = "line1\nline2b\nline3\nline4b\nline6\n"; |
| |
| SourceComparator sc(a, b); |
| sc.compare(); |
| |
| std::vector<Optional<size_t>> matchesVec{0, None, 2, None, None, 4}; |
| assert(sc.matches == matchesVec); |
| |
| auto matches = sc.getMatchingRegions(); |
| assert(matches.size() == 3); |
| assert(matches[0] == LineSpan({0, 0}, {1, 1})); |
| assert(matches[1] == LineSpan({2, 2}, {3, 3})); |
| assert(matches[2] == LineSpan({5, 4}, {6, 5})); |
| |
| auto mismatches = sc.getMismatchingRegions(); |
| assert(mismatches.size() == 2); |
| assert(mismatches[0] == LineSpan({1, 1}, {2, 2})); |
| assert(mismatches[1] == LineSpan({3, 3}, {5, 4})); |
| sc.dump(); |
| sc.convertAllMismatches(); |
| } |
| return true; |
| } |