blob: e7c86f486866f7d831b0803e8f0f89b98f0dfa38 [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
import 'dart:math';
import 'package:path/path.dart' as p;
import './comparison_result.dart';
/// Helper class for calculating the Levenshtein distance between two strings.
///
/// This is added in place of the third-party package edit_distance, which
/// lacked support for sound null-safety in it's stable release.
class Levenshtein {
int distance(String s, String t) {
if (s.isEmpty) return t.length;
if (t.isEmpty) return s.length;
var row0 = List<int>.generate(t.length + 1, (int i) => i);
var row1 = List<int>.filled(t.length + 1, 0);
for (int i = 0; i < s.length; i++) {
row1[0] = i + 1;
for (int j = 0; j < t.length; j++) {
var deletionCost = row0[j + 1] + 1;
var insertionCost = row1[j] + 1;
var substitutionCost = row0[j] + (s[i] == t[j] ? 0 : 1);
row1[j + 1] = min(min(deletionCost, insertionCost), substitutionCost);
}
row0 = List.from(row1);
}
return row0[t.length];
}
}
/// Comparison helper which allows other tools to easily assess whether items
/// are "alike" on variable criteria.
abstract class Comparer {
ComparisonResult equals(String? haystack, String? needle);
ComparisonResult contains(String? haystack, String? needle);
ComparisonResult startsWith(String? haystack, String? needle);
ComparisonResult endsWith(String? haystack, String? needle);
}
/// Comparer with zero creativity. Either the strings match or they don't.
class StrictComparer extends Comparer {
@override
ComparisonResult equals(String? haystack, String? needle) =>
ComparisonResult.strict(isMatch: haystack == needle);
@override
ComparisonResult contains(String? haystack, String? needle) =>
ComparisonResult.strict(
isMatch: needle != null && (haystack?.contains(needle) ?? false));
@override
ComparisonResult startsWith(String? haystack, String? needle) =>
ComparisonResult.strict(
isMatch: needle != null && (haystack?.startsWith(needle) ?? false));
@override
ComparisonResult endsWith(String? haystack, String? needle) =>
ComparisonResult.strict(
isMatch: needle != null && (haystack?.endsWith(needle) ?? false));
}
/// Comparer backed by Levenshtein distance to suggest possible typos after
/// the test suite matches zero tests.
///
/// To support the desire to match substrings (particularly of long paths),
/// this will look for slashes in the [needle] and consume as many slashes in
/// the [haystack]. For example the following example will calculate a
/// Levenshtein distance of 1 and then, with that below the threshold of 3,
/// return `true`.
///
/// ```dart
/// FuzzyComparer({threshold: 3}).endsWith(
/// '/some/long/path/to/our/binary',
/// 'out/binary',
/// )
/// >>> true
/// ```
///
/// This is because the [FuzzyComparer] will detect that there is one slash in
/// the [needle] thus, internally, run the following evaluation:
///
/// ```dart
/// levenshtein('our/binary', 'out/binary')
/// ```
///
/// Behavior is more involved with the [contains] method because there we cannot
/// assume the starting or ending substrings. Thus, [contains] counts the slashes
/// in the [needle] and compares against all possible chunks of [haystack].
/// Thus, our previous example, swapped for [contains] instead of [endsWith],
/// would run all of the following comparisons:
///
/// ```dart
/// return min([
/// levenshtein('some/long', 'out/binary'),
/// levenshtein('long/path', 'out/binary'),
/// levenshtein('path/to', 'out/binary'),
/// levenshtein('to/our', 'out/binary'),
/// levenshtein('our/binary', 'out/binary'),
/// ]);
/// ```
///
/// This behavior should hopefully "Get to Yes" in terms of helpful
/// "Did you mean?" suggestions without too many false-positives.
class FuzzyComparer extends Comparer {
/// Levenshtein distance cutoff for considering two values a match.
/// EXCLUSIVE.
final int threshold;
static final Levenshtein _lev = Levenshtein();
/// Value returned when either or both strings are null.
static const int maxDistance = 999;
static const String separator = '/';
/// Whether the levenshtein algorithm should consider characters different
/// if they are the same letter, but upper and lower case.
final bool caseSensitive;
FuzzyComparer({
required this.threshold,
this.caseSensitive = true,
});
int _levDistance(String? h, String? n) => (h == null || n == null)
? FuzzyComparer.maxDistance
: _lev.distance(
caseSensitive ? h : h.toLowerCase(),
caseSensitive ? n : n.toLowerCase(),
);
/// Due to the chunking nature of our matching, leading or trailing slashes
/// in the needle will cause false-negatives.
String? _prepareNeedle(String? needle) {
var _needle = needle;
if (_needle == null) return _needle;
if (_needle.startsWith(separator)) {
_needle = _needle.substring(1);
}
if (_needle.endsWith(separator)) {
_needle = _needle.substring(0, _needle.length - 1);
}
return _needle;
}
double computeConfidence(int levenshteinDistance) =>
levenshteinDistance >= threshold
? 0
: (threshold - levenshteinDistance) / threshold;
@override
ComparisonResult contains(String? haystack, String? needle) {
final _needle = _prepareNeedle(needle);
var chunks = chunkOnSlashes(haystack, _needle);
int minDistance = max(haystack?.length ?? 0, _needle?.length ?? 0);
for (var chunk in chunks) {
minDistance = min(minDistance, _levDistance(chunk, _needle));
}
return ComparisonResult.withConfidence(computeConfidence(minDistance));
}
@override
ComparisonResult endsWith(String? haystack, String? needle) {
final _needle = _prepareNeedle(needle);
var endingChunk = chunkOnSlashes(haystack, _needle).last;
return ComparisonResult.withConfidence(
computeConfidence(_levDistance(endingChunk, _needle)),
);
}
@override
ComparisonResult equals(String? haystack, String? needle) {
final _needle = _prepareNeedle(needle);
return ComparisonResult.withConfidence(
computeConfidence(_levDistance(haystack, _needle)),
);
}
@override
ComparisonResult startsWith(String? haystack, String? needle) {
final _needle = _prepareNeedle(needle);
var startingChunk = chunkOnSlashes(haystack, _needle, limit: 1).first;
return ComparisonResult.withConfidence(
computeConfidence(_levDistance(startingChunk, _needle)),
);
}
}
/// Divides the [haystack] into the maximum number of substrings that contain a given
/// number of a separator. That number of separators is determined by the number
/// of its occurances in the [needle].
///
/// Note that [needle] does not need to be a substring of [haystack] -- all that
/// matters is its count of [sep].
///
/// Usage:
/// ```dart
/// // "a/file" contains one slash
/// chunkOnSlashes('some/path/to/a/file', 'a/file', sep: '/')
/// // So we return all possible substrings containing one slash
/// >>> ['some/path', 'path/to', 'to/a', 'a/file']
/// ```
List<String> chunkOnSlashes(
/// String we intend to split into a list of strings.
String? haystack,
/// String we will scan for instances of [sep] to determine how to split
/// [haystack].
String? needle, {
/// Separator string that defaults to the current OS's filesystem separator.
String? sep,
/// Optional limit on the number of results to return. For example, if you
/// know you only care about the first result, pass 1 to skip calculating and
/// throwing away subsequent results.
int? limit,
}) {
sep ??= p.separator;
var chunks = <String>[];
if (haystack == null) {
return chunks;
}
if (needle == null) return [haystack];
int numSlashes = needle.split(sep).length - 1;
var splitHaystack = haystack.split(sep);
if (numSlashes >= splitHaystack.length) {
return [haystack];
}
var counter = 0;
while (counter <= splitHaystack.length - numSlashes - 1) {
var endIndex = counter + numSlashes + 1;
chunks.add(splitHaystack.sublist(counter, endIndex).join(sep));
counter += 1;
if (limit != null && chunks.length >= limit) {
break;
}
}
// Briefly cast to a set to remove duplicates
return chunks.toSet().toList();
}