blob: abe98392ee2c0ae122e36164cb44dd7daf4cc587 [file] [log] [blame]
use std::cmp::max;
// strsplit is like `s.split(split)`, except that if `split` is "", it
// trims the leading and trailing empty elements, since the `lcs`
// logic won't handle those properly.
fn strsplit<'a>(s: &'a str, split: &str) -> Vec<&'a str> {
let mut si = s.split(split);
if split == "" {
si.next();
}
let mut v: Vec<&str> = si.collect();
if split == "" {
v.pop();
}
v
}
// finds the longest common subsequences
// outputs the edit distance and a string containing
// all chars both inputs have in common
//
// This algorithm is based on
// https://en.wikipedia.org/wiki/Longest_common_subsequence_problem#Code_for_the_dynamic_programming_solution
#[allow(non_snake_case)]
#[cfg_attr(feature = "cargo-clippy", allow(many_single_char_names))]
pub fn lcs(orig: &str, edit: &str, split: &str) -> (i32, String) {
// make list by custom splits
let a = strsplit(orig, split);
let b = strsplit(edit, split);
let N = a.len();
let M = b.len();
let mut idx: Vec<usize> = Vec::with_capacity(N * M);
idx.resize(N * M, 0);
for i in 0..N {
for j in 0..M {
if b[j] == a[i] {
if i == 0 || j == 0 {
idx[i * M + j] = 1;
} else {
idx[i * M + j] = idx[(i - 1) * M + j - 1] + 1;
}
} else if i == 0 {
if j == 0 {
idx[i * M + j] = 0;
} else {
idx[i * M + j] = idx[i * M + j - 1];
}
} else if j == 0 {
idx[i * M + j] = idx[(i - 1) * M + j];
} else {
idx[i * M + j] = max(idx[i * M + j - 1], idx[(i - 1) * M + j]);
}
}
}
let mut i = (N as isize) - 1;
let mut j = (M as isize) - 1;
let mut lcs = Vec::new();
while i >= 0 && j >= 0 {
let ui = i as usize;
let uj = j as usize;
if a[ui] == b[uj] {
lcs.push(a[ui]);
i -= 1;
j -= 1;
} else if j == 0 && i == 0 {
break;
} else if i == 0 || idx[ui * M + uj - 1] > idx[(ui - 1) * M + uj] {
j -= 1;
} else {
i -= 1;
}
}
lcs.reverse();
((N + M - 2 * lcs.len()) as i32, lcs.join(split))
}
#[test]
fn test_lcs() {
assert_eq!(lcs("test", "tost", ""), (2, "tst".to_string()));
assert_eq!(lcs("test", "test", ""), (0, "test".to_string()));
assert_eq!(lcs("test", "test", " "), (0, "test".to_string()));
assert_eq!(
lcs(
"The quick brown fox jumps over the lazy dog",
"The quick brown dog leaps over the lazy cat",
"",
),
(16, "The quick brown o ps over the lazy ".to_string())
);
assert_eq!(
lcs(
"The quick brown fox jumps over the lazy dog",
"The quick brown dog leaps over the lazy cat",
" ",
),
(6, "The quick brown over the lazy".to_string())
);
assert_eq!(
lcs(
"The quick brown fox jumps over the lazy dog",
"The quick brown dog leaps over the lazy cat",
"\n",
),
(2, "".to_string())
);
assert_eq!(
lcs(
"The quick brown fox jumps over the lazy dog",
"The quick brown fox jumps over the lazy dog",
"\n",
),
(0, "The quick brown fox jumps over the lazy dog".to_string())
);
assert_eq!(
lcs("a b : c", "b a : b : c", " "),
(2, "a b : c".to_string())
);
assert_eq!(lcs("", "a b c", ""), (5, "".to_string()));
assert_eq!(lcs("", " a", " "), (1, "".to_string()));
}