blob: 550e5525c3fc35accf256e3a8419a910da4e7de1 [file] [log] [blame]
use crate::common::FastVec;
use unicode_normalization::UnicodeNormalization;
pub fn soundex(s: &str) -> String {
if s.is_empty() {
return String::from("");
}
let v = &s.to_uppercase().nfkd().collect::<FastVec<char>>();
let mut result = FastVec::new();
result.push(v[0]);
let replacement = |ch| match ch {
'B' | 'F' | 'P' | 'V' => '1',
'C' | 'G' | 'J' | 'K' | 'Q' | 'S' | 'X' | 'Z' => '2',
'D' | 'T' => '3',
'L' => '4',
'M' | 'N' => '5',
'R' => '6',
_ => '*',
};
// find would be replacement for first character
let mut last = replacement(v[0]);
// loop over remaining letters
for letter in v.iter().skip(1) {
let sub = replacement(*letter);
if sub != '*' {
if sub != last {
result.push(sub);
if result.len() == 4 {
break;
}
}
last = sub;
} else if *letter != 'H' && *letter != 'W' {
last = '*';
}
}
while result.len() < 4 {
result.push('0');
}
let mut str_key = String::new();
for k in result {
str_key.push(k);
}
str_key
}
#[cfg(test)]
mod test {
use super::*;
use crate::testutils::testutils;
#[test]
fn test_soundex() {
testutils::test_str_func("testdata/soundex.csv", soundex);
}
}