From 3aea7f18894bfc35c03044688229f6de84eb42f3 Mon Sep 17 00:00:00 2001 From: Till Hoeppner Date: Fri, 3 Oct 2014 14:59:47 +0200 Subject: [PATCH] Fix lev_distance --- src/libcollections/str.rs | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index d198e948ac838..553b34a55c317 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -778,13 +778,11 @@ pub trait StrAllocating: Str { /// Returns the Levenshtein Distance between two strings. fn lev_distance(&self, t: &str) -> uint { let me = self.as_slice(); - let slen = me.len(); - let tlen = t.len(); + if me.is_empty() { return t.char_len(); } + if t.is_empty() { return me.char_len(); } - if slen == 0 { return tlen; } - if tlen == 0 { return slen; } - - let mut dcol = Vec::from_fn(tlen + 1, |x| x); + let mut dcol = Vec::from_fn(t.len() + 1, |x| x); + let mut t_last = 0; for (i, sc) in me.chars().enumerate() { @@ -799,15 +797,15 @@ pub trait StrAllocating: Str { *dcol.get_mut(j + 1) = current; } else { *dcol.get_mut(j + 1) = cmp::min(current, next); - *dcol.get_mut(j + 1) = cmp::min(dcol[j + 1], - dcol[j]) + 1; + *dcol.get_mut(j + 1) = cmp::min(dcol[j + 1], dcol[j]) + 1; } current = next; + t_last = j; } } - return dcol[tlen]; + dcol[t_last + 1] } /// Returns an iterator over the string in Unicode Normalization Form D @@ -1878,6 +1876,27 @@ mod tests { assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"]) } + #[test] + fn test_lev_distance() { + use std::char::{ from_u32, MAX }; + // Test bytelength agnosticity + for c in range(0u32, MAX as u32) + .filter_map(|i| from_u32(i)) + .map(|i| String::from_char(1, i)) { + assert_eq!(c[].lev_distance(c[]), 0); + } + + let a = "\nMäry häd ä little lämb\n\nLittle lämb\n"; + let b = "\nMary häd ä little lämb\n\nLittle lämb\n"; + let c = "Mary häd ä little lämb\n\nLittle lämb\n"; + assert_eq!(a.lev_distance(b), 1); + assert_eq!(b.lev_distance(a), 1); + assert_eq!(a.lev_distance(c), 2); + assert_eq!(c.lev_distance(a), 2); + assert_eq!(b.lev_distance(c), 1); + assert_eq!(c.lev_distance(b), 1); + } + #[test] fn test_nfd_chars() { macro_rules! t {