Skip to content

Commit

Permalink
auto merge of #17738 : hoeppnertill/rust/master, r=alexcrichton
Browse files Browse the repository at this point in the history
There is an issue with lev_distance, where
```
fn main() {
    println!("{}", "\x80".lev_distance("\x80"))
}
```
prints `2`.

This is due to using the byte length instead of the char length.
  • Loading branch information
bors committed Oct 4, 2014
2 parents e434aa1 + 3aea7f1 commit a2e7c4d
Showing 1 changed file with 28 additions and 9 deletions.
37 changes: 28 additions & 9 deletions src/libcollections/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -778,13 +778,11 @@ pub trait StrAllocating: Str {
/// Returns the Levenshtein Distance between two strings.
fn lev_distance(&self, t: &str) -> uint {
let me = self.as_slice();
let slen = me.len();
let tlen = t.len();
if me.is_empty() { return t.char_len(); }
if t.is_empty() { return me.char_len(); }

if slen == 0 { return tlen; }
if tlen == 0 { return slen; }

let mut dcol = Vec::from_fn(tlen + 1, |x| x);
let mut dcol = Vec::from_fn(t.len() + 1, |x| x);
let mut t_last = 0;

for (i, sc) in me.chars().enumerate() {

Expand All @@ -799,15 +797,15 @@ pub trait StrAllocating: Str {
*dcol.get_mut(j + 1) = current;
} else {
*dcol.get_mut(j + 1) = cmp::min(current, next);
*dcol.get_mut(j + 1) = cmp::min(dcol[j + 1],
dcol[j]) + 1;
*dcol.get_mut(j + 1) = cmp::min(dcol[j + 1], dcol[j]) + 1;
}

current = next;
t_last = j;
}
}

return dcol[tlen];
dcol[t_last + 1]
}

/// Returns an iterator over the string in Unicode Normalization Form D
Expand Down Expand Up @@ -1878,6 +1876,27 @@ mod tests {
assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
}

#[test]
fn test_lev_distance() {
use std::char::{ from_u32, MAX };
// Test bytelength agnosticity
for c in range(0u32, MAX as u32)
.filter_map(|i| from_u32(i))
.map(|i| String::from_char(1, i)) {
assert_eq!(c[].lev_distance(c[]), 0);
}

let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
let c = "Mary häd ä little lämb\n\nLittle lämb\n";
assert_eq!(a.lev_distance(b), 1);
assert_eq!(b.lev_distance(a), 1);
assert_eq!(a.lev_distance(c), 2);
assert_eq!(c.lev_distance(a), 2);
assert_eq!(b.lev_distance(c), 1);
assert_eq!(c.lev_distance(b), 1);
}

#[test]
fn test_nfd_chars() {
macro_rules! t {
Expand Down

0 comments on commit a2e7c4d

Please sign in to comment.