-
-
Notifications
You must be signed in to change notification settings - Fork 2.3k
/
duval_algorithm.rs
97 lines (88 loc) · 3.3 KB
/
duval_algorithm.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
//! Implementation of Duval's Algorithm to compute the standard factorization of a string
//! into Lyndon words. A Lyndon word is defined as a string that is strictly smaller
//! (lexicographically) than any of its nontrivial suffixes. This implementation operates
//! in linear time and space.
/// Performs Duval's algorithm to factorize a given string into its Lyndon words.
///
/// # Arguments
///
/// * `s` - A slice of characters representing the input string.
///
/// # Returns
///
/// A vector of strings, where each string is a Lyndon word, representing the factorization
/// of the input string.
///
/// # Time Complexity
///
/// The algorithm runs in O(n) time, where `n` is the length of the input string.
pub fn duval_algorithm(s: &str) -> Vec<String> {
factorize_duval(&s.chars().collect::<Vec<char>>())
}
/// Helper function that takes a string slice, converts it to a vector of characters,
/// and then applies the Duval factorization algorithm to find the Lyndon words.
///
/// # Arguments
///
/// * `s` - A string slice representing the input text.
///
/// # Returns
///
/// A vector of strings, each representing a Lyndon word in the factorization.
fn factorize_duval(s: &[char]) -> Vec<String> {
let mut start = 0;
let mut factors: Vec<String> = Vec::new();
while start < s.len() {
let mut end = start + 1;
let mut repeat = start;
while end < s.len() && s[repeat] <= s[end] {
if s[repeat] < s[end] {
repeat = start;
} else {
repeat += 1;
}
end += 1;
}
while start <= repeat {
factors.push(s[start..start + end - repeat].iter().collect::<String>());
start += end - repeat;
}
}
factors
}
#[cfg(test)]
mod test {
use super::*;
macro_rules! test_duval_algorithm {
($($name:ident: $inputs:expr,)*) => {
$(
#[test]
fn $name() {
let (text, expected) = $inputs;
assert_eq!(duval_algorithm(text), expected);
}
)*
}
}
test_duval_algorithm! {
repeating_with_suffix: ("abcdabcdababc", ["abcd", "abcd", "ababc"]),
single_repeating_char: ("aaa", ["a", "a", "a"]),
single: ("ababb", ["ababb"]),
unicode: ("അഅഅ", ["അ", "അ", "അ"]),
empty_string: ("", Vec::<String>::new()),
single_char: ("x", ["x"]),
palindrome: ("racecar", ["r", "acecar"]),
long_repeating: ("aaaaaa", ["a", "a", "a", "a", "a", "a"]),
mixed_repeating: ("ababcbabc", ["ababcbabc"]),
non_repeating_sorted: ("abcdefg", ["abcdefg"]),
alternating_increasing: ("abababab", ["ab", "ab", "ab", "ab"]),
long_repeating_lyndon: ("abcabcabcabc", ["abc", "abc", "abc", "abc"]),
decreasing_order: ("zyxwvutsrqponm", ["z", "y", "x", "w", "v", "u", "t", "s", "r", "q", "p", "o", "n", "m"]),
alphanumeric_mixed: ("a1b2c3a1", ["a", "1b2c3a", "1"]),
special_characters: ("a@b#c$d", ["a", "@b", "#c$d"]),
unicode_complex: ("αβγδ", ["αβγδ"]),
long_string_performance: (&"a".repeat(1_000_000), vec!["a"; 1_000_000]),
palindrome_repeating_prefix: ("abccba", ["abccb", "a"]),
interrupted_lyndon: ("abcxabc", ["abcx", "abc"]),
}
}