From b67f7c71aad415e3f04a80315431fa8757ae772d Mon Sep 17 00:00:00 2001 From: Johan Walles Date: Fri, 1 Jan 2021 19:05:06 +0100 Subject: [PATCH] Highlight spaces between highlighted words This is our way of saying "this part of the sentence was replaced", rather than "these individual words were replaced". Fixes #11. --- src/token_collector.rs | 93 ++++++++++++++++++++++++++ testdata/consecutive-words.diff | 35 ++++++++++ testdata/consecutive-words.riff-output | 35 ++++++++++ 3 files changed, 163 insertions(+) create mode 100644 testdata/consecutive-words.diff create mode 100644 testdata/consecutive-words.riff-output diff --git a/src/token_collector.rs b/src/token_collector.rs index e271d18..3225174 100644 --- a/src/token_collector.rs +++ b/src/token_collector.rs @@ -31,6 +31,20 @@ impl StyledToken { return first_char.is_whitespace(); } + + pub fn is_word(&self) -> bool { + let mut chars_iterator = self.token.chars(); + let first_char = chars_iterator.next().unwrap(); + let second_char = chars_iterator.next(); + if second_char.is_some() { + // We consist of multiple characters, that means we are a word + return true; + } + + // If we get here, it means our token consists of one character only. If + // that single character is alphanumeric, we are a word, otherwise not. + return first_char.is_alphanumeric(); + } } pub struct TokenCollector { @@ -54,6 +68,16 @@ impl Style { } } + pub fn inverted(&self) -> Style { + return match self { + Style::Old => Style::OldInverse, + Style::New => Style::NewInverse, + Style::OldInverse => Style::OldInverse, + Style::NewInverse => Style::NewInverse, + Style::Error => Style::Error, + }; + } + #[must_use] pub fn color<'a>(&self) -> &'a str { match self { @@ -104,6 +128,7 @@ impl TokenCollector { highlight_trailing_whitespace(row); highlight_nonleading_tab(row); } + highlight_space_between_words(row); // Set inverse from prefix let mut is_inverse = self.line_prefix.style.is_inverse(); @@ -224,6 +249,54 @@ fn highlight_nonleading_tab(row: &mut [StyledToken]) { } } +/// Highlight single space between two highlighted words +fn highlight_space_between_words(row: &mut [StyledToken]) { + enum FoundState { + Nothing, + HighlightedWord, + WordSpace, + }; + + let mut found_state = FoundState::Nothing; + let mut previous_token: Option<&mut StyledToken> = None; + for token in row.iter_mut() { + match found_state { + FoundState::Nothing => { + if token.style.is_inverse() && token.is_word() { + // Found "Monkey" + found_state = FoundState::HighlightedWord; + } + } + + FoundState::HighlightedWord => { + if token.is_whitespace() { + // Found "Monkey " (note trailing space) + found_state = FoundState::WordSpace; + } else if token.style.is_inverse() && token.is_word() { + found_state = FoundState::HighlightedWord; + } else { + found_state = FoundState::Nothing; + } + } + + FoundState::WordSpace => { + if token.style.is_inverse() && token.is_word() { + // Found "Monkey Dance" + if let Some(_previous_token) = previous_token { + _previous_token.style = _previous_token.style.inverted(); + } + + found_state = FoundState::HighlightedWord; + } else { + found_state = FoundState::Nothing; + } + } + } + + previous_token = Some(token); + } +} + #[cfg(test)] mod tests { use super::*; @@ -360,4 +433,24 @@ mod tests { assert_eq!(actual, format!("{}-x\t{}", OLD, NORMAL)); } + + #[test] + fn test_highlight_space_between_words() { + let mut row = [ + StyledToken::new("Monkey".to_string(), Style::NewInverse), + StyledToken::new(" ".to_string(), Style::New), + StyledToken::new("Dance".to_string(), Style::NewInverse), + ]; + + highlight_space_between_words(&mut row); + + assert_eq!( + row, + [ + StyledToken::new("Monkey".to_string(), Style::NewInverse), + StyledToken::new(" ".to_string(), Style::NewInverse), + StyledToken::new("Dance".to_string(), Style::NewInverse), + ] + ); + } } diff --git a/testdata/consecutive-words.diff b/testdata/consecutive-words.diff new file mode 100644 index 0000000..090cd74 --- /dev/null +++ b/testdata/consecutive-words.diff @@ -0,0 +1,35 @@ +commit bbc4309d726819512f9b5fb72b187eeb63d34680 +Author: Johan Walles +Date: Thu Dec 31 15:22:09 2020 +0100 + + Skip highlighting based on newline counts + + If old text and new text have very different line counts, just do the + simplistic highlighting. + +diff --git a/src/refiner.rs b/src/refiner.rs +index d1ebdc1..40ae0df 100644 +--- a/src/refiner.rs ++++ b/src/refiner.rs +@@ -15,8 +15,8 @@ use diffus::{ + /// it. + const MAX_HIGHLIGHT_PERCENTAGE: usize = 30; + +-const LARGE_BYTE_COUNT_CHANGE_PERCENT: usize = 100; +-const SMALL_BYTE_COUNT_CHANGE: usize = 10; ++const LARGE_COUNT_CHANGE_PERCENT: usize = 100; ++const SMALL_COUNT_CHANGE: usize = 10; + + /// Format old and new lines in OLD and NEW colors. + /// +@@ -55,11 +55,14 @@ pub fn format(old_text: &str, new_text: &str) -> Vec { + return simple_format(old_text, new_text); + } + +- // This check makes us faster, please use the benchmark.py script before and +- // after if you change this. ++ // These checks make us faster, please use the benchmark.py script before ++ // and after if you change this. + if is_large_byte_count_change(old_text, new_text) { + return simple_format(old_text, new_text); + } diff --git a/testdata/consecutive-words.riff-output b/testdata/consecutive-words.riff-output new file mode 100644 index 0000000..c288281 --- /dev/null +++ b/testdata/consecutive-words.riff-output @@ -0,0 +1,35 @@ +commit bbc4309d726819512f9b5fb72b187eeb63d34680 +Author: Johan Walles +Date: Thu Dec 31 15:22:09 2020 +0100 + + Skip highlighting based on newline counts + + If old text and new text have very different line counts, just do the + simplistic highlighting. + +diff --git a/src/refiner.rs b/src/refiner.rs +index d1ebdc1..40ae0df 100644 +--- a/src/refiner.rs ++++ b/src/refiner.rs +@@ -15,8 +15,8 @@ use diffus::{ + /// it. + const MAX_HIGHLIGHT_PERCENTAGE: usize = 30; + +-const LARGE_BYTE_COUNT_CHANGE_PERCENT: usize = 100; +-const SMALL_BYTE_COUNT_CHANGE: usize = 10; ++const LARGE_COUNT_CHANGE_PERCENT: usize = 100; ++const SMALL_COUNT_CHANGE: usize = 10; + + /// Format old and new lines in OLD and NEW colors. + /// +@@ -55,11 +55,14 @@ pub fn format(old_text: &str, new_text: &str) -> Vec { + return simple_format(old_text, new_text); + } + +- // This check makes us faster, please use the benchmark.py script before and +- // after if you change this. ++ // These checks make us faster, please use the benchmark.py script before ++ // and after if you change this. + if is_large_byte_count_change(old_text, new_text) { + return simple_format(old_text, new_text); + }