From b67f7c71aad415e3f04a80315431fa8757ae772d Mon Sep 17 00:00:00 2001
From: Johan Walles <johan.walles@gmail.com>
Date: Fri, 1 Jan 2021 19:05:06 +0100
Subject: [PATCH] Highlight spaces between highlighted words

This is our way of saying "this part of the sentence was replaced",
rather than "these individual words were replaced".

Fixes #11.
---
 src/token_collector.rs                 | 93 ++++++++++++++++++++++++++
 testdata/consecutive-words.diff        | 35 ++++++++++
 testdata/consecutive-words.riff-output | 35 ++++++++++
 3 files changed, 163 insertions(+)
 create mode 100644 testdata/consecutive-words.diff
 create mode 100644 testdata/consecutive-words.riff-output

diff --git a/src/token_collector.rs b/src/token_collector.rs
index e271d18..3225174 100644
--- a/src/token_collector.rs
+++ b/src/token_collector.rs
@@ -31,6 +31,20 @@ impl StyledToken {
 
         return first_char.is_whitespace();
     }
+
+    pub fn is_word(&self) -> bool {
+        let mut chars_iterator = self.token.chars();
+        let first_char = chars_iterator.next().unwrap();
+        let second_char = chars_iterator.next();
+        if second_char.is_some() {
+            // We consist of multiple characters, that means we are a word
+            return true;
+        }
+
+        // If we get here, it means our token consists of one character only. If
+        // that single character is alphanumeric, we are a word, otherwise not.
+        return first_char.is_alphanumeric();
+    }
 }
 
 pub struct TokenCollector {
@@ -54,6 +68,16 @@ impl Style {
         }
     }
 
+    pub fn inverted(&self) -> Style {
+        return match self {
+            Style::Old => Style::OldInverse,
+            Style::New => Style::NewInverse,
+            Style::OldInverse => Style::OldInverse,
+            Style::NewInverse => Style::NewInverse,
+            Style::Error => Style::Error,
+        };
+    }
+
     #[must_use]
     pub fn color<'a>(&self) -> &'a str {
         match self {
@@ -104,6 +128,7 @@ impl TokenCollector {
             highlight_trailing_whitespace(row);
             highlight_nonleading_tab(row);
         }
+        highlight_space_between_words(row);
 
         // Set inverse from prefix
         let mut is_inverse = self.line_prefix.style.is_inverse();
@@ -224,6 +249,54 @@ fn highlight_nonleading_tab(row: &mut [StyledToken]) {
     }
 }
 
+/// Highlight single space between two highlighted words
+fn highlight_space_between_words(row: &mut [StyledToken]) {
+    enum FoundState {
+        Nothing,
+        HighlightedWord,
+        WordSpace,
+    };
+
+    let mut found_state = FoundState::Nothing;
+    let mut previous_token: Option<&mut StyledToken> = None;
+    for token in row.iter_mut() {
+        match found_state {
+            FoundState::Nothing => {
+                if token.style.is_inverse() && token.is_word() {
+                    // Found "Monkey"
+                    found_state = FoundState::HighlightedWord;
+                }
+            }
+
+            FoundState::HighlightedWord => {
+                if token.is_whitespace() {
+                    // Found "Monkey " (note trailing space)
+                    found_state = FoundState::WordSpace;
+                } else if token.style.is_inverse() && token.is_word() {
+                    found_state = FoundState::HighlightedWord;
+                } else {
+                    found_state = FoundState::Nothing;
+                }
+            }
+
+            FoundState::WordSpace => {
+                if token.style.is_inverse() && token.is_word() {
+                    // Found "Monkey Dance"
+                    if let Some(_previous_token) = previous_token {
+                        _previous_token.style = _previous_token.style.inverted();
+                    }
+
+                    found_state = FoundState::HighlightedWord;
+                } else {
+                    found_state = FoundState::Nothing;
+                }
+            }
+        }
+
+        previous_token = Some(token);
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -360,4 +433,24 @@ mod tests {
 
         assert_eq!(actual, format!("{}-x\t{}", OLD, NORMAL));
     }
+
+    #[test]
+    fn test_highlight_space_between_words() {
+        let mut row = [
+            StyledToken::new("Monkey".to_string(), Style::NewInverse),
+            StyledToken::new(" ".to_string(), Style::New),
+            StyledToken::new("Dance".to_string(), Style::NewInverse),
+        ];
+
+        highlight_space_between_words(&mut row);
+
+        assert_eq!(
+            row,
+            [
+                StyledToken::new("Monkey".to_string(), Style::NewInverse),
+                StyledToken::new(" ".to_string(), Style::NewInverse),
+                StyledToken::new("Dance".to_string(), Style::NewInverse),
+            ]
+        );
+    }
 }
diff --git a/testdata/consecutive-words.diff b/testdata/consecutive-words.diff
new file mode 100644
index 0000000..090cd74
--- /dev/null
+++ b/testdata/consecutive-words.diff
@@ -0,0 +1,35 @@
+commit bbc4309d726819512f9b5fb72b187eeb63d34680
+Author: Johan Walles <johan.walles@gmail.com>
+Date:   Thu Dec 31 15:22:09 2020 +0100
+
+    Skip highlighting based on newline counts
+
+    If old text and new text have very different line counts, just do the
+    simplistic highlighting.
+
+diff --git a/src/refiner.rs b/src/refiner.rs
+index d1ebdc1..40ae0df 100644
+--- a/src/refiner.rs
++++ b/src/refiner.rs
+@@ -15,8 +15,8 @@ use diffus::{
+ /// it.
+ const MAX_HIGHLIGHT_PERCENTAGE: usize = 30;
+
+-const LARGE_BYTE_COUNT_CHANGE_PERCENT: usize = 100;
+-const SMALL_BYTE_COUNT_CHANGE: usize = 10;
++const LARGE_COUNT_CHANGE_PERCENT: usize = 100;
++const SMALL_COUNT_CHANGE: usize = 10;
+
+ /// Format old and new lines in OLD and NEW colors.
+ ///
+@@ -55,11 +55,14 @@ pub fn format(old_text: &str, new_text: &str) -> Vec<String> {
+         return simple_format(old_text, new_text);
+     }
+
+-    // This check makes us faster, please use the benchmark.py script before and
+-    // after if you change this.
++    // These checks make us faster, please use the benchmark.py script before
++    // and after if you change this.
+     if is_large_byte_count_change(old_text, new_text) {
+         return simple_format(old_text, new_text);
+     }
diff --git a/testdata/consecutive-words.riff-output b/testdata/consecutive-words.riff-output
new file mode 100644
index 0000000..c288281
--- /dev/null
+++ b/testdata/consecutive-words.riff-output
@@ -0,0 +1,35 @@
+commit bbc4309d726819512f9b5fb72b187eeb63d34680
+Author: Johan Walles <johan.walles@gmail.com>
+Date:   Thu Dec 31 15:22:09 2020 +0100
+
+    Skip highlighting based on newline counts
+
+    If old text and new text have very different line counts, just do the
+    simplistic highlighting.
+
+[1mdiff --git a/src/refiner.rs b/src/refiner.rs[0m
+[1mindex d1ebdc1..40ae0df 100644[0m
+[1m--- a/src/refiner.rs[0m
+[1m+++ b/src/refiner.rs[0m
+[36m@@ -15,8 +15,8 @@ use diffus::{[0m
+ /// it.
+ const MAX_HIGHLIGHT_PERCENTAGE: usize = 30;
+
+[31m-const LARGE[7m_BYTE[27m_COUNT_CHANGE_PERCENT: usize = 100;[0m
+[31m-const SMALL[7m_BYTE[27m_COUNT_CHANGE: usize = 10;[0m
+[32m+const LARGE_COUNT_CHANGE_PERCENT: usize = 100;[0m
+[32m+const SMALL_COUNT_CHANGE: usize = 10;[0m
+
+ /// Format old and new lines in OLD and NEW colors.
+ ///
+[36m@@ -55,11 +55,14 @@ pub fn format(old_text: &str, new_text: &str) -> Vec<String> {[0m
+         return simple_format(old_text, new_text);
+     }
+
+[31m-    // [7mThis check makes[27m us faster, please use the benchmark.py script before[7m and[0m
+[31m-    // after if you change this.[0m
+[32m+    // [7mThese checks make[27m us faster, please use the benchmark.py script before[0m
+[32m+    //[7m and[27m after if you change this.[0m
+     if is_large_byte_count_change(old_text, new_text) {
+         return simple_format(old_text, new_text);
+     }