From ccbbb32f658f585ba9f867c58d93bcf8042029b2 Mon Sep 17 00:00:00 2001 From: sharkdp Date: Thu, 1 Nov 2018 13:02:29 +0100 Subject: [PATCH 1/4] Feature: Highlight non-printable characters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a new `-A`/`--show-all` option (in analogy to GNU Linux `cat`s option) that highlights non-printable characters like space, tab or newline. This works in two steps: - **Preprocessing**: replace space by `•`, replace tab by `├──┤`, replace newline by `␤`, etc. - **Highlighting**: Use a newly written Sublime syntax to highlight these special symbols. Note: This feature is not technically a drop-in replacement for GNU `cat`s `--show-all` but it has the same purpose. --- .../syntaxes/show-nonprintable.sublime-syntax | 25 +++++++++++ src/app.rs | 12 +++++- src/clap_app.rs | 12 ++++++ src/controller.rs | 19 ++++++++- src/preprocessor.rs | 41 ++++++++++++++++++- src/printer.rs | 4 +- 6 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 assets/syntaxes/show-nonprintable.sublime-syntax diff --git a/assets/syntaxes/show-nonprintable.sublime-syntax b/assets/syntaxes/show-nonprintable.sublime-syntax new file mode 100644 index 0000000000..6e90e19e40 --- /dev/null +++ b/assets/syntaxes/show-nonprintable.sublime-syntax @@ -0,0 +1,25 @@ +%YAML 1.2 +--- +# http://www.sublimetext.com/docs/3/syntax.html +name: Highlight non-printables +file_extensions: + - show-nonprintable +scope: whitespace +contexts: + main: + - match: "•" + scope: support.function.show-nonprintable.space + - match: "├─*┤" + scope: constant.character.escape.show-nonprintable.tab + - match: "␤" + scope: keyword.operator.show-nonprintable.newline + - match: "␍" + scope: string.show-nonprintable.carriage-return + - match: "␀" + scope: entity.other.attribute-name.show-nonprintable.null + - match: "␇" + scope: entity.other.attribute-name.show-nonprintable.bell + - match: "␛" + scope: entity.other.attribute-name.show-nonprintable.escape + - match: "␈" + scope: entity.other.attribute-name.show-nonprintable.backspace diff --git a/src/app.rs b/src/app.rs index 18b163e2b2..0e07614e20 100644 --- a/src/app.rs +++ b/src/app.rs @@ -37,6 +37,9 @@ pub struct Config<'a> { /// The explicitly configured language, if any pub language: Option<&'a str>, + /// Whether or not to show/replace non-printable characters like space, tab and newline. + pub show_nonprintable: bool, + /// The character width of the terminal pub term_width: usize, @@ -169,7 +172,14 @@ impl App { Ok(Config { true_color: is_truecolor_terminal(), - language: self.matches.value_of("language"), + language: self.matches.value_of("language").or_else(|| { + if self.matches.is_present("show-all") { + Some("show-nonprintable") + } else { + None + } + }), + show_nonprintable: self.matches.is_present("show-all"), output_wrap: if !self.interactive_output { // We don't have the tty width when piping to another program. // There's no point in wrapping when this is the case. diff --git a/src/clap_app.rs b/src/clap_app.rs index b9b383934a..fff618e18b 100644 --- a/src/clap_app.rs +++ b/src/clap_app.rs @@ -158,6 +158,18 @@ pub fn build_app(interactive_output: bool) -> ClapApp<'static, 'static> { '--style=numbers'", ), ) + .arg( + Arg::with_name("show-all") + .long("show-all") + .alias("show-nonprintable") + .short("A") + .conflicts_with("language") + .help("Show non-printable characters (space, tab, newline, ..).") + .long_help( + "Show non-printable characters like space, tab or newline. \ + Use '--tabs' to control the width of the tab-placeholders.", + ), + ) .arg( Arg::with_name("line-range") .long("line-range") diff --git a/src/controller.rs b/src/controller.rs index 371ec48405..ae606e312a 100644 --- a/src/controller.rs +++ b/src/controller.rs @@ -1,4 +1,5 @@ use std::io::{self, Write}; +use std::mem::swap; use app::Config; use assets::HighlightingAssets; @@ -6,6 +7,7 @@ use errors::*; use inputfile::{InputFile, InputFileReader}; use line_range::{LineRanges, RangeCheckResult}; use output::OutputType; +use preprocessor::replace_nonprintable; use printer::{InteractivePrinter, Printer, SimplePrinter}; pub struct Controller<'a> { @@ -64,7 +66,14 @@ impl<'b> Controller<'b> { input_file: InputFile<'a>, ) -> Result<()> { printer.print_header(writer, input_file)?; - self.print_file_ranges(printer, writer, reader, &self.config.line_ranges)?; + self.print_file_ranges( + printer, + writer, + reader, + &self.config.line_ranges, + self.config.show_nonprintable, + self.config.tab_width, + )?; printer.print_footer(writer)?; Ok(()) @@ -76,12 +85,20 @@ impl<'b> Controller<'b> { writer: &mut Write, mut reader: InputFileReader, line_ranges: &LineRanges, + show_nonprintable: bool, + tab_width: usize, ) -> Result<()> { let mut line_buffer = Vec::new(); + let mut line_buffer_processed = Vec::new(); let mut line_number: usize = 1; while reader.read_line(&mut line_buffer)? { + if show_nonprintable { + replace_nonprintable(&mut line_buffer, &mut line_buffer_processed, tab_width); + swap(&mut line_buffer, &mut line_buffer_processed); + } + match line_ranges.check(line_number) { RangeCheckResult::OutsideRange => { // Call the printer in case we need to call the syntax highlighter diff --git a/src/preprocessor.rs b/src/preprocessor.rs index 0a99e07d05..a69f453eae 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -1,7 +1,7 @@ use console::AnsiCodeIterator; /// Expand tabs like an ANSI-enabled expand(1). -pub fn expand(line: &str, width: usize, cursor: &mut usize) -> String { +pub fn expand_tabs(line: &str, width: usize, cursor: &mut usize) -> String { let mut buffer = String::with_capacity(line.len() * 2); for chunk in AnsiCodeIterator::new(line) { @@ -32,3 +32,42 @@ pub fn expand(line: &str, width: usize, cursor: &mut usize) -> String { buffer } + +pub fn replace_nonprintable(input: &mut Vec, output: &mut Vec, tab_width: usize) { + output.clear(); + + let tab_width = if tab_width == 0 { + 4 + } else if tab_width == 1 { + 2 + } else { + tab_width + }; + + for chr in input { + match *chr { + // space + b' ' => output.extend_from_slice("•".as_bytes()), + // tab + b'\t' => { + output.extend_from_slice("├".as_bytes()); + output.extend_from_slice("─".repeat(tab_width - 2).as_bytes()); + output.extend_from_slice("┤".as_bytes()); + } + // new line + b'\n' => output.extend_from_slice("␤".as_bytes()), + // carriage return + b'\r' => output.extend_from_slice("␍".as_bytes()), + // null + 0x00 => output.extend_from_slice("␀".as_bytes()), + // bell + 0x07 => output.extend_from_slice("␇".as_bytes()), + // backspace + 0x08 => output.extend_from_slice("␈".as_bytes()), + // escape + 0x1B => output.extend_from_slice("␛".as_bytes()), + // anything else + _ => output.push(*chr), + } + } +} diff --git a/src/printer.rs b/src/printer.rs index fbe141209d..593853cf54 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -22,7 +22,7 @@ use diff::get_git_diff; use diff::LineChanges; use errors::*; use inputfile::{InputFile, InputFileReader}; -use preprocessor::expand; +use preprocessor::expand_tabs; use style::OutputWrap; use terminal::{as_terminal_escaped, to_ansi_color}; @@ -177,7 +177,7 @@ impl<'a> InteractivePrinter<'a> { fn preprocess(&self, text: &str, cursor: &mut usize) -> String { if self.config.tab_width > 0 { - expand(text, self.config.tab_width, cursor) + expand_tabs(text, self.config.tab_width, cursor) } else { text.to_string() } From 01136a1b4594fcebb1fef9396ef84285f468701f Mon Sep 17 00:00:00 2001 From: sharkdp Date: Thu, 1 Nov 2018 19:40:26 +0100 Subject: [PATCH 2/4] =?UTF-8?q?Use=20`=E2=86=B9`=20character=20if=20tab-wi?= =?UTF-8?q?dth=20=3D=3D=201.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../syntaxes/show-nonprintable.sublime-syntax | 2 ++ src/preprocessor.rs | 18 ++++++++---------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/assets/syntaxes/show-nonprintable.sublime-syntax b/assets/syntaxes/show-nonprintable.sublime-syntax index 6e90e19e40..c0cb0ba123 100644 --- a/assets/syntaxes/show-nonprintable.sublime-syntax +++ b/assets/syntaxes/show-nonprintable.sublime-syntax @@ -11,6 +11,8 @@ contexts: scope: support.function.show-nonprintable.space - match: "├─*┤" scope: constant.character.escape.show-nonprintable.tab + - match: "↹" + scope: constant.character.escape.show-nonprintable.tab - match: "␤" scope: keyword.operator.show-nonprintable.newline - match: "␍" diff --git a/src/preprocessor.rs b/src/preprocessor.rs index a69f453eae..aae9537ce0 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -36,13 +36,7 @@ pub fn expand_tabs(line: &str, width: usize, cursor: &mut usize) -> String { pub fn replace_nonprintable(input: &mut Vec, output: &mut Vec, tab_width: usize) { output.clear(); - let tab_width = if tab_width == 0 { - 4 - } else if tab_width == 1 { - 2 - } else { - tab_width - }; + let tab_width = if tab_width == 0 { 4 } else { tab_width }; for chr in input { match *chr { @@ -50,9 +44,13 @@ pub fn replace_nonprintable(input: &mut Vec, output: &mut Vec, tab_width b' ' => output.extend_from_slice("•".as_bytes()), // tab b'\t' => { - output.extend_from_slice("├".as_bytes()); - output.extend_from_slice("─".repeat(tab_width - 2).as_bytes()); - output.extend_from_slice("┤".as_bytes()); + if tab_width == 1 { + output.extend_from_slice("↹".as_bytes()); + } else { + output.extend_from_slice("├".as_bytes()); + output.extend_from_slice("─".repeat(tab_width - 2).as_bytes()); + output.extend_from_slice("┤".as_bytes()); + } } // new line b'\n' => output.extend_from_slice("␤".as_bytes()), From 4066d5f8f1aa2ca768e0a63b29f9451dbdc67517 Mon Sep 17 00:00:00 2001 From: sharkdp Date: Thu, 1 Nov 2018 19:54:04 +0100 Subject: [PATCH 3/4] Display line-feed as instead of --- assets/syntaxes/show-nonprintable.sublime-syntax | 4 ++-- src/preprocessor.rs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/assets/syntaxes/show-nonprintable.sublime-syntax b/assets/syntaxes/show-nonprintable.sublime-syntax index c0cb0ba123..d9647155f2 100644 --- a/assets/syntaxes/show-nonprintable.sublime-syntax +++ b/assets/syntaxes/show-nonprintable.sublime-syntax @@ -13,8 +13,8 @@ contexts: scope: constant.character.escape.show-nonprintable.tab - match: "↹" scope: constant.character.escape.show-nonprintable.tab - - match: "␤" - scope: keyword.operator.show-nonprintable.newline + - match: "␊" + scope: keyword.operator.show-nonprintable.line-feed - match: "␍" scope: string.show-nonprintable.carriage-return - match: "␀" diff --git a/src/preprocessor.rs b/src/preprocessor.rs index aae9537ce0..ec43febeb3 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -52,10 +52,10 @@ pub fn replace_nonprintable(input: &mut Vec, output: &mut Vec, tab_width output.extend_from_slice("┤".as_bytes()); } } - // new line - b'\n' => output.extend_from_slice("␤".as_bytes()), + // line feed + 0x0A => output.extend_from_slice("␊".as_bytes()), // carriage return - b'\r' => output.extend_from_slice("␍".as_bytes()), + 0x0D => output.extend_from_slice("␍".as_bytes()), // null 0x00 => output.extend_from_slice("␀".as_bytes()), // bell From 793d7a86e704f2b46f3458df7eda6eac79938b0e Mon Sep 17 00:00:00 2001 From: sharkdp Date: Thu, 1 Nov 2018 20:29:48 +0100 Subject: [PATCH 4/4] Fix `--show-all` for UTF-16 encoding --- src/controller.rs | 20 +------------------- src/preprocessor.rs | 36 +++++++++++++++++++----------------- src/printer.rs | 9 +++++++-- 3 files changed, 27 insertions(+), 38 deletions(-) diff --git a/src/controller.rs b/src/controller.rs index ae606e312a..0249035bf9 100644 --- a/src/controller.rs +++ b/src/controller.rs @@ -1,5 +1,4 @@ use std::io::{self, Write}; -use std::mem::swap; use app::Config; use assets::HighlightingAssets; @@ -7,7 +6,6 @@ use errors::*; use inputfile::{InputFile, InputFileReader}; use line_range::{LineRanges, RangeCheckResult}; use output::OutputType; -use preprocessor::replace_nonprintable; use printer::{InteractivePrinter, Printer, SimplePrinter}; pub struct Controller<'a> { @@ -66,14 +64,7 @@ impl<'b> Controller<'b> { input_file: InputFile<'a>, ) -> Result<()> { printer.print_header(writer, input_file)?; - self.print_file_ranges( - printer, - writer, - reader, - &self.config.line_ranges, - self.config.show_nonprintable, - self.config.tab_width, - )?; + self.print_file_ranges(printer, writer, reader, &self.config.line_ranges)?; printer.print_footer(writer)?; Ok(()) @@ -85,20 +76,11 @@ impl<'b> Controller<'b> { writer: &mut Write, mut reader: InputFileReader, line_ranges: &LineRanges, - show_nonprintable: bool, - tab_width: usize, ) -> Result<()> { let mut line_buffer = Vec::new(); - let mut line_buffer_processed = Vec::new(); - let mut line_number: usize = 1; while reader.read_line(&mut line_buffer)? { - if show_nonprintable { - replace_nonprintable(&mut line_buffer, &mut line_buffer_processed, tab_width); - swap(&mut line_buffer, &mut line_buffer_processed); - } - match line_ranges.check(line_number) { RangeCheckResult::OutsideRange => { // Call the printer in case we need to call the syntax highlighter diff --git a/src/preprocessor.rs b/src/preprocessor.rs index ec43febeb3..f3e2f4d1be 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -33,39 +33,41 @@ pub fn expand_tabs(line: &str, width: usize, cursor: &mut usize) -> String { buffer } -pub fn replace_nonprintable(input: &mut Vec, output: &mut Vec, tab_width: usize) { - output.clear(); +pub fn replace_nonprintable(input: &str, tab_width: usize) -> String { + let mut output = String::new(); let tab_width = if tab_width == 0 { 4 } else { tab_width }; - for chr in input { - match *chr { + for chr in input.chars() { + match chr { // space - b' ' => output.extend_from_slice("•".as_bytes()), + ' ' => output.push('•'), // tab - b'\t' => { + '\t' => { if tab_width == 1 { - output.extend_from_slice("↹".as_bytes()); + output.push('↹'); } else { - output.extend_from_slice("├".as_bytes()); - output.extend_from_slice("─".repeat(tab_width - 2).as_bytes()); - output.extend_from_slice("┤".as_bytes()); + output.push('├'); + output.push_str(&"─".repeat(tab_width - 2)); + output.push('┤'); } } // line feed - 0x0A => output.extend_from_slice("␊".as_bytes()), + '\x0A' => output.push('␊'), // carriage return - 0x0D => output.extend_from_slice("␍".as_bytes()), + '\x0D' => output.push('␍'), // null - 0x00 => output.extend_from_slice("␀".as_bytes()), + '\x00' => output.push('␀'), // bell - 0x07 => output.extend_from_slice("␇".as_bytes()), + '\x07' => output.push('␇'), // backspace - 0x08 => output.extend_from_slice("␈".as_bytes()), + '\x08' => output.push('␈'), // escape - 0x1B => output.extend_from_slice("␛".as_bytes()), + '\x1B' => output.push('␛'), // anything else - _ => output.push(*chr), + _ => output.push(chr), } } + + output } diff --git a/src/printer.rs b/src/printer.rs index 593853cf54..5346d4e859 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -22,7 +22,7 @@ use diff::get_git_diff; use diff::LineChanges; use errors::*; use inputfile::{InputFile, InputFileReader}; -use preprocessor::expand_tabs; +use preprocessor::{expand_tabs, replace_nonprintable}; use style::OutputWrap; use terminal::{as_terminal_escaped, to_ansi_color}; @@ -251,7 +251,7 @@ impl<'a> Printer for InteractivePrinter<'a> { line_number: usize, line_buffer: &[u8], ) -> Result<()> { - let line = match self.content_type { + let mut line = match self.content_type { ContentType::BINARY => { return Ok(()); } @@ -263,6 +263,11 @@ impl<'a> Printer for InteractivePrinter<'a> { .unwrap_or("Invalid UTF-16BE".into()), _ => String::from_utf8_lossy(&line_buffer).to_string(), }; + + if self.config.show_nonprintable { + line = replace_nonprintable(&mut line, self.config.tab_width); + } + let regions = { let highlighter = match self.highlighter { Some(ref mut highlighter) => highlighter,