Skip to content

Commit

Permalink
Add :character-info command (#4000)
Browse files Browse the repository at this point in the history
  • Loading branch information
wetheredge authored Feb 3, 2023
1 parent d8f482e commit f7bd7b5
Show file tree
Hide file tree
Showing 3 changed files with 192 additions and 0 deletions.
1 change: 1 addition & 0 deletions book/src/generated/typable-cmd.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
| `:change-current-directory`, `:cd` | Change the current working directory. |
| `:show-directory`, `:pwd` | Show the current working directory. |
| `:encoding` | Set encoding. Based on `https://encoding.spec.whatwg.org`. |
| `:character-info`, `:char` | Get info about the character under the primary cursor. |
| `:reload` | Discard changes and reload from the source file. |
| `:reload-all` | Discard changes and reload all documents from the source files. |
| `:update` | Write changes only if the file has been modified. |
Expand Down
133 changes: 133 additions & 0 deletions helix-term/src/commands/typed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::job::Job;

use super::*;

use helix_core::encoding;
use helix_view::editor::{Action, CloseError, ConfigEvent};
use ui::completers::{self, Completer};

Expand Down Expand Up @@ -1033,6 +1034,131 @@ fn set_encoding(
}
}

/// Shows info about the character under the primary cursor.
fn get_character_info(
cx: &mut compositor::Context,
_args: &[Cow<str>],
event: PromptEvent,
) -> anyhow::Result<()> {
if event != PromptEvent::Validate {
return Ok(());
}

let (view, doc) = current_ref!(cx.editor);
let text = doc.text().slice(..);

let grapheme_start = doc.selection(view.id).primary().cursor(text);
let grapheme_end = graphemes::next_grapheme_boundary(text, grapheme_start);

if grapheme_start == grapheme_end {
return Ok(());
}

let grapheme = text.slice(grapheme_start..grapheme_end).to_string();
let encoding = doc.encoding();

let printable = grapheme.chars().fold(String::new(), |mut s, c| {
match c {
'\0' => s.push_str("\\0"),
'\t' => s.push_str("\\t"),
'\n' => s.push_str("\\n"),
'\r' => s.push_str("\\r"),
_ => s.push(c),
}

s
});

// Convert to Unicode codepoints if in UTF-8
let unicode = if encoding == encoding::UTF_8 {
let mut unicode = " (".to_owned();

for (i, char) in grapheme.chars().enumerate() {
if i != 0 {
unicode.push(' ');
}

unicode.push_str("U+");

let codepoint: u32 = if char.is_ascii() {
char.into()
} else {
// Not ascii means it will be multi-byte, so strip out the extra
// bits that encode the length & mark continuation bytes

let s = String::from(char);
let bytes = s.as_bytes();

// First byte starts with 2-4 ones then a zero, so strip those off
let first = bytes[0];
let codepoint = first & (0xFF >> (first.leading_ones() + 1));
let mut codepoint = u32::from(codepoint);

// Following bytes start with 10
for byte in bytes.iter().skip(1) {
codepoint <<= 6;
codepoint += u32::from(*byte) & 0x3F;
}

codepoint
};

unicode.push_str(&format!("{codepoint:0>4x}"));
}

unicode.push(')');
unicode
} else {
String::new()
};

// Give the decimal value for ascii characters
let dec = if encoding.is_ascii_compatible() && grapheme.len() == 1 {
format!(" Dec {}", grapheme.as_bytes()[0])
} else {
String::new()
};

let hex = {
let mut encoder = encoding.new_encoder();
let max_encoded_len = encoder
.max_buffer_length_from_utf8_without_replacement(grapheme.len())
.unwrap();
let mut bytes = Vec::with_capacity(max_encoded_len);
let mut current_byte = 0;
let mut hex = String::new();

for (i, char) in grapheme.chars().enumerate() {
if i != 0 {
hex.push_str(" +");
}

let (result, _input_bytes_read) = encoder.encode_from_utf8_to_vec_without_replacement(
&char.to_string(),
&mut bytes,
true,
);

if let encoding::EncoderResult::Unmappable(char) = result {
bail!("{char:?} cannot be mapped to {}", encoding.name());
}

for byte in &bytes[current_byte..] {
hex.push_str(&format!(" {byte:0>2x}"));
}

current_byte = bytes.len();
}

hex
};

cx.editor
.set_status(format!("\"{printable}\"{unicode}{dec} Hex{hex}"));

Ok(())
}

/// Reload the [`Document`] from its source file.
fn reload(
cx: &mut compositor::Context,
Expand Down Expand Up @@ -2131,6 +2257,13 @@ pub const TYPABLE_COMMAND_LIST: &[TypableCommand] = &[
fun: set_encoding,
completer: None,
},
TypableCommand {
name: "character-info",
aliases: &["char"],
doc: "Get info about the character under the primary cursor.",
fun: get_character_info,
completer: None,
},
TypableCommand {
name: "reload",
aliases: &[],
Expand Down
58 changes: 58 additions & 0 deletions helix-term/tests/test/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -354,3 +354,61 @@ async fn test_extend_line() -> anyhow::Result<()> {

Ok(())
}

#[tokio::test(flavor = "multi_thread")]
async fn test_character_info() -> anyhow::Result<()> {
// UTF-8, single byte
test_key_sequence(
&mut helpers::AppBuilder::new().build()?,
Some("ih<esc>h:char<ret>"),
Some(&|app| {
assert_eq!(
r#""h" (U+0068) Dec 104 Hex 68"#,
app.editor.get_status().unwrap().0
);
}),
false,
)
.await?;

// UTF-8, multi-byte
test_key_sequence(
&mut helpers::AppBuilder::new().build()?,
Some("ië<esc>h:char<ret>"),
Some(&|app| {
assert_eq!(
r#""ë" (U+0065 U+0308) Hex 65 + cc 88"#,
app.editor.get_status().unwrap().0
);
}),
false,
)
.await?;

// Multiple characters displayed as one, escaped characters
test_key_sequence(
&mut helpers::AppBuilder::new().build()?,
Some(":line<minus>ending crlf<ret>:char<ret>"),
Some(&|app| {
assert_eq!(
r#""\r\n" (U+000d U+000a) Hex 0d + 0a"#,
app.editor.get_status().unwrap().0
);
}),
false,
)
.await?;

// Non-UTF-8
test_key_sequence(
&mut helpers::AppBuilder::new().build()?,
Some(":encoding ascii<ret>ih<esc>h:char<ret>"),
Some(&|app| {
assert_eq!(r#""h" Dec 104 Hex 68"#, app.editor.get_status().unwrap().0);
}),
false,
)
.await?;

Ok(())
}

0 comments on commit f7bd7b5

Please sign in to comment.