diff --git a/crates/stef-lsp/src/compile.rs b/crates/stef-lsp/src/compile.rs index 0ac47bc..0a45562 100644 --- a/crates/stef-lsp/src/compile.rs +++ b/crates/stef-lsp/src/compile.rs @@ -11,6 +11,8 @@ use stef_parser::{ }; use tower_lsp::lsp_types::{self as lsp, Diagnostic}; +use crate::utf16; + pub fn compile(schema: &str) -> std::result::Result, Diagnostic> { stef_parser::Schema::parse(schema, None).map_err(|e| match &e.cause { ParseSchemaCause::Parser(_) => { @@ -187,13 +189,10 @@ fn get_range(schema: &str, location: Range) -> lsp::Range { let start_char = schema[..location.start] .lines() .last() - .map_or(0, |line| line.chars().count()); + .map_or(0, utf16::len); let end_line = schema[..location.end].lines().count().saturating_sub(1); - let end_char = schema[..location.end] - .lines() - .last() - .map_or(0, |line| line.chars().count()); + let end_char = schema[..location.end].lines().last().map_or(0, utf16::len); lsp::Range::new( lsp::Position::new(start_line as u32, start_char as u32), diff --git a/crates/stef-lsp/src/main.rs b/crates/stef-lsp/src/main.rs index d76c750..45e123e 100644 --- a/crates/stef-lsp/src/main.rs +++ b/crates/stef-lsp/src/main.rs @@ -1,7 +1,5 @@ #![allow(missing_docs)] -mod compile; - use std::collections::HashMap; use ouroboros::self_referencing; @@ -20,6 +18,9 @@ use tower_lsp::{ use tracing::debug; use tracing_subscriber::EnvFilter; +mod compile; +mod utf16; + #[derive(Debug)] struct Backend { client: Client, diff --git a/crates/stef-lsp/src/utf16.rs b/crates/stef-lsp/src/utf16.rs new file mode 100644 index 0000000..9661f2c --- /dev/null +++ b/crates/stef-lsp/src/utf16.rs @@ -0,0 +1,10 @@ +//! Utilities for handling UTF-16 encoding of strings. +//! +//! As the LSP by default uses UTF-16 and Rust strings are encoded as UTF-8, special handling is +//! required to calculate the correct information. Mostly these are adjustments to character +//! offsets. + +/// Get the UTF-16 byte count for the code line. +pub fn len(line: &str) -> usize { + line.encode_utf16().count() +}