Skip to content

Commit

Permalink
feat(lsp): properly handle UTF-16 char offsets
Browse files Browse the repository at this point in the history
The LSP requires all char indices to be in UTF-16 byte offsets by
default. But Rust uses UTF-8, so the right offset has to be calculated
accordingly.
  • Loading branch information
dnaka91 committed Dec 4, 2023
1 parent 4be4404 commit f74d625
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 7 deletions.
9 changes: 4 additions & 5 deletions crates/stef-lsp/src/compile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ use stef_parser::{
};
use tower_lsp::lsp_types::{self as lsp, Diagnostic};

use crate::utf16;

pub fn compile(schema: &str) -> std::result::Result<Schema<'_>, Diagnostic> {
stef_parser::Schema::parse(schema, None).map_err(|e| match &e.cause {
ParseSchemaCause::Parser(_) => {
Expand Down Expand Up @@ -187,13 +189,10 @@ fn get_range(schema: &str, location: Range<usize>) -> lsp::Range {
let start_char = schema[..location.start]
.lines()
.last()
.map_or(0, |line| line.chars().count());
.map_or(0, utf16::len);

let end_line = schema[..location.end].lines().count().saturating_sub(1);
let end_char = schema[..location.end]
.lines()
.last()
.map_or(0, |line| line.chars().count());
let end_char = schema[..location.end].lines().last().map_or(0, utf16::len);

lsp::Range::new(
lsp::Position::new(start_line as u32, start_char as u32),
Expand Down
5 changes: 3 additions & 2 deletions crates/stef-lsp/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#![allow(missing_docs)]

mod compile;

use std::collections::HashMap;

use ouroboros::self_referencing;
Expand All @@ -20,6 +18,9 @@ use tower_lsp::{
use tracing::debug;
use tracing_subscriber::EnvFilter;

mod compile;
mod utf16;

#[derive(Debug)]
struct Backend {
client: Client,
Expand Down
10 changes: 10 additions & 0 deletions crates/stef-lsp/src/utf16.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
//! Utilities for handling UTF-16 encoding of strings.
//!
//! As the LSP by default uses UTF-16 and Rust strings are encoded as UTF-8, special handling is
//! required to calculate the correct information. Mostly these are adjustments to character
//! offsets.

/// Get the UTF-16 byte count for the code line.
pub fn len(line: &str) -> usize {
line.encode_utf16().count()
}

0 comments on commit f74d625

Please sign in to comment.