Skip to content
This repository has been archived by the owner on Aug 31, 2023. It is now read-only.

fix(rome_lsp): improve the handling of UTF-8 and overflow errors in LineIndex #3745

Merged
merged 1 commit into from
Nov 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions crates/rome_lsp/src/handlers/analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,10 @@ pub(crate) fn code_actions(
return None;
}

let action = utils::code_fix_to_lsp(&url, &doc.line_index, &diagnostics, action);
has_fixes |= action.diagnostics.is_some();
let action =
utils::code_fix_to_lsp(&url, &doc.line_index, &diagnostics, action).ok()?;

has_fixes |= action.diagnostics.is_some();
Some(CodeActionOrCommand::CodeAction(action))
})
.chain(fix_all)
Expand Down Expand Up @@ -169,10 +170,7 @@ fn fix_all(
vec![lsp::TextEdit {
range: lsp::Range {
start: lsp::Position::new(0, 0),
end: lsp::Position::new(
line_index.newlines.len().try_into().unwrap_or(u32::MAX),
0,
),
end: lsp::Position::new(line_index.len(), 0),
},
new_text: fixed.code,
}],
Expand Down
34 changes: 11 additions & 23 deletions crates/rome_lsp/src/handlers/formatting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pub(crate) fn format(
Err(err) => return Err(Error::from(err)),
};

let num_lines: u32 = doc.line_index.newlines.len().try_into()?;
let num_lines: u32 = doc.line_index.len();

let range = Range {
start: Position::default(),
Expand Down Expand Up @@ -89,23 +89,17 @@ pub(crate) fn format_range(
// Recalculate the actual range that was reformatted from the formatter result
let formatted_range = match formatted.range() {
Some(range) => {
let start_loc = doc.line_index.line_col(range.start());
let end_loc = doc.line_index.line_col(range.end());
let start_loc = utils::position(&doc.line_index, range.start())?;
let end_loc = utils::position(&doc.line_index, range.end())?;
Range {
start: Position {
line: start_loc.line,
character: start_loc.col,
},
end: Position {
line: end_loc.line,
character: end_loc.col,
},
start: start_loc,
end: end_loc,
}
}
None => Range {
start: Position::default(),
end: Position {
line: doc.line_index.newlines.len().try_into()?,
line: doc.line_index.len(),
character: 0,
},
},
Expand Down Expand Up @@ -147,23 +141,17 @@ pub(crate) fn format_on_type(
// Recalculate the actual range that was reformatted from the formatter result
let formatted_range = match formatted.range() {
Some(range) => {
let start_loc = doc.line_index.line_col(range.start());
let end_loc = doc.line_index.line_col(range.end());
let start_loc = utils::position(&doc.line_index, range.start())?;
let end_loc = utils::position(&doc.line_index, range.end())?;
Range {
start: Position {
line: start_loc.line,
character: start_loc.col,
},
end: Position {
line: end_loc.line,
character: end_loc.col,
},
start: start_loc,
end: end_loc,
}
}
None => Range {
start: Position::default(),
end: Position {
line: doc.line_index.newlines.len().try_into()?,
line: doc.line_index.len(),
character: 0,
},
},
Expand Down
2 changes: 1 addition & 1 deletion crates/rome_lsp/src/handlers/rename.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ pub(crate) fn rename(session: &Session, params: RenameParams) -> Result<Option<W
})?;

let mut changes = HashMap::new();
changes.insert(url, utils::text_edit(&doc.line_index, result.indels));
changes.insert(url, utils::text_edit(&doc.line_index, result.indels)?);

let workspace_edit = WorkspaceEdit {
changes: Some(changes),
Expand Down
15 changes: 11 additions & 4 deletions crates/rome_lsp/src/handlers/text_document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::ops::Range;
use anyhow::Result;
use rome_service::workspace::{ChangeFileParams, CloseFileParams, Language, OpenFileParams};
use tower_lsp::lsp_types;
use tracing::error;
use tracing::{error, field};

use crate::{documents::Document, session::Session, utils};

Expand Down Expand Up @@ -38,7 +38,7 @@ pub(crate) async fn did_open(
}

/// Handler for `textDocument/didChange` LSP notification
#[tracing::instrument(level = "trace", skip(session), err)]
#[tracing::instrument(level = "trace", skip_all, fields(url = field::display(&params.text_document.uri), version = params.text_document.version), err)]
pub(crate) async fn did_change(
session: &Session,
params: lsp_types::DidChangeTextDocumentParams,
Expand All @@ -50,18 +50,25 @@ pub(crate) async fn did_change(
let doc = session.document(&url)?;

let mut content = doc.content;
tracing::trace!("old document: {content:?}");

for change in params.content_changes {
match change.range {
Some(range) => {
let range = utils::text_range(&doc.line_index, range)?;
content.replace_range(Range::<usize>::from(range), &change.text);
let text_range = utils::text_range(&doc.line_index, range)?;
let range = Range::<usize>::from(text_range);
tracing::trace!("replace range {range:?} with {:?}", change.text);
content.replace_range(range, &change.text);
}
None => {
tracing::trace!("replace content {:?}", change.text);
content = change.text;
}
}
}

tracing::trace!("new document: {content:?}");

let doc = Document::new(version, &content);

session.workspace.change_file(ChangeFileParams {
Expand Down
187 changes: 169 additions & 18 deletions crates/rome_lsp/src/line_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
//!
//! Copied from rust-analyzer

use rome_rowan::TextSize;
use std::cmp::Ordering;

use rome_rowan::{TextRange, TextSize};

#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct LineIndex {
text: String,
/// Offset the the beginning of each line, zero-based
pub(crate) newlines: Vec<TextSize>,
newlines: Vec<TextSize>,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
Expand All @@ -22,45 +25,193 @@ pub(crate) struct LineCol {
impl LineIndex {
pub(crate) fn new(text: &str) -> LineIndex {
let mut newlines = vec![0.into()];
let mut curr_row = 0.into();

for c in text.chars() {
let c_len = TextSize::of(c);
curr_row += c_len;

for (offset, c) in text.char_indices() {
if c == '\n' {
newlines.push(curr_row);
let char_offset = TextSize::try_from(offset).expect("TextSize overflow");
newlines.push(char_offset + TextSize::of(c));
}
}

LineIndex { newlines }
LineIndex {
text: text.into(),
newlines,
}
}

pub(crate) fn line_col(&self, offset: TextSize) -> LineCol {
let line = self.newlines.partition_point(|&it| it <= offset) - 1;
let line_start_offset = self.newlines[line];
let col = offset - line_start_offset;
LineCol {
line: line as u32,
col: col.into(),
pub(crate) fn line_col(&self, offset: TextSize) -> Option<LineCol> {
// Fast path for offset == 0
if offset == TextSize::from(0) {
return Some(LineCol { line: 0, col: 0 });
}

// Find the index of the line `offset` belongs to
let line_index = self
.newlines
.partition_point(|&it| it <= offset)
.checked_sub(1)?;

// Calculate the text range corresponding to `line_index`
let line_start_offset = self.newlines.get(line_index)?;
let line_end_offset = self
.newlines
.get(line_index + 1)
.cloned()
.unwrap_or_else(|| TextSize::of(&self.text));

let line_range = TextRange::new(*line_start_offset, line_end_offset);
let line_text = &self.text[line_range];

// Calculate the byte offset of the character within the line and find
// a column matching this offset
let char_offset = usize::from(offset - *line_start_offset);
let char_index = match char_offset.cmp(&line_text.len()) {
Ordering::Less => {
line_text
.char_indices()
.enumerate()
.find_map(|(index, (offset, _))| {
if offset == char_offset {
Some(index)
} else {
None
}
})?
}
// If the character offset is equal to the length of the line, the
// character index is the total number of columns in the line
Ordering::Equal => line_text.chars().count(),
// The character offset is greater than the length of the line,
// abort since the provided offset is invalid
Ordering::Greater => return None,
};

Some(LineCol {
line: u32::try_from(line_index).ok()?,
col: u32::try_from(char_index).ok()?,
})
}

pub(crate) fn offset(&self, line_col: LineCol) -> Option<TextSize> {
// Convert the line and column indices to usize (this should never fail
// on 32- and 64-bits platforms)
let line_index = usize::try_from(line_col.line).ok()?;
let col_index = usize::try_from(line_col.col).ok()?;

// Load the byte offset for the start of line `line_index`
let line_offset = self.newlines.get(line_index)?;
Some(*line_offset + TextSize::from(line_col.col))
let col_offset = if col_index > 0 {
// Calculate the text range corresponding to `line_index`
let line_start = usize::from(*line_offset);
let line_end = self
.newlines
.get(line_index + 1)
.map(|offset| usize::from(*offset))
.unwrap_or_else(|| self.text.len());

let line_text = self.text.get(line_start..line_end)?;
let num_chars = line_text.chars().count();

// If the column index is equal to the number of characters in the
// line, return the byte offset for the end of the line
let col_offset = if col_index == num_chars {
line_text.len()
} else {
// Accumulate byte offsets for each character in the line and
// return the value corresponding to `col_index`
let (col_offset, _) = line_text.char_indices().nth(col_index)?;
col_offset
};

TextSize::try_from(col_offset).ok()?
} else {
// Fast path for col == 0
TextSize::from(0)
};

Some(*line_offset + col_offset)
}

/// Return the text slice used to build the index
pub(crate) fn text(&self) -> &str {
&self.text
}

/// Return the number of lines in the index, clamped to [u32::MAX]
pub(crate) fn len(&self) -> u32 {
self.newlines.len().try_into().unwrap_or(u32::MAX)
}
}

#[cfg(test)]
mod tests {
use rome_rowan::TextSize;

use super::{LineCol, LineIndex};

macro_rules! check_conversion {
($line_index:ident : $line_col:expr => $text_size:expr ) => {
let offset = $line_index.offset($line_col);
assert_eq!(offset, Some($text_size));

let line_col = $line_index.line_col($text_size);
assert_eq!(line_col, Some($line_col));
};
}

#[test]
fn empty_string() {
let line_index = LineIndex::new("");
check_conversion!(line_index: LineCol { line: 0, col: 0 } => TextSize::from(0));
}

#[test]
fn empty_line() {
let line_index = LineIndex::new("\n\n");
check_conversion!(line_index: LineCol { line: 1, col: 0 } => TextSize::from(1));
}

#[test]
fn line_end() {
let line_index = LineIndex::new("abc\ndef\nghi");
check_conversion!(line_index: LineCol { line: 1, col: 3 } => TextSize::from(7));
}

#[test]
fn out_of_bounds() {
fn out_of_bounds_line() {
let line_index = LineIndex::new("abcde\nfghij\n");

let offset = line_index.offset(LineCol { line: 5, col: 0 });
assert!(offset.is_none());
}

#[test]
fn out_of_bounds_col() {
let line_index = LineIndex::new("abcde\nfghij\n");

let offset = line_index.offset(LineCol { line: 1, col: 7 });
assert!(offset.is_none());
}

#[test]
fn out_of_bounds_offset() {
let line_index = LineIndex::new("abcde\nfghij\n");

let offset = line_index.line_col(TextSize::from(13));
assert!(offset.is_none());
}

#[test]
fn unicode() {
let line_index = LineIndex::new("'Jan 1, 2018 – Jan 1, 2019'");

check_conversion!(line_index: LineCol { line: 0, col: 0 } => TextSize::from(0));
check_conversion!(line_index: LineCol { line: 0, col: 1 } => TextSize::from(1));
check_conversion!(line_index: LineCol { line: 0, col: 12 } => TextSize::from(12));
check_conversion!(line_index: LineCol { line: 0, col: 13 } => TextSize::from(15));
check_conversion!(line_index: LineCol { line: 0, col: 14 } => TextSize::from(18));
check_conversion!(line_index: LineCol { line: 0, col: 15 } => TextSize::from(21));
check_conversion!(line_index: LineCol { line: 0, col: 26 } => TextSize::from(32));
check_conversion!(line_index: LineCol { line: 0, col: 27 } => TextSize::from(33));
}
}
Loading