From 61930424a92adb0309cd992b2b86ed809a1725f5 Mon Sep 17 00:00:00 2001 From: l3ops Date: Wed, 16 Nov 2022 09:38:08 +0100 Subject: [PATCH] fix(rome_lsp): improve the handling of UTF-8 and overflow errors in `LineIndex` (#3745) --- crates/rome_lsp/src/handlers/analysis.rs | 10 +- crates/rome_lsp/src/handlers/formatting.rs | 34 ++-- crates/rome_lsp/src/handlers/rename.rs | 2 +- crates/rome_lsp/src/handlers/text_document.rs | 15 +- crates/rome_lsp/src/line_index.rs | 187 ++++++++++++++++-- crates/rome_lsp/src/utils.rs | 56 ++++-- 6 files changed, 231 insertions(+), 73 deletions(-) diff --git a/crates/rome_lsp/src/handlers/analysis.rs b/crates/rome_lsp/src/handlers/analysis.rs index 0c8ea56b764..55582c667a6 100644 --- a/crates/rome_lsp/src/handlers/analysis.rs +++ b/crates/rome_lsp/src/handlers/analysis.rs @@ -96,9 +96,10 @@ pub(crate) fn code_actions( return None; } - let action = utils::code_fix_to_lsp(&url, &doc.line_index, &diagnostics, action); - has_fixes |= action.diagnostics.is_some(); + let action = + utils::code_fix_to_lsp(&url, &doc.line_index, &diagnostics, action).ok()?; + has_fixes |= action.diagnostics.is_some(); Some(CodeActionOrCommand::CodeAction(action)) }) .chain(fix_all) @@ -169,10 +170,7 @@ fn fix_all( vec![lsp::TextEdit { range: lsp::Range { start: lsp::Position::new(0, 0), - end: lsp::Position::new( - line_index.newlines.len().try_into().unwrap_or(u32::MAX), - 0, - ), + end: lsp::Position::new(line_index.len(), 0), }, new_text: fixed.code, }], diff --git a/crates/rome_lsp/src/handlers/formatting.rs b/crates/rome_lsp/src/handlers/formatting.rs index 369075fde8c..78ea7447258 100644 --- a/crates/rome_lsp/src/handlers/formatting.rs +++ b/crates/rome_lsp/src/handlers/formatting.rs @@ -32,7 +32,7 @@ pub(crate) fn format( Err(err) => return Err(Error::from(err)), }; - let num_lines: u32 = doc.line_index.newlines.len().try_into()?; + let num_lines: u32 = doc.line_index.len(); let range = Range { start: Position::default(), @@ -89,23 +89,17 @@ pub(crate) fn format_range( // Recalculate the actual range that was reformatted from the formatter result let formatted_range = match formatted.range() { Some(range) => { - let start_loc = doc.line_index.line_col(range.start()); - let end_loc = doc.line_index.line_col(range.end()); + let start_loc = utils::position(&doc.line_index, range.start())?; + let end_loc = utils::position(&doc.line_index, range.end())?; Range { - start: Position { - line: start_loc.line, - character: start_loc.col, - }, - end: Position { - line: end_loc.line, - character: end_loc.col, - }, + start: start_loc, + end: end_loc, } } None => Range { start: Position::default(), end: Position { - line: doc.line_index.newlines.len().try_into()?, + line: doc.line_index.len(), character: 0, }, }, @@ -147,23 +141,17 @@ pub(crate) fn format_on_type( // Recalculate the actual range that was reformatted from the formatter result let formatted_range = match formatted.range() { Some(range) => { - let start_loc = doc.line_index.line_col(range.start()); - let end_loc = doc.line_index.line_col(range.end()); + let start_loc = utils::position(&doc.line_index, range.start())?; + let end_loc = utils::position(&doc.line_index, range.end())?; Range { - start: Position { - line: start_loc.line, - character: start_loc.col, - }, - end: Position { - line: end_loc.line, - character: end_loc.col, - }, + start: start_loc, + end: end_loc, } } None => Range { start: Position::default(), end: Position { - line: doc.line_index.newlines.len().try_into()?, + line: doc.line_index.len(), character: 0, }, }, diff --git a/crates/rome_lsp/src/handlers/rename.rs b/crates/rome_lsp/src/handlers/rename.rs index 12cf9b6a47b..e0fd49afcbc 100644 --- a/crates/rome_lsp/src/handlers/rename.rs +++ b/crates/rome_lsp/src/handlers/rename.rs @@ -30,7 +30,7 @@ pub(crate) fn rename(session: &Session, params: RenameParams) -> Result { - let range = utils::text_range(&doc.line_index, range)?; - content.replace_range(Range::::from(range), &change.text); + let text_range = utils::text_range(&doc.line_index, range)?; + let range = Range::::from(text_range); + tracing::trace!("replace range {range:?} with {:?}", change.text); + content.replace_range(range, &change.text); } None => { + tracing::trace!("replace content {:?}", change.text); content = change.text; } } } + tracing::trace!("new document: {content:?}"); + let doc = Document::new(version, &content); session.workspace.change_file(ChangeFileParams { diff --git a/crates/rome_lsp/src/line_index.rs b/crates/rome_lsp/src/line_index.rs index 8ee929a93d5..0ae61bf6f62 100644 --- a/crates/rome_lsp/src/line_index.rs +++ b/crates/rome_lsp/src/line_index.rs @@ -3,12 +3,15 @@ //! //! Copied from rust-analyzer -use rome_rowan::TextSize; +use std::cmp::Ordering; + +use rome_rowan::{TextRange, TextSize}; #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) struct LineIndex { + text: String, /// Offset the the beginning of each line, zero-based - pub(crate) newlines: Vec, + newlines: Vec, } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] @@ -22,45 +25,193 @@ pub(crate) struct LineCol { impl LineIndex { pub(crate) fn new(text: &str) -> LineIndex { let mut newlines = vec![0.into()]; - let mut curr_row = 0.into(); - - for c in text.chars() { - let c_len = TextSize::of(c); - curr_row += c_len; + for (offset, c) in text.char_indices() { if c == '\n' { - newlines.push(curr_row); + let char_offset = TextSize::try_from(offset).expect("TextSize overflow"); + newlines.push(char_offset + TextSize::of(c)); } } - LineIndex { newlines } + LineIndex { + text: text.into(), + newlines, + } } - pub(crate) fn line_col(&self, offset: TextSize) -> LineCol { - let line = self.newlines.partition_point(|&it| it <= offset) - 1; - let line_start_offset = self.newlines[line]; - let col = offset - line_start_offset; - LineCol { - line: line as u32, - col: col.into(), + pub(crate) fn line_col(&self, offset: TextSize) -> Option { + // Fast path for offset == 0 + if offset == TextSize::from(0) { + return Some(LineCol { line: 0, col: 0 }); } + + // Find the index of the line `offset` belongs to + let line_index = self + .newlines + .partition_point(|&it| it <= offset) + .checked_sub(1)?; + + // Calculate the text range corresponding to `line_index` + let line_start_offset = self.newlines.get(line_index)?; + let line_end_offset = self + .newlines + .get(line_index + 1) + .cloned() + .unwrap_or_else(|| TextSize::of(&self.text)); + + let line_range = TextRange::new(*line_start_offset, line_end_offset); + let line_text = &self.text[line_range]; + + // Calculate the byte offset of the character within the line and find + // a column matching this offset + let char_offset = usize::from(offset - *line_start_offset); + let char_index = match char_offset.cmp(&line_text.len()) { + Ordering::Less => { + line_text + .char_indices() + .enumerate() + .find_map(|(index, (offset, _))| { + if offset == char_offset { + Some(index) + } else { + None + } + })? + } + // If the character offset is equal to the length of the line, the + // character index is the total number of columns in the line + Ordering::Equal => line_text.chars().count(), + // The character offset is greater than the length of the line, + // abort since the provided offset is invalid + Ordering::Greater => return None, + }; + + Some(LineCol { + line: u32::try_from(line_index).ok()?, + col: u32::try_from(char_index).ok()?, + }) } pub(crate) fn offset(&self, line_col: LineCol) -> Option { + // Convert the line and column indices to usize (this should never fail + // on 32- and 64-bits platforms) let line_index = usize::try_from(line_col.line).ok()?; + let col_index = usize::try_from(line_col.col).ok()?; + + // Load the byte offset for the start of line `line_index` let line_offset = self.newlines.get(line_index)?; - Some(*line_offset + TextSize::from(line_col.col)) + let col_offset = if col_index > 0 { + // Calculate the text range corresponding to `line_index` + let line_start = usize::from(*line_offset); + let line_end = self + .newlines + .get(line_index + 1) + .map(|offset| usize::from(*offset)) + .unwrap_or_else(|| self.text.len()); + + let line_text = self.text.get(line_start..line_end)?; + let num_chars = line_text.chars().count(); + + // If the column index is equal to the number of characters in the + // line, return the byte offset for the end of the line + let col_offset = if col_index == num_chars { + line_text.len() + } else { + // Accumulate byte offsets for each character in the line and + // return the value corresponding to `col_index` + let (col_offset, _) = line_text.char_indices().nth(col_index)?; + col_offset + }; + + TextSize::try_from(col_offset).ok()? + } else { + // Fast path for col == 0 + TextSize::from(0) + }; + + Some(*line_offset + col_offset) + } + + /// Return the text slice used to build the index + pub(crate) fn text(&self) -> &str { + &self.text + } + + /// Return the number of lines in the index, clamped to [u32::MAX] + pub(crate) fn len(&self) -> u32 { + self.newlines.len().try_into().unwrap_or(u32::MAX) } } #[cfg(test)] mod tests { + use rome_rowan::TextSize; + use super::{LineCol, LineIndex}; + macro_rules! check_conversion { + ($line_index:ident : $line_col:expr => $text_size:expr ) => { + let offset = $line_index.offset($line_col); + assert_eq!(offset, Some($text_size)); + + let line_col = $line_index.line_col($text_size); + assert_eq!(line_col, Some($line_col)); + }; + } + + #[test] + fn empty_string() { + let line_index = LineIndex::new(""); + check_conversion!(line_index: LineCol { line: 0, col: 0 } => TextSize::from(0)); + } + + #[test] + fn empty_line() { + let line_index = LineIndex::new("\n\n"); + check_conversion!(line_index: LineCol { line: 1, col: 0 } => TextSize::from(1)); + } + + #[test] + fn line_end() { + let line_index = LineIndex::new("abc\ndef\nghi"); + check_conversion!(line_index: LineCol { line: 1, col: 3 } => TextSize::from(7)); + } + #[test] - fn out_of_bounds() { + fn out_of_bounds_line() { let line_index = LineIndex::new("abcde\nfghij\n"); + let offset = line_index.offset(LineCol { line: 5, col: 0 }); assert!(offset.is_none()); } + + #[test] + fn out_of_bounds_col() { + let line_index = LineIndex::new("abcde\nfghij\n"); + + let offset = line_index.offset(LineCol { line: 1, col: 7 }); + assert!(offset.is_none()); + } + + #[test] + fn out_of_bounds_offset() { + let line_index = LineIndex::new("abcde\nfghij\n"); + + let offset = line_index.line_col(TextSize::from(13)); + assert!(offset.is_none()); + } + + #[test] + fn unicode() { + let line_index = LineIndex::new("'Jan 1, 2018 – Jan 1, 2019'"); + + check_conversion!(line_index: LineCol { line: 0, col: 0 } => TextSize::from(0)); + check_conversion!(line_index: LineCol { line: 0, col: 1 } => TextSize::from(1)); + check_conversion!(line_index: LineCol { line: 0, col: 12 } => TextSize::from(12)); + check_conversion!(line_index: LineCol { line: 0, col: 13 } => TextSize::from(15)); + check_conversion!(line_index: LineCol { line: 0, col: 14 } => TextSize::from(18)); + check_conversion!(line_index: LineCol { line: 0, col: 15 } => TextSize::from(21)); + check_conversion!(line_index: LineCol { line: 0, col: 26 } => TextSize::from(32)); + check_conversion!(line_index: LineCol { line: 0, col: 27 } => TextSize::from(33)); + } } diff --git a/crates/rome_lsp/src/utils.rs b/crates/rome_lsp/src/utils.rs index 72e2b632bb1..da771f0624f 100644 --- a/crates/rome_lsp/src/utils.rs +++ b/crates/rome_lsp/src/utils.rs @@ -21,15 +21,21 @@ use tower_lsp::jsonrpc::Error as LspError; use tower_lsp::lsp_types::{self as lsp}; use tracing::error; -pub(crate) fn position(line_index: &LineIndex, offset: TextSize) -> lsp::Position { - let line_col = line_index.line_col(offset); - lsp::Position::new(line_col.line, line_col.col) +pub(crate) fn position(line_index: &LineIndex, offset: TextSize) -> Result { + let line_col = line_index.line_col(offset).with_context(|| { + format!( + "could not convert offset {offset:?} into a line-column index into string {:?}", + line_index.text() + ) + })?; + + Ok(lsp::Position::new(line_col.line, line_col.col)) } -pub(crate) fn range(line_index: &LineIndex, range: TextRange) -> lsp::Range { - let start = position(line_index, range.start()); - let end = position(line_index, range.end()); - lsp::Range::new(start, end) +pub(crate) fn range(line_index: &LineIndex, range: TextRange) -> Result { + let start = position(line_index, range.start())?; + let end = position(line_index, range.end())?; + Ok(lsp::Range::new(start, end)) } pub(crate) fn offset(line_index: &LineIndex, position: lsp::Position) -> Result { @@ -49,7 +55,7 @@ pub(crate) fn text_range(line_index: &LineIndex, range: lsp::Range) -> Result Vec { +pub(crate) fn text_edit(line_index: &LineIndex, diff: TextEdit) -> Result> { let mut result: Vec = Vec::new(); let mut offset = TextSize::from(0); @@ -59,7 +65,7 @@ pub(crate) fn text_edit(line_index: &LineIndex, diff: TextEdit) -> Vec { - let start = position(line_index, offset); + let start = position(line_index, offset)?; // Merge with a previous delete operation if possible let last_edit = result.last_mut().filter(|text_edit| { @@ -76,9 +82,9 @@ pub(crate) fn text_edit(line_index: &LineIndex, diff: TextEdit) -> Vec { - let start = position(line_index, offset); + let start = position(line_index, offset)?; offset += range.len(); - let end = position(line_index, offset); + let end = position(line_index, offset)?; result.push(lsp::TextEdit { range: lsp::Range::new(start, end), @@ -87,7 +93,10 @@ pub(crate) fn text_edit(line_index: &LineIndex, diff: TextEdit) -> Vec { - let mut line_col = line_index.line_col(offset); + let mut line_col = line_index + .line_col(offset) + .expect("diff length is overflowing the line count in the original file"); + line_col.line += line_count.get() + 1; line_col.col = 0; @@ -102,7 +111,7 @@ pub(crate) fn text_edit(line_index: &LineIndex, diff: TextEdit) -> Vec lsp::CodeAction { +) -> Result { // Mark diagnostics emitted by the same rule as resolved by this action let diagnostics: Vec<_> = if matches!(action.category, ActionCategory::QuickFix) { diagnostics @@ -148,7 +157,7 @@ pub(crate) fn code_fix_to_lsp( let suggestion = action.suggestion; let mut changes = HashMap::new(); - let edits = text_edit(line_index, suggestion.suggestion); + let edits = text_edit(line_index, suggestion.suggestion)?; changes.insert(url.clone(), edits); @@ -158,7 +167,7 @@ pub(crate) fn code_fix_to_lsp( change_annotations: None, }; - lsp::CodeAction { + Ok(lsp::CodeAction { title: print_markup(&suggestion.msg), kind: Some(lsp::CodeActionKind::from(kind)), diagnostics: if !diagnostics.is_empty() { @@ -175,7 +184,7 @@ pub(crate) fn code_fix_to_lsp( }, disabled: None, data: None, - } + }) } /// Convert an [rome_diagnostics::Diagnostic] to a [lsp::Diagnostic], using the span @@ -188,7 +197,7 @@ pub(crate) fn diagnostic_to_lsp( line_index: &LineIndex, ) -> Option { let location = diagnostic.location()?; - let span = range(line_index, location.span?); + let span = range(line_index, location.span?).ok()?; let severity = match diagnostic.severity() { Severity::Fatal | Severity::Error => lsp::DiagnosticSeverity::ERROR, @@ -255,12 +264,17 @@ impl Visit for RelatedInformationVisitor<'_> { None => return Ok(()), }; + let range = match range(self.line_index, span) { + Ok(range) => range, + Err(_) => return Ok(()), + }; + let related_information = self.related_information.get_or_insert_with(Vec::new); related_information.push(lsp::DiagnosticRelatedInformation { location: lsp::Location { uri: self.url.clone(), - range: range(self.line_index, span), + range, }, message: String::new(), }); @@ -337,7 +351,7 @@ line 7 new"; let line_index = LineIndex::new(OLD); let diff = TextEdit::from_unicode_words(OLD, NEW); - let text_edit = super::text_edit(&line_index, diff); + let text_edit = super::text_edit(&line_index, diff).unwrap(); assert_eq!( text_edit.as_slice(), @@ -380,7 +394,7 @@ line 7 new"; let line_index = LineIndex::new(OLD); let diff = TextEdit::from_unicode_words(OLD, NEW); - let text_edit = super::text_edit(&line_index, diff); + let text_edit = super::text_edit(&line_index, diff).unwrap(); assert_eq!( text_edit.as_slice(),