Skip to content

Commit

Permalink
Update Indexer to use new f-string tokens (#7325)
Browse files Browse the repository at this point in the history
## Summary

This PR updates the `Indexer` to use the new f-string tokens to compute
the `f_string_ranges` for f-strings. It adds a new abstraction which
exposes two methods to support extracting the range for the surrounding
innermost and outermost f-string. It uses the builder pattern to build
the f-string ranges which is similar to how the comment ranges are
built.

## Test Plan

Add new test cases for f-strings for:
* Tab indentation rule
* Line continuation detection in the indexer
* To get the innermost / outermost f-string range
* All detected f-string ranges

fixes: #7290
  • Loading branch information
dhruvmanila authored Sep 19, 2023
1 parent c496d9c commit 1b2ef3e
Show file tree
Hide file tree
Showing 6 changed files with 317 additions and 36 deletions.
8 changes: 8 additions & 0 deletions crates/ruff/resources/test/fixtures/pycodestyle/W19.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,11 @@ def test_keys(self):
multiline string with tab in it, different lines
'''
" single line string with tab in it"

f"test{
tab_indented_should_be_flagged
} <- this tab is fine"

f"""test{
tab_indented_should_be_flagged
} <- this tab is fine"""
2 changes: 1 addition & 1 deletion crates/ruff/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ impl<'a> Checker<'a> {

// Find the quote character used to start the containing f-string.
let expr = self.semantic.current_expression()?;
let string_range = self.indexer.f_string_range(expr.start())?;
let string_range = self.indexer.fstring_ranges().innermost(expr.start())?;
let trailing_quote = trailing_quote(self.locator.slice(string_range))?;

// Invert the quote character, if it's a single quote.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,4 +349,20 @@ W19.py:146:1: W191 Indentation contains tabs
148 | #: W191 - okay
|

W19.py:157:1: W191 Indentation contains tabs
|
156 | f"test{
157 | tab_indented_should_be_flagged
| ^^^^ W191
158 | } <- this tab is fine"
|

W19.py:161:1: W191 Indentation contains tabs
|
160 | f"""test{
161 | tab_indented_should_be_flagged
| ^^^^ W191
162 | } <- this tab is fine"""
|


84 changes: 84 additions & 0 deletions crates/ruff_python_index/src/fstring_ranges.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
use std::collections::BTreeMap;

use ruff_python_parser::Tok;
use ruff_text_size::{TextRange, TextSize};

/// Stores the ranges of all f-strings in a file sorted by [`TextRange::start`].
/// There can be multiple overlapping ranges for nested f-strings.
#[derive(Debug)]
pub struct FStringRanges {
raw: BTreeMap<TextSize, TextRange>,
}

impl FStringRanges {
/// Return the [`TextRange`] of the innermost f-string at the given offset.
pub fn innermost(&self, offset: TextSize) -> Option<TextRange> {
self.raw
.range(..=offset)
.rev()
.find(|(_, range)| range.contains(offset))
.map(|(_, range)| *range)
}

/// Return the [`TextRange`] of the outermost f-string at the given offset.
pub fn outermost(&self, offset: TextSize) -> Option<TextRange> {
// Explanation of the algorithm:
//
// ```python
// # v
// f"normal" f"another" f"first {f"second {f"third"} second"} first"
// # ^^(1)^^^
// # ^^^^^^^^^^^^(2)^^^^^^^^^^^^
// # ^^^^^^^^^^^^^^^^^^^^^(3)^^^^^^^^^^^^^^^^^^^^
// # ^^^(4)^^^^
// # ^^^(5)^^^
// ```
//
// The offset is marked with a `v` and the ranges are numbered in the order
// they are yielded by the iterator in the reverse order. The algorithm
// works as follows:
// 1. Skip all ranges that don't contain the offset (1).
// 2. Take all ranges that contain the offset (2, 3).
// 3. Stop taking ranges when the offset is no longer contained.
// 4. Take the last range that contained the offset (3, the outermost).
self.raw
.range(..=offset)
.rev()
.skip_while(|(_, range)| !range.contains(offset))
.take_while(|(_, range)| range.contains(offset))
.last()
.map(|(_, range)| *range)
}

#[cfg(test)]
pub(crate) fn ranges(&self) -> impl Iterator<Item = TextRange> + '_ {
self.raw.values().copied()
}
}

#[derive(Default)]
pub(crate) struct FStringRangesBuilder {
start_locations: Vec<TextSize>,
raw: BTreeMap<TextSize, TextRange>,
}

impl FStringRangesBuilder {
pub(crate) fn visit_token(&mut self, token: &Tok, range: TextRange) {
match token {
Tok::FStringStart => {
self.start_locations.push(range.start());
}
Tok::FStringEnd => {
if let Some(start) = self.start_locations.pop() {
self.raw.insert(start, TextRange::new(start, range.end()));
}
}
_ => {}
}
}

pub(crate) fn finish(self) -> FStringRanges {
debug_assert!(self.start_locations.is_empty());
FStringRanges { raw: self.raw }
}
}
Loading

0 comments on commit 1b2ef3e

Please sign in to comment.