Skip to content

Commit

Permalink
Run textobject queries across injections
Browse files Browse the repository at this point in the history
With this change textobjects work even within injection layers, so you
can use `]f` to jump to a function definition in a JavaScript `<script>`
tag within HTML for example.

This requires `Syntax::query_iter` - a utility function for running a
query from `HighlightConfiguration` across injection layers - which
comes from the rainbow brackets branch (merged into my driver).
We need to relocate the textobject query from the `LanguageConfiguration`
to the `HighlightConfiguration` in order to access it
per-injection-layer, like we do for the rainbow brackets query. With
that, the only necessary change is to port the contents of
`TextObjectQuery::capture_nodes_any` to a new function that uses
`query_iter` and update callers.

The callers end up being a bit cleaner: they only need to take `Syntax`
now and not `LanguageConfiguration` and the root layer's root
`tree_sitter::Node`.
  • Loading branch information
the-mikedavis committed Jan 11, 2024
1 parent 8bf5e07 commit 79202fc
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 136 deletions.
22 changes: 7 additions & 15 deletions helix-core/src/movement.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::{cmp::Reverse, iter};

use ropey::iter::Chars;
use tree_sitter::{Node, QueryCursor};
use tree_sitter::Node;

use crate::{
char_idx_at_visual_offset,
Expand All @@ -13,7 +13,6 @@ use crate::{
},
line_ending::rope_is_line_ending,
position::char_idx_at_visual_block_offset,
syntax::LanguageConfiguration,
text_annotations::TextAnnotations,
textobject::TextObject,
visual_offset_from_block, Range, RopeSlice, Selection, Syntax,
Expand Down Expand Up @@ -500,29 +499,22 @@ fn reached_target(target: WordMotionTarget, prev_ch: char, next_ch: char) -> boo
/// Finds the range of the next or previous textobject in the syntax sub-tree of `node`.
/// Returns the range in the forwards direction.
pub fn goto_treesitter_object(
syntax: &Syntax,
slice: RopeSlice,
range: Range,
object_name: &str,
dir: Direction,
slice_tree: Node,
lang_config: &LanguageConfiguration,
count: usize,
) -> Range {
let get_range = move |range: Range| -> Option<Range> {
let byte_pos = slice.char_to_byte(range.cursor(slice));

let cap_name = |t: TextObject| format!("{}.{}", object_name, t);
let mut cursor = QueryCursor::new();
let nodes = lang_config.textobject_query()?.capture_nodes_any(
&[
&cap_name(TextObject::Movement),
&cap_name(TextObject::Around),
&cap_name(TextObject::Inside),
],
slice_tree,
slice,
&mut cursor,
)?;
let movement = cap_name(TextObject::Movement);
let around = cap_name(TextObject::Around);
let inside = cap_name(TextObject::Inside);
let capture_names = &[movement.as_str(), around.as_str(), inside.as_str()];
let nodes = syntax.textobject_nodes(capture_names, slice, None);

let node = match dir {
Direction::Forward => nodes
Expand Down
132 changes: 42 additions & 90 deletions helix-core/src/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,6 @@ pub struct LanguageConfiguration {

#[serde(skip)]
pub(crate) indent_query: OnceCell<Option<Query>>,
#[serde(skip)]
pub(crate) textobject_query: OnceCell<Option<TextObjectQuery>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub debugger: Option<DebugAdapterConfig>,

Expand Down Expand Up @@ -503,11 +501,6 @@ impl FromStr for AutoPairConfig {
}
}

#[derive(Debug)]
pub struct TextObjectQuery {
pub query: Query,
}

#[derive(Debug)]
pub enum CapturedNode<'a> {
Single(Node<'a>),
Expand Down Expand Up @@ -555,68 +548,6 @@ impl<'a> CapturedNode<'a> {
/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high.
const TREE_SITTER_MATCH_LIMIT: u32 = 256;

impl TextObjectQuery {
/// Run the query on the given node and return sub nodes which match given
/// capture ("function.inside", "class.around", etc).
///
/// Captures may contain multiple nodes by using quantifiers (+, *, etc),
/// and support for this is partial and could use improvement.
///
/// ```query
/// (comment)+ @capture
///
/// ; OR
/// (
/// (comment)*
/// .
/// (function)
/// ) @capture
/// ```
pub fn capture_nodes<'a>(
&'a self,
capture_name: &str,
node: Node<'a>,
slice: RopeSlice<'a>,
cursor: &'a mut QueryCursor,
) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
self.capture_nodes_any(&[capture_name], node, slice, cursor)
}

/// Find the first capture that exists out of all given `capture_names`
/// and return sub nodes that match this capture.
pub fn capture_nodes_any<'a>(
&'a self,
capture_names: &[&str],
node: Node<'a>,
slice: RopeSlice<'a>,
cursor: &'a mut QueryCursor,
) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
let capture_idx = capture_names
.iter()
.find_map(|cap| self.query.capture_index_for_name(cap))?;

cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);

let nodes = cursor
.captures(&self.query, node, RopeProvider(slice))
.filter_map(move |(mat, _)| {
let nodes: Vec<_> = mat
.captures
.iter()
.filter_map(|cap| (cap.index == capture_idx).then_some(cap.node))
.collect();

if nodes.len() > 1 {
Some(CapturedNode::Grouped(nodes))
} else {
nodes.into_iter().map(CapturedNode::Single).next()
}
});

Some(nodes)
}
}

pub fn read_query(language: &str, filename: &str) -> String {
static INHERITS_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap());
Expand All @@ -640,6 +571,8 @@ impl LanguageConfiguration {
// always highlight syntax errors
// highlights_query += "\n(ERROR) @error";

let textobjects_query = read_query(&self.language_id, "textobjects.scm");

let injections_query = read_query(&self.language_id, "injections.scm");
let locals_query = read_query(&self.language_id, "locals.scm");

Expand All @@ -658,6 +591,7 @@ impl LanguageConfiguration {
let config = HighlightConfiguration::new(
language,
&highlights_query,
&textobjects_query,
&injections_query,
&locals_query,
)
Expand Down Expand Up @@ -691,15 +625,6 @@ impl LanguageConfiguration {
.as_ref()
}

pub fn textobject_query(&self) -> Option<&TextObjectQuery> {
self.textobject_query
.get_or_init(|| {
self.load_query("textobjects.scm")
.map(|query| TextObjectQuery { query })
})
.as_ref()
}

pub fn scope(&self) -> &str {
&self.scope
}
Expand Down Expand Up @@ -1397,6 +1322,33 @@ impl Syntax {
}
}

pub fn textobject_nodes<'a>(
&'a self,
capture_names: &'a [&str],
source: RopeSlice<'a>,
query_range: Option<std::ops::Range<usize>>,
) -> impl Iterator<Item = CapturedNode<'a>> {
self.query_iter(|config| &config.textobjects_query, source, query_range)
.filter_map(move |(layer, match_, _)| {
// TODO: cache this per-language with a hashmap?
let capture_idx = capture_names
.iter()
.find_map(|name| layer.config.textobjects_query.capture_index_for_name(name))?;

let nodes: Vec<_> = match_
.captures
.iter()
.filter_map(|cap| (cap.index == capture_idx).then_some(cap.node))
.collect();

if nodes.len() > 1 {
Some(CapturedNode::Grouped(nodes))
} else {
nodes.into_iter().map(CapturedNode::Single).next()
}
})
}

// Commenting
// comment_strings_for_pos
// is_commented
Expand Down Expand Up @@ -1636,7 +1588,8 @@ pub enum HighlightEvent {
#[derive(Debug)]
pub struct HighlightConfiguration {
pub language: Grammar,
pub query: Query,
query: Query,
textobjects_query: Query,
injections_query: Query,
combined_injections_patterns: Vec<usize>,
highlights_pattern_index: usize,
Expand Down Expand Up @@ -1734,6 +1687,7 @@ impl HighlightConfiguration {
pub fn new(
language: Grammar,
highlights_query: &str,
textobjects_query: &str,
injection_query: &str,
locals_query: &str,
) -> Result<Self, QueryError> {
Expand All @@ -1753,6 +1707,7 @@ impl HighlightConfiguration {
highlights_pattern_index += 1;
}
}
let textobjects_query = Query::new(language, textobjects_query)?;

let injections_query = Query::new(language, injection_query)?;
let combined_injections_patterns = (0..injections_query.pattern_count())
Expand Down Expand Up @@ -1810,6 +1765,7 @@ impl HighlightConfiguration {
Ok(Self {
language,
query,
textobjects_query,
injections_query,
combined_injections_patterns,
highlights_pattern_index,
Expand Down Expand Up @@ -2690,18 +2646,13 @@ mod test {
});
let language = get_language("rust").unwrap();

let query = Query::new(language, query_str).unwrap();
let textobject = TextObjectQuery { query };
let mut cursor = QueryCursor::new();

let config = HighlightConfiguration::new(language, "", "", "").unwrap();
let config = HighlightConfiguration::new(language, "", query_str, "", "").unwrap();
let syntax = Syntax::new(source.slice(..), Arc::new(config), Arc::new(loader)).unwrap();

let root = syntax.tree().root_node();
let mut test = |capture, range| {
let matches: Vec<_> = textobject
.capture_nodes(capture, root, source.slice(..), &mut cursor)
.unwrap()
let test = |capture, range| {
let capture_names = &[capture];
let matches: Vec<_> = syntax
.textobject_nodes(capture_names, source.slice(..), None)
.collect();

assert_eq!(
Expand Down Expand Up @@ -2756,6 +2707,7 @@ mod test {
language,
&std::fs::read_to_string("../runtime/grammars/sources/rust/queries/highlights.scm")
.unwrap(),
"", // textobjects.scm
&std::fs::read_to_string("../runtime/grammars/sources/rust/queries/injections.scm")
.unwrap(),
"", // locals.scm
Expand Down Expand Up @@ -2858,7 +2810,7 @@ mod test {
});
let language = get_language(language_name).unwrap();

let config = HighlightConfiguration::new(language, "", "", "").unwrap();
let config = HighlightConfiguration::new(language, "", "", "", "").unwrap();
let syntax = Syntax::new(source.slice(..), Arc::new(config), Arc::new(loader)).unwrap();

let root = syntax
Expand Down
12 changes: 4 additions & 8 deletions helix-core/src/textobject.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
use std::fmt::Display;

use ropey::RopeSlice;
use tree_sitter::{Node, QueryCursor};

use crate::chars::{categorize_char, char_is_whitespace, CharCategory};
use crate::graphemes::{next_grapheme_boundary, prev_grapheme_boundary};
use crate::line_ending::rope_is_line_ending;
use crate::movement::Direction;
use crate::surround;
use crate::syntax::LanguageConfiguration;
use crate::Range;

fn find_word_boundary(slice: RopeSlice, mut pos: usize, direction: Direction, long: bool) -> usize {
Expand Down Expand Up @@ -254,22 +252,20 @@ fn textobject_pair_surround_impl(
/// `object_name` is a query capture base name like "function", "class", etc.
/// `slice_tree` is the tree-sitter node corresponding to given text slice.
pub fn textobject_treesitter(
syntax: &crate::Syntax,
slice: RopeSlice,
range: Range,
textobject: TextObject,
object_name: &str,
slice_tree: Node,
lang_config: &LanguageConfiguration,
_count: usize,
) -> Range {
let get_range = move || -> Option<Range> {
let byte_pos = slice.char_to_byte(range.cursor(slice));

let capture_name = format!("{}.{}", object_name, textobject); // eg. function.inner
let mut cursor = QueryCursor::new();
let node = lang_config
.textobject_query()?
.capture_nodes(&capture_name, slice_tree, slice, &mut cursor)?
let capture_names = &[capture_name.as_str()];
let node = syntax
.textobject_nodes(capture_names, slice, None)
.filter(|node| node.byte_range().contains(&byte_pos))
.min_by_key(|node| node.byte_range().len())?;

Expand Down
29 changes: 6 additions & 23 deletions helix-term/src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5116,20 +5116,12 @@ fn goto_ts_object_impl(cx: &mut Context, object: &'static str, direction: Direct
let count = cx.count();
let motion = move |editor: &mut Editor| {
let (view, doc) = current!(editor);
if let Some((lang_config, syntax)) = doc.language_config().zip(doc.syntax()) {
if let Some(syntax) = doc.syntax() {
let text = doc.text().slice(..);
let root = syntax.tree().root_node();

let selection = doc.selection(view.id).clone().transform(|range| {
let new_range = movement::goto_treesitter_object(
text,
range,
object,
direction,
root,
lang_config,
count,
);
let new_range =
movement::goto_treesitter_object(syntax, text, range, object, direction, count);

if editor.mode == Mode::Select {
let head = if new_range.head < range.anchor {
Expand Down Expand Up @@ -5211,19 +5203,10 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) {
let text = doc.text().slice(..);

let textobject_treesitter = |obj_name: &str, range: Range| -> Range {
let (lang_config, syntax) = match doc.language_config().zip(doc.syntax()) {
Some(t) => t,
None => return range,
let Some(syntax) = doc.syntax() else {
return range;
};
textobject::textobject_treesitter(
text,
range,
objtype,
obj_name,
syntax.tree().root_node(),
lang_config,
count,
)
textobject::textobject_treesitter(syntax, text, range, objtype, obj_name, count)
};

if ch == 'g' && doc.diff_handle().is_none() {
Expand Down

0 comments on commit 79202fc

Please sign in to comment.