Skip to content

Commit

Permalink
Run textobject queries across injections
Browse files Browse the repository at this point in the history
With this change textobjects work even within injection layers, so you
can use `]f` to jump to a function definition in a JavaScript `<script>`
tag within HTML for example.

This requires `Syntax::query_iter` - a utility function for running a
query from `HighlightConfiguration` across injection layers - which
comes from the rainbow brackets branch (merged into my driver).
We need to relocate the textobject query from the `LanguageConfiguration`
to the `HighlightConfiguration` in order to access it
per-injection-layer, like we do for the rainbow brackets query. With
that, the only necessary change is to port the contents of
`TextObjectQuery::capture_nodes_any` to a new function that uses
`query_iter` and update callers.

The callers end up being a bit cleaner: they only need to take `Syntax`
now and not `LanguageConfiguration` and the root layer's root
`tree_sitter::Node`.
  • Loading branch information
the-mikedavis committed Feb 26, 2024
1 parent 12e7d12 commit d3575dc
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 169 deletions.
22 changes: 7 additions & 15 deletions helix-core/src/movement.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::{cmp::Reverse, iter};

use ropey::iter::Chars;
use tree_sitter::{Node, QueryCursor};
use tree_sitter::Node;

use crate::{
char_idx_at_visual_offset,
Expand All @@ -13,7 +13,6 @@ use crate::{
},
line_ending::rope_is_line_ending,
position::char_idx_at_visual_block_offset,
syntax::LanguageConfiguration,
text_annotations::TextAnnotations,
textobject::TextObject,
visual_offset_from_block, Range, RopeSlice, Selection, Syntax,
Expand Down Expand Up @@ -500,29 +499,22 @@ fn reached_target(target: WordMotionTarget, prev_ch: char, next_ch: char) -> boo
/// Finds the range of the next or previous textobject in the syntax sub-tree of `node`.
/// Returns the range in the forwards direction.
pub fn goto_treesitter_object(
syntax: &Syntax,
slice: RopeSlice,
range: Range,
object_name: &str,
dir: Direction,
slice_tree: Node,
lang_config: &LanguageConfiguration,
count: usize,
) -> Range {
let get_range = move |range: Range| -> Option<Range> {
let byte_pos = slice.char_to_byte(range.cursor(slice));

let cap_name = |t: TextObject| format!("{}.{}", object_name, t);
let mut cursor = QueryCursor::new();
let nodes = lang_config.textobject_query()?.capture_nodes_any(
&[
&cap_name(TextObject::Movement),
&cap_name(TextObject::Around),
&cap_name(TextObject::Inside),
],
slice_tree,
slice,
&mut cursor,
)?;
let movement = cap_name(TextObject::Movement);
let around = cap_name(TextObject::Around);
let inside = cap_name(TextObject::Inside);
let capture_names = &[movement.as_str(), around.as_str(), inside.as_str()];
let nodes = syntax.textobject_nodes(capture_names, slice, None);

let node = match dir {
Direction::Forward => nodes
Expand Down
202 changes: 80 additions & 122 deletions helix-core/src/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,6 @@ pub struct LanguageConfiguration {

#[serde(skip)]
pub(crate) indent_query: OnceCell<Option<Query>>,
#[serde(skip)]
pub(crate) textobject_query: OnceCell<Option<TextObjectQuery>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub debugger: Option<DebugAdapterConfig>,

Expand Down Expand Up @@ -534,11 +532,6 @@ impl FromStr for AutoPairConfig {
}
}

#[derive(Debug)]
pub struct TextObjectQuery {
pub query: Query,
}

#[derive(Debug)]
pub enum CapturedNode<'a> {
Single(Node<'a>),
Expand Down Expand Up @@ -586,118 +579,57 @@ impl<'a> CapturedNode<'a> {
/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high.
const TREE_SITTER_MATCH_LIMIT: u32 = 256;

impl TextObjectQuery {
/// Run the query on the given node and return sub nodes which match given
/// capture ("function.inside", "class.around", etc).
///
/// Captures may contain multiple nodes by using quantifiers (+, *, etc),
/// and support for this is partial and could use improvement.
///
/// ```query
/// (comment)+ @capture
///
/// ; OR
/// (
/// (comment)*
/// .
/// (function)
/// ) @capture
/// ```
pub fn capture_nodes<'a>(
&'a self,
capture_name: &str,
node: Node<'a>,
slice: RopeSlice<'a>,
cursor: &'a mut QueryCursor,
) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
self.capture_nodes_any(&[capture_name], node, slice, cursor)
}

/// Find the first capture that exists out of all given `capture_names`
/// and return sub nodes that match this capture.
pub fn capture_nodes_any<'a>(
&'a self,
capture_names: &[&str],
node: Node<'a>,
slice: RopeSlice<'a>,
cursor: &'a mut QueryCursor,
) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
let capture_idx = capture_names
.iter()
.find_map(|cap| self.query.capture_index_for_name(cap))?;

cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);

let nodes = cursor
.captures(&self.query, node, RopeProvider(slice))
.filter_map(move |(mat, _)| {
let nodes: Vec<_> = mat
.captures
.iter()
.filter_map(|cap| (cap.index == capture_idx).then_some(cap.node))
.collect();

if nodes.len() > 1 {
Some(CapturedNode::Grouped(nodes))
} else {
nodes.into_iter().map(CapturedNode::Single).next()
}
});

Some(nodes)
}
}

pub fn read_query(language: &str, filename: &str) -> String {
pub fn read_query(language: &str, filename: &str) -> Option<String> {
static INHERITS_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap());

let query = load_runtime_file(language, filename).unwrap_or_default();
let query = load_runtime_file(language, filename).ok()?;

// replaces all "; inherits <language>(,<language>)*" with the queries of the given language(s)
INHERITS_REGEX
let contents = INHERITS_REGEX
.replace_all(&query, |captures: &regex::Captures| {
captures[1]
.split(',')
.map(|language| format!("\n{}\n", read_query(language, filename)))
.filter_map(|language| Some(format!("\n{}\n", read_query(language, filename)?)))
.collect::<String>()
})
.to_string()
.to_string();

Some(contents)
}

impl LanguageConfiguration {
fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> {
let highlights_query = read_query(&self.language_id, "highlights.scm");
let highlights_query = read_query(&self.language_id, "highlights.scm")?;
// always highlight syntax errors
// highlights_query += "\n(ERROR) @error";

let textobjects_query = read_query(&self.language_id, "textobjects.scm");

let injections_query = read_query(&self.language_id, "injections.scm");
let locals_query = read_query(&self.language_id, "locals.scm");

if highlights_query.is_empty() {
None
} else {
let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id))
.map_err(|err| {
log::error!(
"Failed to load tree-sitter parser for language {:?}: {}",
self.language_id,
err
)
})
.ok()?;
let config = HighlightConfiguration::new(
language,
&highlights_query,
&injections_query,
&locals_query,
)
.map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err))
let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id))
.map_err(|err| {
log::error!(
"Failed to load tree-sitter parser for language {:?}: {}",
self.language_id,
err
)
})
.ok()?;
let config = HighlightConfiguration::new(
language,
&highlights_query,
textobjects_query.as_deref(),
&injections_query.unwrap_or_default(),
&locals_query.unwrap_or_default(),
)
.map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err))
.ok()?;

config.configure(scopes);
Some(Arc::new(config))
}
config.configure(scopes);
Some(Arc::new(config))
}

pub fn reconfigure(&self, scopes: &[String]) {
Expand All @@ -722,24 +654,12 @@ impl LanguageConfiguration {
.as_ref()
}

pub fn textobject_query(&self) -> Option<&TextObjectQuery> {
self.textobject_query
.get_or_init(|| {
self.load_query("textobjects.scm")
.map(|query| TextObjectQuery { query })
})
.as_ref()
}

pub fn scope(&self) -> &str {
&self.scope
}

fn load_query(&self, kind: &str) -> Option<Query> {
let query_text = read_query(&self.language_id, kind);
if query_text.is_empty() {
return None;
}
let query_text = read_query(&self.language_id, kind)?;
let lang = self.highlight_config.get()?.as_ref()?.language;
Query::new(lang, &query_text)
.map_err(|e| {
Expand Down Expand Up @@ -1457,6 +1377,42 @@ impl Syntax {
}
}

pub fn textobject_nodes<'a>(
&'a self,
capture_names: &'a [&str],
source: RopeSlice<'a>,
query_range: Option<std::ops::Range<usize>>,
) -> impl Iterator<Item = CapturedNode<'a>> {
self.query_iter(
|config| config.textobjects_query.as_ref(),
source,
query_range,
)
.filter_map(move |(layer, match_, _)| {
// TODO: cache this per-language with a hashmap?
let capture_idx = capture_names.iter().find_map(|name| {
layer
.config
.textobjects_query
.as_ref()
.expect("layer must have textobjects query in order to match")
.capture_index_for_name(name)
})?;

let nodes: Vec<_> = match_
.captures
.iter()
.filter_map(|cap| (cap.index == capture_idx).then_some(cap.node))
.collect();

if nodes.len() > 1 {
Some(CapturedNode::Grouped(nodes))
} else {
nodes.into_iter().map(CapturedNode::Single).next()
}
})
}

pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree {
let mut container_id = self.root;

Expand Down Expand Up @@ -1748,7 +1704,8 @@ pub enum HighlightEvent {
#[derive(Debug)]
pub struct HighlightConfiguration {
pub language: Grammar,
pub query: Query,
query: Query,
textobjects_query: Option<Query>,
injections_query: Query,
combined_injections_patterns: Vec<usize>,
highlights_pattern_index: usize,
Expand Down Expand Up @@ -1846,6 +1803,7 @@ impl HighlightConfiguration {
pub fn new(
language: Grammar,
highlights_query: &str,
textobjects_query: Option<&str>,
injection_query: &str,
locals_query: &str,
) -> Result<Self, QueryError> {
Expand All @@ -1865,6 +1823,9 @@ impl HighlightConfiguration {
highlights_pattern_index += 1;
}
}
let textobjects_query = textobjects_query
.map(|source| Query::new(language, source))
.transpose()?;

let injections_query = Query::new(language, injection_query)?;
let combined_injections_patterns = (0..injections_query.pattern_count())
Expand Down Expand Up @@ -1922,6 +1883,7 @@ impl HighlightConfiguration {
Ok(Self {
language,
query,
textobjects_query,
injections_query,
combined_injections_patterns,
highlights_pattern_index,
Expand Down Expand Up @@ -2809,23 +2771,18 @@ mod test {
.unwrap();
let language = get_language("rust").unwrap();

let query = Query::new(language, query_str).unwrap();
let textobject = TextObjectQuery { query };
let mut cursor = QueryCursor::new();

let config = HighlightConfiguration::new(language, "", "", "").unwrap();
let config = HighlightConfiguration::new(language, "", Some(query_str), "", "").unwrap();
let syntax = Syntax::new(
source.slice(..),
Arc::new(config),
Arc::new(ArcSwap::from_pointee(loader)),
)
.unwrap();

let root = syntax.tree().root_node();
let mut test = |capture, range| {
let matches: Vec<_> = textobject
.capture_nodes(capture, root, source.slice(..), &mut cursor)
.unwrap()
let test = |capture, range| {
let capture_names = &[capture];
let matches: Vec<_> = syntax
.textobject_nodes(capture_names, source.slice(..), None)
.collect();

assert_eq!(
Expand Down Expand Up @@ -2881,6 +2838,7 @@ mod test {
language,
&std::fs::read_to_string("../runtime/grammars/sources/rust/queries/highlights.scm")
.unwrap(),
None, // textobjects.scm
&std::fs::read_to_string("../runtime/grammars/sources/rust/queries/injections.scm")
.unwrap(),
"", // locals.scm
Expand Down Expand Up @@ -2989,7 +2947,7 @@ mod test {
.unwrap();
let language = get_language(language_name).unwrap();

let config = HighlightConfiguration::new(language, "", "", "").unwrap();
let config = HighlightConfiguration::new(language, "", None, "", "").unwrap();
let syntax = Syntax::new(
source.slice(..),
Arc::new(config),
Expand Down
Loading

0 comments on commit d3575dc

Please sign in to comment.