diff --git a/book/src/generated/lang-support.md b/book/src/generated/lang-support.md index 432efb78560b..23ec6a8033f0 100644 --- a/book/src/generated/lang-support.md +++ b/book/src/generated/lang-support.md @@ -79,7 +79,7 @@ | prisma | ✓ | | | `prisma-language-server` | | prolog | | | | `swipl` | | protobuf | ✓ | | ✓ | | -| python | ✓ | ✓ | | `pylsp` | +| python | ✓ | ✓ | ✓ | `pylsp` | | r | ✓ | | | `R` | | racket | | | | `racket` | | regex | ✓ | | | | diff --git a/book/src/guides/indent.md b/book/src/guides/indent.md index f4d916b211e8..0e259289781e 100644 --- a/book/src/guides/indent.md +++ b/book/src/guides/indent.md @@ -46,6 +46,20 @@ capture on the same line, the indent level isn't changed at all. - `@outdent` (default scope `all`): Decrease the indent level by 1. The same rules as for `@indent` apply. +- `@extend`: +Extend the range of this node to the end of the line and to lines that +are indented more than the line that this node starts on. This is useful +for languages like Python, where for the purpose of indentation some nodes +(like functions or classes) should also contain indented lines that follow them. + +- `@extend.prevent-once`: +Prevents the first extension of an ancestor of this node. For example, in Python +a return expression always ends the block that it is in. Note that this only stops the +extension of the next `@extend` capture. If multiple ancestors are captured, +only the extension of the innermost one is prevented. All other ancestors are unaffected +(regardless of whether the innermost ancestor would actually have been extended). + + ## Predicates In some cases, an S-expression cannot express exactly what pattern should be matched. diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index 137b8822d049..f661046cbbd4 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -192,13 +192,15 @@ pub fn indent_level_for_line(line: RopeSlice, tab_width: usize) -> usize { /// Computes for node and all ancestors whether they are the first node on their line. /// The first entry in the return value represents the root node, the last one the node itself -fn get_first_in_line(mut node: Node, byte_pos: usize, new_line: bool) -> Vec { +fn get_first_in_line(mut node: Node, new_line_byte_pos: Option) -> Vec { let mut first_in_line = Vec::new(); loop { if let Some(prev) = node.prev_sibling() { // If we insert a new line, the first node at/after the cursor is considered to be the first in its line let first = prev.end_position().row != node.start_position().row - || (new_line && node.start_byte() >= byte_pos && prev.start_byte() < byte_pos); + || new_line_byte_pos.map_or(false, |byte_pos| { + node.start_byte() >= byte_pos && prev.start_byte() < byte_pos + }); first_in_line.push(Some(first)); } else { // Nodes that have no previous siblings are first in their line if and only if their parent is @@ -298,8 +300,21 @@ enum IndentScope { Tail, } -/// Execute the indent query. -/// Returns for each node (identified by its id) a list of indent captures for that node. +/// A capture from the indent query which does not define an indent but extends +/// the range of a node. This is used before the indent is calculated. +enum ExtendCapture { + Extend, + PreventOnce, +} + +/// The result of running a tree-sitter indent query. This stores for +/// each node (identified by its ID) the relevant captures (already filtered +/// by predicates). +struct IndentQueryResult { + indent_captures: HashMap>, + extend_captures: HashMap>, +} + fn query_indents( query: &Query, syntax: &Syntax, @@ -309,8 +324,9 @@ fn query_indents( // Position of the (optional) newly inserted line break. // Given as (line, byte_pos) new_line_break: Option<(usize, usize)>, -) -> HashMap> { +) -> IndentQueryResult { let mut indent_captures: HashMap> = HashMap::new(); + let mut extend_captures: HashMap> = HashMap::new(); cursor.set_byte_range(range); // Iterate over all captures from the query for m in cursor.matches(query, syntax.tree().root_node(), RopeProvider(text)) { @@ -374,10 +390,24 @@ fn query_indents( continue; } for capture in m.captures { - let capture_type = query.capture_names()[capture.index as usize].as_str(); - let capture_type = match capture_type { + let capture_name = query.capture_names()[capture.index as usize].as_str(); + let capture_type = match capture_name { "indent" => IndentCaptureType::Indent, "outdent" => IndentCaptureType::Outdent, + "extend" => { + extend_captures + .entry(capture.node.id()) + .or_insert_with(|| Vec::with_capacity(1)) + .push(ExtendCapture::Extend); + continue; + } + "extend.prevent-once" => { + extend_captures + .entry(capture.node.id()) + .or_insert_with(|| Vec::with_capacity(1)) + .push(ExtendCapture::PreventOnce); + continue; + } _ => { // Ignore any unknown captures (these may be needed for predicates such as #match?) continue; @@ -420,7 +450,72 @@ fn query_indents( .push(indent_capture); } } - indent_captures + IndentQueryResult { + indent_captures, + extend_captures, + } +} + +/// Handle extend queries. deepest_preceding is the deepest descendant of node that directly precedes the cursor position. +/// Any ancestor of deepest_preceding which is also a descendant of node may be "extended". In that case, node will be updated, +/// so that the indent computation starts with the correct syntax node. +fn extend_nodes<'a>( + node: &mut Node<'a>, + deepest_preceding: Option>, + extend_captures: &HashMap>, + text: RopeSlice, + line: usize, + tab_width: usize, +) { + if let Some(mut deepest_preceding) = deepest_preceding { + let mut stop_extend = false; + while deepest_preceding != *node { + let mut extend_node = false; + // This will be set to true if this node is captured, regardless of whether + // it actually will be extended (e.g. because the cursor isn't indented + // more than the node). + let mut node_captured = false; + if let Some(captures) = extend_captures.get(&deepest_preceding.id()) { + for capture in captures { + match capture { + ExtendCapture::PreventOnce => { + stop_extend = true; + } + ExtendCapture::Extend => { + node_captured = true; + // We extend the node if + // - the cursor is on the same line as the end of the node OR + // - the line that the cursor is on is more indented than the + // first line of the node + if deepest_preceding.end_position().row == line { + extend_node = true; + } else { + let cursor_indent = + indent_level_for_line(text.line(line), tab_width); + let node_indent = indent_level_for_line( + text.line(deepest_preceding.start_position().row), + tab_width, + ); + if cursor_indent > node_indent { + extend_node = true; + } + } + } + } + } + } + // If we encountered some `StopExtend` capture before, we don't + // extend the node even if we otherwise would + if node_captured && stop_extend { + stop_extend = false; + } else if extend_node && !stop_extend { + *node = deepest_preceding; + break; + } + // This parent always exists since node is an ancestor of deepest_preceding + deepest_preceding = deepest_preceding.parent().unwrap(); + } + } } /// Use the syntax tree to determine the indentation for a given position. @@ -459,40 +554,73 @@ fn query_indents( /// }, /// ); /// ``` +#[allow(clippy::too_many_arguments)] pub fn treesitter_indent_for_pos( query: &Query, syntax: &Syntax, indent_style: &IndentStyle, + tab_width: usize, text: RopeSlice, line: usize, pos: usize, new_line: bool, ) -> Option { let byte_pos = text.char_to_byte(pos); + // The innermost tree-sitter node which is considered for the indent + // computation. It may change if some predeceding node is extended let mut node = syntax .tree() .root_node() .descendant_for_byte_range(byte_pos, byte_pos)?; - let mut first_in_line = get_first_in_line(node, byte_pos, new_line); - let new_line_break = if new_line { - Some((line, byte_pos)) - } else { - None + let (query_result, deepest_preceding) = { + // The query range should intersect with all nodes directly preceding + // the position of the indent query in case one of them is extended. + let mut deepest_preceding = None; // The deepest node preceding the indent query position + let mut tree_cursor = node.walk(); + for child in node.children(&mut tree_cursor) { + if child.byte_range().end <= byte_pos { + deepest_preceding = Some(child); + } + } + deepest_preceding = deepest_preceding.map(|mut prec| { + // Get the deepest directly preceding node + while prec.child_count() > 0 { + prec = prec.child(prec.child_count() - 1).unwrap(); + } + prec + }); + let query_range = deepest_preceding + .map(|prec| prec.byte_range().end - 1..byte_pos + 1) + .unwrap_or(byte_pos..byte_pos + 1); + + crate::syntax::PARSER.with(|ts_parser| { + let mut ts_parser = ts_parser.borrow_mut(); + let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new); + let query_result = query_indents( + query, + syntax, + &mut cursor, + text, + query_range, + new_line.then(|| (line, byte_pos)), + ); + ts_parser.cursors.push(cursor); + (query_result, deepest_preceding) + }) }; - let query_result = crate::syntax::PARSER.with(|ts_parser| { - let mut ts_parser = ts_parser.borrow_mut(); - let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new); - let query_result = query_indents( - query, - syntax, - &mut cursor, - text, - byte_pos..byte_pos + 1, - new_line_break, - ); - ts_parser.cursors.push(cursor); - query_result - }); + let indent_captures = query_result.indent_captures; + let extend_captures = query_result.extend_captures; + + // Check for extend captures, potentially changing the node that the indent calculation starts with + extend_nodes( + &mut node, + deepest_preceding, + &extend_captures, + text, + line, + tab_width, + ); + let mut first_in_line = get_first_in_line(node, new_line.then(|| byte_pos)); let mut result = Indentation::default(); // We always keep track of all the indent changes on one line, in order to only indent once @@ -504,7 +632,7 @@ pub fn treesitter_indent_for_pos( // one entry for each ancestor of the node (which is what we iterate over) let is_first = *first_in_line.last().unwrap(); // Apply all indent definitions for this node - if let Some(definitions) = query_result.get(&node.id()) { + if let Some(definitions) = indent_captures.get(&node.id()) { for definition in definitions { match definition.scope { IndentScope::All => { @@ -550,7 +678,13 @@ pub fn treesitter_indent_for_pos( node = parent; first_in_line.pop(); } else { - result.add_line(&indent_for_line_below); + // Only add the indentation for the line below if that line + // is not after the line that the indentation is calculated for. + if (node.start_position().row < line) + || (new_line && node.start_position().row == line && node.start_byte() < byte_pos) + { + result.add_line(&indent_for_line_below); + } result.add_line(&indent_for_line); break; } @@ -579,6 +713,7 @@ pub fn indent_for_newline( query, syntax, indent_style, + tab_width, text, line_before, line_before_end_pos, diff --git a/helix-core/tests/indent.rs b/helix-core/tests/indent.rs index ff04d05f5bbe..e1114f4a9e25 100644 --- a/helix-core/tests/indent.rs +++ b/helix-core/tests/indent.rs @@ -50,6 +50,7 @@ fn test_treesitter_indent(file_name: &str, lang_scope: &str) { indent_query, &syntax, &IndentStyle::Spaces(4), + 4, text, i, text.line_to_char(i) + pos, diff --git a/runtime/queries/python/indents.scm_ b/runtime/queries/python/indents.scm similarity index 65% rename from runtime/queries/python/indents.scm_ rename to runtime/queries/python/indents.scm index 810ff52f5e8f..b7b499c07ae9 100644 --- a/runtime/queries/python/indents.scm_ +++ b/runtime/queries/python/indents.scm @@ -27,12 +27,32 @@ (class_definition) ] @indent +[ + (if_statement) + (for_statement) + (while_statement) + (with_statement) + (try_statement) + + (function_definition) + (class_definition) +] @extend + +[ + (return_statement) + (break_statement) + (continue_statement) + (raise_statement) + (pass_statement) +] @extend.prevent-once + [ ")" "]" "}" - (return_statement) - (pass_statement) - (raise_statement) ] @outdent +(elif_clause + "elif" @outdent) +(else_clause + "else" @outdent)