Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor the injection marker type #12561

Merged
merged 2 commits into from
Jan 23, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 40 additions & 22 deletions helix-core/src/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ use helix_loader::grammar::{get_language, load_runtime_file};

pub use tree_cursor::TreeCursor;

fn deserialize_regex<'de, D>(deserializer: D) -> Result<Option<Regex>, D::Error>
fn deserialize_regex<'de, D>(deserializer: D) -> Result<Option<rope::Regex>, D::Error>
where
D: serde::Deserializer<'de>,
{
Option::<String>::deserialize(deserializer)?
.map(|buf| Regex::new(&buf).map_err(serde::de::Error::custom))
.map(|buf| rope::Regex::new(&buf).map_err(serde::de::Error::custom))
.transpose()
}

Expand Down Expand Up @@ -135,7 +135,7 @@ pub struct LanguageConfiguration {

// content_regex
#[serde(default, skip_serializing, deserialize_with = "deserialize_regex")]
pub injection_regex: Option<Regex>,
pub injection_regex: Option<rope::Regex>,
// first_line_regex
//
#[serde(skip)]
Expand Down Expand Up @@ -1003,12 +1003,12 @@ impl Loader {

/// Unlike language_config_for_language_id, which only returns Some for an exact id, this
/// function will perform a regex match on the given string to find the closest language match.
pub fn language_config_for_name(&self, name: &str) -> Option<Arc<LanguageConfiguration>> {
pub fn language_config_for_name(&self, slice: RopeSlice) -> Option<Arc<LanguageConfiguration>> {
let mut best_match_length = 0;
let mut best_match_position = None;
for (i, configuration) in self.language_configs.iter().enumerate() {
if let Some(injection_regex) = &configuration.injection_regex {
if let Some(mat) = injection_regex.find(name) {
if let Some(mat) = injection_regex.find(slice.regex_input()) {
let length = mat.end() - mat.start();
if length > best_match_length {
best_match_position = Some(i);
Expand All @@ -1026,12 +1026,18 @@ impl Loader {
capture: &InjectionLanguageMarker,
) -> Option<Arc<LanguageConfiguration>> {
match capture {
InjectionLanguageMarker::Name(string) => self.language_config_for_name(string),
InjectionLanguageMarker::Filename(file) => self.language_config_for_file_name(file),
InjectionLanguageMarker::Shebang(shebang) => self
.language_config_ids_by_shebang
.get(shebang)
.and_then(|&id| self.language_configs.get(id).cloned()),
InjectionLanguageMarker::LanguageId(id) => self.language_config_for_language_id(id),
InjectionLanguageMarker::Name(name) => self.language_config_for_name(*name),
InjectionLanguageMarker::Filename(file) => {
let path_str: Cow<str> = (*file).into();
self.language_config_for_file_name(Path::new(path_str.as_ref()))
}
InjectionLanguageMarker::Shebang(shebang) => {
let shebang_str: Cow<str> = (*shebang).into();
self.language_config_ids_by_shebang
.get(shebang_str.as_ref())
.and_then(|&id| self.language_configs.get(id).cloned())
}
}
}

Expand Down Expand Up @@ -2030,12 +2036,13 @@ impl HighlightConfiguration {
for capture in query_match.captures {
let index = Some(capture.index);
if index == self.injection_language_capture_index {
let name = byte_range_to_str(capture.node.byte_range(), source);
injection_capture = Some(InjectionLanguageMarker::Name(name));
injection_capture = Some(InjectionLanguageMarker::Name(
source.byte_slice(capture.node.byte_range()),
));
} else if index == self.injection_filename_capture_index {
let name = byte_range_to_str(capture.node.byte_range(), source);
let path = Path::new(name.as_ref()).to_path_buf();
injection_capture = Some(InjectionLanguageMarker::Filename(path.into()));
injection_capture = Some(InjectionLanguageMarker::Filename(
source.byte_slice(capture.node.byte_range()),
));
} else if index == self.injection_shebang_capture_index {
let node_slice = source.byte_slice(capture.node.byte_range());

Expand All @@ -2054,7 +2061,7 @@ impl HighlightConfiguration {
.captures_iter(lines.regex_input())
.map(|cap| {
let cap = lines.byte_slice(cap.get_group(1).unwrap().range());
InjectionLanguageMarker::Shebang(cap.into())
InjectionLanguageMarker::Shebang(cap)
})
.next()
} else if index == self.injection_content_capture_index {
Expand Down Expand Up @@ -2085,8 +2092,8 @@ impl HighlightConfiguration {
"injection.language" if injection_capture.is_none() => {
injection_capture = prop
.value
.as_ref()
.map(|s| InjectionLanguageMarker::Name(s.as_ref().into()));
.as_deref()
.map(InjectionLanguageMarker::LanguageId);
}

// By default, injections do not include the *children* of an
Expand Down Expand Up @@ -2521,9 +2528,20 @@ impl Iterator for HighlightIter<'_> {

#[derive(Debug, Clone)]
pub enum InjectionLanguageMarker<'a> {
Name(Cow<'a, str>),
Filename(Cow<'a, Path>),
Shebang(String),
/// The language is specified by `LanguageConfiguration`'s `language_id` field.
///
/// This marker is used when a pattern sets the `injection.language` property, for example
/// `(#set! injection.language "rust")`.
LanguageId(&'a str),
/// The language is specified in the document and captured by `@injection.language`.
///
/// This is used for markdown code fences for example. While the `LanguageId` variant can be
/// looked up by finding the language config that sets an `language_id`, this variant contains
/// text from the document being highlighted, so the text is checked against each language's
/// `injection_regex`.
Name(RopeSlice<'a>),
Filename(RopeSlice<'a>),
Shebang(RopeSlice<'a>),
}

const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";
Expand Down
Loading