From f0da9b8717c99c203e423512ff86d050b64096b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrick=20F=C3=B6rster?= Date: Mon, 30 Dec 2019 21:27:27 +0100 Subject: [PATCH] Refactor LaTeX parser --- crates/texlab_syntax/src/latex/env.rs | 23 ++++++++--------------- crates/texlab_syntax/src/latex/lexer.rs | 14 +++++++------- crates/texlab_syntax/src/latex/mod.rs | 19 +++++++++---------- crates/texlab_syntax/src/latex/parser.rs | 2 +- crates/texlab_syntax/src/lib.rs | 12 +++++++----- crates/texlab_workspace/src/document.rs | 20 +++++++++----------- crates/texlab_workspace/src/workspace.rs | 4 ++-- 7 files changed, 43 insertions(+), 51 deletions(-) diff --git a/crates/texlab_syntax/src/latex/env.rs b/crates/texlab_syntax/src/latex/env.rs index fb4419421..7170f3a60 100644 --- a/crates/texlab_syntax/src/latex/env.rs +++ b/crates/texlab_syntax/src/latex/env.rs @@ -15,24 +15,17 @@ impl LatexEnvironmentDelimiter { } pub fn is_math(&self) -> bool { - if let Some(name) = self.name() { - LANGUAGE_DATA - .math_environments - .iter() - .any(|env| env == name.text()) - } else { - false - } + self.is_special(LANGUAGE_DATA.math_environments.iter()) } pub fn is_enum(&self) -> bool { - if let Some(name) = self.name() { - LANGUAGE_DATA - .enum_environments - .iter() - .any(|env| env == name.text()) - } else { - false + self.is_special(LANGUAGE_DATA.enum_environments.iter()) + } + + fn is_special<'a, I: Iterator>(&self, mut values: I) -> bool { + match self.name() { + Some(name) => values.any(|env| env == name.text()), + None => false, } } } diff --git a/crates/texlab_syntax/src/latex/lexer.rs b/crates/texlab_syntax/src/latex/lexer.rs index 0cfb19d7d..517546bd5 100644 --- a/crates/texlab_syntax/src/latex/lexer.rs +++ b/crates/texlab_syntax/src/latex/lexer.rs @@ -7,7 +7,7 @@ pub struct LatexLexer<'a> { impl<'a> LatexLexer<'a> { pub fn new(text: &'a str) -> Self { - LatexLexer { + Self { stream: CharStream::new(text), } } @@ -125,7 +125,7 @@ mod tests { } #[test] - fn test_word() { + fn word() { let mut lexer = LatexLexer::new("foo bar baz"); verify(&mut lexer, 0, 0, "foo", LatexTokenKind::Word); verify(&mut lexer, 0, 4, "bar", LatexTokenKind::Word); @@ -134,7 +134,7 @@ mod tests { } #[test] - fn test_command() { + fn command() { let mut lexer = LatexLexer::new("\\foo\\bar@baz\n\\foo*"); verify(&mut lexer, 0, 0, "\\foo", LatexTokenKind::Command); verify(&mut lexer, 0, 4, "\\bar@baz", LatexTokenKind::Command); @@ -143,7 +143,7 @@ mod tests { } #[test] - fn test_escape_sequence() { + fn escape_sequence() { let mut lexer = LatexLexer::new("\\%\\**"); verify(&mut lexer, 0, 0, "\\%", LatexTokenKind::Command); verify(&mut lexer, 0, 2, "\\*", LatexTokenKind::Command); @@ -152,7 +152,7 @@ mod tests { } #[test] - fn test_group_delimiter() { + fn group_delimiter() { let mut lexer = LatexLexer::new("{}[]"); verify(&mut lexer, 0, 0, "{", LatexTokenKind::BeginGroup); verify(&mut lexer, 0, 1, "}", LatexTokenKind::EndGroup); @@ -162,7 +162,7 @@ mod tests { } #[test] - fn test_math() { + fn math() { let mut lexer = LatexLexer::new("$$ $ $"); verify(&mut lexer, 0, 0, "$$", LatexTokenKind::Math); verify(&mut lexer, 0, 3, "$", LatexTokenKind::Math); @@ -171,7 +171,7 @@ mod tests { } #[test] - fn test_line_comment() { + fn line_comment() { let mut lexer = LatexLexer::new(" %foo \nfoo"); verify(&mut lexer, 1, 0, "foo", LatexTokenKind::Word); assert_eq!(None, lexer.next()); diff --git a/crates/texlab_syntax/src/latex/mod.rs b/crates/texlab_syntax/src/latex/mod.rs index b661bfa60..787909f73 100644 --- a/crates/texlab_syntax/src/latex/mod.rs +++ b/crates/texlab_syntax/src/latex/mod.rs @@ -21,7 +21,7 @@ use self::lexer::LatexLexer; use self::parser::LatexParser; use super::language::*; use super::text::SyntaxNode; -use super::SyntaxTreeContext; +use super::SyntaxTreeInput; use path_clean::PathClean; use std::path::PathBuf; use std::sync::Arc; @@ -132,11 +132,11 @@ impl LatexInclude { components } - fn parse(context: SyntaxTreeContext, commands: &[Arc]) -> Vec { + fn parse(input: SyntaxTreeInput, commands: &[Arc]) -> Vec { let mut includes = Vec::new(); for command in commands { for description in &LANGUAGE_DATA.include_commands { - if let Some(include) = Self::parse_single(context, &command, &description) { + if let Some(include) = Self::parse_single(input, &command, &description) { includes.push(include); } } @@ -145,7 +145,7 @@ impl LatexInclude { } fn parse_single( - context: SyntaxTreeContext, + input: SyntaxTreeInput, command: &Arc, description: &LatexIncludeCommand, ) -> Option { @@ -159,7 +159,7 @@ impl LatexInclude { let mut all_targets = Vec::new(); for relative_path in command.extract_comma_separated_words(description.index) { - let mut path = context.uri.to_file_path().ok()?; + let mut path = input.uri.to_file_path().ok()?; path.pop(); path.push(relative_path.text()); path = PathBuf::from(path.to_string_lossy().into_owned().replace('\\', "/")); @@ -175,8 +175,7 @@ impl LatexInclude { } } - if let Some(uri) = - Self::resolve_distro_file(&context.resolver, description, relative_path) + if let Some(uri) = Self::resolve_distro_file(input.resolver, description, relative_path) { targets.push(uri); } @@ -281,12 +280,12 @@ pub struct LatexSyntaxTree { } impl LatexSyntaxTree { - pub fn parse(context: SyntaxTreeContext, text: &str) -> Self { - let lexer = LatexLexer::new(text); + pub fn parse(input: SyntaxTreeInput) -> Self { + let lexer = LatexLexer::new(input.text); let mut parser = LatexParser::new(lexer); let root = Arc::new(parser.root()); let commands = LatexCommandAnalyzer::parse(Arc::clone(&root)); - let includes = LatexInclude::parse(context, &commands); + let includes = LatexInclude::parse(input, &commands); let components = includes.iter().flat_map(LatexInclude::components).collect(); let env = LatexEnvironmentInfo::parse(&commands); let structure = LatexStructureInfo::parse(&commands); diff --git a/crates/texlab_syntax/src/latex/parser.rs b/crates/texlab_syntax/src/latex/parser.rs index 40693071c..e3a40399e 100644 --- a/crates/texlab_syntax/src/latex/parser.rs +++ b/crates/texlab_syntax/src/latex/parser.rs @@ -15,7 +15,7 @@ pub struct LatexParser> { impl> LatexParser { pub fn new(tokens: I) -> Self { - LatexParser { + Self { tokens: tokens.peekable(), } } diff --git a/crates/texlab_syntax/src/lib.rs b/crates/texlab_syntax/src/lib.rs index 7e24b57dc..71def4a41 100644 --- a/crates/texlab_syntax/src/lib.rs +++ b/crates/texlab_syntax/src/lib.rs @@ -20,16 +20,18 @@ pub enum SyntaxTree { } #[derive(Debug, PartialEq, Eq, Clone, Copy)] -pub struct SyntaxTreeContext<'a> { +pub struct SyntaxTreeInput<'a> { pub resolver: &'a Resolver, pub uri: &'a Uri, + pub text: &'a str, + pub language: Language, } impl SyntaxTree { - pub fn parse(context: SyntaxTreeContext, text: &str, language: Language) -> Self { - match language { - Language::Latex => SyntaxTree::Latex(Box::new(LatexSyntaxTree::parse(context, text))), - Language::Bibtex => SyntaxTree::Bibtex(Box::new(text.into())), + pub fn parse(input: SyntaxTreeInput) -> Self { + match input.language { + Language::Latex => SyntaxTree::Latex(Box::new(LatexSyntaxTree::parse(input))), + Language::Bibtex => SyntaxTree::Bibtex(Box::new(input.text.into())), } } } diff --git a/crates/texlab_workspace/src/document.rs b/crates/texlab_workspace/src/document.rs index 5a829428b..a342da33d 100644 --- a/crates/texlab_workspace/src/document.rs +++ b/crates/texlab_workspace/src/document.rs @@ -1,7 +1,7 @@ use std::time::SystemTime; use texlab_distro::{Language, Resolver}; use texlab_protocol::*; -use texlab_syntax::{SyntaxTree, SyntaxTreeContext}; +use texlab_syntax::{SyntaxTree, SyntaxTreeInput}; #[derive(Debug, PartialEq, Eq, Clone)] pub struct Document { @@ -12,7 +12,14 @@ pub struct Document { } impl Document { - pub fn new(uri: Uri, text: String, tree: SyntaxTree) -> Self { + pub fn parse(uri: Uri, text: String, language: Language, resolver: &Resolver) -> Self { + let input = SyntaxTreeInput { + resolver, + uri: &uri, + text: &text, + language, + }; + let tree = SyntaxTree::parse(input); Self { uri, text, @@ -21,15 +28,6 @@ impl Document { } } - pub fn parse(resolver: &Resolver, uri: Uri, text: String, language: Language) -> Self { - let context = SyntaxTreeContext { - resolver, - uri: &uri, - }; - let tree = SyntaxTree::parse(context, &text, language); - Self::new(uri, text, tree) - } - pub fn is_file(&self) -> bool { self.uri.scheme() == "file" } diff --git a/crates/texlab_workspace/src/workspace.rs b/crates/texlab_workspace/src/workspace.rs index 25bd31000..f713e1970 100644 --- a/crates/texlab_workspace/src/workspace.rs +++ b/crates/texlab_workspace/src/workspace.rs @@ -244,7 +244,7 @@ impl WorkspaceManager { language: Language, ) -> Arc { let resolver = block_on(self.distribution.resolver()); - let document = Document::parse(&resolver, uri, text, language); + let document = Document::parse(uri, text, language, &resolver); let mut documents: Vec> = workspace .documents .iter() @@ -272,7 +272,7 @@ impl WorkspaceBuilder { let path = env::temp_dir().join(name); let language = Language::by_extension(path.extension().unwrap().to_str().unwrap()).unwrap(); let uri = Uri::from_file_path(path).unwrap(); - let document = Document::parse(&resolver, uri.clone(), text.to_owned(), language); + let document = Document::parse(uri.clone(), text.to_owned(), language, &resolver); self.workspace.documents.push(Arc::new(document)); uri }