diff --git a/Cargo.lock b/Cargo.lock index 4d30d365c..a36316cdc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,12 +24,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "allocator-api2" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" - [[package]] name = "atty" version = "0.2.14" @@ -47,15 +41,6 @@ version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51" -[[package]] -name = "bincode" -version = "1.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" -dependencies = [ - "serde", -] - [[package]] name = "bitflags" version = "1.3.2" @@ -80,15 +65,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "chumsky" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" -dependencies = [ - "hashbrown", -] - [[package]] name = "clap" version = "4.0.22" @@ -191,10 +167,6 @@ name = "hashbrown" version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156" -dependencies = [ - "ahash", - "allocator-api2", -] [[package]] name = "heck" @@ -247,14 +219,12 @@ name = "jaq" version = "1.5.1" dependencies = [ "atty", - "chumsky", "clap", "codesnake", "env_logger", "hifijson", "jaq-core", "jaq-interpret", - "jaq-parse", "jaq-std", "jaq-syn", "memmap2", @@ -272,7 +242,7 @@ dependencies = [ "base64", "hifijson", "jaq-interpret", - "jaq-parse", + "jaq-syn", "libm", "log", "regex", @@ -289,33 +259,22 @@ dependencies = [ "dyn-clone", "hifijson", "indexmap", - "jaq-parse", "jaq-syn", "once_cell", "serde_json", ] -[[package]] -name = "jaq-parse" -version = "1.0.2" -dependencies = [ - "chumsky", - "jaq-syn", -] - [[package]] name = "jaq-play" version = "0.1.0" dependencies = [ "aho-corasick", - "chumsky", "codesnake", "console_log", "getrandom", "hifijson", "jaq-core", "jaq-interpret", - "jaq-parse", "jaq-std", "jaq-syn", "js-sys", @@ -329,17 +288,15 @@ dependencies = [ name = "jaq-std" version = "1.5.1" dependencies = [ - "bincode", "jaq-core", "jaq-interpret", - "jaq-parse", "jaq-syn", "serde_json", ] [[package]] name = "jaq-syn" -version = "1.1.0" +version = "1.6.0" dependencies = [ "serde", ] diff --git a/Cargo.toml b/Cargo.toml index b57674b2b..bd439b975 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,6 @@ [workspace] members = [ "jaq-syn", - "jaq-parse", "jaq-interpret", "jaq-core", "jaq-std", diff --git a/jaq-core/Cargo.toml b/jaq-core/Cargo.toml index a4611bede..5978da5df 100644 --- a/jaq-core/Cargo.toml +++ b/jaq-core/Cargo.toml @@ -29,5 +29,5 @@ base64 = { version = "0.22", optional = true } urlencoding = { version = "2.1.3", optional = true } [dev-dependencies] -jaq-parse = { version = "1.0.0", path = "../jaq-parse" } +jaq-syn = { version = "1.6.0", path = "../jaq-syn" } serde_json = "1.0" diff --git a/jaq-core/tests/common/mod.rs b/jaq-core/tests/common/mod.rs index e6936a57a..087ee28d5 100644 --- a/jaq-core/tests/common/mod.rs +++ b/jaq-core/tests/common/mod.rs @@ -4,9 +4,10 @@ fn yields(x: jaq_interpret::Val, f: &str, ys: impl Iterator) { let mut ctx = jaq_interpret::ParseCtx::new(Vec::new()); - let (f, errs) = jaq_parse::parse(f, jaq_parse::main()); - assert!(errs.is_empty()); - ctx.yields(x, f.unwrap(), ys) + let f = jaq_syn::parse(f, |p| p.module(|p| p.term())) + .unwrap() + .conv(f); + ctx.yields(x, f, ys) } pub fn fail(x: Value, f: &str, err: jaq_interpret::Error) { diff --git a/jaq-interpret/tests/path.rs b/jaq-interpret/tests/path.rs index e87251c39..b8143b5e0 100644 --- a/jaq-interpret/tests/path.rs +++ b/jaq-interpret/tests/path.rs @@ -42,7 +42,6 @@ fn index_access() { fn iter_access() { gives(json!([0, 1, 2]), ".[]", [json!(0), json!(1), json!(2)]); gives(json!({"a": [1, 2]}), ".a[]", [json!(1), json!(2)]); - gives(json!({"a": [1, 2]}), ".a.[]", [json!(1), json!(2)]); gives(json!({"a": 1, "b": 2}), ".[]", [json!(1), json!(2)]); // TODO: correct this //gives(json!({"b": 2, "a": 1}), ".[]", [json!(2), json!(1)]); @@ -74,6 +73,9 @@ fn iter_assign() { ); } +yields!(index_keyword, r#"{"if": 0} | .if"#, 0); +yields!(obj_keyword, "{if: 0} | .if", 0); + yields!(key_update1, "{} | .a |= .+1", json!({"a": 1})); yields!(key_update2, "{} | .a? |= .+1", json!({"a": 1})); diff --git a/jaq-interpret/tests/tests.rs b/jaq-interpret/tests/tests.rs index 928702243..c8300f9b6 100644 --- a/jaq-interpret/tests/tests.rs +++ b/jaq-interpret/tests/tests.rs @@ -20,9 +20,9 @@ yields!(cartesian_arith, "[(1,2) * (3,4)]", [3, 4, 6, 8]); #[test] fn add() { give(json!(1), ". + 2", json!(3)); - give(json!(1.0), ". + 2.", json!(3.0)); + give(json!(1.0), ". + 2.0", json!(3.0)); give(json!(1), "2.0 + .", json!(3.0)); - give(json!(null), "1.e1 + 2.1e2", json!(220.0)); + give(json!(null), "1.0e1 + 2.1e2", json!(220.0)); give(json!("Hello "), ". + \"world\"", json!("Hello world")); give(json!([1, 2]), ". + [3, 4]", json!([1, 2, 3, 4])); @@ -48,7 +48,7 @@ yields!(sub_arr, "[1, 2, 3] - [2, 3, 4]", json!([1])); #[test] fn mul() { give(json!(1), ". * 2", json!(2)); - give(json!(1.0), ". * 2.", json!(2.0)); + give(json!(1.0), ". * 2.0", json!(2.0)); give(json!(1), "2.0 * .", json!(2.0)); give(json!("Hello"), "2 * .", json!("HelloHello")); @@ -117,6 +117,27 @@ fn precedence() { give(json!(null), "2 * 3 + 1", json!(7)); } +// these tests use the trick that `try t catch c` is valid syntax only for atomic terms `t` +// TODO for v2.0 +//yields!(atomic_def, "try def x: 1; x + x catch 0", 2); +yields!(atomic_neg, "try - 1 catch 0", -1); +yields!(atomic_if, "try if 0 then 1 end catch 2", 1); +yields!(atomic_try, "try try 0[0] catch 1 catch 2", 1); +yields!(atomic_fold, "try reduce [][] as $x (0; 0) catch 1", 0); +yields!(atomic_var, "0 as $x | try $x catch 1", 0); +yields!(atomic_call, "def x: 0; try x catch 1", 0); +yields!(atomic_str1, r#"try "" catch 1"#, ""); +yields!(atomic_str2, r#"def @f: .; try @f "" catch 1"#, ""); +yields!(atomic_rec, "try .. catch 0", json!(null)); +yields!(atomic_id, "try . catch 0", json!(null)); +yields!(atomic_key1, "{key: 0} | try .key catch 1", 0); +yields!(atomic_key2, r#"{key: 0} | try . "key" catch 1"#, 0); +yields!(atomic_key3, r#"def @f: .; {key: 0} | try .@f"key" catch 1"#, 0); +yields!(atomic_num, "try 0 catch 1", 0); +yields!(atomic_block, "try (1 + 1) catch 0", 2); +yields!(atomic_path, "try [1][0] catch 0", 1); +yields!(atomic_opt, "def x: 0; try x? catch 1", 0); + yields!(neg_arr_iter1, "[-[][]]", json!([])); yields!(neg_arr_iter2, "try (-[])[] catch 0", 0); @@ -164,6 +185,8 @@ yields!( "{a: 1, b: 2} | {a, c: 3}", json!({"a": 1, "c": 3}) ); +yields!(obj_var, r#""x" as $k | {$k}"#, json!({"k": "x"})); +yields!(obj_var_val, r#""x" as $k | {$k: 0}"#, json!({"x": 0})); yields!( obj_multi_keys, r#"[{("a", "b"): 1}]"#, diff --git a/jaq-parse/src/lib.rs b/jaq-parse/src/lib.rs index 9971cbc16..5650f54f8 100644 --- a/jaq-parse/src/lib.rs +++ b/jaq-parse/src/lib.rs @@ -12,14 +12,12 @@ mod prec_climb; mod string; mod token; -use jaq_syn as syn; - pub use def::{defs, main}; use token::{Delim, Token}; use alloc::{string::String, string::ToString, vec::Vec}; use chumsky::prelude::*; -use syn::Spanned; +use jaq_syn::Spanned; /// Lex/parse error. pub type Error = Simple; diff --git a/jaq-play/Cargo.toml b/jaq-play/Cargo.toml index 17da27d0f..ecd81b025 100644 --- a/jaq-play/Cargo.toml +++ b/jaq-play/Cargo.toml @@ -17,13 +17,11 @@ crate-type = ["cdylib", "rlib"] [dependencies] jaq-syn = { version = "1.1.0", path = "../jaq-syn" } -jaq-parse = { version = "1.0.0", path = "../jaq-parse" } jaq-interpret = { version = "1.2.0", path = "../jaq-interpret" } jaq-core = { version = "1.2.0", path = "../jaq-core" } jaq-std = { version = "1.2.0", path = "../jaq-std" } aho-corasick = "1.1.2" codesnake = { version = "0.1" } -chumsky = { version = "0.9.0", default-features = false } hifijson = "0.2" log = "0.4.17" unicode-width = "0.1.13" diff --git a/jaq-play/src/lib.rs b/jaq-play/src/lib.rs index 76626bc96..399917002 100644 --- a/jaq-play/src/lib.rs +++ b/jaq-play/src/lib.rs @@ -130,7 +130,7 @@ impl Settings { use web_sys::DedicatedWorkerGlobalScope as Scope; enum Error { - Chumsky(Vec), + Report(String, Vec), Hifijson(String), Jaq(jaq_interpret::Error), } @@ -163,11 +163,14 @@ pub fn run(filter: &str, input: &str, settings: &JsValue, scope: &Scope) { }; match process(filter, input, &settings, post_value) { Ok(()) => (), - Err(Error::Chumsky(errs)) => { - for e in errs { - scope - .post_message(&format!("⚠️ Parse error: {}", report(filter, &e)).into()) - .unwrap(); + Err(Error::Report(code, reports)) => { + let idx = codesnake::LineIndex::new(&code); + for e in reports { + let error = format!("⚠️ Parse error: {}", e.message); + scope.post_message(&error.into()).unwrap(); + let block = e.into_block(&idx); + let block = format!("{}\n{}{}", block.prologue(), block, block.epilogue()); + scope.post_message(&block.into()).unwrap(); } } Err(Error::Hifijson(e)) => { @@ -224,7 +227,7 @@ fn collect_if<'a, T: 'a + FromIterator, E: 'a>( } fn process(filter: &str, input: &str, settings: &Settings, f: impl Fn(Val)) -> Result<(), Error> { - let filter = parse(filter, Vec::new()).map_err(Error::Chumsky)?; + let filter = parse(filter, Vec::new()).map_err(|e| Error::Report(filter.to_owned(), e))?; let inputs = read_str(settings, input); @@ -243,34 +246,46 @@ fn process(filter: &str, input: &str, settings: &Settings, f: impl Fn(Val)) -> R Ok(()) } -type ChumskyError = chumsky::error::Simple; +fn parse_term(filter_str: &str) -> Result> { + let tokens = jaq_syn::Lexer::new(filter_str).lex().map_err(|errs| { + errs.into_iter() + .map(|e| report_lex(filter_str, e)) + .collect::>() + })?; + + let main = jaq_syn::Parser::new(&tokens).parse(|p| p.module(|p| p.term())); + let main = main.map_err(|errs| { + errs.into_iter() + .map(|e| report_parse(filter_str, e)) + .collect::>() + })?; + + Ok(main.conv(filter_str)) +} -fn parse(filter_str: &str, vars: Vec) -> Result> { - let mut defs = ParseCtx::new(vars); - defs.insert_natives(jaq_core::core()); - defs.insert_defs(jaq_std::std()); - assert!(defs.errs.is_empty()); - let (filter, errs) = jaq_parse::parse(filter_str, jaq_parse::main()); - if !errs.is_empty() { - return Err(errs); - } - let filter = defs.compile(filter.unwrap()); - if defs.errs.is_empty() { +fn parse(filter_str: &str, vars: Vec) -> Result> { + let mut ctx = ParseCtx::new(vars); + ctx.insert_natives(jaq_core::core()); + ctx.insert_defs(jaq_std::std()); + let filter = parse_term(filter_str)?; + let filter = ctx.compile(filter); + if ctx.errs.is_empty() { Ok(filter) } else { - Err(defs - .errs - .into_iter() - .map(|error| ChumskyError::custom(error.1, error.0.to_string())) - .collect()) + let reports = ctx.errs.into_iter().map(|error| Report { + message: error.0.to_string(), + labels: Vec::from([(error.1, [(error.0.to_string(), None)].into(), Color::Red)]), + }); + Err(reports.collect()) } } +type StringColors = Vec<(String, Option)>; + #[derive(Debug)] -struct Report<'a> { - code: &'a str, +struct Report { message: String, - labels: Vec<(core::ops::Range, String, Color)>, + labels: Vec<(core::ops::Range, StringColors, Color)>, } #[derive(Clone, Debug)] @@ -287,74 +302,66 @@ impl Color { } } -fn report<'a>(code: &'a str, e: &chumsky::error::Simple) -> Report<'a> { - use chumsky::error::SimpleReason; - - let eof = || "end of input".to_string(); - - let message = if let SimpleReason::Custom(msg) = e.reason() { - msg.clone() - } else { - let found = if e.found().is_some() { - "Unexpected token" - } else { - "Unexpected end of input" - }; - let when = if let Some(label) = e.label() { - format!(" while parsing {label}") - } else { - String::new() - }; - let expected = if e.expected().len() == 0 { - "something else".to_string() - } else { - let f = |e: &Option| e.as_ref().map_or_else(eof, |e| e.to_string()); - e.expected().map(f).collect::>().join(", ") - }; - format!("{found}{when}, expected {expected}",) +fn report_lex(code: &str, (expected, found): jaq_syn::lex::Error<&str>) -> Report { + use jaq_syn::lex::{span, Expect}; + // truncate found string to its first character + let found = &found[..found.char_indices().nth(1).map_or(found.len(), |(i, _)| i)]; + + let found_range = span(code, found); + let found = match found { + "" => [("unexpected end of input".to_string(), None)].into(), + c => [("unexpected character ", None), (c, Some(Color::Red))] + .map(|(s, c)| (s.into(), c)) + .into(), }; + let label = (found_range, found, Color::Red); - let label = if let SimpleReason::Custom(msg) = e.reason() { - msg.clone() - } else { - let token = |c: &String| format!("token {}", Color::Red.apply(c)); - format!("Unexpected {}", e.found().map_or_else(eof, token)) - }; - // convert character indices to byte offsets - let char_to_byte = |i| { - code.char_indices() - .map(|(i, _c)| i) - .chain([code.len(), code.len()]) - .nth(i) - .unwrap() + let labels = match expected { + Expect::Delim(open) => { + let text = [("unclosed delimiter ", None), (open, Some(Color::Yellow))] + .map(|(s, c)| (s.into(), c)); + Vec::from([(span(code, open), text.into(), Color::Yellow), label]) + } + _ => Vec::from([label]), }; - let conv = |span: &core::ops::Range<_>| char_to_byte(span.start)..char_to_byte(span.end); - let mut labels = Vec::from([(conv(&e.span()), label, Color::Red)]); - if let SimpleReason::Unclosed { span, delimiter } = e.reason() { - let text = format!("Unclosed delimiter {}", Color::Yellow.apply(delimiter)); - labels.insert(0, (conv(span), text, Color::Yellow)); - } Report { - code, - message, + message: format!("expected {}", expected.as_str()), labels, } } -impl Display for Report<'_> { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - use codesnake::{Block, CodeWidth, Label, LineIndex}; - let idx = LineIndex::new(self.code); - let labels = self.labels.clone().into_iter().map(|(range, text, color)| { - Label::new(range, text).with_style(move |s| color.apply(s).to_string()) +fn report_parse(code: &str, (expected, found): jaq_syn::parse::Error) -> Report { + let found_range = match found { + None => code.len()..code.len(), + Some(found) => found.span(code), + }; + let found = found.map_or("unexpected end of input", |_| "unexpected token"); + let found = [(found.to_string(), None)].into(); + + Report { + message: format!("expected {}", expected.as_str()), + labels: Vec::from([(found_range, found, Color::Red)]), + } +} + +type CodeBlock = codesnake::Block, String>; + +impl Report { + fn into_block(self, idx: &codesnake::LineIndex) -> CodeBlock { + use codesnake::{Block, CodeWidth, Label}; + let color_maybe = |(text, color): (_, Option)| match color { + None => text, + Some(color) => color.apply(text).to_string(), + }; + let labels = self.labels.into_iter().map(|(range, text, color)| { + let text = text.into_iter().map(color_maybe).collect::>(); + Label::new(range, text.join("")).with_style(move |s| color.apply(s).to_string()) }); - let block = Block::new(&idx, labels).unwrap().map_code(|c| { + Block::new(idx, labels).unwrap().map_code(|c| { let c = c.replace('\t', " "); let w = unicode_width::UnicodeWidthStr::width(&*c); CodeWidth::new(c, core::cmp::max(w, 1)) - }); - writeln!(f, "{}", self.message)?; - write!(f, "{}\n{}{}", block.prologue(), block, block.epilogue()) + }) } } diff --git a/jaq-std/Cargo.toml b/jaq-std/Cargo.toml index 849d7d914..a1edc7774 100644 --- a/jaq-std/Cargo.toml +++ b/jaq-std/Cargo.toml @@ -10,17 +10,8 @@ repository = "https://github.com/01mf02/jaq" keywords = ["json", "query", "jq"] rust-version = "1.64" -[features] -default = ["bincode"] - -[build-dependencies] -jaq-parse = { version = "1.0.0", path = "../jaq-parse" } -bincode = { version = "1.3.3", optional = true } - [dependencies] jaq-syn = { version = "1.0.0", path = "../jaq-syn" } -jaq-parse = { version = "1.0.0", path = "../jaq-parse" } -bincode = { version = "1.3.3", optional = true } [dev-dependencies] jaq-interpret = { version = "1.2.0", path = "../jaq-interpret" } diff --git a/jaq-std/build.rs b/jaq-std/build.rs deleted file mode 100644 index 54373c682..000000000 --- a/jaq-std/build.rs +++ /dev/null @@ -1,17 +0,0 @@ -//! Cache parsed standard library. - -#[cfg(feature = "bincode")] -fn main() { - let out_dir = std::env::var_os("OUT_DIR").unwrap(); - let dest_path = std::path::Path::new(&out_dir).join("std.bin"); - let buffer = std::fs::File::create(dest_path).unwrap(); - - let std = include_str!("src/std.jq"); - let (std, errs) = jaq_parse::parse(std, jaq_parse::defs()); - assert_eq!(errs, Vec::new()); - let std = std.unwrap(); - bincode::serialize_into(buffer, &std).unwrap(); -} - -#[cfg(not(feature = "bincode"))] -fn main() {} diff --git a/jaq-std/src/lib.rs b/jaq-std/src/lib.rs index 34a5f4829..862a6649b 100644 --- a/jaq-std/src/lib.rs +++ b/jaq-std/src/lib.rs @@ -4,12 +4,6 @@ //! The standard library provides a set of filters defined using core filters. //! For example, the standard library provides the `map(f)` filter, //! which is defined using the more elementary filter `[.[] | f]`. -//! -//! The time required to parse the standard library becomes evident -//! when the runtime of the jaq filter is small. -//! Therefore, when the "bincode" feature is enabled, -//! this crate precompiles the standard library, -//! in order to reduce startup time. #![no_std] #![warn(missing_docs)] @@ -18,15 +12,8 @@ use alloc::vec::Vec; /// Return the standard library. pub fn std() -> Vec { - #[cfg(feature = "bincode")] - { - // use preparsed standard library - let std = include_bytes!(concat!(env!("OUT_DIR"), "/std.bin")); - bincode::deserialize(std).unwrap() - } - #[cfg(not(feature = "bincode"))] - { - let std = include_str!("std.jq"); - jaq_parse::parse(std, jaq_parse::defs()).0.unwrap() - } + let std = include_str!("std.jq"); + jaq_syn::parse(std, |p| p.module(|p| p.defs())) + .unwrap() + .conv(std) } diff --git a/jaq-std/tests/common/mod.rs b/jaq-std/tests/common/mod.rs index d8c78f261..be6466881 100644 --- a/jaq-std/tests/common/mod.rs +++ b/jaq-std/tests/common/mod.rs @@ -5,9 +5,10 @@ fn yields(x: jaq_interpret::Val, f: &str, ys: impl Iterator"] edition = "2021" license = "MIT" diff --git a/jaq-syn/src/convert.rs b/jaq-syn/src/convert.rs new file mode 100644 index 000000000..02c3ba2a8 --- /dev/null +++ b/jaq-syn/src/convert.rs @@ -0,0 +1,223 @@ +use crate::filter::{AssignOp, BinaryOp, Filter, Fold, FoldType, KeyVal}; +use crate::prec_climb::{self, Associativity}; +use crate::{parse, Arg, Call, Def, Main, MathOp, OrdOp, Span, Spanned}; +use alloc::string::ToString; +use alloc::{boxed::Box, vec::Vec}; + +impl parse::Term<&str> { + fn span(&self, code: &str) -> Span { + match self { + Self::Num(s) | Self::Call(s, ..) | Self::Var(s) => crate::lex::span(code, s), + _ => 0..42, + } + } + + fn conv(&self, s: &str) -> Filter { + use crate::lex::StrPart; + use crate::path::{Opt, Part}; + use crate::string; + use Filter::*; + + let span = |tm: &Self| Box::new((tm.conv(s), tm.span(s))); + let from_part = |(part, opt): &(Part<_>, Opt)| { + let part = match part { + Part::Index(i) => Part::Index(*span(i)), + Part::Range(l, h) => { + Part::Range(l.as_ref().map(|l| *span(l)), h.as_ref().map(|h| *span(h))) + } + }; + (part, *opt) + }; + let index_path = |k| { + let path = Vec::from([(Part::Index(k), Opt::Essential)]); + Filter::Path(span(&Self::Id), path) + }; + let from_str = |part: &StrPart<&str, _>| match part { + StrPart::Str(s) => string::Part::Str(s.to_string()), + StrPart::Filter(tm) => string::Part::Fun(*span(tm)), + StrPart::Char(c) => string::Part::Str(c.to_string()), + }; + let from_obj = |(k, v): &(_, Option<_>)| { + let f = || (index_path(*span(k)), 0..42); + let (k, v) = if let (Self::Var(x), None) = (k, v) { + (*span(&Self::str(&x[1..])), *span(k)) + } else { + (*span(k), v.as_ref().map_or_else(f, |v| *span(v))) + }; + KeyVal::Filter(k, v) + }; + let from_op = |op| match op { + "," => BinaryOp::Comma, + "//" => BinaryOp::Alt, + "or" => BinaryOp::Or, + "and" => BinaryOp::And, + "+" => BinaryOp::Math(MathOp::Add), + "-" => BinaryOp::Math(MathOp::Sub), + "*" => BinaryOp::Math(MathOp::Mul), + "/" => BinaryOp::Math(MathOp::Div), + "%" => BinaryOp::Math(MathOp::Rem), + "=" => BinaryOp::Assign(AssignOp::Assign), + "|=" => BinaryOp::Assign(AssignOp::Update), + "+=" => BinaryOp::Assign(AssignOp::UpdateWith(MathOp::Add)), + "-=" => BinaryOp::Assign(AssignOp::UpdateWith(MathOp::Sub)), + "*=" => BinaryOp::Assign(AssignOp::UpdateWith(MathOp::Mul)), + "/=" => BinaryOp::Assign(AssignOp::UpdateWith(MathOp::Div)), + "%=" => BinaryOp::Assign(AssignOp::UpdateWith(MathOp::Rem)), + "<" => BinaryOp::Ord(OrdOp::Lt), + ">" => BinaryOp::Ord(OrdOp::Gt), + "<=" => BinaryOp::Ord(OrdOp::Le), + ">=" => BinaryOp::Ord(OrdOp::Ge), + "==" => BinaryOp::Ord(OrdOp::Eq), + "!=" => BinaryOp::Ord(OrdOp::Ne), + op => panic!("unknown operator: {op}"), + }; + match self { + Self::Id => Id, + Self::Recurse => Recurse, + Self::Num(n) => Num(n.to_string()), + Self::Str(fmt, parts) => Str(Box::new(crate::Str { + fmt: fmt.map(|fmt| span(&Self::Call(fmt, Vec::new()))), + parts: parts.iter().map(from_str).collect(), + })), + Self::Arr(a) => Array(a.as_deref().map(span)), + Self::Obj(o) => Object(o.iter().map(from_obj).collect()), + Self::Neg(tm) => Neg(span(tm)), + Self::Pipe(l, v, r) => Binary( + span(l), + BinaryOp::Pipe(v.map(|v| v[1..].to_string())), + span(r), + ), + Self::BinOp(head, tail) => { + let head = *span(head); + let tail = tail.iter().map(|(op, tm)| (from_op(op), *span(tm))); + prec_climb::climb(head, tail).0 + } + + Self::Label(_v, ..) | Self::Break(_v) => { + unimplemented!("label-break is not supported yet") + } + + Self::Fold(fold, xs, v, args) => { + let fold_type = match *fold { + "reduce" => FoldType::Reduce, + "foreach" => FoldType::Foreach, + "for" => FoldType::For, + _ => panic!(), + }; + let (init, update) = match &args[..] { + [init, update] => (init, update), + _ => unimplemented!("folding filters currently only take two arguments"), + }; + let fold = self::Fold { + xs: span(xs), + x: v[1..].to_string(), + init: span(init), + f: span(update), + }; + Fold(fold_type, fold) + } + Self::TryCatch(try_, catch) => TryCatch(span(try_), catch.as_deref().map(span)), + Self::IfThenElse(if_thens, else_) => Ite( + if_thens + .iter() + .map(|(if_, then_)| (*span(if_), *span(then_))) + .collect(), + else_.as_deref().map(span), + ), + + Self::Def(_defs, _tm) => { + unimplemented!("definitions inside terms are not supported yet") + } + Self::Call(c, args) => Call(c.to_string(), args.iter().map(|a| *span(a)).collect()), + Self::Var(v) => Var(v[1..].to_string()), + + Self::Path(tm, path) => Path(span(tm), path.iter().map(from_part).collect()), + } + } + + fn conv_main(&self, s: &str) -> Main { + match self { + parse::Term::Def(defs, tm) => Main { + defs: defs.iter().map(|def| def.conv(s)).collect(), + body: (tm.conv(s), tm.span(s)), + }, + tm => Main { + defs: Vec::new(), + body: (tm.conv(s), tm.span(s)), + }, + } + } +} + +impl From<&parse::Term<&str>> for Filter { + fn from(tm: &parse::Term<&str>) -> Self { + tm.conv("") + } +} + +impl prec_climb::Op for BinaryOp { + fn precedence(&self) -> usize { + match self { + Self::Pipe(_) => 0, + Self::Comma => 1, + Self::Assign(_) => 2, + Self::Alt => 3, + Self::Or => Self::Alt.precedence() + 1, + Self::And => Self::Or.precedence() + 1, + Self::Ord(OrdOp::Eq | OrdOp::Ne) => Self::And.precedence() + 1, + Self::Ord(OrdOp::Lt | OrdOp::Gt | OrdOp::Le | OrdOp::Ge) => Self::And.precedence() + 2, + Self::Math(MathOp::Add | MathOp::Sub) => Self::And.precedence() + 3, + Self::Math(MathOp::Mul | MathOp::Div) => Self::Math(MathOp::Add).precedence() + 1, + Self::Math(MathOp::Rem) => Self::Math(MathOp::Mul).precedence() + 1, + } + } + + fn associativity(&self) -> Associativity { + match self { + Self::Pipe(_) | Self::Assign(_) => Associativity::Right, + _ => Associativity::Left, + } + } +} + +impl prec_climb::Expr for Spanned { + fn from_op(lhs: Self, op: BinaryOp, rhs: Self) -> Self { + Filter::binary(lhs, op, rhs) + } +} + +impl parse::Def<&str, parse::Term<&str>> { + fn conv(&self, s: &str) -> Def { + let args = self.args.iter().map(|arg| { + if let Some(v) = arg.strip_prefix('$') { + Arg::Var(v.to_string()) + } else { + Arg::Fun(arg.to_string()) + } + }); + Def { + lhs: Call { + name: self.name.to_string(), + args: args.collect(), + }, + rhs: self.body.conv_main(s), + } + } +} + +impl parse::Module<&str, Vec>>> { + /// Convert a definitions module to a [`Def`] vector. + pub fn conv(&self, s: &str) -> Vec { + self.body.iter().map(|def| def.conv(s)).collect() + } +} + +impl parse::Module<&str, parse::Term<&str>> { + /// Convert a term module to a [`Main`]. + pub fn conv(&self, s: &str) -> Main { + if !self.mods.is_empty() { + panic!("include / import is not supported yet"); + } + self.body.conv_main(s) + } +} diff --git a/jaq-syn/src/def.rs b/jaq-syn/src/def.rs index 24158fe9b..23b237142 100644 --- a/jaq-syn/src/def.rs +++ b/jaq-syn/src/def.rs @@ -127,6 +127,6 @@ impl Arg { pub struct Main { /// Definitions at the top of the filter pub defs: Vec>, - /// Body of the filter, e.g. `[.[] | f`. + /// Body of the filter, e.g. `[.[] | f]`. pub body: Spanned, } diff --git a/jaq-syn/src/lex.rs b/jaq-syn/src/lex.rs new file mode 100644 index 000000000..af5e53993 --- /dev/null +++ b/jaq-syn/src/lex.rs @@ -0,0 +1,317 @@ +//! Lexing. + +use alloc::vec::Vec; + +/// Component of a string potentially containing escape sequences. +/// +/// `S` is a type of strings (without escape sequences), and +/// `F` is a type of interpolated filters. +#[derive(Debug)] +pub enum StrPart { + /// string without escape sequences + Str(S), + /// interpolated filter (`\(...)`) + Filter(F), + /// escaped character (e.g. `\n`, `t`, `\u0041`) + Char(char), +} + +/// Token (tree) generic over string type `S`. +#[derive(Debug)] +pub enum Token { + /// keywords such as `def`, but also identifiers such as `map`, `$x`, or `@csv` + Word(S), + /// number + Num(S), + /// (interpolated) string, surrounded by opening and closing '"' + Str(S, Vec>, S), + /// binary operator, such as `|` or `+=` + /// + /// Note that this includes `-` (negation) also when it is used as unary operator. + Op(S), + /// punctuation, such as `.` or `;` + Char(S), + /// delimited tokens, e.g. `(...)` or `[...]` + Block(S, Vec), +} + +/// Type of character that we expected. +/// +/// Each variant is annoted with jq programs that trigger it. +#[derive(Clone, Debug)] +pub enum Expect { + /// `0e`, `0.` + Digit, + /// `$`, `@` + Ident, + /// `(`, `[`, `{` + Delim(S), + /// `"\a"` + Escape, + /// `"\ux"` + Unicode, + /// `&`, `§`, `💣` + Token, +} + +impl<'a> Expect<&'a str> { + /// Return human-readable description of what we expected. + pub fn as_str(&self) -> &'static str { + match self { + Self::Digit => "digit", + Self::Ident => "identifier", + Self::Delim("(") => "closing parenthesis", + Self::Delim("[") => "closing bracket", + Self::Delim("{") => "closing brace", + Self::Delim("\"") => "closing quote", + Self::Delim(_) => panic!(), + Self::Escape => "string escape sequence", + Self::Unicode => "4-digit hexadecimal UTF-8 code point", + Self::Token => "token", + } + } +} + +/// Lexer error, storing what we expected and what we got instead. +pub type Error = (Expect, S); + +/// Lexer for jq files. +pub struct Lexer { + i: S, + e: Vec>, +} + +impl<'a> Lexer<&'a str> { + /// Initialise a new lexer for the given input. + #[must_use] + pub fn new(i: &'a str) -> Self { + let e = Vec::new(); + Self { i, e } + } + + /// Lex, returning the resulting tokens and errors. + pub fn lex(mut self) -> Result>, Vec>> { + let tokens = self.tokens(); + self.space(); + if !self.i.is_empty() { + self.e.push((Expect::Token, self.i)); + } + + if self.e.is_empty() { + Ok(tokens) + } else { + Err(self.e) + } + } + + fn next(&mut self) -> Option { + let mut chars = self.i.chars(); + let c = chars.next()?; + self.i = chars.as_str(); + Some(c) + } + + fn take(&mut self, len: usize) -> &'a str { + let (head, tail) = self.i.split_at(len); + self.i = tail; + head + } + + fn trim(&mut self, f: impl FnMut(char) -> bool) { + self.i = self.i.trim_start_matches(f); + } + + fn consumed(&mut self, skip: usize, f: impl FnOnce(&mut Self)) -> &'a str { + let start = self.i; + self.i = &self.i[skip..]; + f(self); + &start[..start.len() - self.i.len()] + } + + /// Whitespace and comments. + fn space(&mut self) { + self.i = self.i.trim_start(); + while let Some(comment) = self.i.strip_prefix('#') { + self.i = comment.trim_start_matches(|c| c != '\n').trim_start(); + } + } + + fn mod_then_ident(&mut self) { + self.ident0(); + if let Some(rest) = self.i.strip_prefix("::") { + self.i = rest.strip_prefix(['@', '$']).unwrap_or(rest); + self.ident1(); + } + } + + /// Lex a sequence matching `[a-zA-Z0-9_]*`. + fn ident0(&mut self) { + self.trim(|c: char| c.is_ascii_alphanumeric() || c == '_'); + } + + /// Lex a sequence matching `[a-zA-Z_][a-zA-Z0-9_]*`. + fn ident1(&mut self) { + let first = |c: char| c.is_ascii_alphabetic() || c == '_'; + if let Some(rest) = self.i.strip_prefix(first) { + self.i = rest; + self.ident0(); + } else { + self.e.push((Expect::Ident, self.i)); + } + } + + /// Lex a non-empty digit sequence. + fn digits1(&mut self) { + if let Some(rest) = self.i.strip_prefix(|c: char| c.is_ascii_digit()) { + self.i = rest.trim_start_matches(|c: char| c.is_ascii_digit()); + } else { + self.e.push((Expect::Digit, self.i)); + } + } + + /// Decimal with optional exponent. + fn num(&mut self) { + self.trim(|c| c.is_ascii_digit()); + if let Some(i) = self.i.strip_prefix('.') { + self.i = i; + self.digits1(); + } + if let Some(i) = self.i.strip_prefix(['e', 'E']) { + self.i = i.strip_prefix(['+', '-']).unwrap_or(i); + self.digits1(); + } + } + + fn escape(&mut self) -> Option>> { + let mut chars = self.i.chars(); + let part = match chars.next() { + Some(c @ ('\\' | '/' | '"')) => StrPart::Char(c), + Some('b') => StrPart::Char('\x08'), + Some('f') => StrPart::Char('\x0C'), + Some('n') => StrPart::Char('\n'), + Some('r') => StrPart::Char('\r'), + Some('t') => StrPart::Char('\t'), + Some('u') => { + let mut hex = 0; + for _ in 0..4 { + let i = chars.as_str(); + match chars.next().and_then(|c| c.to_digit(16)) { + Some(digit) => hex = (hex << 4) + digit, + None => { + self.i = i; + self.e.push((Expect::Unicode, self.i)); + return None; + } + } + } + StrPart::Char(char::from_u32(hex).unwrap()) + } + Some('(') => return Some(StrPart::Filter(self.delim())), + Some(_) | None => { + self.e.push((Expect::Escape, self.i)); + return None; + } + }; + + self.i = chars.as_str(); + Some(part) + } + + /// Lex a (possibly interpolated) string. + /// + /// The input string has to start with '"'. + fn str(&mut self) -> Token<&'a str> { + let start = self.take(1); + assert_eq!(start, "\""); + let mut parts = Vec::new(); + + loop { + let s = self.consumed(0, |lex| lex.trim(|c| c != '\\' && c != '"')); + if !s.is_empty() { + parts.push(StrPart::Str(s)); + } + let i = self.i; + match self.next() { + Some('"') => return Token::Str(start, parts, &i[..1]), + Some('\\') => self.escape().map(|part| parts.push(part)), + // SAFETY: due to `lex.trim()` + Some(_) => unreachable!(), + None => { + self.e.push((Expect::Delim(start), self.i)); + return Token::Str(start, parts, &i[..0]); + } + }; + } + } + + fn token(&mut self) -> Option> { + self.space(); + + let is_op = |c| "|=!<>+-*/%".contains(c); + + let mut chars = self.i.chars(); + Some(match chars.next()? { + 'a'..='z' | 'A'..='Z' | '_' => Token::Word(self.consumed(1, Self::mod_then_ident)), + '$' | '@' => Token::Word(self.consumed(1, Self::ident1)), + '0'..='9' => Token::Num(self.consumed(1, Self::num)), + c if is_op(c) => Token::Op(self.consumed(1, |lex| lex.trim(is_op))), + '.' => match chars.next() { + Some('.') => Token::Char(self.take(2)), + Some('a'..='z' | 'A'..='Z' | '_') => Token::Char(self.consumed(2, Self::ident0)), + _ => Token::Char(self.take(1)), + }, + ':' | ';' | ',' | '?' => Token::Char(self.take(1)), + '"' => self.str(), + '(' | '[' | '{' => self.delim(), + _ => return None, + }) + } + + fn tokens(&mut self) -> Vec> { + core::iter::from_fn(|| self.token()).collect() + } + + /// Lex a sequence of tokens that is surrounded by parentheses, curly braces, or brackets. + /// + /// The input string has to start with either '(', '[', or '{'. + fn delim(&mut self) -> Token<&'a str> { + let open = self.take(1); + let close = match open { + "(" => ')', + "[" => ']', + "{" => '}', + _ => panic!(), + }; + let mut tokens = self.tokens(); + + self.space(); + if let Some(rest) = self.i.strip_prefix(close) { + tokens.push(Token::Char(&self.i[..1])); + self.i = rest; + } else { + self.e.push((Expect::Delim(open), self.i)); + } + Token::Block(open, tokens) + } +} + +impl<'a> Token<&'a str> { + /// Return the span of a token that was lexed from some given input. + pub fn span(&self, code: &str) -> crate::Span { + match self { + Self::Word(s) | Self::Char(s) | Self::Op(s) | Self::Num(s) => span(code, s), + Self::Str(open, _, close) => span(code, open).start..span(code, close).end, + Self::Block(open, block) => { + span(code, open).start..block.last().unwrap().span(code).end + } + } + } +} + +/// Return the span of a string slice `part` relative to a string slice `whole`. +/// +/// The caller must ensure that `part` is fully contained inside `whole`. +pub fn span(whole: &str, part: &str) -> crate::Span { + let start = part.as_ptr() as usize - whole.as_ptr() as usize; + start..start + part.len() +} diff --git a/jaq-syn/src/lib.rs b/jaq-syn/src/lib.rs index 415fe743c..91f8cbb8f 100644 --- a/jaq-syn/src/lib.rs +++ b/jaq-syn/src/lib.rs @@ -12,8 +12,15 @@ pub mod path; pub mod string; pub mod test; +mod convert; +pub mod lex; +pub mod parse; +mod prec_climb; + pub use def::{Arg, Call, Def, Main}; +pub use lex::Lexer; pub use ops::{MathOp, OrdOp}; +pub use parse::Parser; use path::Path; pub use string::Str; @@ -22,3 +29,18 @@ pub type Span = core::ops::Range; /// An object with position information. pub type Spanned = (T, Span); + +/// Lex a string and parse resulting tokens, returning [`None`] if any error occurred. +/// +/// Example: +/// +/// ~~~ +/// # use jaq_syn::parse; +/// let t = parse("[] | .[]", |p| p.term()); +/// ~~~ +pub fn parse<'s, T: Default, F>(s: &'s str, f: F) -> Option +where + F: for<'t> FnOnce(&mut Parser<'s, 't>) -> parse::Result<'s, 't, T>, +{ + Parser::new(&Lexer::new(s).lex().ok()?).parse(f).ok() +} diff --git a/jaq-syn/src/parse.rs b/jaq-syn/src/parse.rs new file mode 100644 index 000000000..20a3f96f0 --- /dev/null +++ b/jaq-syn/src/parse.rs @@ -0,0 +1,703 @@ +//! Parsing. + +use crate::lex::{StrPart, Token}; +use crate::path; +use alloc::{boxed::Box, vec::Vec}; + +/// Parse error, storing what we expected and what we got instead. +pub type Error<'s, 't> = (Expect<&'s str>, Option<&'t Token<&'s str>>); + +type Path = Vec<(path::Part, path::Opt)>; + +/// Type of token that we expected. +/// +/// Each variant is annoted with jq programs that trigger it. +#[derive(Debug)] +pub enum Expect { + /// `if 0` (expected "then"), `reduce .` (expected "as") + Keyword(S), + /// `0 as $x` (expected "|"), `{(.)}` (expected ":") + Char(S), + /// `0 as`, `label`, `break` + Var, + /// `if 0 then 0` + ElseOrEnd, + /// `{a;}` + CommaOrRBrace, + /// `f(0:)` + SemicolonOrRParen, + /// `` (empty input), `-`, `()` + Term, + /// `.[].` + Key, + /// `def`, `import "foo" as` + Ident, + /// `def f()` + Arg, + /// `import` + Str, + /// `0;` + Nothing, +} + +impl<'a> Expect<&'a str> { + /// String representation of an expected token. + pub fn as_str(&self) -> &'a str { + match self { + Self::Keyword(s) | Self::Char(s) => s, + Self::Var => "variable", + Self::ElseOrEnd => "else or end", + Self::CommaOrRBrace => "comma or right brace", + Self::SemicolonOrRParen => "semicolon or right parenthesis", + Self::Term => "term", + Self::Key => "key", + Self::Ident => "identifier", + Self::Arg => "argument", + Self::Str => "string", + Self::Nothing => "nothing", + } + } +} + +/// Output of a fallible parsing operation. +pub type Result<'s, 't, T> = core::result::Result>; + +/// Parser for jq programs. +pub struct Parser<'s, 't> { + i: core::slice::Iter<'t, Token<&'s str>>, + e: Vec>, + /// names of fold-like filters, e.g. "reduce" and "foreach" + fold: &'s [&'s str], +} + +/// Function from value to stream of values, such as `.[] | add / length`. +#[derive(Debug, Default)] +pub enum Term { + /// Identity, i.e. `.` + #[default] + Id, + /// Recursion (`..`) + Recurse, + + /// Integer or floating-point number + Num(S), + /// String + /// + /// This consists of an optional format filter starting with `@` (such as `@text`), + /// followed by quoted string parts (such as `"Hello, \(.name)! \u263A"`). + Str(Option, Vec>), + /// Array, empty if `None` + Arr(Option>), + /// Object, specifying its key-value pairs + Obj(Vec<(Self, Option)>), + + /// Negation + Neg(Box), + /// Application, i.e. `l | r` if no string is given, else `l as $x | r` + Pipe(Box, Option, Box), + /// Sequence of binary operations, e.g. `1 + 2 - 3 * 4` + BinOp(Box, Vec<(S, Self)>), + + /// Control flow variable declaration, e.g. `label $x | ...` + Label(S, Box), + /// Break out from control flow to location variable, e.g. `break $x` + Break(S), + + /// `reduce` and `foreach`, e.g. `reduce .[] as $x (0; .+$x)` + Fold(S, Box, S, Vec), + /// `try` and optional `catch` + TryCatch(Box, Option>), + /// If-then-else + IfThenElse(Vec<(Self, Self)>, Option>), + + /// Local definition + Def(Vec>, Box), + /// Call to another filter, e.g. `map(.+1)` + Call(S, Vec), + /// Variable, such as `$x` (including leading '$') + Var(S), + + /// Path such as `.a`, `.[][]."b"`, `f[0]` + Path(Box, Path), +} + +impl Term { + pub(crate) fn str(s: S) -> Self { + Self::Str(None, [StrPart::Str(s)].into()) + } +} + +impl<'s, 't> Parser<'s, 't> { + /// Initialise a new parser on a sequence of [`Token`]s. + #[must_use] + pub fn new(i: &'t [Token<&'s str>]) -> Self { + Self { + i: i.iter(), + e: Vec::new(), + fold: &["reduce", "foreach", "for"], + } + } + + /// Parse tokens with the given function. + /// + /// Returns [`Ok`] if the function consumes the whole output without producing any error. + pub fn parse(mut self, f: F) -> core::result::Result>> + where + F: FnOnce(&mut Self) -> Result<'s, 't, T>, + { + let y = self.finish("", f); + if self.e.is_empty() { + Ok(y) + } else { + Err(self.e) + } + } + + /// Verifies that the remaining input tokens correspond to the given string. + fn verify_last(&mut self, last: &'static str) -> Result<'s, 't, ()> { + match (self.i.as_slice(), last) { + ([], "") => Ok(()), + ([Token::Char(c)], last) if *c == last => Ok(()), + ([], _) => Err((Expect::Char(last), None)), + ([next, ..], "") => Err((Expect::Nothing, Some(next))), + ([next, ..], _) => Err((Expect::Char(last), Some(next))), + } + } + + /// Run given parse function with given tokens, then reset tokens to previous tokens. + fn with_tok(&mut self, tokens: &'t [Token<&'s str>], f: impl FnOnce(&mut Self) -> T) -> T { + let i = core::mem::replace(&mut self.i, tokens.iter()); + let y = f(self); + self.i = i; + y + } + + /// Parse with given function, then + /// ensure that remaining input tokens correspond to `last`, and + /// return default if any error occurred. + fn finish(&mut self, last: &'static str, f: F) -> T + where + F: FnOnce(&mut Self) -> Result<'s, 't, T>, + { + f(self) + .and_then(|y| { + self.verify_last(last)?; + Ok(y) + }) + .unwrap_or_else(|e| { + self.e.push(e); + T::default() + }) + } + + fn with(&mut self, tokens: &'t [Token<&'s str>], last: &'static str, f: F) -> T + where + F: FnOnce(&mut Self) -> Result<'s, 't, T>, + { + self.with_tok(tokens, |p| p.finish(last, f)) + } + + /// Parse with the given function, and rewind input if it returns `None`. + fn maybe(&mut self, f: impl Fn(&mut Self) -> Option) -> Option { + let i = self.i.clone(); + let y = f(self); + // rewind to previous state in case of non-match + if y.is_none() { + self.i = i; + } + y + } + + /// Parse with the given function, and rewind input if it returns `Ok(None)`. + fn try_maybe(&mut self, f: F) -> Result<'s, 't, Option> + where + F: Fn(&mut Self) -> Result<'s, 't, Option>, + { + let i = self.i.clone(); + let y = f(self)?; + // rewind to previous state in case of non-match + if y.is_none() { + self.i = i; + } + Ok(y) + } + + /// Parse sequence of shape `f ("," f)* ","? "}"`. + fn obj_items(&mut self, f: F) -> Result<'s, 't, Vec> + where + F: Fn(&mut Self) -> Result<'s, 't, T>, + { + let mut y = Vec::from([f(self)?]); + let rbrace = |p: &mut Self| p.i.next().filter(|tk| matches!(tk, Token::Char("}"))); + loop { + match self.i.next() { + Some(Token::Char("}")) => break, + Some(Token::Char(",")) if self.maybe(rbrace).is_some() => break, + Some(Token::Char(",")) => y.push(f(self)?), + next => return Err((Expect::CommaOrRBrace, next)), + } + } + Ok(y) + } + + /// Parse sequence of shape `f (";" f)* ")"`. + fn arg_items(&mut self, f: F) -> Result<'s, 't, Vec> + where + F: Fn(&mut Self) -> Result<'s, 't, T>, + { + let mut y = Vec::from([f(self)?]); + loop { + match self.i.next() { + Some(Token::Char(";")) => y.push(f(self)?), + Some(Token::Char(")")) => break, + next => return Err((Expect::SemicolonOrRParen, next)), + } + } + Ok(y) + } + + /// Parse `("(" arg (";" arg)* ")")?`. + fn args(&mut self, f: fn(&mut Self) -> Result<'s, 't, T>) -> Vec { + self.maybe(|p| match p.i.next() { + Some(Token::Block("(", tokens)) => Some(p.with(tokens, "", |p| p.arg_items(f))), + _ => None, + }) + .unwrap_or_default() + } + + /// Parse a binary operator, including `,` if `with_comma` is true. + fn op(&mut self, with_comma: bool) -> Option<&'s str> { + self.maybe(|p| match p.i.next() { + // handle pipe directly in `term()` + Some(Token::Op("|")) => None, + Some(Token::Op(o) | Token::Word(o @ ("and" | "or"))) => Some(*o), + Some(Token::Char(o @ ",")) if with_comma => Some(*o), + _ => None, + }) + } + + fn char0(&mut self, c: char) -> Option<&'s str> { + self.maybe(|p| match p.i.next() { + Some(Token::Char(s)) if s.chars().eq([c]) => Some(*s), + _ => None, + }) + } + + fn dot(&mut self) -> Option<&'s str> { + self.maybe(|p| match p.i.next() { + Some(Token::Char(c)) if *c != ".." => c.strip_prefix('.'), + _ => None, + }) + } + + fn terminated(&mut self, f: F) -> Result<'s, 't, T> + where + F: FnOnce(&mut Self) -> Result<'s, 't, T>, + { + let y = f(self)?; + self.char1(";")?; + Ok(y) + } + + fn char1(&mut self, c: &'static str) -> Result<'s, 't, &'s str> { + match self.i.next() { + Some(Token::Char(s)) if *s == c => Ok(*s), + next => Err((Expect::Char(c), next)), + } + } + + fn keyword(&mut self, kw: &'static str) -> Result<'s, 't, ()> { + match self.i.next() { + Some(Token::Word(w)) if *w == kw => Ok(()), + next => Err((Expect::Keyword(kw), next)), + } + } + + fn var(&mut self) -> Result<'s, 't, &'s str> { + match self.i.next() { + Some(Token::Word(x)) if x.starts_with('$') => Ok(*x), + next => Err((Expect::Var, next)), + } + } + + fn pipe(&mut self) -> Result<'s, 't, ()> { + match self.i.next() { + Some(Token::Op("|")) => Ok(()), + next => Err((Expect::Char("|"), next)), + } + } + + /// Parse a term. + /// + /// Only if `with_comma` is true, the parsed term may be of the shape `t, u`. + /// This matters for the parsing of object values, such as `{k1: v1, k2: v2}`: + /// if we would permit terms of the shape `t, u` inside objects, + /// then this would be parsed like `{k1: (v1, k2): v2}`, which is invalid. + fn term_with_comma(&mut self, with_comma: bool) -> Result<'s, 't, Term<&'s str>> { + let head = self.atom()?; + let tail = core::iter::from_fn(|| self.op(with_comma).map(|op| Ok((op, self.atom()?)))) + .collect::>>()?; + + let tm = if tail.is_empty() { + head + } else { + Term::BinOp(Box::new(head), tail) + }; + + let pipe = self.try_maybe(|p| match p.i.next() { + Some(Token::Op("|")) => Ok(Some(None)), + Some(Token::Word("as")) => { + let x = p.var()?; + p.pipe()?; + Ok(Some(Some(x))) + } + _ => Ok(None), + })?; + Ok(match pipe { + None => tm, + Some(x) => Term::Pipe(Box::new(tm), x, Box::new(self.term_with_comma(with_comma)?)), + }) + } + + /// Parse an atomic term. + /// + /// A term `t` is atomic if and only if `try t catch 0` is syntactically correct. + /// For example, the term `1 + 2` is not atomic, because `try 1 + 2 catch 0` is invalid. + /// However, the term `.[]` is atomic, because `try .[] catch 0` is valid. + fn atom(&mut self) -> Result<'s, 't, Term<&'s str>> { + let tm = match self.i.next() { + Some(Token::Op("-")) => Term::Neg(Box::new(self.atom()?)), + Some(Token::Word("def")) => { + let head = self.def_tail()?; + let tail = self.defs()?; + let tm = self.term()?; + Term::Def(core::iter::once(head).chain(tail).collect(), Box::new(tm)) + } + Some(Token::Word("if")) => { + let if_then = |p: &mut Self| { + let if_ = p.term()?; + p.keyword("then")?; + Ok((if_, p.term()?)) + }; + let mut if_thens = Vec::from([if_then(self)?]); + let else_ = loop { + match self.i.next() { + Some(Token::Word("elif")) => if_thens.push(if_then(self)?), + Some(Token::Word("else")) => { + let else_ = self.term()?; + self.keyword("end")?; + break Some(else_); + } + Some(Token::Word("end")) => break None, + next => return Err((Expect::ElseOrEnd, next)), + } + }; + Term::IfThenElse(if_thens, else_.map(Box::new)) + } + Some(Token::Word("try")) => { + let try_ = self.atom()?; + let catch = self.try_maybe(|p| match p.i.next() { + Some(Token::Word("catch")) => Ok(Some(p.atom()?)), + _ => Ok(None), + })?; + Term::TryCatch(Box::new(try_), catch.map(Box::new)) + } + Some(Token::Word("label")) => { + let x = self.var()?; + self.pipe()?; + let tm = self.term()?; + Term::Label(x, Box::new(tm)) + } + Some(Token::Word("break")) => Term::Break(self.var()?), + Some(Token::Word(fold)) if self.fold.contains(fold) => { + let xs = self.atom()?; + self.keyword("as")?; + let x = self.var()?; + let args = self.args(Self::term); + Term::Fold(*fold, Box::new(xs), x, args) + } + Some(Token::Word(id)) if id.starts_with('$') => Term::Var(*id), + Some(Token::Word(id)) if id.starts_with('@') => { + let s = self.maybe(|p| match p.i.next() { + Some(Token::Str(_, parts, _)) => Some(p.str_parts(parts)), + _ => None, + }); + match s { + None => Term::Call(*id, Vec::new()), + Some(parts) => Term::Str(Some(*id), parts), + } + } + Some(Token::Word(id)) => Term::Call(*id, self.args(Self::term)), + Some(Token::Char("..")) => Term::Recurse, + Some(Token::Char(c)) if c.starts_with('.') => { + let key = if c.len() > 1 { + Some(Term::str(&c[1..])) + } else { + // TODO: this returns None on things like "@json .", + // whereas it should return an error instead + self.maybe(|p| p.key().ok()) + }; + + if let Some(key) = key { + let head = (path::Part::Index(key), self.opt()); + let path = core::iter::once(head).chain(self.path()?).collect(); + Term::Path(Box::new(Term::Id), path) + } else { + Term::Id + } + } + Some(Token::Num(n)) => Term::Num(*n), + Some(Token::Block("[", tokens)) if matches!(tokens[..], [Token::Char("]")]) => { + Term::Arr(None) + } + Some(Token::Block("{", tokens)) if matches!(tokens[..], [Token::Char("}")]) => { + Term::Obj(Vec::new()) + } + Some(Token::Block("(", tokens)) => self.with(tokens, ")", Self::term), + Some(Token::Block("[", tokens)) => { + Term::Arr(Some(Box::new(self.with(tokens, "]", Self::term)))) + } + Some(Token::Block("{", tokens)) => { + self.with(tokens, "", |p| p.obj_items(Self::obj_entry).map(Term::Obj)) + } + Some(Token::Str(_, parts, _)) => Term::Str(None, self.str_parts(parts)), + next => return Err((Expect::Term, next)), + }; + + let tm = match self.opt() { + path::Opt::Optional => Term::TryCatch(Box::new(tm), None), + path::Opt::Essential => tm, + }; + + let path = self.path()?; + Ok(if path.is_empty() { + tm + } else { + Term::Path(Box::new(tm), path) + }) + } + + /// Parse a term such as `.[] | .+1`. + pub fn term(&mut self) -> Result<'s, 't, Term<&'s str>> { + self.term_with_comma(true) + } + + /// Parse an object entry. + /// + /// An object is written as `{e1, ..., en}`, where `ei` is an object entry. + /// An example of an object entry is `"key": value` or `(key): value`. + /// When the key is a term surrounded by parentheses, a value is required, + /// otherwise the value may be omitted (e.g. `"key"` or `$x`). + fn obj_entry(&mut self) -> Result<'s, 't, (Term<&'s str>, Option>)> { + let i = self.i.clone(); + let key = match self.i.next() { + Some(Token::Block("(", tokens)) => { + let k = self.with(tokens, ")", Self::term); + self.char1(":")?; + return Ok((k, Some(self.term_with_comma(false)?))); + } + Some(Token::Word(id)) if !id.starts_with(['$', '@']) && !id.contains("::") => { + Term::str(*id) + } + _ => { + self.i = i; + self.key()? + } + }; + let v = self.char0(':').map(|_| self.term_with_comma(false)); + Ok((key, v.transpose()?)) + } + + fn str_parts( + &mut self, + parts: &'t [StrPart<&'s str, Token<&'s str>>], + ) -> Vec>> { + let parts = parts.iter().map(|part| match part { + StrPart::Str(s) => StrPart::Str(*s), + StrPart::Filter(Token::Block("(", tokens)) => { + StrPart::Filter(self.with(tokens, ")", Self::term)) + } + StrPart::Filter(_) => unreachable!(), + StrPart::Char(c) => StrPart::Char(*c), + }); + parts.collect() + } + + fn path(&mut self) -> Result<'s, 't, Path>> { + let mut path: Vec<_> = core::iter::from_fn(|| self.path_part_opt()).collect(); + while let Some(key) = self.dot() { + let key = if key.is_empty() { + self.key()? + } else { + Term::str(key) + }; + path.push((path::Part::Index(key), self.opt())); + path.extend(core::iter::from_fn(|| self.path_part_opt())); + } + Ok(path) + } + + /// Parse `[]`, `[t]`, `[t:]`, `[t:t]`, `[:t]` (all without brackets). + fn path_part(&mut self) -> Result<'s, 't, path::Part>> { + use path::Part::{Index, Range}; + let done = |p: &Self| matches!(p.i.as_slice(), [Token::Char("]")]); + Ok(if done(self) { + Range(None, None) + } else if self.char0(':').is_some() { + Range(None, Some(self.term()?)) + } else { + let tm = self.term()?; + if self.char0(':').is_some() { + if done(self) { + Range(Some(tm), None) + } else { + Range(Some(tm), Some(self.term()?)) + } + } else { + Index(tm) + } + }) + } + + fn path_part_opt(&mut self) -> Option<(path::Part>, path::Opt)> { + let part = self.maybe(|p| match p.i.next() { + Some(Token::Block("[", tokens)) => Some(p.with(tokens, "]", Self::path_part)), + _ => None, + })?; + Some((part, self.opt())) + } + + fn key(&mut self) -> Result<'s, 't, Term<&'s str>> { + Ok(match self.i.next() { + Some(Token::Word(id)) if id.starts_with('$') => Term::Var(*id), + Some(Token::Word(id)) if id.starts_with('@') => match self.i.next() { + Some(Token::Str(_, parts, _)) => Term::Str(Some(*id), self.str_parts(parts)), + next => return Err((Expect::Str, next)), + }, + Some(Token::Str(_, parts, _)) => Term::Str(None, self.str_parts(parts)), + next => return Err((Expect::Key, next)), + }) + } + + fn opt(&mut self) -> path::Opt { + let mut opt = path::Opt::Essential; + while self.char0('?').is_some() { + opt = path::Opt::Optional; + } + opt + } + + /// Parse a sequence of definitions, such as `def x: 1; def y: 2;`. + pub fn defs(&mut self) -> Result<'s, 't, Vec>>> { + let head = |p: &mut Self| p.keyword("def").ok(); + core::iter::from_fn(|| self.maybe(head).map(|_| self.def_tail())).collect() + } + + /// Parse `name args ":" term ";"`. + fn def_tail(&mut self) -> Result<'s, 't, Def<&'s str, Term<&'s str>>> { + let name = match self.i.next() { + Some(Token::Word(w)) if !w.starts_with('$') && !w.contains("::") => w, + next => return Err((Expect::Ident, next)), + }; + let args = self.args(|p| { + Ok(match p.i.next() { + Some(Token::Word(w)) if !w.contains("::") => *w, + next => return Err((Expect::Arg, next)), + }) + }); + self.char1(":")?; + + let body = self.term()?; + self.char1(";")?; + + Ok(Def { name, args, body }) + } + + fn bare_str(&mut self) -> Result<'s, 't, &'s str> { + match self.i.next() { + next @ Some(Token::Str(_, parts, _)) => match parts[..] { + [StrPart::Str(s)] => Ok(s), + _ => Err((Expect::Str, next)), + }, + next => Err((Expect::Str, next)), + } + } + + fn include(&mut self) -> Result<'s, 't, (&'s str, Option<&'s str>)> { + self.bare_str().map(|path| (path, None)) + } + + fn import(&mut self) -> Result<'s, 't, (&'s str, Option<&'s str>)> { + let path = self.bare_str()?; + self.keyword("as")?; + let name = match self.i.next() { + Some(Token::Word(w)) if !w.starts_with(['$', '@']) && !w.contains("::") => *w, + next => return Err((Expect::Ident, next)), + }; + Ok((path, Some(name))) + } + + /// Parse a module with a body returned by the given function. + pub fn module(&mut self, f: F) -> Result<'s, 't, Module<&'s str, B>> + where + F: FnOnce(&mut Self) -> Result<'s, 't, B>, + { + let meta = self + .maybe(|p| match p.i.next() { + Some(Token::Word("module")) => Some(p.terminated(Self::term)), + _ => None, + }) + .transpose()?; + + let mods = core::iter::from_fn(|| { + self.maybe(|p| match p.i.next() { + Some(Token::Word("include")) => Some(p.terminated(Self::include)), + Some(Token::Word("import")) => Some(p.terminated(Self::import)), + _ => None, + }) + }) + .collect::>()?; + + let body = f(self)?; + + Ok(Module { meta, mods, body }) + } +} + +/// jq module, consisting of metadata, imports/includes, and a body. +/// +/// Example (where the body is a sequence of definitions): +/// +/// ~~~ jq +/// module {}; +/// +/// import "foo" as foo; +/// include "bar"; +/// +/// def iter: .[]; +/// ~~~ +#[derive(Debug, Default)] +pub struct Module { + #[allow(dead_code)] + meta: Option>, + pub(crate) mods: Vec<(S, Option)>, + pub(crate) body: B, +} + +/// jq definition, consisting of a name, optional arguments, and a body. +/// +/// Examples: +/// +/// ~~~ jq +/// def pi: 3.1415; +/// def double($x): $x + $x; +/// def map(f): [.[] | f]; +/// def recurse(f; cond): recurse(f | select(cond)); +/// ~~~ +#[derive(Debug)] +pub struct Def { + pub(crate) name: S, + pub(crate) args: Vec, + /// Body of the filter, e.g. `[.[] | f]`. + pub(crate) body: F, +} diff --git a/jaq-syn/src/path.rs b/jaq-syn/src/path.rs index f491a109f..2fd3ff353 100644 --- a/jaq-syn/src/path.rs +++ b/jaq-syn/src/path.rs @@ -16,6 +16,12 @@ pub enum Part { Range(Option, Option), } +impl Default for Part { + fn default() -> Self { + Self::Range(None, None) + } +} + /// Optionality of a path part, i.e. whether `?` is present. /// /// For example, `[] | .a` fails with an error, while `[] | .a?` returns nothing. diff --git a/jaq-syn/src/prec_climb.rs b/jaq-syn/src/prec_climb.rs new file mode 100644 index 000000000..83eb2f2b3 --- /dev/null +++ b/jaq-syn/src/prec_climb.rs @@ -0,0 +1,107 @@ +//! Precedence climbing for parsing expressions with binary operators. +//! +//! This allows you to parse expressions that are +//! separated by binary operators with precedence and associativity. +//! For example, in the expression `1 + 2 * 3`, we usually want to +//! parse this into `1 + (2 * 3)`, not `(1 + 2) * 3`. +//! This is handled by saying that `*` has higher *precedence* than `+`. +//! Also, when we have a power operator `^`, we want +//! `2 ^ 3 ^ 4` to mean `(2 ^ 3) ^ 4`, not `2 ^ (3 ^ 4)`. +//! This is handled by saying that `^` is *left-associative*. +//! +//! This was adapted from +//! . + +use core::iter::Peekable; + +/// Associativity of an operator. +pub enum Associativity { + /// `(x + y) + z` + Left, + /// `x + (y + z)` + Right, +} + +/// Binary operator. +pub trait Op { + /// "Stickiness" of the operator + fn precedence(&self) -> usize; + /// Is the operator left- or right-associative? + fn associativity(&self) -> Associativity; +} + +/// An expression that can be built from other expressions with some operator. +pub trait Expr { + /// Combine two expressions with an operator. + fn from_op(lhs: Self, op: O, rhs: Self) -> Self; +} + +/// Perform precedence climbing. +pub fn climb>(head: T, iter: impl IntoIterator) -> T { + climb1(head, &mut iter.into_iter().peekable(), 0) +} + +fn climb1, I>(mut x: T, iter: &mut Peekable, min_prec: usize) -> T +where + I: Iterator, +{ + while let Some((op, mut rhs)) = iter.next_if(|(op, _)| op.precedence() >= min_prec) { + let right_assoc = matches!(op.associativity(), Associativity::Right); + let this_prec = op.precedence(); + + while let Some(next) = iter.peek() { + let next_prec = next.0.precedence(); + + if next_prec > this_prec || (right_assoc && next_prec == this_prec) { + rhs = climb1(rhs, iter, next_prec) + } else { + break; + } + } + x = T::from_op(x, op, rhs); + } + x +} + +/// Simple arithmetic expressions +#[test] +fn test() { + enum Op { + Add, + Sub, + Mul, + Div, + } + + impl crate::prec_climb::Op for Op { + fn precedence(&self) -> usize { + match self { + Op::Add | Op::Sub => 0, + Op::Mul | Op::Div => 1, + } + } + + fn associativity(&self) -> Associativity { + Associativity::Right + } + } + + impl Expr for isize { + fn from_op(lhs: Self, op: Op, rhs: Self) -> Self { + match op { + Op::Add => lhs + rhs, + Op::Sub => lhs - rhs, + Op::Mul => lhs * rhs, + Op::Div => lhs / rhs, + } + } + } + + use Op::{Add, Div, Mul, Sub}; + // 1 + 2 * 3 - 6 / 2 = + // 1 + 6 - 3 = 4 + let head: isize = 1; + let tail = [(Add, 2), (Mul, 3), (Sub, 6), (Div, 2)]; + let out = climb(head, tail); + assert_eq!(out, 4); +} diff --git a/jaq/Cargo.toml b/jaq/Cargo.toml index 1f96922c2..aa5bfd8ff 100644 --- a/jaq/Cargo.toml +++ b/jaq/Cargo.toml @@ -16,12 +16,10 @@ default = ["mimalloc"] [dependencies] jaq-syn = { version = "1.1.0", path = "../jaq-syn" } -jaq-parse = { version = "1.0.0", path = "../jaq-parse" } jaq-interpret = { version = "1.2.0", path = "../jaq-interpret" } jaq-core = { version = "1.2.0", path = "../jaq-core" } -jaq-std = { version = "1.2.0", path = "../jaq-std" } +jaq-std = { version = "1.5.0", path = "../jaq-std" } atty = "0.2" -chumsky = { version = "0.9.0", default-features = false } codesnake = { version = "0.1" } clap = { version = "4.0.0", features = ["derive"] } env_logger = { version = "0.10.0", default-features = false } diff --git a/jaq/src/main.rs b/jaq/src/main.rs index 9add24eeb..9159d4acd 100644 --- a/jaq/src/main.rs +++ b/jaq/src/main.rs @@ -160,18 +160,17 @@ fn real_main(cli: &Cli) -> Result { let mut args = cli.args.iter(); let filter = match &cli.from_file { - Some(file) => parse(&std::fs::read_to_string(file)?, vars)?, - None => { - if let Some(filter) = args.next() { - parse(filter, vars)? - } else { - Filter::default() - } - } + Some(file) => Some(std::fs::read_to_string(file)?), + None => args.next().cloned(), }; - //println!("Filter: {:?}", filter); let files: Vec<_> = args.collect(); + let filter = match filter { + None => Filter::default(), + Some(filter_str) => parse(&filter_str, vars).map_err(|e| Error::Report(filter_str, e))?, + }; + //println!("Filter: {:?}", filter); + let last = if files.is_empty() { let inputs = read_buffered(cli, io::stdin().lock()); with_stdout(|out| run(cli, &filter, ctx, inputs, |v| print(out, cli, &v)))? @@ -258,33 +257,39 @@ fn args_named(var_val: &[(String, Val)]) -> Val { Val::obj(args.collect()) } -fn parse(filter_str: &str, vars: Vec) -> Result> { - let mut defs = ParseCtx::new(vars); - defs.insert_natives(jaq_core::core()); - defs.insert_defs(jaq_std::std()); - assert!(defs.errs.is_empty()); - let (filter, errs) = jaq_parse::parse(filter_str, jaq_parse::main()); - if !errs.is_empty() { - return Err(errs - .into_iter() - .map(|error| ParseError { - error, - filter: filter_str.to_owned(), - }) - .collect()); - } - let filter = defs.compile(filter.unwrap()); - if defs.errs.is_empty() { +fn parse_term(filter_str: &str) -> Result> { + let tokens = jaq_syn::Lexer::new(filter_str).lex().map_err(|errs| { + errs.into_iter() + .map(|e| report_lex(filter_str, e)) + .collect::>() + })?; + + let main = jaq_syn::Parser::new(&tokens).parse(|p| p.module(|p| p.term())); + let main = main.map_err(|errs| { + //std::println!("{:?}", errs); + errs.into_iter() + .map(|e| report_parse(filter_str, e)) + .collect::>() + })?; + + //std::println!("{:?}", main); + Ok(main.conv(filter_str)) +} + +fn parse(filter_str: &str, vars: Vec) -> Result> { + let mut ctx = ParseCtx::new(vars); + ctx.insert_natives(jaq_core::core()); + ctx.insert_defs(jaq_std::std()); + let filter = parse_term(filter_str)?; + let filter = ctx.compile(filter); + if ctx.errs.is_empty() { Ok(filter) } else { - Err(defs - .errs - .into_iter() - .map(|error| ParseError { - error: chumsky::error::Simple::custom(error.1, error.0.to_string()), - filter: filter_str.to_owned(), - }) - .collect()) + let reports = ctx.errs.into_iter().map(|error| Report { + message: error.0.to_string(), + labels: Vec::from([(error.1, [(error.0.to_string(), None)].into(), Color::Red)]), + }); + Err(reports.collect()) } } @@ -366,16 +371,10 @@ fn collect_if<'a, T: 'a, E: 'a>( } } -#[derive(Debug)] -struct ParseError { - error: chumsky::error::Simple, - filter: String, -} - #[derive(Debug)] enum Error { Io(Option, io::Error), - Chumsky(Vec), + Report(String, Vec), Parse(String), Jaq(jaq_interpret::Error), Persist(tempfile::PersistError), @@ -399,9 +398,12 @@ impl Termination for Error { eprintln!("Error: {e}"); 2 } - Self::Chumsky(errs) => { - for e in errs { - eprintln!("Error: {}", report(&e.filter, &e.error)); + Self::Report(code, reports) => { + let idx = codesnake::LineIndex::new(&code); + for e in reports { + eprintln!("Error: {}", e.message); + let block = e.into_block(&idx); + eprintln!("{}\n{}{}", block.prologue(), block, block.epilogue()) } 3 } @@ -425,12 +427,6 @@ impl From for Error { } } -impl From> for Error { - fn from(e: Vec) -> Self { - Self::Chumsky(e) - } -} - /// Run a filter with given input values and run `f` for every value output. /// /// This function cannot return an `Iterator` because it creates an `RcIter`. @@ -578,11 +574,12 @@ fn with_stdout(f: impl FnOnce(&mut io::StdoutLock) -> Result) -> Re Ok(y) } +type StringColors = Vec<(String, Option)>; + #[derive(Debug)] -struct Report<'a> { - code: &'a str, +struct Report { message: String, - labels: Vec<(core::ops::Range, String, Color)>, + labels: Vec<(core::ops::Range, StringColors, Color)>, } #[derive(Clone, Debug)] @@ -602,75 +599,67 @@ impl Color { } } -fn report<'a>(code: &'a str, e: &chumsky::error::Simple) -> Report<'a> { - use chumsky::error::SimpleReason; +fn report_lex(code: &str, (expected, found): jaq_syn::lex::Error<&str>) -> Report { + use jaq_syn::lex::{span, Expect}; + // truncate found string to its first character + let found = &found[..found.char_indices().nth(1).map_or(found.len(), |(i, _)| i)]; - let eof = || "end of input".to_string(); - - let message = if let SimpleReason::Custom(msg) = e.reason() { - msg.clone() - } else { - let found = if e.found().is_some() { - "Unexpected token" - } else { - "Unexpected end of input" - }; - let when = if let Some(label) = e.label() { - format!(" while parsing {label}") - } else { - String::new() - }; - let expected = if e.expected().len() == 0 { - "something else".to_string() - } else { - let f = |e: &Option| e.as_ref().map_or_else(eof, |e| e.to_string()); - e.expected().map(f).collect::>().join(", ") - }; - format!("{found}{when}, expected {expected}",) + let found_range = span(code, found); + let found = match found { + "" => [("unexpected end of input".to_string(), None)].into(), + c => [("unexpected character ", None), (c, Some(Color::Red))] + .map(|(s, c)| (s.into(), c)) + .into(), }; + let label = (found_range, found, Color::Red); - let label = if let SimpleReason::Custom(msg) = e.reason() { - msg.clone() - } else { - let token = |c: &String| format!("token {}", Color::Red.apply(c)); - format!("Unexpected {}", e.found().map_or_else(eof, token)) - }; - // convert character indices to byte offsets - let char_to_byte = |i| { - code.char_indices() - .map(|(i, _c)| i) - .chain([code.len(), code.len()]) - .nth(i) - .unwrap() + let labels = match expected { + Expect::Delim(open) => { + let text = [("unclosed delimiter ", None), (open, Some(Color::Yellow))] + .map(|(s, c)| (s.into(), c)); + Vec::from([(span(code, open), text.into(), Color::Yellow), label]) + } + _ => Vec::from([label]), }; - let conv = |span: &core::ops::Range<_>| char_to_byte(span.start)..char_to_byte(span.end); - let mut labels = Vec::from([(conv(&e.span()), label, Color::Red)]); - if let SimpleReason::Unclosed { span, delimiter } = e.reason() { - let text = format!("Unclosed delimiter {}", Color::Yellow.apply(delimiter)); - labels.insert(0, (conv(span), text, Color::Yellow)); - } Report { - code, - message, + message: format!("expected {}", expected.as_str()), labels, } } -impl Display for Report<'_> { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - use codesnake::{Block, CodeWidth, Label, LineIndex}; - let idx = LineIndex::new(self.code); - let labels = self.labels.clone().into_iter().map(|(range, text, color)| { - Label::new(range, text).with_style(move |s| color.apply(s).to_string()) +fn report_parse(code: &str, (expected, found): jaq_syn::parse::Error) -> Report { + let found_range = match found { + None => code.len()..code.len(), + Some(found) => found.span(code), + }; + let found = found.map_or("unexpected end of input", |_| "unexpected token"); + let found = [(found.to_string(), None)].into(); + + Report { + message: format!("expected {}", expected.as_str()), + labels: Vec::from([(found_range, found, Color::Red)]), + } +} + +type CodeBlock = codesnake::Block, String>; + +impl Report { + fn into_block(self, idx: &codesnake::LineIndex) -> CodeBlock { + use codesnake::{Block, CodeWidth, Label}; + let color_maybe = |(text, color): (_, Option)| match color { + None => text, + Some(color) => color.apply(text).to_string(), + }; + let labels = self.labels.into_iter().map(|(range, text, color)| { + let text = text.into_iter().map(color_maybe).collect::>(); + Label::new(range, text.join("")).with_style(move |s| color.apply(s).to_string()) }); - let block = Block::new(&idx, labels).unwrap().map_code(|c| { + Block::new(idx, labels).unwrap().map_code(|c| { let c = c.replace('\t', " "); let w = unicode_width::UnicodeWidthStr::width(&*c); CodeWidth::new(c, core::cmp::max(w, 1)) - }); - writeln!(f, "{}", self.message)?; - write!(f, "{}\n{}{}", block.prologue(), block, block.epilogue()) + }) } } @@ -678,7 +667,7 @@ fn run_test(test: jaq_syn::test::Test) -> Result<(Val, Val), Error> { let inputs = RcIter::new(Box::new(core::iter::empty())); let ctx = Ctx::new(Vec::new(), &inputs); - let filter = parse(&test.filter, Vec::new())?; + let filter = parse(&test.filter, Vec::new()).map_err(|e| Error::Report(test.filter, e))?; let json = |s: String| { use hifijson::token::Lex;