From 70b5ea252770dfeecfd80617e7e1880d0dc99ac0 Mon Sep 17 00:00:00 2001 From: Josh Pschorr Date: Tue, 17 Sep 2024 15:38:26 -0700 Subject: [PATCH] Merge fixes from `dev-ion-doc` feature branch back to `main` (#497) * Rename `ion-rs` to `ion-rs_old` in preparation of upgrade (#492) * Refactor lexer into module & upgrade lexer & parser dependencies (#493) * Upgrade project deps to latest; use semver, not wildcard nor tilde (#494) --- .github/workflows/ci_build_test.yml | 4 +- deny.toml | 34 +- extension/partiql-extension-ddl/Cargo.toml | 9 +- extension/partiql-extension-ddl/src/ddl.rs | 3 - .../Cargo.toml | 31 +- .../src/lib.rs | 4 +- extension/partiql-extension-ion/Cargo.toml | 27 +- extension/partiql-extension-ion/src/decode.rs | 2 +- extension/partiql-extension-ion/src/encode.rs | 10 +- extension/partiql-extension-ion/src/lib.rs | 156 +- .../partiql-extension-visualize/Cargo.toml | 16 +- partiql-ast-passes/Cargo.toml | 8 +- partiql-ast/Cargo.toml | 6 +- partiql-ast/partiql-ast-macros/Cargo.toml | 6 +- partiql-catalog/Cargo.toml | 16 +- partiql-common/Cargo.toml | 10 +- partiql-conformance-test-generator/Cargo.toml | 12 +- .../src/generator.rs | 6 +- .../src/reader.rs | 6 +- .../src/schema.rs | 2 +- partiql-conformance-tests/Cargo.toml | 12 +- partiql-conformance-tests/tests/test_value.rs | 2 +- partiql-eval/Cargo.toml | 34 +- partiql-logical-planner/Cargo.toml | 26 +- partiql-logical-planner/src/lower.rs | 4 +- partiql-logical/Cargo.toml | 20 +- partiql-parser/Cargo.toml | 38 +- partiql-parser/src/lexer.rs | 1319 ----------------- partiql-parser/src/lexer/comment.rs | 132 ++ partiql-parser/src/lexer/embedded_ion.rs | 135 ++ partiql-parser/src/lexer/mod.rs | 521 +++++++ partiql-parser/src/lexer/partiql.rs | 569 +++++++ partiql-types/Cargo.toml | 24 +- partiql-value/Cargo.toml | 36 +- partiql/Cargo.toml | 4 +- 35 files changed, 1633 insertions(+), 1611 deletions(-) delete mode 100644 partiql-parser/src/lexer.rs create mode 100644 partiql-parser/src/lexer/comment.rs create mode 100644 partiql-parser/src/lexer/embedded_ion.rs create mode 100644 partiql-parser/src/lexer/mod.rs create mode 100644 partiql-parser/src/lexer/partiql.rs diff --git a/.github/workflows/ci_build_test.yml b/.github/workflows/ci_build_test.yml index 94fe6e8b..0f91b3d7 100644 --- a/.github/workflows/ci_build_test.yml +++ b/.github/workflows/ci_build_test.yml @@ -92,7 +92,7 @@ jobs: - name: Rust Toolchain uses: dtolnay/rust-toolchain@master with: - toolchain: nightly-2023-06-09 + toolchain: nightly-2024-08-06 - uses: actions/cache@v3 id: restore-build with: @@ -132,7 +132,7 @@ jobs: - name: Rust Toolchain uses: dtolnay/rust-toolchain@master with: - toolchain: nightly-2023-06-09 + toolchain: nightly-2024-08-06 - uses: actions/cache@v3 id: restore-build-and-conformance with: diff --git a/deny.toml b/deny.toml index 3958acf3..26081a2d 100644 --- a/deny.toml +++ b/deny.toml @@ -6,11 +6,7 @@ # More documentation for the advisories section can be found here: # https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html [advisories] -vulnerability = "deny" -unsound = "deny" -unmaintained = "deny" -yanked = "deny" -notice = "warn" +version = 2 ignore = [ # Advisory: https://rustsec.org/advisories/RUSTSEC-2021-0145 @@ -28,8 +24,7 @@ ignore = [ # More documentation for the licenses section can be found here: # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html [licenses] -# The lint level for crates which do not have a detectable license -unlicensed = "deny" +version = 2 # ignores workspace crates that aren't published, or are only published to private registries. private = { ignore = true } @@ -62,29 +57,6 @@ exceptions = [ { allow = ["Unicode-DFS-2016"], name = "unicode-ident" }, ] -# Lint level for licenses considered copyleft -copyleft = "deny" - -# List of explicitly disallowed licenses -# See https://spdx.org/licenses/ for list of possible licenses -# [possible values: any SPDX 3.11 short identifier (+ optional exception)]. -deny = [ -] - -# Blanket approval or denial for OSI-approved or FSF Free/Libre licenses -# * both - The license will be approved if it is both OSI-approved *AND* FSF -# * either - The license will be approved if it is either OSI-approved *OR* FSF -# * osi-only - The license will be approved if is OSI-approved *AND NOT* FSF -# * fsf-only - The license will be approved if is FSF *AND NOT* OSI-approved -# * neither - This predicate is ignored and the default lint level is used -allow-osi-fsf-free = "neither" - -# Lint level used when no other predicates are matched -# 1. License isn't in the allow or deny lists -# 2. License isn't copyleft -# 3. License isn't OSI/FSF, or allow-osi-fsf-free = "neither" -default = "deny" - # The confidence threshold for detecting a license from license text. # The higher the value, the more closely the license text must be to the # canonical license text of a valid SPDX license file. @@ -125,7 +97,7 @@ allow = [ deny = [ # Use `once_cell` instead # `OnceCell`s API is under consideration for inclusion in `std`: https://github.com/rust-lang/rust/issues/74465 - { name = "lazy_static", wrappers = ["Inflector", "criterion", "insta", "console"] }, + { name = "lazy_static", wrappers = ["Inflector", "criterion", "insta", "console", "logos-codegen"] }, # Advisory: https://rustsec.org/advisories/RUSTSEC-2020-0071 # `time` < 0.2.23 has a potential (though unlikely) potential segfault { name = "time", version = "<0.2.23", wrappers = ["chrono"] }, diff --git a/extension/partiql-extension-ddl/Cargo.toml b/extension/partiql-extension-ddl/Cargo.toml index 77eb385c..d8dd9f2c 100644 --- a/extension/partiql-extension-ddl/Cargo.toml +++ b/extension/partiql-extension-ddl/Cargo.toml @@ -7,7 +7,7 @@ repository.workspace = true license = "Apache-2.0" readme = "../../README.md" keywords = ["sql", "ddl", "definition-language", "compilers", "interpreters"] -categories = ["database", "compilers",] +categories = ["database", "compilers", ] exclude = [ "**/.git/**", "**/.github/**", @@ -23,15 +23,14 @@ bench = false [dependencies] partiql-types = { path = "../../partiql-types", version = "0.10.*" } -ion-rs = "0.18.1" thiserror = "1.0" -miette = { version = "7.2", features = ["fancy"] } +miette = { version = "7", features = ["fancy"] } time = { version = "0.3", features = ["formatting", "parsing", "serde"] } -indexmap = "2.2" +indexmap = "2.5" [dev-dependencies] -criterion = "0.4" +criterion = "0.5" [features] default = [] diff --git a/extension/partiql-extension-ddl/src/ddl.rs b/extension/partiql-extension-ddl/src/ddl.rs index 02e83760..a2db56e7 100644 --- a/extension/partiql-extension-ddl/src/ddl.rs +++ b/extension/partiql-extension-ddl/src/ddl.rs @@ -1,4 +1,3 @@ -use ion_rs::IonError; use miette::Diagnostic; use partiql_types::{ AnyOf, ArrayType, BagType, PartiqlShape, ShapeResultError, Static, StaticType, StructType, @@ -13,8 +12,6 @@ use thiserror::Error; pub enum ShapeEncodingError { #[error("UnsupportedEncoding: {0}")] UnsupportedEncoding(String), - #[error("IonEncodingError: {0}")] - IonEncodingError(#[from] IonError), #[error("DateTimeEncodingError e: {0}")] DateTimeEncodingError(#[from] time::error::Format), #[error("Invalid Simulation Configuration e: {0}")] diff --git a/extension/partiql-extension-ion-functions/Cargo.toml b/extension/partiql-extension-ion-functions/Cargo.toml index bbf69250..e359011f 100644 --- a/extension/partiql-extension-ion-functions/Cargo.toml +++ b/extension/partiql-extension-ion-functions/Cargo.toml @@ -9,10 +9,10 @@ readme = "../../README.md" keywords = ["sql", "parser", "query", "compilers", "interpreters"] categories = ["database", "compilers"] exclude = [ - "**/.git/**", - "**/.github/**", - "**/.travis.yml", - "**/.appveyor.yml", + "**/.git/**", + "**/.github/**", + "**/.travis.yml", + "**/.appveyor.yml", ] version.workspace = true edition.workspace = true @@ -21,27 +21,28 @@ edition.workspace = true bench = false [dependencies] -partiql-extension-ion = {path = "../partiql-extension-ion", version = "0.10.*" } +partiql-extension-ion = { path = "../partiql-extension-ion", version = "0.10.*" } partiql-value = { path = "../../partiql-value", version = "0.10.*" } partiql-catalog = { path = "../../partiql-catalog", version = "0.10.*" } partiql-logical = { path = "../../partiql-logical", version = "0.10.*" } -ordered-float = "3.*" -itertools = "0.10.*" -unicase = "2.6" -rust_decimal = { version = "1.25.0", default-features = false, features = ["std"] } -rust_decimal_macros = "1.26" -ion-rs = "0.18" +ordered-float = "4" +itertools = "0.13" +unicase = "2.7" +rust_decimal = { version = "1.36.0", default-features = false, features = ["std"] } +rust_decimal_macros = "1.36" +ion-rs_old = { version = "0.18", package = "ion-rs" } +ion-rs = { version = "1.0.0-rc.7", features = ["experimental"] } time = { version = "0.3", features = ["macros"] } once_cell = "1" -regex = "1.7" +regex = "1.10" thiserror = "1.0" -delegate = "0.9" -zstd = "0.12" +delegate = "0.13" +zstd = "0.13" flate2 = "1.0" [dev-dependencies] -criterion = "0.4" +criterion = "0.5" partiql-parser = { path = "../../partiql-parser", version = "0.10.*" } partiql-logical = { path = "../../partiql-logical", version = "0.10.*" } partiql-logical-planner = { path = "../../partiql-logical-planner", version = "0.10.*" } diff --git a/extension/partiql-extension-ion-functions/src/lib.rs b/extension/partiql-extension-ion-functions/src/lib.rs index 116c5201..5b1bfb85 100644 --- a/extension/partiql-extension-ion-functions/src/lib.rs +++ b/extension/partiql-extension-ion-functions/src/lib.rs @@ -1,7 +1,7 @@ #![deny(rust_2018_idioms)] #![deny(clippy::all)] -use ion_rs::data_source::ToIonDataSource; +use ion_rs_old::data_source::ToIonDataSource; use partiql_catalog::call_defs::{CallDef, CallSpec, CallSpecArg}; use partiql_catalog::TableFunction; use partiql_catalog::{ @@ -152,7 +152,7 @@ fn parse_ion_read<'a>(mut reader: impl 'a + Read + Seek) -> BaseTableExprResult< fn parse_ion_buff<'a, I: 'a + ToIonDataSource>(input: I) -> BaseTableExprResult<'a> { let err_map = |e| Box::new(e) as BaseTableExprResultError; - let reader = ion_rs::ReaderBuilder::new().build(input).unwrap(); + let reader = ion_rs_old::ReaderBuilder::new().build(input).unwrap(); let decoder = IonDecoderBuilder::new(IonDecoderConfig::default().with_mode(Encoding::Ion)).build(reader); let decoder = decoder.map_err(err_map)?.map(move |it| it.map_err(err_map)); diff --git a/extension/partiql-extension-ion/Cargo.toml b/extension/partiql-extension-ion/Cargo.toml index 30ae832c..b2080ce9 100644 --- a/extension/partiql-extension-ion/Cargo.toml +++ b/extension/partiql-extension-ion/Cargo.toml @@ -9,10 +9,10 @@ readme = "../../README.md" keywords = ["sql", "parser", "query", "compilers", "interpreters"] categories = ["database", "compilers"] exclude = [ - "**/.git/**", - "**/.github/**", - "**/.travis.yml", - "**/.appveyor.yml", + "**/.git/**", + "**/.github/**", + "**/.travis.yml", + "**/.appveyor.yml", ] version.workspace = true edition.workspace = true @@ -22,20 +22,21 @@ bench = false [dependencies] partiql-value = { path = "../../partiql-value", version = "0.10.*" } -ordered-float = "3.*" -itertools = "0.10.*" -unicase = "2.6" -rust_decimal = { version = "1.25.0", default-features = false, features = ["std"] } -rust_decimal_macros = "1.26" -ion-rs = "0.18" +ordered-float = "4" +itertools = "0.13" +unicase = "2.7" +rust_decimal = { version = "1.36.0", default-features = false, features = ["std"] } +rust_decimal_macros = "1.36" +ion-rs_old = { version = "0.18", package = "ion-rs" } +ion-rs = { version = "1.0.0-rc.7", features = ["experimental"] } time = { version = "0.3", features = ["macros"] } once_cell = "1" -regex = "1.7" +regex = "1.10" thiserror = "1.0" -delegate = "0.9" +delegate = "0.13" [dev-dependencies] -criterion = "0.4" +criterion = "0.5" [features] default = [] diff --git a/extension/partiql-extension-ion/src/decode.rs b/extension/partiql-extension-ion/src/decode.rs index 55b9cbe6..9a2607a1 100644 --- a/extension/partiql-extension-ion/src/decode.rs +++ b/extension/partiql-extension-ion/src/decode.rs @@ -1,5 +1,5 @@ use delegate::delegate; -use ion_rs::{Decimal, Int, IonError, IonReader, IonType, StreamItem, Symbol}; +use ion_rs_old::{Decimal, Int, IonError, IonReader, IonType, StreamItem, Symbol}; use once_cell::sync::Lazy; use partiql_value::{Bag, DateTime, List, Tuple, Value}; use regex::RegexSet; diff --git a/extension/partiql-extension-ion/src/encode.rs b/extension/partiql-extension-ion/src/encode.rs index d0992dfd..8a1560f5 100644 --- a/extension/partiql-extension-ion/src/encode.rs +++ b/extension/partiql-extension-ion/src/encode.rs @@ -1,6 +1,6 @@ use delegate::delegate; -use ion_rs::{IonError, IonType, IonWriter}; +use ion_rs_old::{IonError, IonType, IonWriter}; use ordered_float::OrderedFloat; use partiql_value::{Bag, DateTime, List, Tuple, Value}; use rust_decimal::Decimal; @@ -218,7 +218,7 @@ where fn encode_decimal(&mut self, val: &Decimal) -> IonEncodeResult { let scale = i64::from(val.scale()); let mantissa = val.mantissa(); - let dec = ion_rs::Decimal::new(mantissa, -scale); + let dec = ion_rs_old::Decimal::new(mantissa, -scale); Ok(self.writer.write_decimal(&dec)?) } @@ -233,7 +233,7 @@ where fn encode_datetime(&mut self, val: &DateTime) -> IonEncodeResult { match val { DateTime::Timestamp(ts) => { - let ts = ion_rs::Timestamp::with_ymd( + let ts = ion_rs_old::Timestamp::with_ymd( ts.year() as u32, ts.month() as u32, u32::from(ts.day()), @@ -249,7 +249,7 @@ where Ok(self.writer.write_timestamp(&ts)?) } DateTime::TimestampWithTz(ts) => { - let ts = ion_rs::Timestamp::with_ymd( + let ts = ion_rs_old::Timestamp::with_ymd( ts.year() as u32, ts.month() as u32, u32::from(ts.day()), @@ -334,7 +334,7 @@ where self.inner .writer .set_annotations(std::iter::once(DATE_ANNOT)); - let ts = ion_rs::Timestamp::with_ymd( + let ts = ion_rs_old::Timestamp::with_ymd( date.year() as u32, date.month() as u32, u32::from(date.day()), diff --git a/extension/partiql-extension-ion/src/lib.rs b/extension/partiql-extension-ion/src/lib.rs index 6a82e5cd..a682b0fc 100644 --- a/extension/partiql-extension-ion/src/lib.rs +++ b/extension/partiql-extension-ion/src/lib.rs @@ -14,17 +14,17 @@ mod tests { use crate::encode::{IonEncodeError, IonEncoderBuilder, IonEncoderConfig}; use itertools::Itertools; - use ion_rs::element::writer::TextKind; - use ion_rs::element::{Element, IntoAnnotatedElement}; - use ion_rs::types::{Bytes, Sequence, Struct}; - use ion_rs::{Decimal, Int, IonType, Str, Timestamp}; + use ion_rs_old::element::writer::TextKind; + use ion_rs_old::element::{Element, IntoAnnotatedElement}; + use ion_rs_old::types::{Bytes, Sequence, Struct}; + use ion_rs_old::{Decimal, Int, IonType, Str, Timestamp}; use partiql_value::{bag, list, tuple, DateTime, Value}; use rust_decimal_macros::dec; use std::num::NonZeroU8; fn decode_ion_text(contents: &str, encoding: Encoding) -> IonDecodeResult { - let reader = ion_rs::ReaderBuilder::new().build(contents)?; + let reader = ion_rs_old::ReaderBuilder::new().build(contents)?; let mut iter = IonDecoderBuilder::new(IonDecoderConfig::default().with_mode(encoding)) .build(reader)?; @@ -35,7 +35,7 @@ mod tests { fn encode_ion_text(value: &Value, encoding: Encoding) -> Result { let mut buff = vec![]; - let mut writer = ion_rs::TextWriterBuilder::new(TextKind::Compact) + let mut writer = ion_rs_old::TextWriterBuilder::new(TextKind::Compact) .build(&mut buff) .expect("writer"); let mut encoder = IonEncoderBuilder::new(IonEncoderConfig::default().with_mode(encoding)) @@ -50,10 +50,10 @@ mod tests { } fn decode_ion_element( - contents: ion_rs::element::Element, + contents: ion_rs_old::element::Element, encoding: Encoding, ) -> IonDecodeResult { - let reader = ion_rs::element::element_stream_reader::ElementStreamReader::new(contents); + let reader = ion_rs_old::element::element_stream_reader::ElementStreamReader::new(contents); let mut iter = IonDecoderBuilder::new(IonDecoderConfig::default().with_mode(encoding)) .build(reader)?; @@ -65,9 +65,10 @@ mod tests { fn encode_ion_element( value: &Value, encoding: Encoding, - ) -> Result, IonEncodeError> { + ) -> Result, IonEncodeError> { let mut out = vec![]; - let mut writer = ion_rs::element::element_stream_writer::ElementStreamWriter::new(&mut out); + let mut writer = + ion_rs_old::element::element_stream_writer::ElementStreamWriter::new(&mut out); let mut encoder = IonEncoderBuilder::new(IonEncoderConfig::default().with_mode(encoding)) .build(&mut writer)?; encoder.write_value(value)?; @@ -81,7 +82,7 @@ mod tests { #[track_caller] fn assert_decode_encode( ion: &str, - element: impl Into, + element: impl Into, val: impl Into, encoding: Encoding, ) { @@ -115,14 +116,18 @@ mod tests { } #[track_caller] - fn assert_ion(ion: &str, element: impl Into, val: impl Into) { + fn assert_ion( + ion: &str, + element: impl Into, + val: impl Into, + ) { assert_decode_encode(ion, element, val, Encoding::Ion); } #[track_caller] fn assert_partiql_encoded_ion( ion: &str, - element: impl Into, + element: impl Into, val: impl Into, ) { assert_decode_encode(ion, element, val, Encoding::PartiqlEncodedAsIon); @@ -132,44 +137,44 @@ mod tests { fn partiql_value_from_ion() { assert_ion( "null", - ion_rs::element::Value::Null(IonType::Null), + ion_rs_old::element::Value::Null(IonType::Null), Value::Null, ); // bool - assert_ion("true", ion_rs::element::Value::Bool(true), true); - assert_ion("false", ion_rs::element::Value::Bool(false), false); + assert_ion("true", ion_rs_old::element::Value::Bool(true), true); + assert_ion("false", ion_rs_old::element::Value::Bool(false), false); // int - assert_ion("42", ion_rs::element::Value::Int(Int::I64(42)), 42); - assert_ion("-5", ion_rs::element::Value::Int(Int::I64(-5)), -5); + assert_ion("42", ion_rs_old::element::Value::Int(Int::I64(42)), 42); + assert_ion("-5", ion_rs_old::element::Value::Int(Int::I64(-5)), -5); // float - assert_ion("1.1e0", ion_rs::element::Value::Float(1.1), 1.1); + assert_ion("1.1e0", ion_rs_old::element::Value::Float(1.1), 1.1); // decimal assert_ion( "1.", - ion_rs::element::Value::Decimal(Decimal::new(1, 0)), + ion_rs_old::element::Value::Decimal(Decimal::new(1, 0)), dec!(1), ); // text assert_ion( "'foo'", - ion_rs::element::Value::String(Str::from("foo")), + ion_rs_old::element::Value::String(Str::from("foo")), "foo", ); assert_ion( "\"foo\"", - ion_rs::element::Value::String(Str::from("foo")), + ion_rs_old::element::Value::String(Str::from("foo")), "foo", ); // datetime assert_ion( "2017-01-01T01:02:03.4+00:30", - ion_rs::element::Value::Timestamp( + ion_rs_old::element::Value::Timestamp( Timestamp::with_ymd_hms_millis(2017, 1, 1, 1, 2, 3, 400) .build_at_offset(30) .expect("ion timestamp"), @@ -187,7 +192,7 @@ mod tests { ); assert_ion( "2017-01-01T01:02:03.4-00:00", - ion_rs::element::Value::Timestamp( + ion_rs_old::element::Value::Timestamp( Timestamp::with_ymd_hms_millis(2017, 1, 1, 1, 2, 3, 400) .build_at_unknown_offset() .expect("ion timestamp"), @@ -207,22 +212,22 @@ mod tests { // lob assert_ion( "{{ +AB/ }}", - ion_rs::element::Value::Blob(Bytes::from(vec![248, 0, 127])), + ion_rs_old::element::Value::Blob(Bytes::from(vec![248, 0, 127])), Value::Blob(Box::new(vec![248, 0, 127])), ); assert_ion( "{{ \"CLOB of text.\" }}", - ion_rs::element::Value::Clob(Bytes::from("CLOB of text.")), + ion_rs_old::element::Value::Clob(Bytes::from("CLOB of text.")), Value::Blob(Box::new("CLOB of text.".bytes().collect_vec())), ); // list assert_ion( "[1,2,\"3\"]", - ion_rs::element::Value::List(Sequence::new([ - ion_rs::element::Value::Int(Int::I64(1)), - ion_rs::element::Value::Int(Int::I64(2)), - ion_rs::element::Value::String(Str::from("3")), + ion_rs_old::element::Value::List(Sequence::new([ + ion_rs_old::element::Value::Int(Int::I64(1)), + ion_rs_old::element::Value::Int(Int::I64(2)), + ion_rs_old::element::Value::String(Str::from("3")), ])), list![1, 2, "3"], ); @@ -230,9 +235,9 @@ mod tests { // struct assert_ion( "{\"k\": [1,2,3]}", - ion_rs::element::Value::Struct( + ion_rs_old::element::Value::Struct( Struct::builder() - .with_field("k", ion_rs::element::List(Sequence::new([1, 2, 3]))) + .with_field("k", ion_rs_old::element::List(Sequence::new([1, 2, 3]))) .build(), ), tuple![("k", list![1, 2, 3])], @@ -243,49 +248,49 @@ mod tests { fn partiql_value_from_partiql_encoded_ion() { assert_partiql_encoded_ion( "null", - ion_rs::element::Value::Null(IonType::Null), + ion_rs_old::element::Value::Null(IonType::Null), Value::Null, ); assert_partiql_encoded_ion( "$missing::null", - ion_rs::element::Value::Null(IonType::Null).with_annotations(["$missing"]), + ion_rs_old::element::Value::Null(IonType::Null).with_annotations(["$missing"]), Value::Missing, ); // bool - assert_partiql_encoded_ion("true", ion_rs::element::Value::Bool(true), true); - assert_partiql_encoded_ion("false", ion_rs::element::Value::Bool(false), false); + assert_partiql_encoded_ion("true", ion_rs_old::element::Value::Bool(true), true); + assert_partiql_encoded_ion("false", ion_rs_old::element::Value::Bool(false), false); // int - assert_partiql_encoded_ion("42", ion_rs::element::Value::Int(Int::I64(42)), 42); - assert_partiql_encoded_ion("-5", ion_rs::element::Value::Int(Int::I64(-5)), -5); + assert_partiql_encoded_ion("42", ion_rs_old::element::Value::Int(Int::I64(42)), 42); + assert_partiql_encoded_ion("-5", ion_rs_old::element::Value::Int(Int::I64(-5)), -5); // float - assert_partiql_encoded_ion("1.1e0", ion_rs::element::Value::Float(1.1), 1.1); + assert_partiql_encoded_ion("1.1e0", ion_rs_old::element::Value::Float(1.1), 1.1); // decimal assert_partiql_encoded_ion( "1.", - ion_rs::element::Value::Decimal(Decimal::new(1, 0)), + ion_rs_old::element::Value::Decimal(Decimal::new(1, 0)), dec!(1), ); // text assert_partiql_encoded_ion( "'foo'", - ion_rs::element::Value::String(Str::from("foo")), + ion_rs_old::element::Value::String(Str::from("foo")), "foo", ); assert_partiql_encoded_ion( "\"foo\"", - ion_rs::element::Value::String(Str::from("foo")), + ion_rs_old::element::Value::String(Str::from("foo")), "foo", ); // datetime assert_partiql_encoded_ion( "2017-01-01T01:02:03.4+00:30", - ion_rs::element::Value::Timestamp( + ion_rs_old::element::Value::Timestamp( Timestamp::with_ymd_hms_millis(2017, 1, 1, 1, 2, 3, 400) .build_at_offset(30) .expect("ion timestamp"), @@ -303,7 +308,7 @@ mod tests { ); assert_partiql_encoded_ion( "2017-01-01T01:02:03.4-00:00", - ion_rs::element::Value::Timestamp( + ion_rs_old::element::Value::Timestamp( Timestamp::with_ymd_hms_millis(2017, 1, 1, 1, 2, 3, 400) .build_at_unknown_offset() .expect("ion timestamp"), @@ -321,12 +326,12 @@ mod tests { ); assert_partiql_encoded_ion( "$time::{ hour: 12, minute: 11, second: 10.08}", - ion_rs::element::Value::Struct( + ion_rs_old::element::Value::Struct( Struct::builder() .with_fields([ - ("hour", ion_rs::element::Value::Int(Int::I64(12))), - ("minute", ion_rs::element::Value::Int(Int::I64(11))), - ("second", ion_rs::element::Value::Float(10.08)), + ("hour", ion_rs_old::element::Value::Int(Int::I64(12))), + ("minute", ion_rs_old::element::Value::Int(Int::I64(11))), + ("second", ion_rs_old::element::Value::Float(10.08)), ]) .build(), ) @@ -335,14 +340,20 @@ mod tests { ); assert_partiql_encoded_ion( "$time::{ hour: 12, minute: 11, second: 10.08, timezone_hour: 0, timezone_minute: 30}", - ion_rs::element::Value::Struct( + ion_rs_old::element::Value::Struct( Struct::builder() .with_fields([ - ("hour", ion_rs::element::Value::Int(Int::I64(12))), - ("minute", ion_rs::element::Value::Int(Int::I64(11))), - ("second", ion_rs::element::Value::Float(10.08)), - ("timezone_hour", ion_rs::element::Value::Int(Int::I64(0))), - ("timezone_minute", ion_rs::element::Value::Int(Int::I64(30))), + ("hour", ion_rs_old::element::Value::Int(Int::I64(12))), + ("minute", ion_rs_old::element::Value::Int(Int::I64(11))), + ("second", ion_rs_old::element::Value::Float(10.08)), + ( + "timezone_hour", + ion_rs_old::element::Value::Int(Int::I64(0)), + ), + ( + "timezone_minute", + ion_rs_old::element::Value::Int(Int::I64(30)), + ), ]) .build(), ) @@ -351,7 +362,7 @@ mod tests { ); assert_partiql_encoded_ion( "$date::1957-05-25T", - ion_rs::element::Value::Timestamp( + ion_rs_old::element::Value::Timestamp( Timestamp::with_ymd(1957, 5, 25) .build() .expect("ion timestamp"), @@ -363,22 +374,22 @@ mod tests { // lob assert_partiql_encoded_ion( "{{ +AB/ }}", - ion_rs::element::Value::Blob(Bytes::from(vec![248, 0, 127])), + ion_rs_old::element::Value::Blob(Bytes::from(vec![248, 0, 127])), Value::Blob(Box::new(vec![248, 0, 127])), ); assert_partiql_encoded_ion( "{{ \"CLOB of text.\" }}", - ion_rs::element::Value::Clob(Bytes::from("CLOB of text.")), + ion_rs_old::element::Value::Clob(Bytes::from("CLOB of text.")), Value::Blob(Box::new("CLOB of text.".bytes().collect_vec())), ); // list assert_partiql_encoded_ion( "[1,2,\"3\"]", - ion_rs::element::Value::List(Sequence::new([ - ion_rs::element::Value::Int(Int::I64(1)), - ion_rs::element::Value::Int(Int::I64(2)), - ion_rs::element::Value::String(Str::from("3")), + ion_rs_old::element::Value::List(Sequence::new([ + ion_rs_old::element::Value::Int(Int::I64(1)), + ion_rs_old::element::Value::Int(Int::I64(2)), + ion_rs_old::element::Value::String(Str::from("3")), ])), list![1, 2, "3"], ); @@ -386,12 +397,12 @@ mod tests { // bag assert_partiql_encoded_ion( "$bag::[1,2,\"3\", null, $missing::null]", - ion_rs::element::Value::List(Sequence::new::([ - ion_rs::element::Value::Int(Int::I64(1)).into(), - ion_rs::element::Value::Int(Int::I64(2)).into(), - ion_rs::element::Value::String(Str::from("3")).into(), - ion_rs::element::Value::Null(IonType::Null).into(), - ion_rs::element::Value::Null(IonType::Null).with_annotations(["$missing"]), + ion_rs_old::element::Value::List(Sequence::new::([ + ion_rs_old::element::Value::Int(Int::I64(1)).into(), + ion_rs_old::element::Value::Int(Int::I64(2)).into(), + ion_rs_old::element::Value::String(Str::from("3")).into(), + ion_rs_old::element::Value::Null(IonType::Null).into(), + ion_rs_old::element::Value::Null(IonType::Null).with_annotations(["$missing"]), ])) .with_annotations(["$bag"]), bag![1, 2, "3", Value::Null, Value::Missing], @@ -400,18 +411,21 @@ mod tests { // struct assert_partiql_encoded_ion( "{\"k\": []}", - ion_rs::element::Value::Struct( + ion_rs_old::element::Value::Struct( Struct::builder() - .with_field("k", ion_rs::element::List(Sequence::new::([]))) + .with_field( + "k", + ion_rs_old::element::List(Sequence::new::([])), + ) .build(), ), tuple![("k", list![])], ); assert_partiql_encoded_ion( "{\"k\": [1,2,3]}", - ion_rs::element::Value::Struct( + ion_rs_old::element::Value::Struct( Struct::builder() - .with_field("k", ion_rs::element::List(Sequence::new([1, 2, 3]))) + .with_field("k", ion_rs_old::element::List(Sequence::new([1, 2, 3]))) .build(), ), tuple![("k", list![1, 2, 3])], diff --git a/extension/partiql-extension-visualize/Cargo.toml b/extension/partiql-extension-visualize/Cargo.toml index 4aa90dae..697265ca 100644 --- a/extension/partiql-extension-visualize/Cargo.toml +++ b/extension/partiql-extension-visualize/Cargo.toml @@ -9,10 +9,10 @@ readme = "../../README.md" keywords = ["sql", "parser", "query", "compilers", "interpreters"] categories = ["database", "compilers"] exclude = [ - "**/.git/**", - "**/.github/**", - "**/.travis.yml", - "**/.appveyor.yml", + "**/.git/**", + "**/.github/**", + "**/.travis.yml", + "**/.appveyor.yml", ] version.workspace = true edition.workspace = true @@ -24,12 +24,12 @@ bench = false partiql-ast = { path = "../../partiql-ast", version = "0.10.*" } partiql-logical = { path = "../../partiql-logical", version = "0.10.*" } -dot-writer = { version = "0.1.*", optional = true } -itertools = { version = "0.10.*", optional = true } +dot-writer = { version = "0.1", optional = true } +itertools = { version = "0.13", optional = true } [features] default = [] visualize-dot = [ - "dep:dot-writer", - "dep:itertools", + "dep:dot-writer", + "dep:itertools", ] diff --git a/partiql-ast-passes/Cargo.toml b/partiql-ast-passes/Cargo.toml index 6ff1dba2..ff65bae3 100644 --- a/partiql-ast-passes/Cargo.toml +++ b/partiql-ast-passes/Cargo.toml @@ -9,8 +9,8 @@ readme = "../README.md" keywords = ["sql", "ast", "compilers", "visitors", "passes"] categories = ["database", "compilers"] exclude = [ - "**/.git/**", - "**/.github/**", + "**/.git/**", + "**/.github/**", ] version.workspace = true edition.workspace = true @@ -25,9 +25,9 @@ partiql-catalog = { path = "../partiql-catalog", version = "0.10.*" } partiql-common = { path = "../partiql-common", version = "0.10.*" } partiql-types = { path = "../partiql-types", version = "0.10.*" } -assert_matches = "1.5.*" +assert_matches = "1" fnv = "1" -indexmap = "2.2" +indexmap = "2.5" thiserror = "1.0" [dev-dependencies] diff --git a/partiql-ast/Cargo.toml b/partiql-ast/Cargo.toml index 0867c82a..ffeec148 100644 --- a/partiql-ast/Cargo.toml +++ b/partiql-ast/Cargo.toml @@ -21,9 +21,9 @@ bench = false [dependencies] partiql-common = { path = "../partiql-common", version = "0.10.*" } -indexmap = "2.2" -rust_decimal = { version = "1.25.0", default-features = false, features = ["std"] } -serde = { version = "1.*", features = ["derive"], optional = true } +indexmap = "2.5" +rust_decimal = { version = "1.36.0", default-features = false, features = ["std"] } +serde = { version = "1", features = ["derive"], optional = true } pretty = "0.12" thiserror = "1.0" diff --git a/partiql-ast/partiql-ast-macros/Cargo.toml b/partiql-ast/partiql-ast-macros/Cargo.toml index 2c3d8a30..2ec90041 100644 --- a/partiql-ast/partiql-ast-macros/Cargo.toml +++ b/partiql-ast/partiql-ast-macros/Cargo.toml @@ -23,7 +23,7 @@ bench = false [dependencies] quote = "1.0" -syn = {version="2.0", default-features = true, features=["full"]} -proc-macro2 = "1.0.*" +syn = { version = "2.0", default-features = true, features = ["full"] } +proc-macro2 = "1" darling = "0.20" -Inflector = "0.11.*" +Inflector = "0.11" diff --git a/partiql-catalog/Cargo.toml b/partiql-catalog/Cargo.toml index 3e34f5f8..40f2b85c 100644 --- a/partiql-catalog/Cargo.toml +++ b/partiql-catalog/Cargo.toml @@ -9,10 +9,10 @@ readme = "../README.md" keywords = ["sql", "parser", "query", "compilers", "interpreters"] categories = ["database", "compilers"] exclude = [ - "**/.git/**", - "**/.github/**", - "**/.travis.yml", - "**/.appveyor.yml", + "**/.git/**", + "**/.github/**", + "**/.travis.yml", + "**/.appveyor.yml", ] version.workspace = true edition.workspace = true @@ -27,9 +27,9 @@ partiql-logical = { path = "../partiql-logical", version = "0.10.*" } partiql-types = { path = "../partiql-types", version = "0.10.*" } thiserror = "1.0" -ordered-float = "3.*" -itertools = "0.10.*" -unicase = "2.6" +ordered-float = "4" +itertools = "0.13" +unicase = "2.7" [dev-dependencies] -criterion = "0.4" +criterion = "0.5" diff --git a/partiql-common/Cargo.toml b/partiql-common/Cargo.toml index a20dc02d..1e3b28db 100644 --- a/partiql-common/Cargo.toml +++ b/partiql-common/Cargo.toml @@ -20,12 +20,12 @@ path = "src/lib.rs" bench = false [dependencies] -indexmap = "2.2" +indexmap = "2.5" pretty = "0.12" -serde = { version = "1.*", features = ["derive"], optional = true } -rust_decimal = { version = "1.25.0", default-features = false, features = ["std"] } -smallvec = { version = "1.*" } -thiserror = "1.0" +serde = { version = "1", features = ["derive"], optional = true } +rust_decimal = { version = "1.36", default-features = false, features = ["std"] } +smallvec = { version = "1" } +thiserror = "1" [features] default = [] diff --git a/partiql-conformance-test-generator/Cargo.toml b/partiql-conformance-test-generator/Cargo.toml index 3a94e059..ebc1ebc7 100644 --- a/partiql-conformance-test-generator/Cargo.toml +++ b/partiql-conformance-test-generator/Cargo.toml @@ -21,10 +21,10 @@ edition.workspace = true bench = false [dependencies] -walkdir = "2.3" -ion-rs = "0.18" -codegen = "0.2.*" -Inflector = "0.11.*" -miette = "5.*" +walkdir = "2.5" +ion-rs_old = { version = "0.18", package = "ion-rs" } +codegen = "0.2" +Inflector = "0.11" +miette = { version = "7", features = ["fancy"] } thiserror = "1.0" -quote = "1.*" +quote = "1" diff --git a/partiql-conformance-test-generator/src/generator.rs b/partiql-conformance-test-generator/src/generator.rs index 2f037623..f686b4d6 100644 --- a/partiql-conformance-test-generator/src/generator.rs +++ b/partiql-conformance-test-generator/src/generator.rs @@ -3,10 +3,10 @@ use crate::schema::structure::*; use crate::util::{escape_fn_code, Escaper}; use codegen::{Function, Module, Scope}; -use ion_rs::TextWriterBuilder; +use ion_rs_old::TextWriterBuilder; -use ion_rs::element::writer::ElementWriter; -use ion_rs::element::{Element, Struct}; +use ion_rs_old::element::writer::ElementWriter; +use ion_rs_old::element::{Element, Struct}; use quote::__private::TokenStream; use quote::quote; use std::collections::{HashMap, HashSet}; diff --git a/partiql-conformance-test-generator/src/reader.rs b/partiql-conformance-test-generator/src/reader.rs index 5397d9aa..dcae2748 100644 --- a/partiql-conformance-test-generator/src/reader.rs +++ b/partiql-conformance-test-generator/src/reader.rs @@ -1,4 +1,4 @@ -use ion_rs::{IonType, ReaderBuilder}; +use ion_rs_old::{IonType, ReaderBuilder}; use miette::{miette, IntoDiagnostic}; use std::ffi::OsStr; use std::fs; @@ -6,8 +6,8 @@ use std::fs::DirEntry; use crate::schema::spec::*; use crate::schema::structure::*; -use ion_rs::element::reader::ElementReader; -use ion_rs::element::{Element, Struct}; +use ion_rs_old::element::reader::ElementReader; +use ion_rs_old::element::{Element, Struct}; use std::path::Path; macro_rules! expect_value { diff --git a/partiql-conformance-test-generator/src/schema.rs b/partiql-conformance-test-generator/src/schema.rs index 0abec249..d922fc20 100644 --- a/partiql-conformance-test-generator/src/schema.rs +++ b/partiql-conformance-test-generator/src/schema.rs @@ -24,7 +24,7 @@ pub mod structure { } pub mod spec { - use ion_rs::element::{Element, Struct}; + use ion_rs_old::element::{Element, Struct}; #[derive(Debug, Clone)] pub enum TestVariant { diff --git a/partiql-conformance-tests/Cargo.toml b/partiql-conformance-tests/Cargo.toml index 32161a27..e29f5d21 100644 --- a/partiql-conformance-tests/Cargo.toml +++ b/partiql-conformance-tests/Cargo.toml @@ -28,7 +28,7 @@ required-features = ["report_tool"] bench = false [build-dependencies] -miette = { version = "5.*", features = ["fancy"] } +miette = { version = "7", features = ["fancy"] } partiql-conformance-test-generator = { path = "../partiql-conformance-test-generator", version = "0.10.*" } [dependencies] @@ -42,17 +42,17 @@ partiql-value = { path = "../partiql-value", version = "0.10.*" } partiql-eval = { path = "../partiql-eval", version = "0.10.*" } partiql-extension-ion = { path = "../extension/partiql-extension-ion", version = "0.10.*" } -ion-rs = "0.18" +ion-rs_old = { version = "0.18", package = "ion-rs" } -regex = "1.7" +regex = "1.10" once_cell = "1" -rust_decimal = "1.27" +rust_decimal = "1.36" thiserror = "1.0" -serde = { version = "1.*", features = ["derive"], optional = true } -serde_json = { version = "1.*", optional = true } +serde = { version = "1", features = ["derive"], optional = true } +serde_json = { version = "1", optional = true } [features] default = ["base"] diff --git a/partiql-conformance-tests/tests/test_value.rs b/partiql-conformance-tests/tests/test_value.rs index dae359c2..f46d46cf 100644 --- a/partiql-conformance-tests/tests/test_value.rs +++ b/partiql-conformance-tests/tests/test_value.rs @@ -17,7 +17,7 @@ impl From<&str> for TestValue { } fn parse_test_value_str(contents: &str) -> Value { - let reader = ion_rs::ReaderBuilder::new() + let reader = ion_rs_old::ReaderBuilder::new() .build(contents) .expect("reading contents"); let mut iter = IonDecoderBuilder::new( diff --git a/partiql-eval/Cargo.toml b/partiql-eval/Cargo.toml index 4384d6c4..b7c13ddc 100644 --- a/partiql-eval/Cargo.toml +++ b/partiql-eval/Cargo.toml @@ -9,10 +9,10 @@ readme = "../README.md" keywords = ["sql", "parser", "query", "compilers", "interpreters"] categories = ["database", "compilers"] exclude = [ - "**/.git/**", - "**/.github/**", - "**/.travis.yml", - "**/.appveyor.yml", + "**/.git/**", + "**/.github/**", + "**/.travis.yml", + "**/.appveyor.yml", ] version.workspace = true edition.workspace = true @@ -25,21 +25,21 @@ partiql-logical = { path = "../partiql-logical", version = "0.10.*" } partiql-value = { path = "../partiql-value", version = "0.10.*" } partiql-catalog = { path = "../partiql-catalog", version = "0.10.*" } partiql-types = { path = "../partiql-types", version = "0.10.*" } -petgraph = "0.6.*" -ordered-float = "3.*" -itertools = "0.10.*" -unicase = "2.6" -rust_decimal = { version = "1.25.0", default-features = false, features = ["std"] } -rust_decimal_macros = "1.26" -thiserror = "1.0" -assert_matches = "1.5.*" -regex = "1.7" -regex-syntax = "0.6" -rustc-hash = "1" -delegate = "0.12" +petgraph = "0.6" +ordered-float = "4" +itertools = "0.13" +unicase = "2" +rust_decimal = { version = "1", default-features = false, features = ["std"] } +rust_decimal_macros = "1" +thiserror = "1" +assert_matches = "1" +regex = "1" +regex-syntax = "0.8" +rustc-hash = "2" +delegate = "0.13" [dev-dependencies] -criterion = "0.4" +criterion = "0.5" [[bench]] name = "bench_eval" diff --git a/partiql-logical-planner/Cargo.toml b/partiql-logical-planner/Cargo.toml index 1b9de4c4..da1d3ee9 100644 --- a/partiql-logical-planner/Cargo.toml +++ b/partiql-logical-planner/Cargo.toml @@ -9,10 +9,10 @@ readme = "../README.md" keywords = ["sql", "parser", "query", "compilers", "interpreters"] categories = ["database", "compilers"] exclude = [ - "**/.git/**", - "**/.github/**", - "**/.travis.yml", - "**/.appveyor.yml", + "**/.git/**", + "**/.github/**", + "**/.travis.yml", + "**/.appveyor.yml", ] version.workspace = true edition.workspace = true @@ -25,23 +25,23 @@ partiql-ast = { path = "../partiql-ast", version = "0.10.*" } partiql-ast-passes = { path = "../partiql-ast-passes", version = "0.10.*" } partiql-catalog = { path = "../partiql-catalog", version = "0.10.*" } partiql-common = { path = "../partiql-common", version = "0.10.*" } -partiql-extension-ion = {path = "../extension/partiql-extension-ion", version = "0.10.*" } +partiql-extension-ion = { path = "../extension/partiql-extension-ion", version = "0.10.*" } partiql-parser = { path = "../partiql-parser", version = "0.10.*" } partiql-logical = { path = "../partiql-logical", version = "0.10.*" } partiql-types = { path = "../partiql-types", version = "0.10.*" } partiql-value = { path = "../partiql-value", version = "0.10.*" } -ion-rs = "0.18" -ordered-float = "3.*" -itertools = "0.10.*" -unicase = "2.6" -indexmap = "2.2" -petgraph = "0.6.*" +ion-rs_old = { version = "0.18", package = "ion-rs" } +ordered-float = "4" +itertools = "0.13" +unicase = "2.7" +indexmap = "2.5" +petgraph = "0.6" num = "0.4" fnv = "1" -assert_matches = "1.5.*" +assert_matches = "1" once_cell = "1" -thiserror = "1.0" +thiserror = "1" [dev-dependencies] partiql-eval = { path = "../partiql-eval", version = "0.10.*" } diff --git a/partiql-logical-planner/src/lower.rs b/partiql-logical-planner/src/lower.rs index e23c8566..b691aad5 100644 --- a/partiql-logical-planner/src/lower.rs +++ b/partiql-logical-planner/src/lower.rs @@ -185,7 +185,7 @@ pub struct AstToLogical<'a> { } /// Attempt to infer an alias for a simple variable reference expression. -/// For example infer such that `SELECT a, b.c.d.e ...` <=> `SELECT a as a, b.c.d.e as e` +/// For example infer such that `SELECT a, b.c.d.e ...` <=> `SELECT a as a, b.c.d.e as e` fn infer_id(expr: &ValueExpr) -> Option { let sensitive = |value: &str| { Some(SymbolPrimitive { @@ -1985,7 +1985,7 @@ fn parse_embedded_ion_str(contents: &str) -> Result { } } - let reader = ion_rs::ReaderBuilder::new() + let reader = ion_rs_old::ReaderBuilder::new() .build(contents) .map_err(|e| lit_err(contents, e))?; let mut iter = IonDecoderBuilder::new(IonDecoderConfig::default().with_mode(Encoding::Ion)) diff --git a/partiql-logical/Cargo.toml b/partiql-logical/Cargo.toml index c482eabf..52ef3eb6 100644 --- a/partiql-logical/Cargo.toml +++ b/partiql-logical/Cargo.toml @@ -9,10 +9,10 @@ readme = "../README.md" keywords = ["sql", "parser", "query", "compilers", "interpreters"] categories = ["database", "compilers"] exclude = [ - "**/.git/**", - "**/.github/**", - "**/.travis.yml", - "**/.appveyor.yml", + "**/.git/**", + "**/.github/**", + "**/.travis.yml", + "**/.appveyor.yml", ] version.workspace = true edition.workspace = true @@ -22,15 +22,15 @@ bench = false [dependencies] partiql-value = { path = "../partiql-value", version = "0.10.*" } -ordered-float = "3.*" -itertools = "0.10.*" -unicase = "2.6" +ordered-float = "4" +itertools = "0.13" +unicase = "2.7" -serde = { version = "1.*", features = ["derive"], optional = true } +serde = { version = "1", features = ["derive"], optional = true } [features] default = [] serde = [ - "dep:serde", - "ordered-float/serde" + "dep:serde", + "ordered-float/serde" ] diff --git a/partiql-parser/Cargo.toml b/partiql-parser/Cargo.toml index fa666915..52013913 100644 --- a/partiql-parser/Cargo.toml +++ b/partiql-parser/Cargo.toml @@ -9,10 +9,10 @@ readme = "../README.md" keywords = ["sql", "parser", "query", "compilers", "interpreters"] categories = ["database", "compilers", "parser-implementations"] exclude = [ - "**/.git/**", - "**/.github/**", - "**/.travis.yml", - "**/.appveyor.yml", + "**/.git/**", + "**/.github/**", + "**/.travis.yml", + "**/.appveyor.yml", ] version.workspace = true edition.workspace = true @@ -22,7 +22,7 @@ bench = false # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [build-dependencies] -lalrpop = "0.20" +lalrpop = "0.21" [dependencies] partiql-ast = { path = "../partiql-ast", version = "0.10.*" } @@ -30,33 +30,33 @@ partiql-common = { path = "../partiql-common", version = "0.10.*" } thiserror = "1.0" -num-traits = "~0.2.14" -num-bigint = "~0.4.0" -bigdecimal = "~0.2.0" -rust_decimal = { version = "1.25.0", default-features = false, features = ["std"] } +num-traits = "0.2" +num-bigint = "0.4" +bigdecimal = "0.4" +rust_decimal = { version = "1.36.0", default-features = false, features = ["std"] } bitflags = "2" -lalrpop-util = "0.20" -logos = "0.12" +lalrpop-util = "0.21" +logos = "0.14" -itertools = "~0.10.3" +itertools = "0.13" -regex = "1.7" +regex = "1.10" once_cell = "1" -serde = { version = "1.*", features = ["derive"], optional = true } +serde = { version = "1", features = ["derive"], optional = true } [dev-dependencies] -criterion = "0.4" +criterion = "0.5" [features] default = [] serde = [ - "dep:serde", - "rust_decimal/serde-with-str", - "partiql-ast/serde", - "partiql-common/serde" + "dep:serde", + "rust_decimal/serde-with-str", + "partiql-ast/serde", + "partiql-common/serde" ] [[bench]] diff --git a/partiql-parser/src/lexer.rs b/partiql-parser/src/lexer.rs deleted file mode 100644 index 6e2fbe18..00000000 --- a/partiql-parser/src/lexer.rs +++ /dev/null @@ -1,1319 +0,0 @@ -use partiql_common::syntax::location::{ByteOffset, BytePosition, ToLocated}; -use std::borrow::Cow; - -use logos::{Logos, Span}; - -use std::cmp::max; - -use std::fmt; -use std::fmt::Formatter; - -use crate::error::{LexError, ParseError}; -use partiql_common::syntax::line_offset_tracker::LineOffsetTracker; - -/// A 3-tuple of (start, `Tok`, end) denoting a token and it start and end offsets. -pub type Spanned = (Loc, Tok, Loc); -/// A [`Result`] of a [`Spanned`] token. -pub(crate) type SpannedResult = Result, Spanned>; - -/// A block comment string (e.g. `"/* comment here */"`) with [`ByteOffset`] span relative to lexed source. -/// -/// Note: -/// - The returned string includes the comment start (`/*`) and end (`*/`) tokens. -/// - The returned `ByteOffset` span includes the comment start (`/*`) and end (`*/`) tokens. -type CommentStringResult<'input> = SpannedResult<&'input str, ByteOffset, LexError<'input>>; - -/// Tokens used to parse block comment -#[derive(Logos, Debug, Clone, PartialEq, Eq)] -enum CommentToken { - #[error] - // Skip stuff that won't interfere with comment detection - #[regex(r"[^/*\r\n\u0085\u2028\u2029]+", logos::skip)] - Any, - // Skip newlines, but record their position. - // For line break recommendations, - // see https://www.unicode.org/standard/reports/tr13/tr13-5.html - #[regex(r"(([\r])?[\n])|\u0085|\u2028|\u2029")] - Newline, - #[token("*/")] - End, - #[token("/*")] - Start, -} - -/// A lexer for block comments (enclosed between '/*' & '*/') that returns the parsed [`CommentString`] -struct CommentLexer<'input, 'tracker> { - /// Wrap a logos-generated lexer - lexer: logos::Lexer<'input, CommentToken>, - comment_nesting: bool, - tracker: &'tracker mut LineOffsetTracker, -} - -impl<'input, 'tracker> CommentLexer<'input, 'tracker> { - /// Creates a new block comment lexer over `input` text. - /// Nested comment parsing is *off* by default; see [`with_nesting`] to enable nesting. - #[inline] - pub fn new(input: &'input str, tracker: &'tracker mut LineOffsetTracker) -> Self { - CommentLexer { - lexer: CommentToken::lexer(input), - comment_nesting: false, - tracker, - } - } - - /// Toggles *on* the parsing of nested comments - #[inline] - fn with_nesting(mut self) -> Self { - self.comment_nesting = true; - self - } - - /// Parses a single (possibly nested) block comment and returns it - fn next_internal(&mut self) -> Option> { - let Span { start, .. } = self.lexer.span(); - let mut nesting = 0; - let nesting_inc = i32::from(self.comment_nesting); - 'comment: loop { - match self.lexer.next() { - Some(CommentToken::Any) => continue, - Some(CommentToken::Newline) => { - self.tracker.record(self.lexer.span().end.into()); - } - Some(CommentToken::Start) => nesting = max(1, nesting + nesting_inc), - Some(CommentToken::End) => { - if nesting == 0 { - let Span { end, .. } = self.lexer.span(); - return Some(Err((start.into(), LexError::Unknown, end.into()))); - } - nesting -= 1; - if nesting == 0 { - break 'comment; - } - } - None => { - return if nesting != 0 { - let Span { end, .. } = self.lexer.span(); - Some(Err(( - start.into(), - LexError::UnterminatedComment, - end.into(), - ))) - } else { - None - } - } - } - } - let Span { end, .. } = self.lexer.span(); - let comment = &self.lexer.source()[start..end]; - - Some(Ok((start.into(), comment, end.into()))) - } -} - -impl<'input, 'tracker> Iterator for CommentLexer<'input, 'tracker> { - type Item = CommentStringResult<'input>; - - #[inline(always)] - fn next(&mut self) -> Option { - self.next_internal() - } -} - -/// An embedded Ion string (e.g. `[{a: 1}, {b: 2}]`) with [`ByteOffset`] span -/// relative to lexed source. -/// -/// Note: -/// - The lexer parses the embedded ion value enclosed in backticks. -/// - The returned string *does not* include the backticks -/// - The returned `ByteOffset` span *does* include the backticks -type EmbeddedIonStringResult<'input> = SpannedResult<&'input str, ByteOffset, LexError<'input>>; - -/// Tokens used to parse Ion literals embedded in backticks (\`) -#[derive(Logos, Debug, Clone, PartialEq)] -enum EmbeddedIonToken { - #[error] - // Skip stuff that doesn't interfere with comment or string detection - #[regex(r#"[^/*'"`\r\n\u0085\u2028\u2029]+"#, logos::skip)] - Any, - - // Skip newlines, but record their position. - // For line break recommendations, - // see https://www.unicode.org/standard/reports/tr13/tr13-5.html - #[regex(r"(([\r])?[\n])|\u0085|\u2028|\u2029")] - Newline, - - #[token("`")] - Embed, - - #[regex(r"//[^\n]*")] - CommentLine, - #[token("/*")] - CommentBlock, - - #[regex(r#""([^"\\]|\\t|\\u|\\")*""#)] - String, - #[regex(r#"'([^'\\]|\\t|\\u|\\')*'"#)] - Symbol, - #[token("'''")] - LongString, -} - -/// A Lexer for Ion literals embedded in backticks (\`) that returns the parsed [`EmbeddedIonString`] -/// -/// Parses just enough Ion to make sure not to include a backtick that is inside a string or comment. -struct EmbeddedIonLexer<'input, 'tracker> { - /// Wrap a logos-generated lexer - lexer: logos::Lexer<'input, EmbeddedIonToken>, - tracker: &'tracker mut LineOffsetTracker, -} - -impl<'input, 'tracker> EmbeddedIonLexer<'input, 'tracker> { - /// Creates a new embedded ion lexer over `input` text. - #[inline] - pub fn new(input: &'input str, tracker: &'tracker mut LineOffsetTracker) -> Self { - EmbeddedIonLexer { - lexer: EmbeddedIonToken::lexer(input), - tracker, - } - } - - /// Parses a single embedded ion value, quoted between backticks (`), and returns it - fn next_internal(&mut self) -> Option> { - let next_token = self.lexer.next(); - match next_token { - Some(EmbeddedIonToken::Embed) => { - let Span { start, .. } = self.lexer.span(); - 'ion_value: loop { - let next_tok = self.lexer.next(); - match next_tok { - Some(EmbeddedIonToken::Newline) => { - self.tracker.record(self.lexer.span().end.into()); - } - Some(EmbeddedIonToken::Embed) => { - break 'ion_value; - } - Some(EmbeddedIonToken::CommentBlock) => { - let embed = self.lexer.span(); - let remaining = &self.lexer.source()[embed.start..]; - let mut comment_tracker = LineOffsetTracker::default(); - let mut comment_lexer = - CommentLexer::new(remaining, &mut comment_tracker); - match comment_lexer.next_internal() { - Some(Ok((s, _c, e))) => { - self.tracker.append(&comment_tracker, embed.start.into()); - self.lexer.bump((e - s).to_usize() - embed.len()); - } - Some(Err((s, err, e))) => { - let offset: ByteOffset = embed.start.into(); - return Some(Err((s + offset, err, e + offset))); - } - None => unreachable!(), - } - } - Some(EmbeddedIonToken::LongString) => { - 'triple_quote: loop { - let next_tok = self.lexer.next(); - match next_tok { - Some(EmbeddedIonToken::LongString) => break 'triple_quote, - Some(_) => (), // just consume all other tokens - None => continue 'ion_value, - } - } - } - Some(_) => { - // just consume all other tokens - } - None => { - let Span { end, .. } = self.lexer.span(); - return Some(Err(( - start.into(), - LexError::UnterminatedIonLiteral, - end.into(), - ))); - } - } - } - let Span { end, .. } = self.lexer.span(); - let (str_start, str_end) = (start + 1, end - 1); - let ion_value = &self.lexer.source()[str_start..str_end]; - - Some(Ok((start.into(), ion_value, end.into()))) - } - _ => None, - } - } -} - -impl<'input, 'tracker> Iterator for EmbeddedIonLexer<'input, 'tracker> { - type Item = EmbeddedIonStringResult<'input>; - - #[inline(always)] - fn next(&mut self) -> Option { - self.next_internal() - } -} - -/// A lexer from `PartiQL` text strings to [`Token`]s -pub(crate) struct PartiqlLexer<'input, 'tracker> { - /// Wrap a logos-generated lexer - lexer: logos::Lexer<'input, Token<'input>>, - tracker: &'tracker mut LineOffsetTracker, -} - -pub(crate) type InternalLexResult<'input> = - SpannedResult, ByteOffset, LexError<'input>>; -pub(crate) type LexResult<'input> = - Result, ByteOffset>, ParseError<'input, BytePosition>>; - -impl<'input> From, ByteOffset>> for ParseError<'input, BytePosition> { - fn from(res: Spanned, ByteOffset>) -> Self { - let (start, cause, end) = res; - ParseError::LexicalError( - cause.to_located(BytePosition::from(start)..BytePosition::from(end)), - ) - } -} - -impl<'input, 'tracker> PartiqlLexer<'input, 'tracker> { - /// Creates a new `PartiQL` lexer over `input` text. - #[inline] - pub fn new(input: &'input str, tracker: &'tracker mut LineOffsetTracker) -> Self { - PartiqlLexer { - lexer: Token::lexer(input), - tracker, - } - } - - /// Creates an error token at the current lexer location - #[inline] - fn err_here( - &self, - err_ctor: fn(Cow<'input, str>) -> LexError<'input>, - ) -> InternalLexResult<'input> { - let region = self.lexer.slice(); - let Span { start, end } = self.lexer.span(); - Err((start.into(), err_ctor(region.into()), end.into())) - } - - pub fn slice(&self) -> &'input str { - self.lexer.slice() - } - - /// Wraps a [`Token`] into a [`Token`] at the current position of the lexer. - #[inline(always)] - fn wrap(&mut self, token: Token<'input>) -> InternalLexResult<'input> { - let Span { start, end } = self.lexer.span(); - Ok((start.into(), token, end.into())) - } - - /// Advances the iterator and returns the next [`Token`] or [`None`] when input is exhausted. - #[inline] - pub(crate) fn next_internal(&mut self) -> Option> { - 'next_tok: loop { - return match self.lexer.next() { - None => None, - Some(token) => match token { - Token::Error => Some(self.err_here(LexError::InvalidInput)), - - Token::Newline => { - self.tracker.record(self.lexer.span().end.into()); - // Newlines shouldn't generate an externally visible token - continue 'next_tok; - } - - Token::EmbeddedIonQuote => self.parse_embedded_ion(), - - Token::CommentBlockStart => self.parse_block_comment(), - - _ => Some(self.wrap(token)), - }, - }; - } - } - - /// Uses [`CommentLexer`] to parse a block comment - fn parse_block_comment(&mut self) -> Option> { - let embed = self.lexer.span(); - let remaining = &self.lexer.source()[embed.start..]; - let mut comment_tracker = LineOffsetTracker::default(); - let mut comment_lexer = CommentLexer::new(remaining, &mut comment_tracker).with_nesting(); - comment_lexer.next_internal().map(|res| match res { - Ok((s, comment, e)) => { - let val_len = e - s; - let val_start = embed.start.into(); // embed end is 1 past the starting '/*' - let val_end = val_start + val_len; - self.tracker.append(&comment_tracker, embed.start.into()); - self.lexer.bump(val_len.to_usize() - embed.len()); - Ok((val_start, Token::CommentBlock(comment), val_end)) - } - Err((s, err, e)) => { - let offset: ByteOffset = embed.start.into(); - Err((s + offset, err, e + offset)) - } - }) - } - - /// Uses [`EmbeddedIonLexer`] to parse an embedded ion value - fn parse_embedded_ion(&mut self) -> Option> { - let embed = self.lexer.span(); - let remaining = &self.lexer.source()[embed.start..]; - let mut ion_tracker = LineOffsetTracker::default(); - let mut ion_lexer = EmbeddedIonLexer::new(remaining, &mut ion_tracker); - ion_lexer.next_internal().map(|res| match res { - Ok((s, ion, e)) => { - let val_len = e - s; - let val_start = embed.end.into(); // embed end is 1 past the starting '`' - let val_end = val_start + val_len - 2; // sub 2 to remove surrounding '`' - self.tracker.append(&ion_tracker, embed.start.into()); - self.lexer.bump(val_len.to_usize() - embed.len()); - Ok((val_start, Token::Ion(ion), val_end)) - } - Err((s, err, e)) => { - let offset: ByteOffset = embed.start.into(); - Err((s + offset, err, e + offset)) - } - }) - } -} - -impl<'input, 'tracker> Iterator for PartiqlLexer<'input, 'tracker> { - type Item = LexResult<'input>; - - #[inline(always)] - fn next(&mut self) -> Option { - self.next_internal() - .map(|res| res.map_err(std::convert::Into::into)) - } -} - -/// Tokens that the lexer can generate. -/// -/// # Note -/// Tokens with names beginning with `__` are used internally and not meant to be used outside lexing. -#[derive(Logos, Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] -// TODO make pub(crate) ? -pub enum Token<'input> { - #[error] - // Skip whitespace - #[regex(r"[ \t\f]+", logos::skip)] - Error, - - // Skip newlines, but record their position. - // For line break recommendations, - // see https://www.unicode.org/standard/reports/tr13/tr13-5.html - #[regex(r"([\r]?[\n])|\u{0085}|\u{2028}|\u{2029}")] - Newline, - - #[regex(r"--[^\n]*", |lex| lex.slice())] - CommentLine(&'input str), - #[token("/*")] - CommentBlockStart, - CommentBlock(&'input str), - - // Brackets - #[token("[")] - OpenSquare, - #[token("]")] - CloseSquare, - #[token("{")] - OpenCurly, - #[token("}")] - CloseCurly, - #[token("(")] - OpenParen, - #[token(")")] - CloseParen, - #[token("<<")] - OpenDblAngle, - #[token(">>")] - CloseDblAngle, - - // Symbols - #[token(",")] - Comma, - #[token(";")] - Semicolon, - #[token(":")] - Colon, - #[token("==")] - EqualEqual, - #[token("!=")] - BangEqual, - #[token("<>")] - LessGreater, - #[token("<=")] - LessEqual, - #[token(">=")] - GreaterEqual, - #[token("=")] - Equal, - #[token("<")] - LessThan, - #[token(">")] - GreaterThan, - #[token("-")] - Minus, - #[token("+")] - Plus, - #[token("*")] - Star, - #[token("?")] - SqlParameter, - #[token("%")] - Percent, - #[token("/")] - Slash, - #[token("^")] - Caret, - #[token(".")] - Period, - #[token("||")] - DblPipe, - - // unquoted identifiers - #[regex("[a-zA-Z_$][a-zA-Z0-9_$]*", |lex| lex.slice())] - UnquotedIdent(&'input str), - - // quoted identifiers (quoted with double quotes) - #[regex(r#""([^"\\]|\\t|\\u|\\n|\\")*""#, - |lex| lex.slice().trim_matches('"'))] - QuotedIdent(&'input str), - - // unquoted @identifiers - #[regex("@[a-zA-Z_$][a-zA-Z0-9_$]*", |lex| &lex.slice()[1..])] - UnquotedAtIdentifier(&'input str), - - // quoted @identifiers (quoted with double quotes) - #[regex(r#"@"([^"\\]|\\t|\\u|\\n|\\")*""#, - |lex| lex.slice()[1..].trim_matches('"'))] - QuotedAtIdentifier(&'input str), - - #[regex("[0-9]+", |lex| lex.slice())] - Int(&'input str), - - #[regex("[0-9]+\\.[0-9]*([eE][-+]?[0-9]+)", |lex| lex.slice())] - #[regex("\\.[0-9]+([eE][-+]?[0-9]+)", |lex| lex.slice())] - #[regex("[0-9]+[eE][-+]?[0-9]+", |lex| lex.slice())] - ExpReal(&'input str), - - #[regex("[0-9]+\\.[0-9]*", |lex| lex.slice())] - #[regex("\\.[0-9]+", |lex| lex.slice())] - Real(&'input str), - - // strings are single-quoted in SQL/PartiQL - #[regex(r#"'([^'\\]|\\t|\\u|\\n|\\'|\\|(?:''))*'"#, - |lex| lex.slice().trim_matches('\''))] - String(&'input str), - - #[token("`")] - EmbeddedIonQuote, - Ion(&'input str), - - // Keywords - #[regex("(?i:All)")] - All, - #[regex("(?i:Asc)")] - Asc, - #[regex("(?i:And)")] - And, - #[regex("(?i:As)")] - As, - #[regex("(?i:At)")] - At, - #[regex("(?i:Between)")] - Between, - #[regex("(?i:By)")] - By, - #[regex("(?i:Case)")] - Case, - #[regex("(?i:Cross)")] - Cross, - #[regex("(?i:Cycle)")] - Cycle, - #[regex("(?i:Date)")] - Date, - #[regex("(?i:Desc)")] - Desc, - #[regex("(?i:Distinct)")] - Distinct, - #[regex("(?i:Else)")] - Else, - #[regex("(?i:End)")] - End, - #[regex("(?i:Escape)")] - Escape, - #[regex("(?i:Except)")] - Except, - #[regex("(?i:Exclude)")] - Exclude, - #[regex("(?i:False)")] - False, - #[regex("(?i:First)")] - First, - #[regex("(?i:For)")] - For, - #[regex("(?i:Full)")] - Full, - #[regex("(?i:From)")] - From, - #[regex("(?i:Group)")] - Group, - #[regex("(?i:Having)")] - Having, - #[regex("(?i:In)")] - In, - #[regex("(?i:Inner)")] - Inner, - #[regex("(?i:Is)")] - Is, - #[regex("(?i:Intersect)")] - Intersect, - #[regex("(?i:Join)")] - Join, - #[regex("(?i:Last)")] - Last, - #[regex("(?i:Lateral)")] - Lateral, - #[regex("(?i:Left)")] - Left, - #[regex("(?i:Like)")] - Like, - #[regex("(?i:Limit)")] - Limit, - #[regex("(?i:Missing)")] - Missing, - #[regex("(?i:Natural)")] - Natural, - #[regex("(?i:Not)")] - Not, - #[regex("(?i:Null)")] - Null, - #[regex("(?i:Nulls)")] - Nulls, - #[regex("(?i:Offset)")] - Offset, - #[regex("(?i:On)")] - On, - #[regex("(?i:Or)")] - Or, - #[regex("(?i:Order)")] - Order, - #[regex("(?i:Outer)")] - Outer, - #[regex("(?i:Partial)")] - Partial, - #[regex("(?i:Pivot)")] - Pivot, - #[regex("(?i:Preserve)")] - Preserve, - #[regex("(?i:Right)")] - Right, - #[regex("(?i:Recursive)")] - Recursive, - #[regex("(?i:Select)")] - Select, - #[regex("(?i:Search)")] - Search, - #[regex("(?i:Table)")] - Table, - #[regex("(?i:Time)")] - Time, - #[regex("(?i:Timestamp)")] - Timestamp, - #[regex("(?i:Then)")] - Then, - #[regex("(?i:True)")] - True, - #[regex("(?i:Union)")] - Union, - #[regex("(?i:Unpivot)")] - Unpivot, - #[regex("(?i:Using)")] - Using, - #[regex("(?i:Value)")] - Value, - #[regex("(?i:Values)")] - Values, - #[regex("(?i:When)")] - When, - #[regex("(?i:Where)")] - Where, - #[regex("(?i:With)")] - With, - #[regex("(?i:Without)")] - Without, - #[regex("(?i:Zone)")] - Zone, -} - -impl<'input> Token<'input> { - pub fn is_keyword(&self) -> bool { - matches!( - self, - Token::All - | Token::Asc - | Token::And - | Token::As - | Token::At - | Token::Between - | Token::By - | Token::Case - | Token::Cross - | Token::Cycle - | Token::Date - | Token::Desc - | Token::Distinct - | Token::Escape - | Token::Except - | Token::First - | Token::For - | Token::Full - | Token::From - | Token::Group - | Token::Having - | Token::In - | Token::Inner - | Token::Is - | Token::Intersect - | Token::Join - | Token::Last - | Token::Lateral - | Token::Left - | Token::Like - | Token::Limit - | Token::Missing - | Token::Natural - | Token::Not - | Token::Null - | Token::Nulls - | Token::Offset - | Token::On - | Token::Or - | Token::Order - | Token::Outer - | Token::Partial - | Token::Pivot - | Token::Preserve - | Token::Right - | Token::Recursive - | Token::Search - | Token::Select - | Token::Table - | Token::Time - | Token::Timestamp - | Token::Then - | Token::Union - | Token::Unpivot - | Token::Using - | Token::Value - | Token::Values - | Token::Where - | Token::With - ) - } -} - -impl<'input> fmt::Display for Token<'input> { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - Token::Error => write!(f, ""), - Token::Newline => write!(f, "\\n"), - Token::CommentLine(_) => write!(f, "--"), - Token::CommentBlockStart => write!(f, "/*"), - Token::CommentBlock(_) => write!(f, "/**/"), - Token::OpenSquare => write!(f, "["), - Token::CloseSquare => write!(f, "]"), - Token::OpenCurly => write!(f, "{{"), - Token::CloseCurly => write!(f, "}}"), - Token::OpenParen => write!(f, "("), - Token::CloseParen => write!(f, ")"), - Token::OpenDblAngle => write!(f, "<<"), - Token::CloseDblAngle => write!(f, ">>"), - Token::Comma => write!(f, ","), - Token::Semicolon => write!(f, ";"), - Token::Colon => write!(f, ":"), - Token::EqualEqual => write!(f, "=="), - Token::BangEqual => write!(f, "!="), - Token::LessGreater => write!(f, "<>"), - Token::LessEqual => write!(f, "<="), - Token::GreaterEqual => write!(f, ">="), - Token::Equal => write!(f, "="), - Token::LessThan => write!(f, "<"), - Token::GreaterThan => write!(f, ">"), - Token::Minus => write!(f, "-"), - Token::Plus => write!(f, "+"), - Token::Star => write!(f, "*"), - Token::SqlParameter => write!(f, "?"), - Token::Percent => write!(f, "%"), - Token::Slash => write!(f, "/"), - Token::Caret => write!(f, "^"), - Token::Period => write!(f, "."), - Token::DblPipe => write!(f, "||"), - Token::UnquotedIdent(id) => write!(f, "<{id}:UNQUOTED_IDENT>"), - Token::QuotedIdent(id) => write!(f, "<{id}:QUOTED_IDENT>"), - Token::UnquotedAtIdentifier(id) => write!(f, "<{id}:UNQUOTED_ATIDENT>"), - Token::QuotedAtIdentifier(id) => write!(f, "<{id}:QUOTED_ATIDENT>"), - Token::Int(txt) => write!(f, "<{txt}:INT>"), - Token::ExpReal(txt) => write!(f, "<{txt}:REAL>"), - Token::Real(txt) => write!(f, "<{txt}:REAL>"), - Token::String(txt) => write!(f, "<{txt}:STRING>"), - Token::EmbeddedIonQuote => write!(f, ""), - Token::Ion(txt) => write!(f, "<{txt}:ION>"), - - Token::All - | Token::Asc - | Token::And - | Token::As - | Token::At - | Token::Between - | Token::By - | Token::Case - | Token::Cross - | Token::Cycle - | Token::Date - | Token::Desc - | Token::Distinct - | Token::Else - | Token::End - | Token::Escape - | Token::Except - | Token::Exclude - | Token::False - | Token::First - | Token::For - | Token::Full - | Token::From - | Token::Group - | Token::Having - | Token::In - | Token::Inner - | Token::Is - | Token::Intersect - | Token::Join - | Token::Last - | Token::Lateral - | Token::Left - | Token::Like - | Token::Limit - | Token::Missing - | Token::Natural - | Token::Not - | Token::Null - | Token::Nulls - | Token::Offset - | Token::On - | Token::Or - | Token::Order - | Token::Outer - | Token::Partial - | Token::Pivot - | Token::Preserve - | Token::Right - | Token::Recursive - | Token::Search - | Token::Select - | Token::Table - | Token::Time - | Token::Timestamp - | Token::Then - | Token::True - | Token::Union - | Token::Unpivot - | Token::Using - | Token::Value - | Token::Values - | Token::When - | Token::Where - | Token::With - | Token::Without - | Token::Zone => { - write!(f, "{}", format!("{self:?}").to_uppercase()) - } - } - } -} - -/// A lexer that wraps another lexer and skips comments. -pub(crate) struct CommentSkippingLexer<'input, L> -where - L: Iterator>, -{ - lexer: L, -} - -impl<'input, L> CommentSkippingLexer<'input, L> -where - L: Iterator>, -{ - /// Creates a new `CommentSkippingLexer` wrapping `lexer` - #[inline] - pub fn new(lexer: L) -> Self { - Self { lexer } - } -} - -impl<'input, L> Iterator for CommentSkippingLexer<'input, L> -where - L: Iterator>, -{ - type Item = LexResult<'input>; - - #[inline(always)] - fn next(&mut self) -> Option { - 'next_tok: loop { - let next = self.lexer.next(); - if matches!( - next, - Some(Ok((_, Token::CommentBlock(_) | Token::CommentLine(_), _))) - ) { - continue 'next_tok; - } - return next; - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use partiql_common::syntax::line_offset_tracker::{LineOffsetError, LineOffsetTracker}; - use partiql_common::syntax::location::{ - CharOffset, LineAndCharPosition, LineAndColumn, LineOffset, Located, Location, - }; - - use itertools::Itertools; - - #[test] - fn display() -> Result<(), ParseError<'static, BytePosition>> { - let symbols = - "( [ { } ] ) << >> ; , < > <= >= != <> = == - + * ? % / ^ . || : --foo /*block*/"; - let primitives = r#"unquoted_ident "quoted_ident" @unquoted_atident @"quoted_atident""#; - let keywords = - "WiTH Where Value uSiNg Unpivot UNION True Select right Preserve pivoT Outer Order Or \ - On Offset Nulls Null Not Natural Missing Limit Like Left Lateral Last Join \ - Intersect Is Inner In Having Group From For Full First False Except Escape Desc \ - Cross Table Time Timestamp Date By Between At As And Asc All Values Case When Then Else End"; - let symbols = symbols.split(' ').chain(primitives.split(' ')); - let keywords = keywords.split(' '); - - let text = symbols.interleave(keywords).join("\n"); - let s = text.as_str(); - - let mut offset_tracker = LineOffsetTracker::default(); - let lexer = PartiqlLexer::new(s, &mut offset_tracker); - let toks: Vec<_> = lexer.collect::>().unwrap(); - - #[rustfmt::skip] - let expected = vec![ - "(", "WITH", "[", "WHERE", "{", "VALUE", "}", "USING", "]", "UNPIVOT", ")", "UNION", - "<<", "TRUE", ">>", "SELECT", ";", "RIGHT", ",", "PRESERVE", "<", "PIVOT", ">", "OUTER", - "<=", "ORDER", ">=", "OR", "!=", "ON", "<>", "OFFSET", "=", "NULLS", "==", "NULL", "-", - "NOT", "+", "NATURAL", "*", "MISSING", "?", "LIMIT", "%", "LIKE", "/", "LEFT", "^", - "LATERAL", ".", "LAST", "||", "JOIN", ":", "INTERSECT", "--", "IS", "/**/", "INNER", - "", "IN", "", "HAVING", - "", "GROUP", "", - "FROM", "FOR", "FULL", "FIRST", "FALSE", "EXCEPT", "ESCAPE", "DESC", "CROSS", "TABLE", - "TIME", "TIMESTAMP", "DATE", "BY", "BETWEEN", "AT", "AS", "AND", "ASC", "ALL", "VALUES", - "CASE", "WHEN", "THEN", "ELSE", "END" - ]; - let displayed = toks - .into_iter() - .map(|(_s, t, _e)| t.to_string()) - .collect::>(); - assert_eq!(expected, displayed); - - Ok(()) - } - - #[test] - fn ion_simple() { - let ion_value = r" `{'input':1, 'b':1}`--comment "; - - let mut offset_tracker = LineOffsetTracker::default(); - let ion_lexer = EmbeddedIonLexer::new(ion_value.trim(), &mut offset_tracker); - assert_eq!(ion_lexer.into_iter().count(), 1); - assert_eq!(offset_tracker.num_lines(), 1); - - let mut offset_tracker = LineOffsetTracker::default(); - let mut lexer = PartiqlLexer::new(ion_value, &mut offset_tracker); - - let tok = lexer.next().unwrap().unwrap(); - assert!( - matches!(tok, (ByteOffset(5), Token::Ion(ion_str), ByteOffset(24)) if ion_str == "{'input':1, 'b':1}") - ); - let tok = lexer.next().unwrap().unwrap(); - assert!( - matches!(tok, (ByteOffset(25), Token::CommentLine(cmt_str), ByteOffset(35)) if cmt_str == "--comment ") - ); - } - - #[test] - fn ion() { - let ion_value = r#" `{'input' // comment ' " - :1, /* - comment - */ - 'b':1}` "#; - - let mut offset_tracker = LineOffsetTracker::default(); - let ion_lexer = EmbeddedIonLexer::new(ion_value.trim(), &mut offset_tracker); - assert_eq!(ion_lexer.into_iter().count(), 1); - assert_eq!(offset_tracker.num_lines(), 5); - - let mut offset_tracker = LineOffsetTracker::default(); - let mut lexer = PartiqlLexer::new(ion_value, &mut offset_tracker); - - let tok = lexer.next().unwrap().unwrap(); - assert!( - matches!(tok, (ByteOffset(2), Token::Ion(ion_str), ByteOffset(158)) if ion_str == ion_value.trim().trim_matches('`')) - ); - assert_eq!(offset_tracker.num_lines(), 5); - } - - #[test] - fn nested_comments() { - let comments = r#"/* - /* / * * * / - /* ' " ''' ` - */ text - */ 1 2 3 4 5 6,7,8,9 10.112^5 - */"#; - - let mut offset_tracker = LineOffsetTracker::default(); - let nested_lex = CommentLexer::new(comments, &mut offset_tracker).with_nesting(); - assert_eq!(nested_lex.into_iter().count(), 1); - assert_eq!(offset_tracker.num_lines(), 6); - - let mut offset_tracker = LineOffsetTracker::default(); - let nonnested_lex = CommentLexer::new(comments, &mut offset_tracker); - let toks: Result, Spanned, ByteOffset>> = nonnested_lex.collect(); - assert!(toks.is_err()); - let error = toks.unwrap_err(); - assert!(matches!( - error, - (ByteOffset(142), LexError::Unknown, ByteOffset(189)) - )); - assert_eq!(error.1.to_string(), "Lexing error: unknown error"); - } - - #[test] - fn select() -> Result<(), ParseError<'static, BytePosition>> { - let query = r#"SELECT g - FROM "data" - GROUP BY a"#; - let mut offset_tracker = LineOffsetTracker::default(); - let lexer = PartiqlLexer::new(query, &mut offset_tracker); - let toks: Vec<_> = lexer.collect::>()?; - - let mut pre_offset_tracker = LineOffsetTracker::default(); - let pre_lexer = PartiqlLexer::new(query, &mut pre_offset_tracker); - let pre_toks: Vec<_> = pre_lexer.collect::>()?; - - let expected_toks = vec![ - Token::Select, - Token::UnquotedIdent("g"), - Token::From, - Token::QuotedIdent("data"), - Token::Group, - Token::By, - Token::UnquotedIdent("a"), - ]; - assert_eq!( - expected_toks, - toks.into_iter().map(|(_s, t, _e)| t).collect::>() - ); - assert_eq!( - expected_toks, - pre_toks - .into_iter() - .map(|(_s, t, _e)| t) - .collect::>() - ); - - assert_eq!(offset_tracker.num_lines(), 3); - assert_eq!( - LineAndColumn::from(offset_tracker.at(query, 0.into()).unwrap()), - LineAndColumn::new(1, 1).unwrap() - ); - assert_eq!( - LineAndColumn::from(offset_tracker.at(query, 1.into()).unwrap()), - LineAndColumn::new(1, 2).unwrap() - ); - assert_eq!( - LineAndColumn::from(offset_tracker.at(query, 9.into()).unwrap()), - LineAndColumn::new(2, 1).unwrap() - ); - assert_eq!( - LineAndColumn::from(offset_tracker.at(query, 19.into()).unwrap()), - LineAndColumn::new(2, 11).unwrap() - ); - - let offset_r_a = query.rfind('a').unwrap(); - let offset_r_n = query.rfind('\n').unwrap(); - assert_eq!( - LineAndColumn::from( - offset_tracker - .at(query, BytePosition::from(query.len() - 1)) - .unwrap() - ), - LineAndColumn::new(3, offset_r_a - offset_r_n).unwrap() - ); - - Ok(()) - } - - #[test] - fn select_unicode() -> Result<(), ParseError<'static, BytePosition>> { - let query = "\u{2028}SELECT \"🐈\"\r\nFROM \"❤\u{211D}\"\u{2029}\u{0085}GROUP BY \"🧸\""; - let mut offset_tracker = LineOffsetTracker::default(); - let lexer = PartiqlLexer::new(query, &mut offset_tracker); - let toks: Vec<_> = lexer.collect::>()?; - - assert_eq!( - vec![ - Token::Select, - Token::QuotedIdent("🐈"), - Token::From, - Token::QuotedIdent("❤ℝ"), - Token::Group, - Token::By, - Token::QuotedIdent("🧸") - ], - toks.into_iter().map(|(_s, t, _e)| t).collect::>() - ); - - assert_eq!(offset_tracker.num_lines(), 5); - assert_eq!( - LineAndColumn::from(offset_tracker.at(query, 0.into()).unwrap()), - LineAndColumn::new(1, 1).unwrap() - ); - - let offset_s = query.find('S').unwrap(); - assert_eq!( - LineAndColumn::from(offset_tracker.at(query, offset_s.into()).unwrap()), - LineAndColumn::new(2, 1).unwrap() - ); - - let offset_f = query.find('F').unwrap(); - assert_eq!( - LineAndColumn::from(offset_tracker.at(query, offset_f.into()).unwrap()), - LineAndColumn::new(3, 1).unwrap() - ); - - let offset_g = query.find('G').unwrap(); - assert_eq!( - LineAndColumn::from(offset_tracker.at(query, offset_g.into()).unwrap()), - LineAndColumn::new(5, 1).unwrap() - ); - - Ok(()) - } - - #[test] - fn offset_overflow() { - let query = "\u{2028}SELECT \"🐈\"\r\nFROM \"❤\u{211D}\"\u{2029}\u{0085}GROUP BY x"; - let mut offset_tracker = LineOffsetTracker::default(); - let lexer = PartiqlLexer::new(query, &mut offset_tracker); - lexer.count(); - - let last = offset_tracker.at(query, ByteOffset(query.len() as u32).into()); - assert!(matches!( - last, - Ok(LineAndCharPosition { - line: LineOffset(4), - char: CharOffset(10) - }) - )); - - let overflow = offset_tracker.at(query, ByteOffset(1 + query.len() as u32).into()); - assert!(matches!(overflow, Err(LineOffsetError::EndOfInput))); - } - - #[test] - fn offset_into_codepoint() { - let query = "\u{2028}SELECT \"🐈\"\r\nFROM \"❤\u{211D}\"\u{2029}\u{0085}GROUP BY \"🧸\""; - let mut offset_tracker = LineOffsetTracker::default(); - let lexer = PartiqlLexer::new(query, &mut offset_tracker); - lexer.count(); - - assert_eq!( - offset_tracker.at(query, ByteOffset(1).into()), - Err(LineOffsetError::InsideUnicodeCodepoint) - ); - } - - #[test] - fn select_comment_line() -> Result<(), ParseError<'static, BytePosition>> { - let query = "SELECT --comment\n@g from @\"foo\""; - let mut offset_tracker = LineOffsetTracker::default(); - let lexer = PartiqlLexer::new(query, &mut offset_tracker); - let toks: Vec<_> = lexer.collect::>()?; - - assert_eq!( - vec![ - Token::Select, - Token::CommentLine("--comment"), - Token::UnquotedAtIdentifier("g"), - Token::From, - Token::QuotedAtIdentifier("foo"), - ], - toks.into_iter().map(|(_s, t, _e)| t).collect::>() - ); - assert_eq!(offset_tracker.num_lines(), 2); - Ok(()) - } - - #[test] - fn select_comment_block() -> Result<(), ParseError<'static, BytePosition>> { - let query = "SELECT /*comment*/ g"; - let mut offset_tracker = LineOffsetTracker::default(); - let lexer = PartiqlLexer::new(query, &mut offset_tracker); - let toks: Vec<_> = lexer.collect::>()?; - - assert_eq!( - vec![ - Token::Select, - Token::CommentBlock("/*comment*/"), - Token::UnquotedIdent("g"), - ], - toks.into_iter().map(|(_s, t, _e)| t).collect::>() - ); - assert_eq!(offset_tracker.num_lines(), 1); - Ok(()) - } - - /// In the future, the following identifiers may be converted into reserved keywords. In that case, - /// the following test will need to be modified. - #[test] - fn select_non_reserved_keywords() -> Result<(), ParseError<'static, BytePosition>> { - let query = - "SELECT acyclic, BoTh, DOMAIN, SiMpLe, Trail, leading, TRailing, USER\nfrom @\"foo\""; - let mut offset_tracker = LineOffsetTracker::default(); - let lexer = PartiqlLexer::new(query, &mut offset_tracker); - let toks: Vec<_> = lexer.collect::>()?; - - assert_eq!( - vec![ - Token::Select, - Token::UnquotedIdent("acyclic"), - Token::Comma, - Token::UnquotedIdent("BoTh"), - Token::Comma, - Token::UnquotedIdent("DOMAIN"), - Token::Comma, - Token::UnquotedIdent("SiMpLe"), - Token::Comma, - Token::UnquotedIdent("Trail"), - Token::Comma, - Token::UnquotedIdent("leading"), - Token::Comma, - Token::UnquotedIdent("TRailing"), - Token::Comma, - Token::UnquotedIdent("USER"), - Token::From, - Token::QuotedAtIdentifier("foo"), - ], - toks.into_iter().map(|(_s, t, _e)| t).collect::>() - ); - assert_eq!(offset_tracker.num_lines(), 2); - Ok(()) - } - - #[test] - fn err_invalid_input() { - let query = "SELECT # FROM data GROUP BY a"; - let mut offset_tracker = LineOffsetTracker::default(); - let toks: Result, _> = PartiqlLexer::new(query, &mut offset_tracker).collect(); - assert!(toks.is_err()); - let error = toks.unwrap_err(); - assert_eq!( - error.to_string(), - r"Lexing error: invalid input `#` at `(b7..b8)`" - ); - assert!(matches!(error, - ParseError::LexicalError(Located { - inner: LexError::InvalidInput(s), - location: Location{start: BytePosition(ByteOffset(7)), end: BytePosition(ByteOffset(8))} - }) if s == "#")); - assert_eq!(offset_tracker.num_lines(), 1); - assert_eq!( - LineAndColumn::from(offset_tracker.at(query, 7.into()).unwrap()), - LineAndColumn::new(1, 8).unwrap() - ); - } - - #[test] - fn err_unterminated_ion() { - let query = r#" ` "fooo` "#; - let mut offset_tracker = LineOffsetTracker::default(); - let toks: Result, _> = PartiqlLexer::new(query, &mut offset_tracker).collect(); - assert!(toks.is_err()); - let error = toks.unwrap_err(); - - assert!(matches!( - error, - ParseError::LexicalError(Located { - inner: LexError::UnterminatedIonLiteral, - location: Location { - start: BytePosition(ByteOffset(1)), - end: BytePosition(ByteOffset(10)) - } - }) - )); - assert_eq!( - error.to_string(), - "Lexing error: unterminated ion literal at `(b1..b10)`" - ); - assert_eq!( - LineAndColumn::from(offset_tracker.at(query, BytePosition::from(1)).unwrap()), - LineAndColumn::new(1, 2).unwrap() - ); - } - - #[test] - fn err_unterminated_comment() { - let query = r" /*12345678"; - let mut offset_tracker = LineOffsetTracker::default(); - let toks: Result, _> = PartiqlLexer::new(query, &mut offset_tracker).collect(); - assert!(toks.is_err()); - let error = toks.unwrap_err(); - assert!(matches!( - error, - ParseError::LexicalError(Located { - inner: LexError::UnterminatedComment, - location: Location { - start: BytePosition(ByteOffset(1)), - end: BytePosition(ByteOffset(11)) - } - }) - )); - assert_eq!( - error.to_string(), - "Lexing error: unterminated comment at `(b1..b11)`" - ); - assert_eq!( - LineAndColumn::from(offset_tracker.at(query, BytePosition::from(1)).unwrap()), - LineAndColumn::new(1, 2).unwrap() - ); - } - - #[test] - fn err_unterminated_ion_comment() { - let query = r" `/*12345678`"; - let mut offset_tracker = LineOffsetTracker::default(); - let ion_lexer = EmbeddedIonLexer::new(query, &mut offset_tracker); - let toks: Result, Spanned, ByteOffset>> = ion_lexer.collect(); - assert!(toks.is_err()); - let error = toks.unwrap_err(); - assert!(matches!( - error, - (ByteOffset(2), LexError::UnterminatedComment, ByteOffset(13)) - )); - assert_eq!(error.1.to_string(), "Lexing error: unterminated comment"); - assert_eq!( - LineAndColumn::from(offset_tracker.at(query, BytePosition::from(2)).unwrap()), - LineAndColumn::new(1, 3).unwrap() - ); - } -} diff --git a/partiql-parser/src/lexer/comment.rs b/partiql-parser/src/lexer/comment.rs new file mode 100644 index 00000000..782847e4 --- /dev/null +++ b/partiql-parser/src/lexer/comment.rs @@ -0,0 +1,132 @@ +use std::borrow::Cow; +use std::cmp::max; + +use crate::error::LexError; +use crate::lexer::SpannedResult; +use logos::{Logos, Span}; +use partiql_common::syntax::line_offset_tracker::LineOffsetTracker; +use partiql_common::syntax::location::ByteOffset; + +/// A block comment string (e.g. `"/* comment here */"`) with [`ByteOffset`] span relative to lexed source. +/// +/// Note: +/// - The returned string includes the comment start (`/*`) and end (`*/`) tokens. +/// - The returned `ByteOffset` span includes the comment start (`/*`) and end (`*/`) tokens. +type CommentStringResult<'input> = SpannedResult<&'input str, ByteOffset, LexError<'input>>; + +/// Tokens used to parse block comment +#[derive(Logos, Debug, Clone, PartialEq, Eq)] +#[logos(skip r"[^/*\r\n\u0085\u2028\u2029]+")] +enum CommentToken { + // Skip stuff that won't interfere with comment detection + #[regex(r"[/*]", logos::skip)] + Any, + // Skip newlines, but record their position. + // For line break recommendations, + // see https://www.unicode.org/standard/reports/tr13/tr13-5.html + #[regex(r"(([\r])?[\n])|\u0085|\u2028|\u2029")] + Newline, + #[token("*/")] + End, + #[token("/*")] + Start, +} + +/// A lexer for block comments (enclosed between '/*' & '*/') that returns the parsed [`CommentString`] +pub struct CommentLexer<'input, 'tracker> { + /// Wrap a logos-generated lexer + lexer: logos::Lexer<'input, CommentToken>, + comment_nesting: bool, + tracker: &'tracker mut LineOffsetTracker, +} + +impl<'input, 'tracker> CommentLexer<'input, 'tracker> { + /// Creates a new block comment lexer over `input` text. + /// Nested comment parsing is *off* by default; see [`with_nesting`] to enable nesting. + #[inline] + pub fn new(input: &'input str, tracker: &'tracker mut LineOffsetTracker) -> Self { + CommentLexer { + lexer: CommentToken::lexer(input), + comment_nesting: false, + tracker, + } + } + + /// Toggles *on* the parsing of nested comments + #[inline] + pub fn with_nesting(mut self) -> Self { + self.comment_nesting = true; + self + } + + /// Creates an error token at the current lexer location + #[inline] + fn err_here( + &self, + err_ctor: fn(Cow<'input, str>) -> LexError<'input>, + ) -> CommentStringResult<'input> { + let Span { start, .. } = self.lexer.span(); + self.err_ends_here(start, err_ctor) + } + + /// Creates an error token ending at the current lexer location + #[inline] + fn err_ends_here( + &self, + start: usize, + err_ctor: fn(Cow<'input, str>) -> LexError<'input>, + ) -> CommentStringResult<'input> { + let region = self.lexer.slice(); + let Span { end, .. } = self.lexer.span(); + Err((start.into(), err_ctor(region.into()), end.into())) + } + + /// Parses a single (possibly nested) block comment and returns it + fn next_internal(&mut self) -> Option> { + let Span { start, .. } = self.lexer.span(); + let mut nesting = 0; + let nesting_inc = i32::from(self.comment_nesting); + 'comment: loop { + match self.lexer.next() { + Some(Ok(CommentToken::Any)) => continue, + Some(Ok(CommentToken::Newline)) => { + self.tracker.record(self.lexer.span().end.into()); + } + Some(Ok(CommentToken::Start)) => nesting = max(1, nesting + nesting_inc), + Some(Ok(CommentToken::End)) => { + if nesting == 0 { + // saw a `*/` while not in a comment + return Some(self.err_here(|_| LexError::UnterminatedComment)); + } + nesting -= 1; + if nesting == 0 { + break 'comment; + } + } + Some(Err(_)) => return Some(self.err_here(LexError::InvalidInput)), + None => { + let result = if nesting != 0 { + // ran out of input while inside a comment + Some(self.err_ends_here(start, |_| LexError::UnterminatedComment)) + } else { + None + }; + return result; + } + } + } + let Span { end, .. } = self.lexer.span(); + let comment = &self.lexer.source()[start..end]; + + Some(Ok((start.into(), comment, end.into()))) + } +} + +impl<'input, 'tracker> Iterator for CommentLexer<'input, 'tracker> { + type Item = CommentStringResult<'input>; + + #[inline(always)] + fn next(&mut self) -> Option { + self.next_internal() + } +} diff --git a/partiql-parser/src/lexer/embedded_ion.rs b/partiql-parser/src/lexer/embedded_ion.rs new file mode 100644 index 00000000..66370052 --- /dev/null +++ b/partiql-parser/src/lexer/embedded_ion.rs @@ -0,0 +1,135 @@ +use crate::error::LexError; +use crate::lexer::{CommentLexer, SpannedResult}; +use logos::{Logos, Span}; +use partiql_common::syntax::line_offset_tracker::LineOffsetTracker; +use partiql_common::syntax::location::ByteOffset; + +/// An embedded Ion string (e.g. `[{a: 1}, {b: 2}]`) with [`ByteOffset`] span +/// relative to lexed source. +/// +/// Note: +/// - The lexer parses the embedded ion value enclosed in backticks. +/// - The returned string *does not* include the backticks +/// - The returned `ByteOffset` span *does* include the backticks +type EmbeddedIonStringResult<'input> = SpannedResult<&'input str, ByteOffset, LexError<'input>>; + +/// Tokens used to parse Ion literals embedded in backticks (\`) +#[derive(Logos, Debug, Clone, PartialEq)] +#[logos(skip r#"[^/*'"`\r\n\u0085\u2028\u2029]+"#)] +enum EmbeddedIonToken { + // Skip newlines, but record their position. + // For line break recommendations, + // see https://www.unicode.org/standard/reports/tr13/tr13-5.html + #[regex(r"(([\r])?[\n])|\u0085|\u2028|\u2029")] + Newline, + + #[token("`")] + Embed, + + #[regex(r"//[^\n]*")] + CommentLine, + #[token("/*")] + CommentBlock, + + #[regex(r#""([^"\\]|\\t|\\u|\\")*""#)] + String, + #[regex(r#"'([^'\\]|\\t|\\u|\\')*'"#)] + Symbol, + #[token("'''")] + LongString, +} + +/// A Lexer for Ion literals embedded in backticks (\`) that returns the parsed [`EmbeddedIonString`] +/// +/// Parses just enough Ion to make sure not to include a backtick that is inside a string or comment. +pub struct EmbeddedIonLexer<'input, 'tracker> { + /// Wrap a logos-generated lexer + lexer: logos::Lexer<'input, EmbeddedIonToken>, + tracker: &'tracker mut LineOffsetTracker, +} + +impl<'input, 'tracker> EmbeddedIonLexer<'input, 'tracker> { + /// Creates a new embedded ion lexer over `input` text. + #[inline] + pub fn new(input: &'input str, tracker: &'tracker mut LineOffsetTracker) -> Self { + EmbeddedIonLexer { + lexer: EmbeddedIonToken::lexer(input), + tracker, + } + } + + /// Parses a single embedded ion value, quoted between backticks (`), and returns it + fn next_internal(&mut self) -> Option> { + let next_token = self.lexer.next(); + match next_token { + Some(Ok(EmbeddedIonToken::Embed)) => { + let Span { start, .. } = self.lexer.span(); + 'ion_value: loop { + let next_tok = self.lexer.next(); + match next_tok { + Some(Ok(EmbeddedIonToken::Newline)) => { + self.tracker.record(self.lexer.span().end.into()); + } + Some(Ok(EmbeddedIonToken::Embed)) => { + break 'ion_value; + } + Some(Ok(EmbeddedIonToken::CommentBlock)) => { + let embed = self.lexer.span(); + let remaining = &self.lexer.source()[embed.start..]; + let mut comment_tracker = LineOffsetTracker::default(); + let mut comment_lexer = + CommentLexer::new(remaining, &mut comment_tracker); + match comment_lexer.next() { + Some(Ok((s, _c, e))) => { + self.tracker.append(&comment_tracker, embed.start.into()); + self.lexer.bump((e - s).to_usize() - embed.len()); + } + Some(Err((s, err, e))) => { + let offset: ByteOffset = embed.start.into(); + return Some(Err((s + offset, err, e + offset))); + } + None => unreachable!(), + } + } + Some(Ok(EmbeddedIonToken::LongString)) => { + 'triple_quote: loop { + let next_tok = self.lexer.next(); + match next_tok { + Some(Ok(EmbeddedIonToken::LongString)) => break 'triple_quote, + Some(_) => (), // just consume all other tokens + None => continue 'ion_value, + } + } + } + Some(_) => { + // just consume all other tokens + } + None => { + let Span { end, .. } = self.lexer.span(); + return Some(Err(( + start.into(), + LexError::UnterminatedIonLiteral, + end.into(), + ))); + } + } + } + let Span { end, .. } = self.lexer.span(); + let (str_start, str_end) = (start + 1, end - 1); + let ion_value = &self.lexer.source()[str_start..str_end]; + + Some(Ok((start.into(), ion_value, end.into()))) + } + _ => None, + } + } +} + +impl<'input, 'tracker> Iterator for EmbeddedIonLexer<'input, 'tracker> { + type Item = EmbeddedIonStringResult<'input>; + + #[inline(always)] + fn next(&mut self) -> Option { + self.next_internal() + } +} diff --git a/partiql-parser/src/lexer/mod.rs b/partiql-parser/src/lexer/mod.rs new file mode 100644 index 00000000..7a81fefb --- /dev/null +++ b/partiql-parser/src/lexer/mod.rs @@ -0,0 +1,521 @@ +use partiql_common::syntax::location::{ByteOffset, BytePosition, ToLocated}; + +use crate::error::{LexError, ParseError}; + +mod comment; +mod embedded_ion; +mod partiql; + +pub use comment::*; +pub use embedded_ion::*; +pub use partiql::*; + +/// A 3-tuple of (start, `Tok`, end) denoting a token and it start and end offsets. +pub type Spanned = (Loc, Tok, Loc); +/// A [`Result`] of a [`Spanned`] token. +pub(crate) type SpannedResult = Result, Spanned>; + +pub(crate) type InternalLexResult<'input> = + SpannedResult, ByteOffset, LexError<'input>>; +pub(crate) type LexResult<'input> = + Result, ByteOffset>, ParseError<'input, BytePosition>>; + +impl<'input> From, ByteOffset>> for ParseError<'input, BytePosition> { + fn from(res: Spanned, ByteOffset>) -> Self { + let (start, cause, end) = res; + ParseError::LexicalError( + cause.to_located(BytePosition::from(start)..BytePosition::from(end)), + ) + } +} + +/// A lexer that wraps another lexer and skips comments. +pub(crate) struct CommentSkippingLexer<'input, L> +where + L: Iterator>, +{ + lexer: L, +} + +impl<'input, L> CommentSkippingLexer<'input, L> +where + L: Iterator>, +{ + /// Creates a new `CommentSkippingLexer` wrapping `lexer` + #[inline] + pub fn new(lexer: L) -> Self { + Self { lexer } + } +} + +impl<'input, L> Iterator for CommentSkippingLexer<'input, L> +where + L: Iterator>, +{ + type Item = LexResult<'input>; + + #[inline(always)] + fn next(&mut self) -> Option { + 'next_tok: loop { + let next = self.lexer.next(); + if matches!( + next, + Some(Ok((_, Token::CommentBlock(_) | Token::CommentLine(_), _))) + ) { + continue 'next_tok; + } + return next; + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use partiql_common::syntax::line_offset_tracker::{LineOffsetError, LineOffsetTracker}; + use partiql_common::syntax::location::{ + CharOffset, LineAndCharPosition, LineAndColumn, LineOffset, Located, Location, + }; + + use itertools::Itertools; + + #[test] + fn display() -> Result<(), ParseError<'static, BytePosition>> { + let symbols = + "( [ { } ] ) << >> ; , < > <= >= != <> = == - + * ? % / ^ . || : --foo /*block*/"; + let primitives = r#"unquoted_ident "quoted_ident" @unquoted_atident @"quoted_atident""#; + let keywords = + "WiTH Where Value uSiNg Unpivot UNION True Select right Preserve pivoT Outer Order Or \ + On Offset Nulls Null Not Natural Missing Limit Like Left Lateral Last Join \ + Intersect Is Inner In Having Group From For Full First False Except Escape Desc \ + Cross Table Time Timestamp Date By Between At As And Asc All Values Case When Then Else End"; + let symbols = symbols.split(' ').chain(primitives.split(' ')); + let keywords = keywords.split(' '); + + let text = symbols.interleave(keywords).join("\n"); + let s = text.as_str(); + + let mut offset_tracker = LineOffsetTracker::default(); + let lexer = PartiqlLexer::new(s, &mut offset_tracker); + let toks: Vec<_> = lexer.collect::>().unwrap(); + + #[rustfmt::skip] + let expected = vec![ + "(", "WITH", "[", "WHERE", "{", "VALUE", "}", "USING", "]", "UNPIVOT", ")", "UNION", + "<<", "TRUE", ">>", "SELECT", ";", "RIGHT", ",", "PRESERVE", "<", "PIVOT", ">", "OUTER", + "<=", "ORDER", ">=", "OR", "!=", "ON", "<>", "OFFSET", "=", "NULLS", "==", "NULL", "-", + "NOT", "+", "NATURAL", "*", "MISSING", "?", "LIMIT", "%", "LIKE", "/", "LEFT", "^", + "LATERAL", ".", "LAST", "||", "JOIN", ":", "INTERSECT", "--", "IS", "/**/", "INNER", + "", "IN", "", "HAVING", + "", "GROUP", "", + "FROM", "FOR", "FULL", "FIRST", "FALSE", "EXCEPT", "ESCAPE", "DESC", "CROSS", "TABLE", + "TIME", "TIMESTAMP", "DATE", "BY", "BETWEEN", "AT", "AS", "AND", "ASC", "ALL", "VALUES", + "CASE", "WHEN", "THEN", "ELSE", "END" + ]; + let displayed = toks + .into_iter() + .map(|(_s, t, _e)| t.to_string()) + .collect::>(); + assert_eq!(expected, displayed); + + Ok(()) + } + + #[test] + fn ion_simple() { + let ion_value = r" `{'input':1, 'b':1}`--comment "; + + let mut offset_tracker = LineOffsetTracker::default(); + let ion_lexer = EmbeddedIonLexer::new(ion_value.trim(), &mut offset_tracker); + assert_eq!(ion_lexer.into_iter().count(), 1); + assert_eq!(offset_tracker.num_lines(), 1); + + let mut offset_tracker = LineOffsetTracker::default(); + let mut lexer = PartiqlLexer::new(ion_value, &mut offset_tracker); + + let tok = lexer.next().unwrap().unwrap(); + assert!( + matches!(tok, (ByteOffset(5), Token::Ion(ion_str), ByteOffset(24)) if ion_str == "{'input':1, 'b':1}") + ); + let tok = lexer.next().unwrap().unwrap(); + assert!( + matches!(tok, (ByteOffset(25), Token::CommentLine(cmt_str), ByteOffset(35)) if cmt_str == "--comment ") + ); + } + + #[test] + fn ion() { + let ion_value = r#" `{'input' // comment ' " + :1, /* + comment + */ + 'b':1}` "#; + + let mut offset_tracker = LineOffsetTracker::default(); + let ion_lexer = EmbeddedIonLexer::new(ion_value.trim(), &mut offset_tracker); + assert_eq!(ion_lexer.into_iter().count(), 1); + assert_eq!(offset_tracker.num_lines(), 5); + + let mut offset_tracker = LineOffsetTracker::default(); + let mut lexer = PartiqlLexer::new(ion_value, &mut offset_tracker); + + let tok = lexer.next().unwrap().unwrap(); + assert!( + matches!(tok, (ByteOffset(2), Token::Ion(ion_str), ByteOffset(158)) if ion_str == ion_value.trim().trim_matches('`')) + ); + assert_eq!(offset_tracker.num_lines(), 5); + } + + #[test] + fn nested_comments() { + let comments = r#"/* + /* / * * * / + /* ' " ''' ` + */ text + */ 1 2 3 4 5 6,7,8,9 10.112^5 + */"#; + + // track nested comments + let mut offset_tracker = LineOffsetTracker::default(); + let nested_lex = CommentLexer::new(comments, &mut offset_tracker).with_nesting(); + let count = nested_lex.into_iter().count(); + assert_eq!(count, 1); + assert_eq!(offset_tracker.num_lines(), 6); + + // don't track nested comments + let mut offset_tracker = LineOffsetTracker::default(); + let nonnested_lex = CommentLexer::new(comments, &mut offset_tracker); + let toks: Result, Spanned, ByteOffset>> = nonnested_lex.collect(); + assert!(toks.is_err()); + let error = toks.unwrap_err(); + assert!(matches!( + error, + ( + ByteOffset(187), + LexError::UnterminatedComment, + ByteOffset(189) + ) + )); + assert_eq!(error.1.to_string(), "Lexing error: unterminated comment"); + } + + #[test] + fn select() -> Result<(), ParseError<'static, BytePosition>> { + let query = r#"SELECT g + FROM "data" + GROUP BY a"#; + let mut offset_tracker = LineOffsetTracker::default(); + let lexer = PartiqlLexer::new(query, &mut offset_tracker); + let toks: Vec<_> = lexer.collect::>()?; + + let mut pre_offset_tracker = LineOffsetTracker::default(); + let pre_lexer = PartiqlLexer::new(query, &mut pre_offset_tracker); + let pre_toks: Vec<_> = pre_lexer.collect::>()?; + + let expected_toks = vec![ + Token::Select, + Token::UnquotedIdent("g"), + Token::From, + Token::QuotedIdent("data"), + Token::Group, + Token::By, + Token::UnquotedIdent("a"), + ]; + assert_eq!( + expected_toks, + toks.into_iter().map(|(_s, t, _e)| t).collect::>() + ); + assert_eq!( + expected_toks, + pre_toks + .into_iter() + .map(|(_s, t, _e)| t) + .collect::>() + ); + + assert_eq!(offset_tracker.num_lines(), 3); + assert_eq!( + LineAndColumn::from(offset_tracker.at(query, 0.into()).unwrap()), + LineAndColumn::new(1, 1).unwrap() + ); + assert_eq!( + LineAndColumn::from(offset_tracker.at(query, 1.into()).unwrap()), + LineAndColumn::new(1, 2).unwrap() + ); + assert_eq!( + LineAndColumn::from(offset_tracker.at(query, 9.into()).unwrap()), + LineAndColumn::new(2, 1).unwrap() + ); + assert_eq!( + LineAndColumn::from(offset_tracker.at(query, 19.into()).unwrap()), + LineAndColumn::new(2, 11).unwrap() + ); + + let offset_r_a = query.rfind('a').unwrap(); + let offset_r_n = query.rfind('\n').unwrap(); + assert_eq!( + LineAndColumn::from( + offset_tracker + .at(query, BytePosition::from(query.len() - 1)) + .unwrap() + ), + LineAndColumn::new(3, offset_r_a - offset_r_n).unwrap() + ); + + Ok(()) + } + + #[test] + fn select_unicode() -> Result<(), ParseError<'static, BytePosition>> { + let query = "\u{2028}SELECT \"🐈\"\r\nFROM \"❤\u{211D}\"\u{2029}\u{0085}GROUP BY \"🧸\""; + let mut offset_tracker = LineOffsetTracker::default(); + let lexer = PartiqlLexer::new(query, &mut offset_tracker); + let toks: Vec<_> = lexer.collect::>()?; + + assert_eq!( + vec![ + Token::Select, + Token::QuotedIdent("🐈"), + Token::From, + Token::QuotedIdent("❤ℝ"), + Token::Group, + Token::By, + Token::QuotedIdent("🧸") + ], + toks.into_iter().map(|(_s, t, _e)| t).collect::>() + ); + + assert_eq!(offset_tracker.num_lines(), 5); + assert_eq!( + LineAndColumn::from(offset_tracker.at(query, 0.into()).unwrap()), + LineAndColumn::new(1, 1).unwrap() + ); + + let offset_s = query.find('S').unwrap(); + assert_eq!( + LineAndColumn::from(offset_tracker.at(query, offset_s.into()).unwrap()), + LineAndColumn::new(2, 1).unwrap() + ); + + let offset_f = query.find('F').unwrap(); + assert_eq!( + LineAndColumn::from(offset_tracker.at(query, offset_f.into()).unwrap()), + LineAndColumn::new(3, 1).unwrap() + ); + + let offset_g = query.find('G').unwrap(); + assert_eq!( + LineAndColumn::from(offset_tracker.at(query, offset_g.into()).unwrap()), + LineAndColumn::new(5, 1).unwrap() + ); + + Ok(()) + } + + #[test] + fn offset_overflow() { + let query = "\u{2028}SELECT \"🐈\"\r\nFROM \"❤\u{211D}\"\u{2029}\u{0085}GROUP BY x"; + let mut offset_tracker = LineOffsetTracker::default(); + let lexer = PartiqlLexer::new(query, &mut offset_tracker); + lexer.count(); + + let last = offset_tracker.at(query, ByteOffset(query.len() as u32).into()); + assert!(matches!( + last, + Ok(LineAndCharPosition { + line: LineOffset(4), + char: CharOffset(10) + }) + )); + + let overflow = offset_tracker.at(query, ByteOffset(1 + query.len() as u32).into()); + assert!(matches!(overflow, Err(LineOffsetError::EndOfInput))); + } + + #[test] + fn offset_into_codepoint() { + let query = "\u{2028}SELECT \"🐈\"\r\nFROM \"❤\u{211D}\"\u{2029}\u{0085}GROUP BY \"🧸\""; + let mut offset_tracker = LineOffsetTracker::default(); + let lexer = PartiqlLexer::new(query, &mut offset_tracker); + lexer.count(); + + assert_eq!( + offset_tracker.at(query, ByteOffset(1).into()), + Err(LineOffsetError::InsideUnicodeCodepoint) + ); + } + + #[test] + fn select_comment_line() -> Result<(), ParseError<'static, BytePosition>> { + let query = "SELECT --comment\n@g from @\"foo\""; + let mut offset_tracker = LineOffsetTracker::default(); + let lexer = PartiqlLexer::new(query, &mut offset_tracker); + let toks: Vec<_> = lexer.collect::>()?; + + assert_eq!( + vec![ + Token::Select, + Token::CommentLine("--comment"), + Token::UnquotedAtIdentifier("g"), + Token::From, + Token::QuotedAtIdentifier("foo"), + ], + toks.into_iter().map(|(_s, t, _e)| t).collect::>() + ); + assert_eq!(offset_tracker.num_lines(), 2); + Ok(()) + } + + #[test] + fn select_comment_block() -> Result<(), ParseError<'static, BytePosition>> { + let query = "SELECT /*comment*/ g"; + let mut offset_tracker = LineOffsetTracker::default(); + let lexer = PartiqlLexer::new(query, &mut offset_tracker); + let toks: Vec<_> = lexer.collect::>()?; + + assert_eq!( + vec![ + Token::Select, + Token::CommentBlock("/*comment*/"), + Token::UnquotedIdent("g"), + ], + toks.into_iter().map(|(_s, t, _e)| t).collect::>() + ); + assert_eq!(offset_tracker.num_lines(), 1); + Ok(()) + } + + /// In the future, the following identifiers may be converted into reserved keywords. In that case, + /// the following test will need to be modified. + #[test] + fn select_non_reserved_keywords() -> Result<(), ParseError<'static, BytePosition>> { + let query = + "SELECT acyclic, BoTh, DOMAIN, SiMpLe, Trail, leading, TRailing, USER\nfrom @\"foo\""; + let mut offset_tracker = LineOffsetTracker::default(); + let lexer = PartiqlLexer::new(query, &mut offset_tracker); + let toks: Vec<_> = lexer.collect::>()?; + + assert_eq!( + vec![ + Token::Select, + Token::UnquotedIdent("acyclic"), + Token::Comma, + Token::UnquotedIdent("BoTh"), + Token::Comma, + Token::UnquotedIdent("DOMAIN"), + Token::Comma, + Token::UnquotedIdent("SiMpLe"), + Token::Comma, + Token::UnquotedIdent("Trail"), + Token::Comma, + Token::UnquotedIdent("leading"), + Token::Comma, + Token::UnquotedIdent("TRailing"), + Token::Comma, + Token::UnquotedIdent("USER"), + Token::From, + Token::QuotedAtIdentifier("foo"), + ], + toks.into_iter().map(|(_s, t, _e)| t).collect::>() + ); + assert_eq!(offset_tracker.num_lines(), 2); + Ok(()) + } + + #[test] + fn err_invalid_input() { + let query = "SELECT # FROM data GROUP BY a"; + let mut offset_tracker = LineOffsetTracker::default(); + let toks: Result, _> = PartiqlLexer::new(query, &mut offset_tracker).collect(); + assert!(toks.is_err()); + let error = toks.unwrap_err(); + assert_eq!( + error.to_string(), + r"Lexing error: invalid input `#` at `(b7..b8)`" + ); + assert!(matches!(error, + ParseError::LexicalError(Located { + inner: LexError::InvalidInput(s), + location: Location{start: BytePosition(ByteOffset(7)), end: BytePosition(ByteOffset(8))} + }) if s == "#")); + assert_eq!(offset_tracker.num_lines(), 1); + assert_eq!( + LineAndColumn::from(offset_tracker.at(query, 7.into()).unwrap()), + LineAndColumn::new(1, 8).unwrap() + ); + } + + #[test] + fn err_unterminated_ion() { + let query = r#" ` "fooo` "#; + let mut offset_tracker = LineOffsetTracker::default(); + let toks: Result, _> = PartiqlLexer::new(query, &mut offset_tracker).collect(); + assert!(toks.is_err()); + let error = toks.unwrap_err(); + + assert!(matches!( + error, + ParseError::LexicalError(Located { + inner: LexError::UnterminatedIonLiteral, + location: Location { + start: BytePosition(ByteOffset(1)), + end: BytePosition(ByteOffset(10)) + } + }) + )); + assert_eq!( + error.to_string(), + "Lexing error: unterminated ion literal at `(b1..b10)`" + ); + assert_eq!( + LineAndColumn::from(offset_tracker.at(query, BytePosition::from(1)).unwrap()), + LineAndColumn::new(1, 2).unwrap() + ); + } + + #[test] + fn err_unterminated_comment() { + let query = r" /*12345678"; + let mut offset_tracker = LineOffsetTracker::default(); + let toks: Result, _> = PartiqlLexer::new(query, &mut offset_tracker).collect(); + assert!(toks.is_err()); + let error = toks.unwrap_err(); + assert!(matches!( + error, + ParseError::LexicalError(Located { + inner: LexError::UnterminatedComment, + location: Location { + start: BytePosition(ByteOffset(1)), + end: BytePosition(ByteOffset(11)) + } + }) + )); + assert_eq!( + error.to_string(), + "Lexing error: unterminated comment at `(b1..b11)`" + ); + assert_eq!( + LineAndColumn::from(offset_tracker.at(query, BytePosition::from(1)).unwrap()), + LineAndColumn::new(1, 2).unwrap() + ); + } + + #[test] + fn err_unterminated_ion_comment() { + let query = r" `/*12345678`"; + let mut offset_tracker = LineOffsetTracker::default(); + let ion_lexer = EmbeddedIonLexer::new(query, &mut offset_tracker); + let toks: Result, Spanned, ByteOffset>> = ion_lexer.collect(); + assert!(toks.is_err()); + let error = toks.unwrap_err(); + assert!(matches!( + error, + (ByteOffset(2), LexError::UnterminatedComment, ByteOffset(13)) + )); + assert_eq!(error.1.to_string(), "Lexing error: unterminated comment"); + assert_eq!( + LineAndColumn::from(offset_tracker.at(query, BytePosition::from(2)).unwrap()), + LineAndColumn::new(1, 3).unwrap() + ); + } +} diff --git a/partiql-parser/src/lexer/partiql.rs b/partiql-parser/src/lexer/partiql.rs new file mode 100644 index 00000000..da153e4a --- /dev/null +++ b/partiql-parser/src/lexer/partiql.rs @@ -0,0 +1,569 @@ +use crate::error::LexError; +use crate::lexer::{CommentLexer, EmbeddedIonLexer, InternalLexResult, LexResult}; +use logos::{Logos, Span}; +use partiql_common::syntax::line_offset_tracker::LineOffsetTracker; +use partiql_common::syntax::location::ByteOffset; +use std::borrow::Cow; +use std::fmt; +use std::fmt::Formatter; + +/// A lexer from `PartiQL` text strings to [`Token`]s +pub(crate) struct PartiqlLexer<'input, 'tracker> { + /// Wrap a logos-generated lexer + lexer: logos::Lexer<'input, Token<'input>>, + tracker: &'tracker mut LineOffsetTracker, +} + +impl<'input, 'tracker> PartiqlLexer<'input, 'tracker> { + /// Creates a new `PartiQL` lexer over `input` text. + #[inline] + pub fn new(input: &'input str, tracker: &'tracker mut LineOffsetTracker) -> Self { + PartiqlLexer { + lexer: Token::lexer(input), + tracker, + } + } + + /// Creates an error token at the current lexer location + #[inline] + fn err_here( + &self, + err_ctor: fn(Cow<'input, str>) -> LexError<'input>, + ) -> InternalLexResult<'input> { + let region = self.lexer.slice(); + let Span { start, end } = self.lexer.span(); + Err((start.into(), err_ctor(region.into()), end.into())) + } + + pub fn slice(&self) -> &'input str { + self.lexer.slice() + } + + /// Wraps a [`Token`] into a [`Token`] at the current position of the lexer. + #[inline(always)] + fn wrap(&mut self, token: Token<'input>) -> InternalLexResult<'input> { + let Span { start, end } = self.lexer.span(); + Ok((start.into(), token, end.into())) + } + + /// Advances the iterator and returns the next [`Token`] or [`None`] when input is exhausted. + #[inline] + pub(crate) fn next_internal(&mut self) -> Option> { + 'next_tok: loop { + return match self.lexer.next() { + None => None, + Some(Ok(token)) => match token { + Token::Newline => { + self.tracker.record(self.lexer.span().end.into()); + // Newlines shouldn't generate an externally visible token + continue 'next_tok; + } + + Token::EmbeddedIonQuote => self.parse_embedded_ion(), + + Token::CommentBlockStart => self.parse_block_comment(), + + _ => Some(self.wrap(token)), + }, + Some(Err(_)) => Some(self.err_here(LexError::InvalidInput)), + }; + } + } + + /// Uses [`CommentLexer`] to parse a block comment + fn parse_block_comment(&mut self) -> Option> { + let embed = self.lexer.span(); + let remaining = &self.lexer.source()[embed.start..]; + let mut comment_tracker = LineOffsetTracker::default(); + let mut comment_lexer = CommentLexer::new(remaining, &mut comment_tracker).with_nesting(); + comment_lexer.next().map(|res| match res { + Ok((s, comment, e)) => { + let val_len = e - s; + let val_start = embed.start.into(); // embed end is 1 past the starting '/*' + let val_end = val_start + val_len; + self.tracker.append(&comment_tracker, embed.start.into()); + self.lexer.bump(val_len.to_usize() - embed.len()); + Ok((val_start, Token::CommentBlock(comment), val_end)) + } + Err((s, err, e)) => { + let offset: ByteOffset = embed.start.into(); + Err((s + offset, err, e + offset)) + } + }) + } + + /// Uses [`EmbeddedIonLexer`] to parse an embedded ion value + fn parse_embedded_ion(&mut self) -> Option> { + let embed = self.lexer.span(); + let remaining = &self.lexer.source()[embed.start..]; + let mut ion_tracker = LineOffsetTracker::default(); + let mut ion_lexer = EmbeddedIonLexer::new(remaining, &mut ion_tracker); + ion_lexer.next().map(|res| match res { + Ok((s, ion, e)) => { + let val_len = e - s; + let val_start = embed.end.into(); // embed end is 1 past the starting '`' + let val_end = val_start + val_len - 2; // sub 2 to remove surrounding '`' + self.tracker.append(&ion_tracker, embed.start.into()); + self.lexer.bump(val_len.to_usize() - embed.len()); + Ok((val_start, Token::Ion(ion), val_end)) + } + Err((s, err, e)) => { + let offset: ByteOffset = embed.start.into(); + Err((s + offset, err, e + offset)) + } + }) + } +} + +impl<'input, 'tracker> Iterator for PartiqlLexer<'input, 'tracker> { + type Item = LexResult<'input>; + + #[inline(always)] + fn next(&mut self) -> Option { + self.next_internal() + .map(|res| res.map_err(std::convert::Into::into)) + } +} + +/// Tokens that the lexer can generate. +/// +/// # Note +/// Tokens with names beginning with `__` are used internally and not meant to be used outside lexing. +#[derive(Logos, Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] +// TODO make pub(crate) ? +// Skip whitespace +#[logos(skip r"[ \t\f]+")] +pub enum Token<'input> { + // Skip newlines, but record their position. + // For line break recommendations, + // see https://www.unicode.org/standard/reports/tr13/tr13-5.html + #[regex(r"([\r]?[\n])|\u{0085}|\u{2028}|\u{2029}")] + Newline, + + #[regex(r"--[^\n]*", |lex| lex.slice())] + CommentLine(&'input str), + #[token("/*")] + CommentBlockStart, + CommentBlock(&'input str), + + // Brackets + #[token("[")] + OpenSquare, + #[token("]")] + CloseSquare, + #[token("{")] + OpenCurly, + #[token("}")] + CloseCurly, + #[token("(")] + OpenParen, + #[token(")")] + CloseParen, + #[token("<<")] + OpenDblAngle, + #[token(">>")] + CloseDblAngle, + + // Symbols + #[token(",")] + Comma, + #[token(";")] + Semicolon, + #[token(":")] + Colon, + #[token("==")] + EqualEqual, + #[token("!=")] + BangEqual, + #[token("<>")] + LessGreater, + #[token("<=")] + LessEqual, + #[token(">=")] + GreaterEqual, + #[token("=")] + Equal, + #[token("<")] + LessThan, + #[token(">")] + GreaterThan, + #[token("-")] + Minus, + #[token("+")] + Plus, + #[token("*")] + Star, + #[token("?")] + SqlParameter, + #[token("%")] + Percent, + #[token("/")] + Slash, + #[token("^")] + Caret, + #[token(".")] + Period, + #[token("||")] + DblPipe, + + // unquoted identifiers + #[regex("[a-zA-Z_$][a-zA-Z0-9_$]*", |lex| lex.slice())] + UnquotedIdent(&'input str), + + // quoted identifiers (quoted with double quotes) + #[regex(r#""([^"\\]|\\t|\\u|\\n|\\")*""#, + |lex| lex.slice().trim_matches('"'))] + QuotedIdent(&'input str), + + // unquoted @identifiers + #[regex("@[a-zA-Z_$][a-zA-Z0-9_$]*", |lex| &lex.slice()[1..])] + UnquotedAtIdentifier(&'input str), + + // quoted @identifiers (quoted with double quotes) + #[regex(r#"@"([^"\\]|\\t|\\u|\\n|\\")*""#, + |lex| lex.slice()[1..].trim_matches('"'))] + QuotedAtIdentifier(&'input str), + + #[regex("[0-9]+", |lex| lex.slice())] + Int(&'input str), + + #[regex("[0-9]+\\.[0-9]*([eE][-+]?[0-9]+)", |lex| lex.slice())] + #[regex("\\.[0-9]+([eE][-+]?[0-9]+)", |lex| lex.slice())] + #[regex("[0-9]+[eE][-+]?[0-9]+", |lex| lex.slice())] + ExpReal(&'input str), + + #[regex("[0-9]+\\.[0-9]*", |lex| lex.slice())] + #[regex("\\.[0-9]+", |lex| lex.slice())] + Real(&'input str), + + // strings are single-quoted in SQL/PartiQL + #[regex(r#"'([^'\\]|\\t|\\u|\\n|\\'|\\|(?:''))*'"#, + |lex| lex.slice().trim_matches('\''))] + String(&'input str), + + #[token("`")] + EmbeddedIonQuote, + Ion(&'input str), + + // Keywords + #[regex("(?i:All)")] + All, + #[regex("(?i:Asc)")] + Asc, + #[regex("(?i:And)")] + And, + #[regex("(?i:As)")] + As, + #[regex("(?i:At)")] + At, + #[regex("(?i:Between)")] + Between, + #[regex("(?i:By)")] + By, + #[regex("(?i:Case)")] + Case, + #[regex("(?i:Cross)")] + Cross, + #[regex("(?i:Cycle)")] + Cycle, + #[regex("(?i:Date)")] + Date, + #[regex("(?i:Desc)")] + Desc, + #[regex("(?i:Distinct)")] + Distinct, + #[regex("(?i:Else)")] + Else, + #[regex("(?i:End)")] + End, + #[regex("(?i:Escape)")] + Escape, + #[regex("(?i:Except)")] + Except, + #[regex("(?i:Exclude)")] + Exclude, + #[regex("(?i:False)")] + False, + #[regex("(?i:First)")] + First, + #[regex("(?i:For)")] + For, + #[regex("(?i:Full)")] + Full, + #[regex("(?i:From)")] + From, + #[regex("(?i:Group)")] + Group, + #[regex("(?i:Having)")] + Having, + #[regex("(?i:In)")] + In, + #[regex("(?i:Inner)")] + Inner, + #[regex("(?i:Is)")] + Is, + #[regex("(?i:Intersect)")] + Intersect, + #[regex("(?i:Join)")] + Join, + #[regex("(?i:Last)")] + Last, + #[regex("(?i:Lateral)")] + Lateral, + #[regex("(?i:Left)")] + Left, + #[regex("(?i:Like)")] + Like, + #[regex("(?i:Limit)")] + Limit, + #[regex("(?i:Missing)")] + Missing, + #[regex("(?i:Natural)")] + Natural, + #[regex("(?i:Not)")] + Not, + #[regex("(?i:Null)")] + Null, + #[regex("(?i:Nulls)")] + Nulls, + #[regex("(?i:Offset)")] + Offset, + #[regex("(?i:On)")] + On, + #[regex("(?i:Or)")] + Or, + #[regex("(?i:Order)")] + Order, + #[regex("(?i:Outer)")] + Outer, + #[regex("(?i:Partial)")] + Partial, + #[regex("(?i:Pivot)")] + Pivot, + #[regex("(?i:Preserve)")] + Preserve, + #[regex("(?i:Right)")] + Right, + #[regex("(?i:Recursive)")] + Recursive, + #[regex("(?i:Select)")] + Select, + #[regex("(?i:Search)")] + Search, + #[regex("(?i:Table)")] + Table, + #[regex("(?i:Time)")] + Time, + #[regex("(?i:Timestamp)")] + Timestamp, + #[regex("(?i:Then)")] + Then, + #[regex("(?i:True)")] + True, + #[regex("(?i:Union)")] + Union, + #[regex("(?i:Unpivot)")] + Unpivot, + #[regex("(?i:Using)")] + Using, + #[regex("(?i:Value)")] + Value, + #[regex("(?i:Values)")] + Values, + #[regex("(?i:When)")] + When, + #[regex("(?i:Where)")] + Where, + #[regex("(?i:With)")] + With, + #[regex("(?i:Without)")] + Without, + #[regex("(?i:Zone)")] + Zone, +} + +impl<'input> Token<'input> { + pub fn is_keyword(&self) -> bool { + matches!( + self, + Token::All + | Token::Asc + | Token::And + | Token::As + | Token::At + | Token::Between + | Token::By + | Token::Case + | Token::Cross + | Token::Cycle + | Token::Date + | Token::Desc + | Token::Distinct + | Token::Escape + | Token::Except + | Token::First + | Token::For + | Token::Full + | Token::From + | Token::Group + | Token::Having + | Token::In + | Token::Inner + | Token::Is + | Token::Intersect + | Token::Join + | Token::Last + | Token::Lateral + | Token::Left + | Token::Like + | Token::Limit + | Token::Missing + | Token::Natural + | Token::Not + | Token::Null + | Token::Nulls + | Token::Offset + | Token::On + | Token::Or + | Token::Order + | Token::Outer + | Token::Partial + | Token::Pivot + | Token::Preserve + | Token::Right + | Token::Recursive + | Token::Search + | Token::Select + | Token::Table + | Token::Time + | Token::Timestamp + | Token::Then + | Token::Union + | Token::Unpivot + | Token::Using + | Token::Value + | Token::Values + | Token::Where + | Token::With + ) + } +} + +impl<'input> fmt::Display for Token<'input> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Token::Newline => write!(f, "\\n"), + Token::CommentLine(_) => write!(f, "--"), + Token::CommentBlockStart => write!(f, "/*"), + Token::CommentBlock(_) => write!(f, "/**/"), + Token::OpenSquare => write!(f, "["), + Token::CloseSquare => write!(f, "]"), + Token::OpenCurly => write!(f, "{{"), + Token::CloseCurly => write!(f, "}}"), + Token::OpenParen => write!(f, "("), + Token::CloseParen => write!(f, ")"), + Token::OpenDblAngle => write!(f, "<<"), + Token::CloseDblAngle => write!(f, ">>"), + Token::Comma => write!(f, ","), + Token::Semicolon => write!(f, ";"), + Token::Colon => write!(f, ":"), + Token::EqualEqual => write!(f, "=="), + Token::BangEqual => write!(f, "!="), + Token::LessGreater => write!(f, "<>"), + Token::LessEqual => write!(f, "<="), + Token::GreaterEqual => write!(f, ">="), + Token::Equal => write!(f, "="), + Token::LessThan => write!(f, "<"), + Token::GreaterThan => write!(f, ">"), + Token::Minus => write!(f, "-"), + Token::Plus => write!(f, "+"), + Token::Star => write!(f, "*"), + Token::SqlParameter => write!(f, "?"), + Token::Percent => write!(f, "%"), + Token::Slash => write!(f, "/"), + Token::Caret => write!(f, "^"), + Token::Period => write!(f, "."), + Token::DblPipe => write!(f, "||"), + Token::UnquotedIdent(id) => write!(f, "<{id}:UNQUOTED_IDENT>"), + Token::QuotedIdent(id) => write!(f, "<{id}:QUOTED_IDENT>"), + Token::UnquotedAtIdentifier(id) => write!(f, "<{id}:UNQUOTED_ATIDENT>"), + Token::QuotedAtIdentifier(id) => write!(f, "<{id}:QUOTED_ATIDENT>"), + Token::Int(txt) => write!(f, "<{txt}:INT>"), + Token::ExpReal(txt) => write!(f, "<{txt}:REAL>"), + Token::Real(txt) => write!(f, "<{txt}:REAL>"), + Token::String(txt) => write!(f, "<{txt}:STRING>"), + Token::EmbeddedIonQuote => write!(f, ""), + Token::Ion(txt) => write!(f, "<{txt}:ION>"), + + Token::All + | Token::Asc + | Token::And + | Token::As + | Token::At + | Token::Between + | Token::By + | Token::Case + | Token::Cross + | Token::Cycle + | Token::Date + | Token::Desc + | Token::Distinct + | Token::Else + | Token::End + | Token::Escape + | Token::Except + | Token::Exclude + | Token::False + | Token::First + | Token::For + | Token::Full + | Token::From + | Token::Group + | Token::Having + | Token::In + | Token::Inner + | Token::Is + | Token::Intersect + | Token::Join + | Token::Last + | Token::Lateral + | Token::Left + | Token::Like + | Token::Limit + | Token::Missing + | Token::Natural + | Token::Not + | Token::Null + | Token::Nulls + | Token::Offset + | Token::On + | Token::Or + | Token::Order + | Token::Outer + | Token::Partial + | Token::Pivot + | Token::Preserve + | Token::Right + | Token::Recursive + | Token::Search + | Token::Select + | Token::Table + | Token::Time + | Token::Timestamp + | Token::Then + | Token::True + | Token::Union + | Token::Unpivot + | Token::Using + | Token::Value + | Token::Values + | Token::When + | Token::Where + | Token::With + | Token::Without + | Token::Zone => { + write!(f, "{}", format!("{self:?}").to_uppercase()) + } + } + } +} diff --git a/partiql-types/Cargo.toml b/partiql-types/Cargo.toml index 1c18d374..fef1a7fa 100644 --- a/partiql-types/Cargo.toml +++ b/partiql-types/Cargo.toml @@ -9,10 +9,10 @@ readme = "../README.md" keywords = ["sql", "parser", "query", "compilers", "interpreters"] categories = ["database", "compilers"] exclude = [ - "**/.git/**", - "**/.github/**", - "**/.travis.yml", - "**/.appveyor.yml", + "**/.git/**", + "**/.github/**", + "**/.travis.yml", + "**/.appveyor.yml", ] version.workspace = true edition.workspace = true @@ -21,17 +21,17 @@ edition.workspace = true bench = false [dependencies] -partiql-common = { path = "../partiql-common", version = "0.10.*"} -ordered-float = "3.*" -itertools = "0.10.*" -unicase = "2.6" +partiql-common = { path = "../partiql-common", version = "0.10.*" } +ordered-float = "4" +itertools = "0.13" +unicase = "2.7" -miette = { version ="7.2.*", features = ["fancy"] } -thiserror = "1.*" +miette = { version = "7", features = ["fancy"] } +thiserror = "1" -indexmap = "2.2" +indexmap = "2.5" derivative = "2.2" [dev-dependencies] -criterion = "0.4" +criterion = "0.5" diff --git a/partiql-value/Cargo.toml b/partiql-value/Cargo.toml index 79b256c9..f00096d1 100644 --- a/partiql-value/Cargo.toml +++ b/partiql-value/Cargo.toml @@ -9,10 +9,10 @@ readme = "../README.md" keywords = ["sql", "parser", "query", "compilers", "interpreters"] categories = ["database", "compilers"] exclude = [ - "**/.git/**", - "**/.github/**", - "**/.travis.yml", - "**/.appveyor.yml", + "**/.git/**", + "**/.github/**", + "**/.travis.yml", + "**/.appveyor.yml", ] version.workspace = true edition.workspace = true @@ -21,27 +21,27 @@ edition.workspace = true bench = false [dependencies] -ordered-float = "3.*" -itertools = "0.10.*" -unicase = "2.6" -rust_decimal = { version = "1.25.0", default-features = false, features = ["std"] } -rust_decimal_macros = "1.26" -ion-rs = "0.18" +ordered-float = "4" +itertools = "0.13" +unicase = "2.7" +rust_decimal = { version = "1.36.0", default-features = false, features = ["std"] } +rust_decimal_macros = "1.36" + time = { version = "0.3", features = ["macros"] } once_cell = "1" -regex = "1.7" +regex = "1.10" -serde = { version = "1.*", features = ["derive"], optional = true } +serde = { version = "1", features = ["derive"], optional = true } [dev-dependencies] -criterion = "0.4" +criterion = "0.5" [features] default = [] serde = [ - "dep:serde", - "time/serde", - "rust_decimal/serde-with-str", - "rust_decimal/serde", - "ordered-float/serde" + "dep:serde", + "time/serde", + "rust_decimal/serde-with-str", + "rust_decimal/serde", + "ordered-float/serde" ] diff --git a/partiql/Cargo.toml b/partiql/Cargo.toml index d288876f..86077d04 100644 --- a/partiql/Cargo.toml +++ b/partiql/Cargo.toml @@ -32,12 +32,12 @@ partiql-logical = { path = "../partiql-logical" } partiql-logical-planner = { path = "../partiql-logical-planner" } partiql-eval = { path = "../partiql-eval" } -insta = "1.38.0" +insta = "1.40.0" thiserror = "1.0" -itertools = "0.12" +itertools = "0.13" criterion = "0.5" rand = "0.8"