Skip to content
This repository has been archived by the owner on Jul 27, 2023. It is now read-only.

Commit

Permalink
perf: Cursor based lexer (#38)
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaReiser authored Jul 26, 2023
1 parent 13196fc commit 593b46b
Show file tree
Hide file tree
Showing 30 changed files with 2,497 additions and 1,799 deletions.
10 changes: 3 additions & 7 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,8 @@ jobs:

- uses: Swatinem/rust-cache@v2

- name: run tests with num-bigint
run: cargo test --all --no-default-features --features num-bigint
- name: run tests with malachite-bigint and all features
run: cargo test --all --features malachite-bigint,full-lexer,serde
- name: run tests
run: cargo test --all --all-features

lint:
name: Check Rust code with rustfmt and clippy
Expand All @@ -53,9 +51,7 @@ jobs:
- name: run rustfmt
run: cargo fmt --all -- --check
- name: run clippy
run: cargo clippy --all --no-default-features --features num-bigint
- name: run clippy
run: cargo clippy --all --features malachite-bigint,full-lexer,serde -- -Dwarnings
run: cargo clippy --all --all-features -- -Dwarnings

- uses: actions/setup-python@v4
with:
Expand Down
6 changes: 0 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,17 @@ rustpython-literal = { path = "literal" }
rustpython-format = { path = "format" }
rustpython-parser = { path = "parser", default-features = false }

ahash = "0.7.6"
anyhow = "1.0.45"
cfg-if = "1.0"
insta = "1.14.0"
itertools = "0.10.3"
is-macro = "0.2.2"
log = "0.4.16"
num-complex = "0.4.0"
num-bigint = "0.4.3"
num-traits = "0.2"
pyo3 = { version = "0.19.0" }
malachite-bigint = { version = "0.1.0" }
memchr = "2.5.0"
rand = "0.8.5"
serde = "1.0"
static_assertions = "1.1"
once_cell = "1.17.1"
unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" }

[profile.dev.package."*"]
Expand Down
6 changes: 1 addition & 5 deletions ast/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,10 @@ edition = "2021"
repository = "https://github.com/RustPython/Parser/"
license = "MIT"

[features]
default = ["malachite-bigint"]

[dependencies]
rustpython-parser-core = { workspace = true }
rustpython-literal = { workspace = true, optional = true }

is-macro = { workspace = true }
num-bigint = { workspace = true, optional = true }
malachite-bigint = { workspace = true, optional = true }
num-bigint = { workspace = true }
static_assertions = "1.1.0"
2 changes: 1 addition & 1 deletion ast/src/builtin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
use rustpython_parser_core::text_size::TextRange;

use crate::bigint::BigInt;
use crate::Ranged;
use num_bigint::BigInt;

pub type String = std::string::String;

Expand Down
4 changes: 2 additions & 2 deletions ast/src/generic.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#![allow(clippy::derive_partial_eq_without_eq)]
use crate::text_size::TextRange;
pub use crate::{builtin::*, text_size::TextSize, ConversionFlag, Node};
use crate::text_size::{TextRange, TextSize};
pub(crate) use crate::{builtin::*, ConversionFlag, Node};
use std::fmt::{self, Debug};

// This file was originally generated from asdl by a python script, but we now edit it manually
Expand Down
5 changes: 0 additions & 5 deletions ast/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@ mod generic;
mod impls;
mod ranged;

#[cfg(feature = "malachite-bigint")]
pub use malachite_bigint as bigint;
#[cfg(all(feature = "num-bigint", not(feature = "malachite-bigint")))]
pub use num_bigint as bigint;

pub use builtin::*;
pub use generic::*;
pub use ranged::Ranged;
Expand Down
2 changes: 0 additions & 2 deletions ast/src/ranged.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

use crate::text_size::{TextRange, TextSize};

pub use crate::builtin::*;

pub trait Ranged {
fn range(&self) -> TextRange;

Expand Down
1 change: 0 additions & 1 deletion core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ ruff_text_size = { path = "../ruff_text_size" }

serde = { version = "1.0.133", optional = true, default-features = false, features = ["derive"] }
is-macro.workspace = true
memchr.workspace = true

[features]
default = []
4 changes: 1 addition & 3 deletions format/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ rustpython-literal = { workspace = true }
bitflags = "2.3.1"
itertools = "0.10.5"
num-traits = { workspace = true }
num-bigint = { workspace = true, optional = true }
malachite-bigint = { workspace = true, optional = true }
num-bigint = { workspace = true }

[features]
default = ["malachite-bigint"]
2 changes: 1 addition & 1 deletion format/src/cformat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::{
str::FromStr,
};

use crate::bigint::{BigInt, Sign};
use num_bigint::{BigInt, Sign};

#[derive(Debug, PartialEq)]
pub enum CFormatErrorType {
Expand Down
2 changes: 1 addition & 1 deletion format/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use rustpython_literal::format::Case;
use std::ops::Deref;
use std::{cmp, str::FromStr};

use crate::bigint::{BigInt, Sign};
use num_bigint::{BigInt, Sign};

trait FormatParse {
fn parse(text: &str) -> (Option<Self>, &str)
Expand Down
5 changes: 0 additions & 5 deletions format/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
#[cfg(feature = "malachite-bigint")]
pub use malachite_bigint as bigint;
#[cfg(all(feature = "num-bigint", not(feature = "malachite-bigint")))]
pub use num_bigint as bigint;

pub use crate::format::*;

pub mod cformat;
Expand Down
2 changes: 1 addition & 1 deletion literal/src/escape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ impl<'a> Escape for AsciiEscape<'a> {
fn layout(&self) -> &EscapeLayout {
&self.layout
}

#[allow(unsafe_code)]
fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
formatter.write_str(unsafe {
// SAFETY: this function must be called only when source is printable ascii characters
Expand Down
11 changes: 2 additions & 9 deletions parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,11 @@ license = "MIT"
edition = "2021"

[features]
default = ["malachite-bigint"]
serde = ["dep:serde", "rustpython-parser-core/serde"]
full-lexer = []
malachite-bigint = ["dep:malachite-bigint", "rustpython-ast/malachite-bigint"]
num-bigint = ["dep:num-bigint", "rustpython-ast/num-bigint"]

[build-dependencies]
anyhow = { workspace = true }
lalrpop = { version = "0.20.0", default-features = false, optional = true }
phf_codegen = "0.11.1"
tiny-keccak = { version = "2", features = ["sha3"] }

[dependencies]
Expand All @@ -27,18 +22,16 @@ rustpython-parser-core = { workspace = true }

itertools = { workspace = true }
is-macro = { workspace = true }
log = { workspace = true }
malachite-bigint = { workspace = true, optional = true }
num-bigint = { workspace = true, optional = true }
num-bigint = { workspace = true }
num-traits = { workspace = true }
unicode_names2 = { workspace = true }

unic-emoji-char = "0.9.0"
unic-ucd-ident = "0.9.0"
lalrpop-util = { version = "0.20.0", default-features = false }
phf = "0.11.1"
rustc-hash = "1.1.0"
serde = { version = "1.0.133", optional = true, default-features = false, features = ["derive"] }
static_assertions = "1.1.0"

[dev-dependencies]
insta = { workspace = true }
58 changes: 2 additions & 56 deletions parser/build.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
use std::fmt::Write as _;
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use tiny_keccak::{Hasher, Sha3};

fn main() -> anyhow::Result<()> {
let out_dir = PathBuf::from(std::env::var_os("OUT_DIR").unwrap());
gen_phf(&out_dir);

const SOURCE: &str = "src/python.lalrpop";
println!("cargo:rerun-if-changed={SOURCE}");

Expand All @@ -16,6 +13,7 @@ fn main() -> anyhow::Result<()> {

#[cfg(feature = "lalrpop")]
{
let out_dir = PathBuf::from(std::env::var_os("OUT_DIR").unwrap());
target = out_dir.join("src/python.rs");
}
#[cfg(not(feature = "lalrpop"))]
Expand Down Expand Up @@ -113,55 +111,3 @@ fn sha_equal(expected_sha3_str: &str, actual_sha3: &[u8; 32]) -> bool {
}
*actual_sha3 == expected_sha3
}

fn gen_phf(out_dir: &Path) {
let mut kwds = phf_codegen::Map::new();
let kwds = kwds
// Alphabetical keywords:
.entry("...", "Tok::Ellipsis")
.entry("False", "Tok::False")
.entry("None", "Tok::None")
.entry("True", "Tok::True")
// more so "standard" keywords
.entry("and", "Tok::And")
.entry("as", "Tok::As")
.entry("assert", "Tok::Assert")
.entry("async", "Tok::Async")
.entry("await", "Tok::Await")
.entry("break", "Tok::Break")
.entry("case", "Tok::Case")
.entry("class", "Tok::Class")
.entry("continue", "Tok::Continue")
.entry("def", "Tok::Def")
.entry("del", "Tok::Del")
.entry("elif", "Tok::Elif")
.entry("else", "Tok::Else")
.entry("except", "Tok::Except")
.entry("finally", "Tok::Finally")
.entry("for", "Tok::For")
.entry("from", "Tok::From")
.entry("global", "Tok::Global")
.entry("if", "Tok::If")
.entry("import", "Tok::Import")
.entry("in", "Tok::In")
.entry("is", "Tok::Is")
.entry("lambda", "Tok::Lambda")
.entry("match", "Tok::Match")
.entry("nonlocal", "Tok::Nonlocal")
.entry("not", "Tok::Not")
.entry("or", "Tok::Or")
.entry("pass", "Tok::Pass")
.entry("raise", "Tok::Raise")
.entry("return", "Tok::Return")
.entry("try", "Tok::Try")
.entry("type", "Tok::Type")
.entry("while", "Tok::While")
.entry("with", "Tok::With")
.entry("yield", "Tok::Yield")
.build();
writeln!(
BufWriter::new(File::create(out_dir.join("keywords.rs")).unwrap()),
"{kwds}",
)
.unwrap();
}
4 changes: 2 additions & 2 deletions parser/src/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ use rustc_hash::FxHashSet;
use rustpython_ast::Ranged;

pub(crate) struct ArgumentList {
pub args: Vec<ast::Expr>,
pub keywords: Vec<ast::Keyword>,
pub(crate) args: Vec<ast::Expr>,
pub(crate) keywords: Vec<ast::Keyword>,
}

// Perform validation of function/lambda arguments in a function definition.
Expand Down
Loading

0 comments on commit 593b46b

Please sign in to comment.