Skip to content
This repository has been archived by the owner on Jul 27, 2023. It is now read-only.

perf: Cursor based lexer #38

Merged
merged 6 commits into from
Jul 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,8 @@ jobs:

- uses: Swatinem/rust-cache@v2

- name: run tests with num-bigint
run: cargo test --all --no-default-features --features num-bigint
- name: run tests with malachite-bigint and all features
run: cargo test --all --features malachite-bigint,full-lexer,serde
- name: run tests
run: cargo test --all --all-features

lint:
name: Check Rust code with rustfmt and clippy
Expand All @@ -53,9 +51,7 @@ jobs:
- name: run rustfmt
run: cargo fmt --all -- --check
- name: run clippy
run: cargo clippy --all --no-default-features --features num-bigint
- name: run clippy
run: cargo clippy --all --features malachite-bigint,full-lexer,serde -- -Dwarnings
run: cargo clippy --all --all-features -- -Dwarnings

- uses: actions/setup-python@v4
with:
Expand Down
6 changes: 0 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,17 @@ rustpython-literal = { path = "literal" }
rustpython-format = { path = "format" }
rustpython-parser = { path = "parser", default-features = false }

ahash = "0.7.6"
anyhow = "1.0.45"
cfg-if = "1.0"
insta = "1.14.0"
itertools = "0.10.3"
is-macro = "0.2.2"
log = "0.4.16"
num-complex = "0.4.0"
num-bigint = "0.4.3"
num-traits = "0.2"
pyo3 = { version = "0.19.0" }
malachite-bigint = { version = "0.1.0" }
memchr = "2.5.0"
rand = "0.8.5"
serde = "1.0"
static_assertions = "1.1"
once_cell = "1.17.1"
unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" }

[profile.dev.package."*"]
Expand Down
6 changes: 1 addition & 5 deletions ast/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,10 @@ edition = "2021"
repository = "https://github.com/RustPython/Parser/"
license = "MIT"

[features]
default = ["malachite-bigint"]

[dependencies]
rustpython-parser-core = { workspace = true }
rustpython-literal = { workspace = true, optional = true }

is-macro = { workspace = true }
num-bigint = { workspace = true, optional = true }
malachite-bigint = { workspace = true, optional = true }
num-bigint = { workspace = true }
static_assertions = "1.1.0"
2 changes: 1 addition & 1 deletion ast/src/builtin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
use rustpython_parser_core::text_size::TextRange;

use crate::bigint::BigInt;
use crate::Ranged;
use num_bigint::BigInt;

pub type String = std::string::String;

Expand Down
4 changes: 2 additions & 2 deletions ast/src/generic.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#![allow(clippy::derive_partial_eq_without_eq)]
use crate::text_size::TextRange;
pub use crate::{builtin::*, text_size::TextSize, ConversionFlag, Node};
use crate::text_size::{TextRange, TextSize};
pub(crate) use crate::{builtin::*, ConversionFlag, Node};
use std::fmt::{self, Debug};

// This file was originally generated from asdl by a python script, but we now edit it manually
Expand Down
5 changes: 0 additions & 5 deletions ast/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@ mod generic;
mod impls;
mod ranged;

#[cfg(feature = "malachite-bigint")]
pub use malachite_bigint as bigint;
#[cfg(all(feature = "num-bigint", not(feature = "malachite-bigint")))]
pub use num_bigint as bigint;

pub use builtin::*;
pub use generic::*;
pub use ranged::Ranged;
Expand Down
2 changes: 0 additions & 2 deletions ast/src/ranged.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

use crate::text_size::{TextRange, TextSize};

pub use crate::builtin::*;

pub trait Ranged {
fn range(&self) -> TextRange;

Expand Down
1 change: 0 additions & 1 deletion core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ ruff_text_size = { path = "../ruff_text_size" }

serde = { version = "1.0.133", optional = true, default-features = false, features = ["derive"] }
is-macro.workspace = true
memchr.workspace = true

[features]
default = []
4 changes: 1 addition & 3 deletions format/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ rustpython-literal = { workspace = true }
bitflags = "2.3.1"
itertools = "0.10.5"
num-traits = { workspace = true }
num-bigint = { workspace = true, optional = true }
malachite-bigint = { workspace = true, optional = true }
num-bigint = { workspace = true }

[features]
default = ["malachite-bigint"]
2 changes: 1 addition & 1 deletion format/src/cformat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::{
str::FromStr,
};

use crate::bigint::{BigInt, Sign};
use num_bigint::{BigInt, Sign};

#[derive(Debug, PartialEq)]
pub enum CFormatErrorType {
Expand Down
2 changes: 1 addition & 1 deletion format/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use rustpython_literal::format::Case;
use std::ops::Deref;
use std::{cmp, str::FromStr};

use crate::bigint::{BigInt, Sign};
use num_bigint::{BigInt, Sign};

trait FormatParse {
fn parse(text: &str) -> (Option<Self>, &str)
Expand Down
5 changes: 0 additions & 5 deletions format/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
#[cfg(feature = "malachite-bigint")]
pub use malachite_bigint as bigint;
#[cfg(all(feature = "num-bigint", not(feature = "malachite-bigint")))]
pub use num_bigint as bigint;

pub use crate::format::*;

pub mod cformat;
Expand Down
2 changes: 1 addition & 1 deletion literal/src/escape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ impl<'a> Escape for AsciiEscape<'a> {
fn layout(&self) -> &EscapeLayout {
&self.layout
}

#[allow(unsafe_code)]
MichaReiser marked this conversation as resolved.
Show resolved Hide resolved
fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
formatter.write_str(unsafe {
// SAFETY: this function must be called only when source is printable ascii characters
Expand Down
11 changes: 2 additions & 9 deletions parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,11 @@ license = "MIT"
edition = "2021"

[features]
default = ["malachite-bigint"]
serde = ["dep:serde", "rustpython-parser-core/serde"]
full-lexer = []
malachite-bigint = ["dep:malachite-bigint", "rustpython-ast/malachite-bigint"]
num-bigint = ["dep:num-bigint", "rustpython-ast/num-bigint"]

[build-dependencies]
anyhow = { workspace = true }
lalrpop = { version = "0.20.0", default-features = false, optional = true }
phf_codegen = "0.11.1"
tiny-keccak = { version = "2", features = ["sha3"] }

[dependencies]
Expand All @@ -27,18 +22,16 @@ rustpython-parser-core = { workspace = true }

itertools = { workspace = true }
is-macro = { workspace = true }
log = { workspace = true }
malachite-bigint = { workspace = true, optional = true }
num-bigint = { workspace = true, optional = true }
num-bigint = { workspace = true }
num-traits = { workspace = true }
unicode_names2 = { workspace = true }

unic-emoji-char = "0.9.0"
unic-ucd-ident = "0.9.0"
lalrpop-util = { version = "0.20.0", default-features = false }
phf = "0.11.1"
rustc-hash = "1.1.0"
serde = { version = "1.0.133", optional = true, default-features = false, features = ["derive"] }
static_assertions = "1.1.0"

[dev-dependencies]
insta = { workspace = true }
58 changes: 2 additions & 56 deletions parser/build.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
use std::fmt::Write as _;
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use tiny_keccak::{Hasher, Sha3};

fn main() -> anyhow::Result<()> {
MichaReiser marked this conversation as resolved.
Show resolved Hide resolved
let out_dir = PathBuf::from(std::env::var_os("OUT_DIR").unwrap());
gen_phf(&out_dir);

const SOURCE: &str = "src/python.lalrpop";
println!("cargo:rerun-if-changed={SOURCE}");

Expand All @@ -16,6 +13,7 @@ fn main() -> anyhow::Result<()> {

#[cfg(feature = "lalrpop")]
{
let out_dir = PathBuf::from(std::env::var_os("OUT_DIR").unwrap());
target = out_dir.join("src/python.rs");
}
#[cfg(not(feature = "lalrpop"))]
Expand Down Expand Up @@ -113,55 +111,3 @@ fn sha_equal(expected_sha3_str: &str, actual_sha3: &[u8; 32]) -> bool {
}
*actual_sha3 == expected_sha3
}

fn gen_phf(out_dir: &Path) {
let mut kwds = phf_codegen::Map::new();
let kwds = kwds
// Alphabetical keywords:
.entry("...", "Tok::Ellipsis")
.entry("False", "Tok::False")
.entry("None", "Tok::None")
.entry("True", "Tok::True")
// more so "standard" keywords
.entry("and", "Tok::And")
.entry("as", "Tok::As")
.entry("assert", "Tok::Assert")
.entry("async", "Tok::Async")
.entry("await", "Tok::Await")
.entry("break", "Tok::Break")
.entry("case", "Tok::Case")
.entry("class", "Tok::Class")
.entry("continue", "Tok::Continue")
.entry("def", "Tok::Def")
.entry("del", "Tok::Del")
.entry("elif", "Tok::Elif")
.entry("else", "Tok::Else")
.entry("except", "Tok::Except")
.entry("finally", "Tok::Finally")
.entry("for", "Tok::For")
.entry("from", "Tok::From")
.entry("global", "Tok::Global")
.entry("if", "Tok::If")
.entry("import", "Tok::Import")
.entry("in", "Tok::In")
.entry("is", "Tok::Is")
.entry("lambda", "Tok::Lambda")
.entry("match", "Tok::Match")
.entry("nonlocal", "Tok::Nonlocal")
.entry("not", "Tok::Not")
.entry("or", "Tok::Or")
.entry("pass", "Tok::Pass")
.entry("raise", "Tok::Raise")
.entry("return", "Tok::Return")
.entry("try", "Tok::Try")
.entry("type", "Tok::Type")
.entry("while", "Tok::While")
.entry("with", "Tok::With")
.entry("yield", "Tok::Yield")
.build();
writeln!(
BufWriter::new(File::create(out_dir.join("keywords.rs")).unwrap()),
"{kwds}",
)
.unwrap();
}
4 changes: 2 additions & 2 deletions parser/src/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ use rustc_hash::FxHashSet;
use rustpython_ast::Ranged;

pub(crate) struct ArgumentList {
pub args: Vec<ast::Expr>,
pub keywords: Vec<ast::Keyword>,
pub(crate) args: Vec<ast::Expr>,
pub(crate) keywords: Vec<ast::Keyword>,
}

// Perform validation of function/lambda arguments in a function definition.
Expand Down
Loading