From 271e5d5a778d58ed1d7e1815db6f4555cdc6a329 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Fri, 16 Aug 2024 12:47:36 +0200 Subject: [PATCH 01/10] Factor out crate translation in tests --- charon/tests/crate_data.rs | 64 +++++++++----------------------------- charon/tests/util/mod.rs | 46 +++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 50 deletions(-) diff --git a/charon/tests/crate_data.rs b/charon/tests/crate_data.rs index f37af49f..3868c187 100644 --- a/charon/tests/crate_data.rs +++ b/charon/tests/crate_data.rs @@ -1,49 +1,13 @@ -#![feature(rustc_private)] - -use assert_cmd::prelude::{CommandCargoExt, OutputAssertExt}; use charon_lib::ast::{AnyTransItem, TranslatedCrate}; use itertools::Itertools; use std::collections::HashMap; -use std::{error::Error, fs::File, io::BufReader, process::Command}; - -use charon_lib::{ - export::CrateData, gast::*, logger, meta::*, names::*, types::*, values::ScalarValue, -}; - -fn translate(code: impl std::fmt::Display) -> Result> { - // Initialize the logger - logger::initialize_logger(); - - // Write the code to a temporary file. - use std::io::Write; - let tmp_dir = tempfile::TempDir::new()?; - let input_path = tmp_dir.path().join("test_crate.rs"); - { - let mut tmp_file = File::create(&input_path)?; - write!(tmp_file, "{}", code)?; - drop(tmp_file); - } - // Call charon - let output_path = tmp_dir.path().join("test_crate.llbc"); - Command::cargo_bin("charon")? - .arg("--no-cargo") - .arg("--rustc-flag=--edition=2021") - .arg("--input") - .arg(input_path) - .arg("--dest-file") - .arg(&output_path) - .assert() - .try_success()?; - - // Extract the computed crate data. - let crate_data: CrateData = { - let file = File::open(output_path)?; - let reader = BufReader::new(file); - serde_json::from_reader(reader)? - }; +use charon_lib::ast::*; + +mod util; - Ok(crate_data.translated) +fn translate(code: impl std::fmt::Display) -> anyhow::Result { + util::translate_rust_text(code.to_string()) } /// `Name` is a complex datastructure; to inspect it we serialize it a little bit. @@ -110,7 +74,7 @@ fn items_by_name<'c>(crate_data: &'c TranslatedCrate) -> HashMap Result<(), Box> { +fn type_decl() -> anyhow::Result<()> { let crate_data = translate( " struct Struct; @@ -125,7 +89,7 @@ fn type_decl() -> Result<(), Box> { } #[test] -fn file_name() -> Result<(), Box> { +fn file_name() -> anyhow::Result<()> { let crate_data = translate( " type Foo = Option<()>; @@ -149,7 +113,7 @@ fn file_name() -> Result<(), Box> { } #[test] -fn spans() -> Result<(), Box> { +fn spans() -> anyhow::Result<()> { let crate_data = translate( " pub fn sum(s: &[u32]) -> u32 { @@ -178,7 +142,7 @@ fn spans() -> Result<(), Box> { } #[test] -fn predicate_origins() -> Result<(), Box> { +fn predicate_origins() -> anyhow::Result<()> { use PredicateOrigin::*; let crate_data = translate( " @@ -277,7 +241,7 @@ fn predicate_origins() -> Result<(), Box> { } #[test] -fn attributes() -> Result<(), Box> { +fn attributes() -> anyhow::Result<()> { // Use the `clippy::` prefix because it's ignored by rustc. let unknown_attrs = |item_meta: &ItemMeta| { item_meta @@ -362,7 +326,7 @@ fn attributes() -> Result<(), Box> { } #[test] -fn visibility() -> Result<(), Box> { +fn visibility() -> anyhow::Result<()> { let crate_data = translate( r#" pub struct Pub; @@ -394,7 +358,7 @@ fn visibility() -> Result<(), Box> { } #[test] -fn discriminants() -> Result<(), Box> { +fn discriminants() -> anyhow::Result<()> { let crate_data = translate( r#" enum Foo { @@ -423,7 +387,7 @@ fn discriminants() -> Result<(), Box> { } #[test] -fn rename_attribute() -> Result<(), Box> { +fn rename_attribute() -> anyhow::Result<()> { let crate_data = translate( r#" #![feature(register_tool)] @@ -592,7 +556,7 @@ fn rename_attribute() -> Result<(), Box> { } #[test] -fn declaration_groups() -> Result<(), Box> { +fn declaration_groups() -> anyhow::Result<()> { let crate_data = translate( r#" fn foo() { diff --git a/charon/tests/util/mod.rs b/charon/tests/util/mod.rs index 5c6e6eab..0986c8c2 100644 --- a/charon/tests/util/mod.rs +++ b/charon/tests/util/mod.rs @@ -1,9 +1,18 @@ //! Shared utility functions for use in tests. //! //! This is in `util/mod.rs` instead of `util.rs` to avoid cargo treating it like a test file. + +// Needed because this is imported from various tests that each use different items from this +// module. +#![allow(dead_code)] +use assert_cmd::prelude::{CommandCargoExt, OutputAssertExt}; use snapbox; use snapbox::filter::Filter; use std::path::Path; +use std::{error::Error, fs::File, io::BufReader, process::Command}; + +use charon_lib::ast::TranslatedCrate; +use charon_lib::{export::CrateData, logger}; #[derive(Clone, Copy)] pub enum Action { @@ -48,3 +57,40 @@ fn expect_file_contents(path: &Path, actual: snapbox::Data) -> snapbox::assert:: Ok(()) } } + +/// Given a string that contains rust code, this calls charon on it and returns the result. +pub fn translate_rust_text(code: String) -> anyhow::Result { + // Initialize the logger + logger::initialize_logger(); + + // Write the code to a temporary file. + use std::io::Write; + let tmp_dir = tempfile::TempDir::new()?; + let input_path = tmp_dir.path().join("test_crate.rs"); + { + let mut tmp_file = File::create(&input_path)?; + write!(tmp_file, "{}", code)?; + drop(tmp_file); + } + + // Call charon + let output_path = tmp_dir.path().join("test_crate.llbc"); + Command::cargo_bin("charon")? + .arg("--no-cargo") + .arg("--rustc-flag=--edition=2021") + .arg("--input") + .arg(input_path) + .arg("--dest-file") + .arg(&output_path) + .assert() + .try_success()?; + + // Extract the computed crate data. + let crate_data: CrateData = { + let file = File::open(output_path)?; + let reader = BufReader::new(file); + serde_json::from_reader(reader)? + }; + + Ok(crate_data.translated) +} From ebef65af1c842864cacb683654758e30aec31ab8 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Mon, 19 Aug 2024 16:46:20 +0200 Subject: [PATCH 02/10] Fix regression in version errors --- charon-ml/src/LlbcOfJson.ml | 75 ++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/charon-ml/src/LlbcOfJson.ml b/charon-ml/src/LlbcOfJson.ml index c7da089f..cf5a60f8 100644 --- a/charon-ml/src/LlbcOfJson.ml +++ b/charon-ml/src/LlbcOfJson.ml @@ -181,43 +181,42 @@ let split_global (gid_conv : global_id_converter) global : (global_decl, fun_decl) let crate_of_json (js : json) : (crate, string) result = - combine_error_msgs js __FUNCTION__ - begin - let* crate = gcrate_of_json expr_body_of_json js in - (* When deserializing the globals, we split the global declarations - * between the globals themselves and their bodies, which are simply - * functions with no arguments. We add the global bodies to the list - * of function declarations: the (fresh) ids we use for those bodies - * are simply given by: [num_functions + global_id] *) - let gid_conv = - { fun_count = List.length (FunDeclId.Map.bindings crate.fun_decls) } - in - let globals, global_bodies = - List.split - (List.map - (fun (_, g) -> split_global gid_conv g) - (GlobalDeclId.Map.bindings crate.global_decls)) - in + begin + let* crate = gcrate_of_json expr_body_of_json js in + (* When deserializing the globals, we split the global declarations + * between the globals themselves and their bodies, which are simply + * functions with no arguments. We add the global bodies to the list + * of function declarations: the (fresh) ids we use for those bodies + * are simply given by: [num_functions + global_id] *) + let gid_conv = + { fun_count = List.length (FunDeclId.Map.bindings crate.fun_decls) } + in + let globals, global_bodies = + List.split + (List.map + (fun (_, g) -> split_global gid_conv g) + (GlobalDeclId.Map.bindings crate.global_decls)) + in - (* Add the global bodies to the list of functions *) - let fun_decls = - List.fold_left - (fun m (d : fun_decl) -> FunDeclId.Map.add d.def_id d m) - crate.fun_decls global_bodies - in - let global_decls = - GlobalDeclId.Map.of_list - (List.map (fun (d : global_decl) -> (d.def_id, d)) globals) - in + (* Add the global bodies to the list of functions *) + let fun_decls = + List.fold_left + (fun m (d : fun_decl) -> FunDeclId.Map.add d.def_id d m) + crate.fun_decls global_bodies + in + let global_decls = + GlobalDeclId.Map.of_list + (List.map (fun (d : global_decl) -> (d.def_id, d)) globals) + in - Ok - { - name = crate.name; - declarations = crate.declarations; - type_decls = crate.type_decls; - fun_decls; - global_decls; - trait_decls = crate.trait_decls; - trait_impls = crate.trait_impls; - } - end + Ok + { + name = crate.name; + declarations = crate.declarations; + type_decls = crate.type_decls; + fun_decls; + global_decls; + trait_decls = crate.trait_decls; + trait_impls = crate.trait_impls; + } + end From 2ae4b7e19a3f11a1b0ec22a335f72d8fee78d696 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Tue, 13 Aug 2024 12:09:36 +0200 Subject: [PATCH 03/10] Reimplement the name matcher in rust --- charon-ml/tests/Test_NameMatcher.ml | 2 +- charon-ml/tests/Tests.ml | 3 +- charon/Cargo.lock | 60 +++++- charon/Cargo.toml | 2 + charon/src/ast/assumed.rs | 27 +-- charon/src/lib.rs | 1 + charon/src/name_matcher/mod.rs | 151 ++++++++++++++ charon/src/name_matcher/parser.rs | 193 ++++++++++++++++++ charon/src/pretty/fmt_with_ctx.rs | 2 +- charon/tests/crate_data.rs | 2 +- charon/tests/test-rust-name-matcher.rs | 116 +++++++++++ ...er-tests.out => ml-name-matcher-tests.out} | 0 ...cher-tests.rs => ml-name-matcher-tests.rs} | 0 charon/tests/util/mod.rs | 5 +- 14 files changed, 545 insertions(+), 19 deletions(-) create mode 100644 charon/src/name_matcher/mod.rs create mode 100644 charon/src/name_matcher/parser.rs create mode 100644 charon/tests/test-rust-name-matcher.rs rename charon/tests/ui/{name-matcher-tests.out => ml-name-matcher-tests.out} (100%) rename charon/tests/ui/{name-matcher-tests.rs => ml-name-matcher-tests.rs} (100%) diff --git a/charon-ml/tests/Test_NameMatcher.ml b/charon-ml/tests/Test_NameMatcher.ml index f3853d01..09874cbc 100644 --- a/charon-ml/tests/Test_NameMatcher.ml +++ b/charon-ml/tests/Test_NameMatcher.ml @@ -159,7 +159,7 @@ module PatternTest = struct else true end -(* This reads the output generated from the `ui/name-matcher-tests.rs` test +(* This reads the output generated from the `ui/ml-name-matcher-tests.rs` test file, and verifies that for each `#[pattern::...]` annotation, the item matches the pattern. See the `PatternTest` module for details of what annotations are available. *) diff --git a/charon-ml/tests/Tests.ml b/charon-ml/tests/Tests.ml index 4184bc44..9d85127e 100644 --- a/charon-ml/tests/Tests.ml +++ b/charon-ml/tests/Tests.ml @@ -15,4 +15,5 @@ let () = let () = Test_Deserialize.run_tests "../../../tests/serialized" let () = - Test_NameMatcher.run_tests "../../../tests/serialized/name-matcher-tests.llbc" + Test_NameMatcher.run_tests + "../../../tests/serialized/ml-name-matcher-tests.llbc" diff --git a/charon/Cargo.lock b/charon/Cargo.lock index 19e68bec..9506e15e 100644 --- a/charon/Cargo.lock +++ b/charon/Cargo.lock @@ -84,6 +84,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + [[package]] name = "assert_cmd" version = "2.0.14" @@ -133,6 +139,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "brownstone" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5839ee4f953e811bfdcf223f509cb2c6a3e1447959b0bff459405575bc17f22" +dependencies = [ + "arrayvec 0.7.4", +] + [[package]] name = "bstr" version = "1.9.1" @@ -179,6 +194,8 @@ dependencies = [ "libtest-mimic", "log", "macros", + "nom", + "nom-supreme", "petgraph", "pretty", "regex", @@ -567,6 +584,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "indent_write" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cfe9645a18782869361d9c8732246be7b410ad4e919d3609ebabdac00ba12c3" + [[package]] name = "index_vec" version = "0.1.3" @@ -631,6 +654,12 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "joinery" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72167d68f5fce3b8655487b8038691a3c9984ee769590f93f2a631f4ad64e4f5" + [[package]] name = "lazy_static" version = "1.5.0" @@ -691,6 +720,35 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "nom-supreme" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bd3ae6c901f1959588759ff51c95d24b491ecb9ff91aa9c2ef4acc5b1dcab27" +dependencies = [ + "brownstone", + "indent_write", + "joinery", + "memchr", + "nom", +] + [[package]] name = "normalize-line-endings" version = "0.3.0" @@ -793,7 +851,7 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b55c4d17d994b637e2f4daf6e5dc5d660d209d5642377d675d7a1c3ab69fa579" dependencies = [ - "arrayvec", + "arrayvec 0.5.2", "typed-arena", "unicode-width", ] diff --git a/charon/Cargo.toml b/charon/Cargo.toml index b5f7bd4d..e6ef56e9 100644 --- a/charon/Cargo.toml +++ b/charon/Cargo.toml @@ -53,6 +53,8 @@ index_vec = { version = "0.1.3", features = ["serde"] } itertools = "0.13" lazy_static = "1.4.0" log = "0.4.17" +nom = "7.1.3" +nom-supreme = "0.8.0" petgraph = "0.6.2" pretty = "0.12" regex = "1.7.1" diff --git a/charon/src/ast/assumed.rs b/charon/src/ast/assumed.rs index 06da4818..59dc14d6 100644 --- a/charon/src/ast/assumed.rs +++ b/charon/src/ast/assumed.rs @@ -5,8 +5,9 @@ //! we ignore the disambiguators (see [crate::names] and [crate::names_utils]). // TODO: rename to "primitive" +use crate::ast; +use crate::names::*; use crate::types::*; -use crate::ullbc_ast; use macros::EnumIsA; // Built-in functions @@ -35,10 +36,10 @@ pub struct FunInfo { impl BuiltinFun { /// Converts to the ullbc equivalent. Panics if `self` is `Panic` as this should be handled /// separately. - pub fn to_ullbc_builtin_fun(self) -> ullbc_ast::AssumedFunId { + pub fn to_ullbc_builtin_fun(self) -> ast::AssumedFunId { match self { - BuiltinFun::BoxNew => ullbc_ast::AssumedFunId::BoxNew, - BuiltinFun::BoxFree => ullbc_ast::AssumedFunId::BoxFree, + BuiltinFun::BoxNew => ast::AssumedFunId::BoxNew, + BuiltinFun::BoxFree => ast::AssumedFunId::BoxFree, BuiltinFun::Panic => panic!(), } } @@ -56,14 +57,16 @@ impl BuiltinFun { } } -pub fn get_name_from_type_id(id: AssumedTy) -> Vec { - let name: &[_] = match id { - AssumedTy::Box => &["alloc", "boxed", "Box"], - AssumedTy::Str => &["Str"], - AssumedTy::Array => &["Array"], - AssumedTy::Slice => &["Slice"], - }; - name.iter().map(|s| s.to_string()).collect() +impl AssumedTy { + pub fn get_name(self) -> Name { + let name: &[_] = match self { + AssumedTy::Box => &["alloc", "boxed", "Box"], + AssumedTy::Str => &["Str"], + AssumedTy::Array => &["Array"], + AssumedTy::Slice => &["Slice"], + }; + Name::from_path(name) + } } /// When translating from MIR to ULLBC, we ignore some type parameters for some diff --git a/charon/src/lib.rs b/charon/src/lib.rs index beb452cb..7a62693d 100644 --- a/charon/src/lib.rs +++ b/charon/src/lib.rs @@ -37,6 +37,7 @@ pub mod ast; pub mod common; pub mod errors; pub mod export; +pub mod name_matcher; pub mod options; pub mod pretty; pub mod transform; diff --git a/charon/src/name_matcher/mod.rs b/charon/src/name_matcher/mod.rs new file mode 100644 index 00000000..0fd2d4eb --- /dev/null +++ b/charon/src/name_matcher/mod.rs @@ -0,0 +1,151 @@ +use itertools::{EitherOrBoth, Itertools}; + +use crate::ast::*; + +mod parser; + +#[derive(Clone)] +pub struct Pattern { + elems: Vec, +} + +#[derive(Clone)] +enum PatElem { + /// An identifier, optionally with generic arguments. E.g. `std` or `Box<_>`. + Ident { + name: String, + generics: Vec, + /// For pretty-printing only: whether this is the name of a trait. + is_trait: bool, + }, + /// An inherent or trait implementation block. For traits, the implemented type is the first + /// element of the pattern generics. + Impl(Box), + /// A `*` or `_`. + Glob, +} + +impl Pattern { + pub fn parse(i: &str) -> Result> { + nom_supreme::final_parser::final_parser(parser::parse_pattern)(i) + } + + pub fn matches(&self, ctx: &TranslatedCrate, name: &Name) -> bool { + self.matches_with_generics(ctx, name, &GenericArgs::empty()) + } + + pub fn matches_with_generics( + &self, + ctx: &TranslatedCrate, + name: &Name, + args: &GenericArgs, + ) -> bool { + let zipped = self.elems.iter().zip_longest(&name.name).collect_vec(); + let zipped_len = zipped.len(); + for (i, x) in zipped.into_iter().enumerate() { + let is_last = i + 1 == zipped_len; + match x { + EitherOrBoth::Both(pat, elem) => { + let args = if is_last { args } else { &GenericArgs::empty() }; + if !pat.matches_with_generics(ctx, elem, args) { + return false; + } + } + // The pattern is shorter than the scrutinee and the previous elements match: we + // count that as matching. + EitherOrBoth::Right(_) => return true, + // The pattern is longer than the scrutinee; they don't match. + EitherOrBoth::Left(_) => return false, + } + } + // Both had the same length and all the elements matched. + true + } + + pub fn matches_generics(ctx: &TranslatedCrate, pats: &[Self], generics: &GenericArgs) -> bool { + // We don't include regions in patterns. + if pats.len() != generics.types.len() + generics.const_generics.len() { + return false; + } + let (type_pats, const_pats) = pats.split_at(generics.types.len()); + let types_match = generics + .types + .iter() + .zip(type_pats) + .all(|(ty, pat)| pat.matches_ty(ctx, ty)); + let consts_match = generics + .const_generics + .iter() + .zip(const_pats) + .all(|(c, pat)| pat.matches_const(ctx, c)); + types_match && consts_match + } + + pub fn matches_ty(&self, ctx: &TranslatedCrate, ty: &Ty) -> bool { + if let [PatElem::Glob] = self.elems.as_slice() { + return true; + } + match ty { + Ty::Adt(TypeId::Adt(type_id), args) => { + let Some(decl) = ctx.type_decls.get(*type_id) else { + return false; + }; + self.matches_with_generics(ctx, &decl.item_meta.name, args) + } + Ty::Adt(TypeId::Assumed(assumed_ty), args) => { + let name = assumed_ty.get_name(); + self.matches_with_generics(ctx, &name, args) + } + Ty::Adt(TypeId::Tuple, _) + | Ty::TypeVar(_) + | Ty::Literal(_) + | Ty::Never + | Ty::Ref(_, _, _) + | Ty::RawPtr(_, _) + | Ty::TraitType(_, _) + | Ty::DynTrait(_) + | Ty::Arrow(_, _, _) => false, + } + } + + pub fn matches_const(&self, _ctx: &TranslatedCrate, _c: &ConstGeneric) -> bool { + if let [PatElem::Glob] = self.elems.as_slice() { + return true; + } + todo!("non-trivial const generics patterns aren't implemented") + } +} + +impl PatElem { + fn matches_with_generics( + &self, + ctx: &TranslatedCrate, + elem: &PathElem, + args: &GenericArgs, + ) -> bool { + match (self, elem) { + (PatElem::Glob, _) => true, + ( + PatElem::Ident { + name: pat_ident, + generics, + .. + }, + PathElem::Ident(ident, _), + ) => pat_ident == ident && Pattern::matches_generics(ctx, generics, args), + (PatElem::Impl(_pat), PathElem::Impl(ImplElem::Ty(_, _ty), _)) => { + todo!() + } + (PatElem::Impl(pat), PathElem::Impl(ImplElem::Trait(impl_id), _)) => { + let Some(timpl) = ctx.trait_impls.get(*impl_id) else { + return false; + }; + let Some(tdecl) = ctx.trait_decls.get(timpl.impl_trait.trait_id) else { + return false; + }; + pat.matches_with_generics(ctx, &tdecl.item_meta.name, &timpl.impl_trait.generics) + } + _ => false, + } + } +} diff --git a/charon/src/name_matcher/parser.rs b/charon/src/name_matcher/parser.rs new file mode 100644 index 00000000..c7cba083 --- /dev/null +++ b/charon/src/name_matcher/parser.rs @@ -0,0 +1,193 @@ +use std::fmt; + +use itertools::Itertools; +use nom::{ + bytes::complete::{tag, take_while}, + character::complete::{multispace0, multispace1}, + combinator::{map_res, success}, + error::ParseError, + multi::separated_list0, + sequence::{delimited, preceded}, + Parser, +}; +use nom_supreme::{error::ErrorTree, ParserExt}; + +use super::{PatElem, Pattern}; + +type ParseResult<'a, T> = nom::IResult<&'a str, T, ErrorTree<&'a str>>; + +/// Extra methods on parsers. +trait ParserExtExt: Parser + Sized +where + I: Clone, + E: ParseError, +{ + fn followed_by(self, suffix: F) -> impl Parser + where + F: Parser, + { + self.terminated(suffix) + } +} +impl ParserExtExt for P +where + I: Clone, + E: ParseError, + P: Parser, +{ +} + +pub(super) fn parse_pattern(i: &str) -> ParseResult<'_, Pattern> { + separated_list0(tag("::"), parse_pat_elem) + .map(|elems| Pattern { elems }) + .parse(i) +} + +impl fmt::Display for Pattern { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.elems.iter().format("::").fmt(f) + } +} + +fn parse_pat_elem(i: &str) -> ParseResult<'_, PatElem> { + let parse_glob = tag("*").map(|_| PatElem::Glob); + parse_glob + .or(parse_impl_elem) + .or(parse_simple_elem) + .parse(i) +} + +fn parse_simple_elem(i: &str) -> ParseResult<'_, PatElem> { + let ident = take_while(|c: char| c.is_alphanumeric() || c == '_'); + let (i, ident) = ident.followed_by(multispace0).parse(i)?; + if ident == "_" { + success(PatElem::Glob).parse(i) + } else { + let args = delimited( + tag("<").followed_by(multispace0), + separated_list0( + tag(",").followed_by(multispace0), + parse_pattern.followed_by(multispace0), + ), + tag(">"), + ); + args.opt() + .map(|args| PatElem::Ident { + name: ident.to_string(), + generics: args.unwrap_or_default(), + is_trait: false, + }) + .parse(i) + } +} + +fn parse_impl_elem(i: &str) -> ParseResult<'_, PatElem> { + let for_ty = preceded( + tag("for").followed_by(multispace1), + parse_pattern.followed_by(multispace0), + ); + let impl_contents = parse_pattern.followed_by(multispace0).and(for_ty.opt()); + let impl_expr = tag("{").followed_by(multispace0).precedes( + delimited( + tag("impl").followed_by(multispace1), + impl_contents, + tag("}"), + ) + .cut(), + ); + map_res(impl_expr, |(mut pat, for_ty)| { + if let Some(for_ty) = for_ty { + let last_elem = pat + .elems + .last_mut() + .ok_or_else(|| anyhow::anyhow!("trait path must be nonempty"))?; + let PatElem::Ident { + generics, is_trait, .. + } = last_elem + else { + return Err(anyhow::anyhow!("trait path must end in an ident")); + }; + // Set the type as the first generic arg. + generics.insert(0, for_ty); + *is_trait = true; + } + Ok(PatElem::Impl(pat.into())) + }) + .parse(i) +} + +impl fmt::Display for PatElem { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PatElem::Ident { + name, + generics, + is_trait, + } => { + write!(f, "{name}")?; + let generics = generics.as_slice(); + let (ty, generics) = if let [ty, generics @ ..] = generics + && *is_trait + { + (Some(ty), generics) + } else { + (None, generics) + }; + if !generics.is_empty() { + write!(f, "<{}>", generics.iter().format(", "))?; + } + if let Some(ty) = ty { + write!(f, " for {ty}")?; + } + Ok(()) + } + PatElem::Impl(pat) => write!(f, "{{impl {pat}}}"), + PatElem::Glob => write!(f, "_"), + } + } +} + +#[test] +fn test_roundtrip() { + let idempotent_test_strings = [ + "crate::foo::bar", + "blah::_", + "blah::_foo", + "a::b::Type", + "a::b::Type<_, _>", + "Clone", + "usize", + "foo::{impl Clone for usize}::clone", + "foo::{impl PartialEq<_> for Type<_, _>}::clone", + "foo::{impl PartialEq for Box}::clone", + "foo::{impl foo::Trait> for alloc::boxed::Box<_>}::method", + ]; + let other_test_strings = [ + ("blah::*", "blah::_"), + ("crate ::foo ::bar ", "crate::foo::bar"), + ("a::b::Type < _ , _ >", "a::b::Type<_, _>"), + ("{ impl Clone for usize }", "{impl Clone for usize}"), + ]; + let failures = [ + "{implClone for usize}", + "{impl Clone forusize}", + "foo::{impl for alloc::boxed::Box<_>}::method", + "foo::{impl foo::_ for alloc::boxed::Box<_>}::method", + ]; + + let test_strings = idempotent_test_strings + .into_iter() + .map(|s| (s, s)) + .chain(other_test_strings); + for (input, expected) in test_strings { + let pat = Pattern::parse(input).map_err(|e| e.to_string()).unwrap(); + assert_eq!(pat.to_string(), expected); + } + + for input in failures { + assert!( + Pattern::parse(input).is_err(), + "Pattern parsed correctly but shouldn't: `{input}`" + ); + } +} diff --git a/charon/src/pretty/fmt_with_ctx.rs b/charon/src/pretty/fmt_with_ctx.rs index 70cadc6e..cd5b1b7a 100644 --- a/charon/src/pretty/fmt_with_ctx.rs +++ b/charon/src/pretty/fmt_with_ctx.rs @@ -1355,7 +1355,7 @@ impl FmtWithCtx for TypeId { match self { TypeId::Tuple => "".to_string(), TypeId::Adt(def_id) => ctx.format_object(*def_id), - TypeId::Assumed(aty) => get_name_from_type_id(*aty).join("::"), + TypeId::Assumed(aty) => aty.get_name().fmt_with_ctx(ctx), } } } diff --git a/charon/tests/crate_data.rs b/charon/tests/crate_data.rs index 3868c187..3fb6b715 100644 --- a/charon/tests/crate_data.rs +++ b/charon/tests/crate_data.rs @@ -7,7 +7,7 @@ use charon_lib::ast::*; mod util; fn translate(code: impl std::fmt::Display) -> anyhow::Result { - util::translate_rust_text(code.to_string()) + util::translate_rust_text(code) } /// `Name` is a complex datastructure; to inspect it we serialize it a little bit. diff --git a/charon/tests/test-rust-name-matcher.rs b/charon/tests/test-rust-name-matcher.rs new file mode 100644 index 00000000..81003086 --- /dev/null +++ b/charon/tests/test-rust-name-matcher.rs @@ -0,0 +1,116 @@ +use charon_lib::formatter::IntoFormatter; +use charon_lib::pretty::FmtWithCtx; +use itertools::Itertools; + +use charon_lib::ast::*; +use charon_lib::name_matcher::Pattern; + +mod util; + +fn parse_pattern_attr(a: &Attribute) -> Option<(bool, Pattern)> { + let Attribute::Unknown(a) = a else { + return None; + }; + let (pass, a) = if let Some(a) = a.strip_prefix("pattern::pass") { + (true, a) + } else if let Some(a) = a.strip_prefix("pattern::fail") { + (false, a) + } else { + return None; + }; + let Some(a) = a.strip_prefix("(\"") else { + return None; + }; + let Some(a) = a.strip_suffix("\")") else { + return None; + }; + match Pattern::parse(a) { + Ok(pat) => Some((pass, pat)), + Err(e) => { + panic!("Failed to parse pattern `{a}` ({e})") + } + } +} + +#[test] +fn test_name_matcher() -> anyhow::Result<()> { + let crate_data = util::translate_rust_text( + r#" + #![feature(register_tool)] + #![register_tool(pattern)] + mod foo { + #[pattern::fail("test_crate::foo::bar::_")] + #[pattern::pass("test_crate::foo::bar")] + #[pattern::pass("test_crate::foo::_")] + #[pattern::pass("test_crate::foo")] + #[pattern::pass("test_crate")] + #[pattern::pass("test_crate::_")] + #[pattern::pass("test_crate::_::bar")] + #[pattern::fail("test_crate::_::lsdkfjs")] + #[pattern::fail("crate::foo::bar")] + #[pattern::fail("foo::bar")] + fn bar() {} + } + + #[pattern::pass("test_crate::Trait")] + #[pattern::fail("test_crate::Trait<_>")] + #[pattern::fail("test_crate::Trait<_, _>")] + trait Trait { + #[pattern::pass("test_crate::Trait::method")] + #[pattern::fail("test_crate::Trait<_>::method")] + fn method(); + } + + #[pattern::pass("test_crate::{impl test_crate::Trait<_> for _}")] + #[pattern::pass("test_crate::{impl test_crate::Trait<_, _>}")] + #[pattern::fail("test_crate::{impl test_crate::Trait<_, _> for _}")] + #[pattern::pass("test_crate::{impl test_crate::Trait> for alloc::boxed::Box<_>}")] + #[pattern::pass("test_crate::{impl test_crate::Trait, core::option::Option<_>>}")] + #[pattern::pass("test_crate::{impl test_crate::Trait> for alloc::boxed::Box<_>}")] + #[pattern::fail("test_crate::{impl test_crate::Trait> for alloc::boxed::Box<_>}")] + #[pattern::fail("test_crate::{impl test_crate::Trait> for alloc::boxed::Box<_>}")] + #[pattern::fail("test_crate::{impl test_crate::Trait> for FooBar<_>}")] + #[pattern::fail("test_crate::{impl Trait<_> for _}")] + #[pattern::fail("test_crate::{impl test_crate::OtherTrait<_> for _}")] + #[pattern::fail("test_crate::Trait<_>")] + impl Trait> for Box { + #[pattern::pass("test_crate::{impl test_crate::Trait<_> for _}::method")] + #[pattern::pass("test_crate::{impl test_crate::Trait<_> for _}::_")] + #[pattern::pass("test_crate::{impl test_crate::Trait<_> for _}")] + #[pattern::pass("test_crate::{impl test_crate::Trait<_, _>}"::method)] + #[pattern::fail("test_crate::Trait<_>::method")] + fn method() {} + } + "#, + )?; + let fmt_ctx = &crate_data.into_fmt(); + + for item in crate_data.all_items() { + let name = &item.item_meta().name; + let patterns = item + .item_meta() + .attr_info + .attributes + .iter() + .filter_map(|a| parse_pattern_attr(a)) + .collect_vec(); + for (pass, pat) in patterns { + let passes = pat.matches(&crate_data, name); + if passes != pass { + if passes { + panic!( + "Pattern `{pat}` passes on `{}` but shouldn't", + name.fmt_with_ctx(fmt_ctx) + ); + } else { + panic!( + "Pattern `{pat}` doesn't pass on `{}` but should", + name.fmt_with_ctx(fmt_ctx) + ); + } + } + } + } + + Ok(()) +} diff --git a/charon/tests/ui/name-matcher-tests.out b/charon/tests/ui/ml-name-matcher-tests.out similarity index 100% rename from charon/tests/ui/name-matcher-tests.out rename to charon/tests/ui/ml-name-matcher-tests.out diff --git a/charon/tests/ui/name-matcher-tests.rs b/charon/tests/ui/ml-name-matcher-tests.rs similarity index 100% rename from charon/tests/ui/name-matcher-tests.rs rename to charon/tests/ui/ml-name-matcher-tests.rs diff --git a/charon/tests/util/mod.rs b/charon/tests/util/mod.rs index 0986c8c2..62e49685 100644 --- a/charon/tests/util/mod.rs +++ b/charon/tests/util/mod.rs @@ -8,8 +8,9 @@ use assert_cmd::prelude::{CommandCargoExt, OutputAssertExt}; use snapbox; use snapbox::filter::Filter; +use std::fmt::Display; use std::path::Path; -use std::{error::Error, fs::File, io::BufReader, process::Command}; +use std::{fs::File, io::BufReader, process::Command}; use charon_lib::ast::TranslatedCrate; use charon_lib::{export::CrateData, logger}; @@ -59,7 +60,7 @@ fn expect_file_contents(path: &Path, actual: snapbox::Data) -> snapbox::assert:: } /// Given a string that contains rust code, this calls charon on it and returns the result. -pub fn translate_rust_text(code: String) -> anyhow::Result { +pub fn translate_rust_text(code: impl Display) -> anyhow::Result { // Initialize the logger logger::initialize_logger(); From 7f2843c1176c793ce79221d9cdff891ea5757a2b Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Fri, 16 Aug 2024 17:19:38 +0200 Subject: [PATCH 04/10] Support reference types in patterns --- charon/src/name_matcher/mod.rs | 69 ++++++++++++++++++-------- charon/src/name_matcher/parser.rs | 39 +++++++++++++-- charon/tests/test-rust-name-matcher.rs | 10 ++++ 3 files changed, 92 insertions(+), 26 deletions(-) diff --git a/charon/src/name_matcher/mod.rs b/charon/src/name_matcher/mod.rs index 0fd2d4eb..92126ef4 100644 --- a/charon/src/name_matcher/mod.rs +++ b/charon/src/name_matcher/mod.rs @@ -14,7 +14,7 @@ enum PatElem { /// An identifier, optionally with generic arguments. E.g. `std` or `Box<_>`. Ident { name: String, - generics: Vec, + generics: Vec, /// For pretty-printing only: whether this is the name of a trait. is_trait: bool, }, @@ -25,6 +25,14 @@ enum PatElem { Glob, } +#[derive(Clone)] +enum PatTy { + /// A path, like `my_crate::foo::Type<_, usize>` + Pat(Pattern), + /// `&T`, `&mut T` + Ref(RefKind, Box), +} + impl Pattern { pub fn parse(i: &str) -> Result> { nom_supreme::final_parser::final_parser(parser::parse_pattern)(i) @@ -62,25 +70,6 @@ impl Pattern { true } - pub fn matches_generics(ctx: &TranslatedCrate, pats: &[Self], generics: &GenericArgs) -> bool { - // We don't include regions in patterns. - if pats.len() != generics.types.len() + generics.const_generics.len() { - return false; - } - let (type_pats, const_pats) = pats.split_at(generics.types.len()); - let types_match = generics - .types - .iter() - .zip(type_pats) - .all(|(ty, pat)| pat.matches_ty(ctx, ty)); - let consts_match = generics - .const_generics - .iter() - .zip(const_pats) - .all(|(c, pat)| pat.matches_const(ctx, c)); - types_match && consts_match - } - pub fn matches_ty(&self, ctx: &TranslatedCrate, ty: &Ty) -> bool { if let [PatElem::Glob] = self.elems.as_slice() { return true; @@ -132,7 +121,7 @@ impl PatElem { .. }, PathElem::Ident(ident, _), - ) => pat_ident == ident && Pattern::matches_generics(ctx, generics, args), + ) => pat_ident == ident && PatTy::matches_generics(ctx, generics, args), (PatElem::Impl(_pat), PathElem::Impl(ImplElem::Ty(_, _ty), _)) => { todo!() } @@ -149,3 +138,41 @@ impl PatElem { } } } + +impl PatTy { + pub fn matches_generics(ctx: &TranslatedCrate, pats: &[Self], generics: &GenericArgs) -> bool { + // We don't include regions in patterns. + if pats.len() != generics.types.len() + generics.const_generics.len() { + return false; + } + let (type_pats, const_pats) = pats.split_at(generics.types.len()); + let types_match = generics + .types + .iter() + .zip(type_pats) + .all(|(ty, pat)| pat.matches_ty(ctx, ty)); + let consts_match = generics + .const_generics + .iter() + .zip(const_pats) + .all(|(c, pat)| pat.matches_const(ctx, c)); + types_match && consts_match + } + + pub fn matches_ty(&self, ctx: &TranslatedCrate, ty: &Ty) -> bool { + match (self, ty) { + (PatTy::Pat(p), _) => p.matches_ty(ctx, ty), + (PatTy::Ref(pat_mtbl, p_ty), Ty::Ref(_, ty, ty_mtbl)) => { + pat_mtbl == ty_mtbl && p_ty.matches_ty(ctx, ty) + } + _ => false, + } + } + + pub fn matches_const(&self, ctx: &TranslatedCrate, c: &ConstGeneric) -> bool { + match self { + PatTy::Pat(p) => p.matches_const(ctx, c), + PatTy::Ref(..) => false, + } + } +} diff --git a/charon/src/name_matcher/parser.rs b/charon/src/name_matcher/parser.rs index c7cba083..5e340d07 100644 --- a/charon/src/name_matcher/parser.rs +++ b/charon/src/name_matcher/parser.rs @@ -12,7 +12,8 @@ use nom::{ }; use nom_supreme::{error::ErrorTree, ParserExt}; -use super::{PatElem, Pattern}; +use super::{PatElem, PatTy, Pattern}; +use crate::ast::RefKind; type ParseResult<'a, T> = nom::IResult<&'a str, T, ErrorTree<&'a str>>; @@ -67,7 +68,7 @@ fn parse_simple_elem(i: &str) -> ParseResult<'_, PatElem> { tag("<").followed_by(multispace0), separated_list0( tag(",").followed_by(multispace0), - parse_pattern.followed_by(multispace0), + parse_pat_ty.followed_by(multispace0), ), tag(">"), ); @@ -84,7 +85,7 @@ fn parse_simple_elem(i: &str) -> ParseResult<'_, PatElem> { fn parse_impl_elem(i: &str) -> ParseResult<'_, PatElem> { let for_ty = preceded( tag("for").followed_by(multispace1), - parse_pattern.followed_by(multispace0), + parse_pat_ty.followed_by(multispace0), ); let impl_contents = parse_pattern.followed_by(multispace0).and(for_ty.opt()); let impl_expr = tag("{").followed_by(multispace0).precedes( @@ -147,6 +148,32 @@ impl fmt::Display for PatElem { } } +fn parse_pat_ty(i: &str) -> ParseResult<'_, PatTy> { + let mutability = tag("mut").followed_by(multispace0).opt().map(|mtbl| { + if mtbl.is_some() { + RefKind::Mut + } else { + RefKind::Shared + } + }); + tag("&") + .followed_by(multispace0) + .precedes(mutability.and(parse_pat_ty)) + .map(|(mtbl, ty)| PatTy::Ref(mtbl, ty.into())) + .or(parse_pattern.map(PatTy::Pat)) + .parse(i) +} + +impl fmt::Display for PatTy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PatTy::Pat(p) => write!(f, "{p}"), + PatTy::Ref(RefKind::Shared, ty) => write!(f, "&{ty}"), + PatTy::Ref(RefKind::Mut, ty) => write!(f, "&mut {ty}"), + } + } +} + #[test] fn test_roundtrip() { let idempotent_test_strings = [ @@ -158,8 +185,9 @@ fn test_roundtrip() { "Clone", "usize", "foo::{impl Clone for usize}::clone", - "foo::{impl PartialEq<_> for Type<_, _>}::clone", - "foo::{impl PartialEq for Box}::clone", + "foo::{impl Clone for &&usize}", + "foo::{impl PartialEq<_> for Type<_, _>}", + "foo::{impl PartialEq for Box}", "foo::{impl foo::Trait> for alloc::boxed::Box<_>}::method", ]; let other_test_strings = [ @@ -173,6 +201,7 @@ fn test_roundtrip() { "{impl Clone forusize}", "foo::{impl for alloc::boxed::Box<_>}::method", "foo::{impl foo::_ for alloc::boxed::Box<_>}::method", + "foo::{impl &Clone for usize}", ]; let test_strings = idempotent_test_strings diff --git a/charon/tests/test-rust-name-matcher.rs b/charon/tests/test-rust-name-matcher.rs index 81003086..2f974569 100644 --- a/charon/tests/test-rust-name-matcher.rs +++ b/charon/tests/test-rust-name-matcher.rs @@ -81,6 +81,16 @@ fn test_name_matcher() -> anyhow::Result<()> { #[pattern::fail("test_crate::Trait<_>::method")] fn method() {} } + + #[pattern::pass("test_crate::{impl test_crate::Trait<_> for Slice<_>}")] + impl Trait for [T] { + fn method() {} + } + + #[pattern::pass("test_crate::{impl test_crate::Trait<_> for &Slice<_>}")] + impl Trait for &[T] { + fn method() {} + } "#, )?; let fmt_ctx = &crate_data.into_fmt(); From 090dd4d09cf4c5c250a9f469d7c09281ff2b2953 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Fri, 16 Aug 2024 18:07:53 +0200 Subject: [PATCH 05/10] Implement `FromStr` for `NamePattern` --- charon/src/name_matcher/mod.rs | 14 +++++++++----- charon/src/name_matcher/parser.rs | 23 +++++++++++++++++++++-- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/charon/src/name_matcher/mod.rs b/charon/src/name_matcher/mod.rs index 92126ef4..15535878 100644 --- a/charon/src/name_matcher/mod.rs +++ b/charon/src/name_matcher/mod.rs @@ -1,15 +1,18 @@ use itertools::{EitherOrBoth, Itertools}; +use serde::{Deserialize, Serialize}; use crate::ast::*; mod parser; -#[derive(Clone)] +pub use Pattern as NamePattern; + +#[derive(Clone, Serialize, Deserialize)] pub struct Pattern { elems: Vec, } -#[derive(Clone)] +#[derive(Clone, Serialize, Deserialize)] enum PatElem { /// An identifier, optionally with generic arguments. E.g. `std` or `Box<_>`. Ident { @@ -25,7 +28,7 @@ enum PatElem { Glob, } -#[derive(Clone)] +#[derive(Clone, Serialize, Deserialize)] enum PatTy { /// A path, like `my_crate::foo::Type<_, usize>` Pat(Pattern), @@ -34,8 +37,9 @@ enum PatTy { } impl Pattern { - pub fn parse(i: &str) -> Result> { - nom_supreme::final_parser::final_parser(parser::parse_pattern)(i) + pub fn parse(i: &str) -> Result> { + use std::str::FromStr; + Self::from_str(i) } pub fn matches(&self, ctx: &TranslatedCrate, name: &Name) -> bool { diff --git a/charon/src/name_matcher/parser.rs b/charon/src/name_matcher/parser.rs index 5e340d07..79c2b8da 100644 --- a/charon/src/name_matcher/parser.rs +++ b/charon/src/name_matcher/parser.rs @@ -1,4 +1,4 @@ -use std::fmt; +use std::{fmt, str::FromStr}; use itertools::Itertools; use nom::{ @@ -38,7 +38,20 @@ where { } -pub(super) fn parse_pattern(i: &str) -> ParseResult<'_, Pattern> { +/// The entry point for this module: parses a string into a `Pattern`. +impl FromStr for Pattern { + type Err = ErrorTree; + fn from_str(s: &str) -> Result { + parse_pattern_complete(s) + } +} + +fn parse_pattern_complete(i: &str) -> Result> { + nom_supreme::final_parser::final_parser(parse_pattern)(i) + .map_err(|e: ErrorTree<_>| e.map_locations(|s: &str| s.to_string())) +} + +fn parse_pattern(i: &str) -> ParseResult<'_, Pattern> { separated_list0(tag("::"), parse_pat_elem) .map(|elems| Pattern { elems }) .parse(i) @@ -50,6 +63,12 @@ impl fmt::Display for Pattern { } } +impl fmt::Debug for Pattern { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{self}") + } +} + fn parse_pat_elem(i: &str) -> ParseResult<'_, PatElem> { let parse_glob = tag("*").map(|_| PatElem::Glob); parse_glob From 5057e6803c9eefa9eb74040704f2c297fcbe9091 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Fri, 16 Aug 2024 18:07:18 +0200 Subject: [PATCH 06/10] Split `TransOptions` into "translate" and "transform" versions --- charon/src/bin/charon-driver/driver.rs | 2 +- .../bin/charon-driver/translate/get_mir.rs | 4 +- .../translate/translate_crate_to_ullbc.rs | 42 ++++++++++--------- .../charon-driver/translate/translate_ctx.rs | 27 +++++++++++- charon/src/bin/charon/main.rs | 5 ++- charon/src/options.rs | 29 +------------ charon/src/transform/ctx.rs | 13 ++++-- 7 files changed, 65 insertions(+), 57 deletions(-) diff --git a/charon/src/bin/charon-driver/driver.rs b/charon/src/bin/charon-driver/driver.rs index d12a044d..b39701ac 100644 --- a/charon/src/bin/charon-driver/driver.rs +++ b/charon/src/bin/charon-driver/driver.rs @@ -1,7 +1,7 @@ use crate::translate::translate_crate_to_ullbc; +use crate::translate::translate_ctx::MirLevel; use charon_lib::export; use charon_lib::options; -use charon_lib::options::MirLevel; use charon_lib::reorder_decls::compute_reordered_decls; use charon_lib::transform::{LLBC_PASSES, ULLBC_PASSES}; use charon_lib::ullbc_to_llbc; diff --git a/charon/src/bin/charon-driver/translate/get_mir.rs b/charon/src/bin/charon-driver/translate/get_mir.rs index d52bdd6d..f3fa7782 100644 --- a/charon/src/bin/charon-driver/translate/get_mir.rs +++ b/charon/src/bin/charon-driver/translate/get_mir.rs @@ -1,11 +1,11 @@ //! Various utilities to load MIR. //! Allow to easily load the MIR code generated by a specific pass. - -use crate::options::MirLevel; use rustc_hir::def_id::DefId; use rustc_middle::mir::Body; use rustc_middle::ty::TyCtxt; +use crate::translate::translate_ctx::MirLevel; + /// Indicates if the constants should be extracted in their own identifier, /// or if they must be evaluated to a constant value, depending on the /// MIR level which we extract. diff --git a/charon/src/bin/charon-driver/translate/translate_crate_to_ullbc.rs b/charon/src/bin/charon-driver/translate/translate_crate_to_ullbc.rs index 49c74e44..660ab2ab 100644 --- a/charon/src/bin/charon-driver/translate/translate_crate_to_ullbc.rs +++ b/charon/src/bin/charon-driver/translate/translate_crate_to_ullbc.rs @@ -2,9 +2,9 @@ use super::get_mir::extract_constants_at_top_level; use super::translate_ctx::*; use charon_lib::ast::krate::*; use charon_lib::common::*; -use charon_lib::options::{CliOpts, MirLevel, TransOptions}; +use charon_lib::options::CliOpts; +use charon_lib::transform::ctx::TransformOptions; use charon_lib::transform::TransformCtx; - use hax_frontend_exporter as hax; use hax_frontend_exporter::SInto; use rustc_hir::def_id::DefId; @@ -364,25 +364,29 @@ pub fn translate<'tcx, 'ctx>( // downgrade_errors: options.errors_as_warnings, }, ); + let error_ctx = ErrorCtx { + continue_on_failure: !options.abort_on_error, + errors_as_warnings: options.errors_as_warnings, + dcx: tcx.dcx(), + decls_with_errors: HashSet::new(), + ignored_failed_decls: HashSet::new(), + dep_sources: HashMap::new(), + def_id: None, + error_count: 0, + }; + let transform_options = TransformOptions { + no_code_duplication: options.no_code_duplication, + }; + let translate_options = TranslateOptions { + mir_level, + extract_opaque_bodies: options.extract_opaque_bodies, + opaque_mods: HashSet::from_iter(options.opaque_modules.iter().cloned()), + }; let mut ctx = TranslateCtx { tcx, hax_state, - options: TransOptions { - mir_level, - no_code_duplication: options.no_code_duplication, - extract_opaque_bodies: options.extract_opaque_bodies, - opaque_mods: HashSet::from_iter(options.opaque_modules.iter().cloned()), - }, - errors: ErrorCtx { - continue_on_failure: !options.abort_on_error, - errors_as_warnings: options.errors_as_warnings, - dcx: tcx.dcx(), - decls_with_errors: HashSet::new(), - ignored_failed_decls: HashSet::new(), - dep_sources: HashMap::new(), - def_id: None, - error_count: 0, - }, + options: translate_options, + errors: error_ctx, translated: TranslatedCrate { crate_name, ..TranslatedCrate::default() @@ -438,7 +442,7 @@ pub fn translate<'tcx, 'ctx>( // Return the context, dropping the hax state and rustc `tcx`. TransformCtx { - options: ctx.options, + options: transform_options, translated: ctx.translated, errors: ctx.errors, } diff --git a/charon/src/bin/charon-driver/translate/translate_ctx.rs b/charon/src/bin/charon-driver/translate/translate_ctx.rs index 8537b71a..cee81fe9 100644 --- a/charon/src/bin/charon-driver/translate/translate_ctx.rs +++ b/charon/src/bin/charon-driver/translate/translate_ctx.rs @@ -4,7 +4,6 @@ use charon_lib::ast::*; use charon_lib::common::*; use charon_lib::formatter::{FmtCtx, IntoFormatter}; use charon_lib::ids::{MapGenerator, Vector}; -use charon_lib::options::TransOptions; use charon_lib::ullbc_ast as ast; use hax_frontend_exporter as hax; use hax_frontend_exporter::SInto; @@ -17,6 +16,7 @@ use rustc_hir::Node as HirNode; use rustc_middle::ty::TyCtxt; use std::cmp::{Ord, PartialOrd}; use std::collections::HashMap; +use std::collections::HashSet; use std::collections::{BTreeMap, VecDeque}; use std::fmt; use std::path::Component; @@ -30,6 +30,29 @@ pub(crate) use charon_lib::errors::{ error_assert, error_or_panic, register_error_or_panic, DepSource, ErrorCtx, }; +/// TODO: maybe we should always target MIR Built, this would make things +/// simpler. In particular, the MIR optimized is very low level and +/// reveals too many types and data-structures that we don't want to manipulate. +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum MirLevel { + /// Original MIR, directly translated from HIR. + Built, + /// Not sure what this is. Not well tested. + Promoted, + /// MIR after optimization passes. The last one before codegen. + Optimized, +} + +/// The options that control translation. +pub struct TranslateOptions { + /// The level at which to extract the MIR + pub mir_level: MirLevel, + /// Whether to extract the bodies of foreign methods and structs with private fields. + pub extract_opaque_bodies: bool, + /// Modules to consider opaque. + pub opaque_mods: HashSet, +} + /// We use a special type to store the Rust identifiers in the stack, to /// make sure we translate them in a specific order (top-level constants /// before constant functions before functions...). This allows us to @@ -86,7 +109,7 @@ pub struct TranslateCtx<'tcx, 'ctx> { pub hax_state: hax::State, (), (), ()>, /// The options that control translation. - pub options: TransOptions, + pub options: TranslateOptions, /// The translated data. pub translated: TranslatedCrate, diff --git a/charon/src/bin/charon/main.rs b/charon/src/bin/charon/main.rs index dde5953c..53fce0c5 100644 --- a/charon/src/bin/charon/main.rs +++ b/charon/src/bin/charon/main.rs @@ -30,8 +30,9 @@ //! deserialize them later and use them to guide the extraction in the //! callbacks. -// Don't link with the `charon_lib` crate so that the `charon` binary doesn't have to dynamically -// link to `librustc_driver.so` etc. +// We must not link with the `charon_lib` crate because that would make `charon` need to +// dynamically link to `librustc_driver.so` etc. The `charon` binary must be runnable _before_ +// setting up the correct toolchain paths. #[path = "../../logger.rs"] mod logger; #[path = "../../options.rs"] diff --git a/charon/src/options.rs b/charon/src/options.rs index 5de86c69..79887da2 100644 --- a/charon/src/options.rs +++ b/charon/src/options.rs @@ -2,7 +2,7 @@ use clap::Parser; /// The options received as input by cargo-charon use serde::{Deserialize, Serialize}; -use std::{collections::HashSet, path::PathBuf}; +use std::path::PathBuf; /// The name of the environment variable we use to save the serialized Cli options /// when calling charon-driver from cargo-charon. @@ -199,30 +199,3 @@ impl CliOpts { ); } } - -/// TODO: maybe we should always target MIR Built, this would make things -/// simpler. In particular, the MIR optimized is very low level and -/// reveals too many types and data-structures that we don't want to manipulate. -#[derive(Clone, Copy, PartialEq, Eq)] -pub enum MirLevel { - /// Original MIR, directly translated from HIR. - Built, - /// Not sure what this is. Not well tested. - Promoted, - /// MIR after optimization passes. The last one before codegen. - Optimized, -} - -/// The options that control translation. -pub struct TransOptions { - /// The level at which to extract the MIR - pub mir_level: MirLevel, - /// Error out if some code ends up being duplicated by the control-flow - /// reconstruction (note that because several patterns in a match may lead - /// to the same branch, it is node always possible not to duplicate code). - pub no_code_duplication: bool, - /// Whether to extract the bodies of foreign methods and structs with private fields. - pub extract_opaque_bodies: bool, - /// Modules to consider opaque. - pub opaque_mods: HashSet, -} diff --git a/charon/src/transform/ctx.rs b/charon/src/transform/ctx.rs index 2b08221f..b3e1761c 100644 --- a/charon/src/transform/ctx.rs +++ b/charon/src/transform/ctx.rs @@ -3,18 +3,25 @@ use crate::errors::ErrorCtx; use crate::formatter::{FmtCtx, IntoFormatter}; use crate::ids::Vector; use crate::llbc_ast; -use crate::options::TransOptions; use crate::pretty::FmtWithCtx; use crate::ullbc_ast; use rustc_error_messages::MultiSpan; use rustc_span::def_id::DefId; use std::fmt; +/// The options that control transformation. +pub struct TransformOptions { + /// Error out if some code ends up being duplicated by the control-flow + /// reconstruction (note that because several patterns in a match may lead + /// to the same branch, it is node always possible not to duplicate code). + pub no_code_duplication: bool, +} + /// Simpler context used for rustc-independent code transformation. This only depends on rustc for /// its error reporting machinery. pub struct TransformCtx<'ctx> { - /// The options that control translation. - pub options: TransOptions, + /// The options that control transformation. + pub options: TransformOptions, /// The translated data. pub translated: TranslatedCrate, /// Context for tracking and reporting errors. From 12d7040c2f2817cc04c3450ebe0d484f758e192d Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Mon, 19 Aug 2024 16:31:00 +0200 Subject: [PATCH 07/10] Rework the existing opacity system in terms of patterns --- charon/src/ast/krate.rs | 3 +- charon/src/ast/names_utils.rs | 26 ------ charon/src/bin/charon-driver/driver.rs | 23 +---- .../translate/translate_crate_to_ullbc.rs | 33 ++++--- .../charon-driver/translate/translate_ctx.rs | 92 +++++++++++++++---- .../translate/translate_traits.rs | 4 +- charon/src/name_matcher/mod.rs | 60 +++++++++++- 7 files changed, 157 insertions(+), 84 deletions(-) diff --git a/charon/src/ast/krate.rs b/charon/src/ast/krate.rs index c9a0eb21..7bde89c9 100644 --- a/charon/src/ast/krate.rs +++ b/charon/src/ast/krate.rs @@ -81,7 +81,8 @@ pub enum AnyTransItem<'ctx> { /// The data of a translated crate. #[derive(Default, Clone, Drive, DriveMut, Serialize, Deserialize)] pub struct TranslatedCrate { - /// The name of the crate. + /// The name that the user requested for the crate. This may differ from what rustc reports as + /// the name of the crate. pub crate_name: String, /// File names to ids and vice-versa diff --git a/charon/src/ast/names_utils.rs b/charon/src/ast/names_utils.rs index d78f0cc8..d3ebe406 100644 --- a/charon/src/ast/names_utils.rs +++ b/charon/src/ast/names_utils.rs @@ -3,7 +3,6 @@ //! For now, we have one function per object kind (type, trait, function, //! module): many of them could be factorized (will do). use crate::names::*; -use std::collections::HashSet; impl PathElem { fn equals_ident(&self, id: &str) -> bool { @@ -56,29 +55,4 @@ impl Name { pub fn equals_ref_name(&self, ref_name: &[&str]) -> bool { self.compare_with_ref_name(true, ref_name) } - - /// Compare the name to a constant array. - /// This ignores disambiguators. - pub fn prefix_is_same(&self, ref_name: &[&str]) -> bool { - self.compare_with_ref_name(false, ref_name) - } - - /// Return `true` if the name identifies an item inside the module: `krate::module` - pub fn is_in_module(&self, krate: &String, module: &String) -> bool { - self.prefix_is_same(&[krate, module]) - } - - /// Similar to [Name::is_in_module] - pub fn is_in_modules(&self, krate: &String, modules: &HashSet) -> bool { - if self.len() >= 2 { - match (&self.name[0], &self.name[1]) { - (PathElem::Ident(s0, _), PathElem::Ident(s1, _)) => { - s0 == krate && modules.contains(s1) - } - _ => false, - } - } else { - false - } - } } diff --git a/charon/src/bin/charon-driver/driver.rs b/charon/src/bin/charon-driver/driver.rs index b39701ac..b02a95e1 100644 --- a/charon/src/bin/charon-driver/driver.rs +++ b/charon/src/bin/charon-driver/driver.rs @@ -1,5 +1,4 @@ use crate::translate::translate_crate_to_ullbc; -use crate::translate::translate_ctx::MirLevel; use charon_lib::export; use charon_lib::options; use charon_lib::reorder_decls::compute_reordered_decls; @@ -203,26 +202,6 @@ pub fn translate(tcx: TyCtxt, internal: &mut CharonCallbacks) -> export::CrateDa trace!(); let options = &internal.options; - // Retrieve the crate name: if the user specified a custom name, use - // it, otherwise retrieve it from Rustc. - let crate_name: String = options.crate_name.as_deref().map_or_else( - || { - tcx.crate_name(rustc_span::def_id::LOCAL_CRATE) - .to_ident_string() - }, - |x: &str| x.to_string(), - ); - trace!("# Crate: {}", crate_name); - - // Adjust the level of MIR we extract, depending on the options - let mir_level = if options.mir_optimized { - MirLevel::Optimized - } else if options.mir_promoted { - MirLevel::Promoted - } else { - MirLevel::Built - }; - // Some important notes about crates and how to interact with rustc: // - when calling rustc, we should give it the root of the crate, for // instance the "main.rs" file. From there, rustc will load all the @@ -233,7 +212,7 @@ pub fn translate(tcx: TyCtxt, internal: &mut CharonCallbacks) -> export::CrateDa // # Translate the declarations in the crate. // We translate the declarations in an ad-hoc order, and do not group // the mutually recursive groups - we do this in the next step. - let mut ctx = translate_crate_to_ullbc::translate(crate_name, options, tcx, mir_level); + let mut ctx = translate_crate_to_ullbc::translate(options, tcx); if options.print_ullbc { info!("# ULLBC after translation from MIR:\n\n{ctx}\n"); diff --git a/charon/src/bin/charon-driver/translate/translate_crate_to_ullbc.rs b/charon/src/bin/charon-driver/translate/translate_crate_to_ullbc.rs index 660ab2ab..32979073 100644 --- a/charon/src/bin/charon-driver/translate/translate_crate_to_ullbc.rs +++ b/charon/src/bin/charon-driver/translate/translate_crate_to_ullbc.rs @@ -58,7 +58,7 @@ impl<'tcx, 'ctx> TranslateCtx<'tcx, 'ctx> { | ItemKind::Macro(..) | ItemKind::Trait(..) => { let name = self.def_id_to_name(def_id)?; - if self.is_opaque_name(&name) { + if self.opacity_for_name(&name).is_opaque() { trace!("Ignoring {:?} (marked as opaque)", item.item_id()); return Ok(()); } @@ -350,13 +350,7 @@ impl<'tcx, 'ctx> TranslateCtx<'tcx, 'ctx> { } } -/// Translate all the declarations in the crate. -pub fn translate<'tcx, 'ctx>( - crate_name: String, - options: &CliOpts, - tcx: TyCtxt<'tcx>, - mir_level: MirLevel, -) -> TransformCtx<'tcx> { +pub fn translate<'tcx, 'ctx>(options: &CliOpts, tcx: TyCtxt<'tcx>) -> TransformCtx<'tcx> { let hax_state = hax::state::State::new( tcx, hax::options::Options { @@ -364,7 +358,20 @@ pub fn translate<'tcx, 'ctx>( // downgrade_errors: options.errors_as_warnings, }, ); - let error_ctx = ErrorCtx { + + // Retrieve the crate name: if the user specified a custom name, use + // it, otherwise retrieve it from rustc. + let real_crate_name = tcx + .crate_name(rustc_span::def_id::LOCAL_CRATE) + .to_ident_string(); + let requested_crate_name: String = options + .crate_name + .as_ref() + .unwrap_or(&real_crate_name) + .clone(); + trace!("# Crate: {}", requested_crate_name); + + let mut error_ctx = ErrorCtx { continue_on_failure: !options.abort_on_error, errors_as_warnings: options.errors_as_warnings, dcx: tcx.dcx(), @@ -377,18 +384,14 @@ pub fn translate<'tcx, 'ctx>( let transform_options = TransformOptions { no_code_duplication: options.no_code_duplication, }; - let translate_options = TranslateOptions { - mir_level, - extract_opaque_bodies: options.extract_opaque_bodies, - opaque_mods: HashSet::from_iter(options.opaque_modules.iter().cloned()), - }; + let translate_options = TranslateOptions::new(&mut error_ctx, &real_crate_name, options); let mut ctx = TranslateCtx { tcx, hax_state, options: translate_options, errors: error_ctx, translated: TranslatedCrate { - crate_name, + crate_name: requested_crate_name, ..TranslatedCrate::default() }, priority_queue: Default::default(), diff --git a/charon/src/bin/charon-driver/translate/translate_ctx.rs b/charon/src/bin/charon-driver/translate/translate_ctx.rs index cee81fe9..0440bb3a 100644 --- a/charon/src/bin/charon-driver/translate/translate_ctx.rs +++ b/charon/src/bin/charon-driver/translate/translate_ctx.rs @@ -4,6 +4,8 @@ use charon_lib::ast::*; use charon_lib::common::*; use charon_lib::formatter::{FmtCtx, IntoFormatter}; use charon_lib::ids::{MapGenerator, Vector}; +use charon_lib::name_matcher::NamePattern; +use charon_lib::options::CliOpts; use charon_lib::ullbc_ast as ast; use hax_frontend_exporter as hax; use hax_frontend_exporter::SInto; @@ -16,7 +18,6 @@ use rustc_hir::Node as HirNode; use rustc_middle::ty::TyCtxt; use std::cmp::{Ord, PartialOrd}; use std::collections::HashMap; -use std::collections::HashSet; use std::collections::{BTreeMap, VecDeque}; use std::fmt; use std::path::Component; @@ -49,8 +50,61 @@ pub struct TranslateOptions { pub mir_level: MirLevel, /// Whether to extract the bodies of foreign methods and structs with private fields. pub extract_opaque_bodies: bool, - /// Modules to consider opaque. - pub opaque_mods: HashSet, + // List of patterns to assign a given opacity to. For each name, the most specific pattern that + // matches determines the opacity of the item. When no options are provided this is initialized + // to treat items in the crate as transparent and items in other crates as foreign. + pub item_opacities: Vec<(NamePattern, ItemOpacity)>, +} + +impl TranslateOptions { + pub(crate) fn new(error_ctx: &mut ErrorCtx<'_>, crate_name: &str, options: &CliOpts) -> Self { + let mut parse_pattern = |s: &str| match NamePattern::parse(s) { + Ok(p) => Ok(p), + Err(e) => { + let msg = format!("failed to parse pattern `{s}` ({e})"); + error_or_panic!(error_ctx, rustc_span::DUMMY_SP, msg) + } + }; + + let mir_level = if options.mir_optimized { + MirLevel::Optimized + } else if options.mir_promoted { + MirLevel::Promoted + } else { + MirLevel::Built + }; + + let item_opacities = { + use ItemOpacity::*; + let mut opacities = vec![]; + + // This is how to treat items that don't match any other pattern. + if options.extract_opaque_bodies { + opacities.push(("_".to_string(), Transparent)); + } else { + opacities.push(("_".to_string(), Foreign)); + } + + // We always include the items from the crate. + opacities.push((crate_name.to_owned(), Transparent)); + + for module in options.opaque_modules.iter() { + opacities.push((format!("{crate_name}::{module}"), Opaque)); + } + + opacities + .into_iter() + .filter_map(|(s, opacity)| parse_pattern(&s).ok().map(|pat| (pat, opacity))) + .collect() + }; + + TranslateOptions { + mir_level, + // TODO: remove option + extract_opaque_bodies: options.extract_opaque_bodies, + item_opacities, + } + } } /// We use a special type to store the Rust identifiers in the stack, to @@ -494,17 +548,13 @@ impl<'tcx, 'ctx> TranslateCtx<'tcx, 'ctx> { let attr_info = self.translate_attr_info_from_rid(def_id, span); let is_local = def_id.is_local(); - let opacity = { - let is_opaque = self.is_opaque_name(&name) - || self.id_is_extern_item(def_id) - || attr_info.attributes.iter().any(|attr| attr.is_opaque()); - if is_opaque { - ItemOpacity::Opaque - } else if !is_local && !self.options.extract_opaque_bodies { - ItemOpacity::Foreign - } else { - ItemOpacity::Transparent - } + let opacity = if self.id_is_extern_item(def_id) + || attr_info.attributes.iter().any(|attr| attr.is_opaque()) + { + // Force opaque in these cases. + ItemOpacity::Opaque + } else { + self.opacity_for_name(&name) }; Ok(ItemMeta { @@ -780,8 +830,18 @@ impl<'tcx, 'ctx> TranslateCtx<'tcx, 'ctx> { .is_some_and(|node| matches!(node, HirNode::ForeignItem(_))) } - pub(crate) fn is_opaque_name(&self, name: &Name) -> bool { - name.is_in_modules(&self.translated.crate_name, &self.options.opaque_mods) + pub(crate) fn opacity_for_name(&self, name: &Name) -> ItemOpacity { + // Find the most precise pattern that matches this name. There is always one since + // the list contains the `_` pattern. If there are conflicting settings for this item, we + // err on the side of being more transparent. + let (_, opacity) = self + .options + .item_opacities + .iter() + .filter(|(pat, _)| pat.matches(&self.translated, name)) + .max() + .unwrap(); + *opacity } /// Register the fact that `id` is a dependency of `src` (if `src` is not `None`). diff --git a/charon/src/bin/charon-driver/translate/translate_traits.rs b/charon/src/bin/charon-driver/translate/translate_traits.rs index a6298fdc..c5618a50 100644 --- a/charon/src/bin/charon-driver/translate/translate_traits.rs +++ b/charon/src/bin/charon-driver/translate/translate_traits.rs @@ -171,7 +171,9 @@ impl<'tcx, 'ctx> TranslateCtx<'tcx, 'ctx> { // but still remember their name (unless `extract_opaque_bodies` is set). // TODO: translate function signatures unconditionally. if hax_item.has_value { - // This is a *provided* method + // This is a *provided* method, + // TODO: this branch is a hack: we should always give an id to methods and + // skip translating their body. However today we run into `for<'a>` crashes. if rust_id.is_local() || bt_ctx.t_ctx.options.extract_opaque_bodies { let fun_id = bt_ctx.register_fun_decl_id(item_span, rust_item_id); provided_methods.push((item_name.clone(), Some(fun_id))); diff --git a/charon/src/name_matcher/mod.rs b/charon/src/name_matcher/mod.rs index 15535878..ce965293 100644 --- a/charon/src/name_matcher/mod.rs +++ b/charon/src/name_matcher/mod.rs @@ -1,3 +1,5 @@ +use std::cmp::Ordering; + use itertools::{EitherOrBoth, Itertools}; use serde::{Deserialize, Serialize}; @@ -7,12 +9,12 @@ mod parser; pub use Pattern as NamePattern; -#[derive(Clone, Serialize, Deserialize)] +#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct Pattern { elems: Vec, } -#[derive(Clone, Serialize, Deserialize)] +#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)] enum PatElem { /// An identifier, optionally with generic arguments. E.g. `std` or `Box<_>`. Ident { @@ -28,7 +30,7 @@ enum PatElem { Glob, } -#[derive(Clone, Serialize, Deserialize)] +#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)] enum PatTy { /// A path, like `my_crate::foo::Type<_, usize>` Pat(Pattern), @@ -42,6 +44,10 @@ impl Pattern { Self::from_str(i) } + fn len(&self) -> usize { + self.elems.len() + } + pub fn matches(&self, ctx: &TranslatedCrate, name: &Name) -> bool { self.matches_with_generics(ctx, name, &GenericArgs::empty()) } @@ -107,6 +113,39 @@ impl Pattern { } todo!("non-trivial const generics patterns aren't implemented") } + + /// Compares two patterns that match the same name, in terms of precision. A pattern that is + /// fully included in another (i.e. matches a subset of values) is considered "less precise". + /// Returns nonsense if the patterns don't match the same name. + pub fn compare(&self, other: &Self) -> Ordering { + use Ordering::*; + use PatElem::*; + match self.len().cmp(&other.len()) { + o @ (Less | Greater) => return o, + _ if self.len() == 0 => return Equal, + Equal => {} + } + match (self.elems.last().unwrap(), other.elems.last().unwrap()) { + (Glob, Glob) => Equal, + (Glob, _) => Less, + (_, Glob) => Greater, + // TODO: compare precision of the generics. + _ => Equal, + } + } +} + +/// Orders patterns by precision: the maximal pattern is the most precise. COmparing patterns only +/// makes sense if they match the same name. +impl Ord for Pattern { + fn cmp(&self, other: &Self) -> Ordering { + self.compare(other) + } +} +impl PartialOrd for Pattern { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.compare(other)) + } } impl PatElem { @@ -180,3 +219,18 @@ impl PatTy { } } } + +#[test] +fn test_compare() { + use Ordering::*; + let tests = [ + ("_", Less, "crate"), + ("crate::_", Less, "crate::foo"), + ("crate::foo", Less, "crate::foo::_"), + ]; + for (x, o, y) in tests { + let x = Pattern::parse(x).unwrap(); + let y = Pattern::parse(y).unwrap(); + assert_eq!(x.compare(&y), o); + } +} From 168401e71ef6ad32ac3e837592b7edfc85dd59ba Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Mon, 19 Aug 2024 16:38:38 +0200 Subject: [PATCH 08/10] Support using `crate` as the crate identifier in patterns --- charon-ml/src/CharonVersion.ml | 2 +- charon-ml/src/GAstOfJson.ml | 1 + charon/Cargo.lock | 2 +- charon/Cargo.toml | 2 +- charon/src/ast/krate.rs | 2 ++ .../charon-driver/translate/translate_crate_to_ullbc.rs | 3 ++- charon/src/bin/charon-driver/translate/translate_ctx.rs | 6 +++--- charon/src/bin/generate-ml/templates/GAstOfJson.ml | 1 + charon/src/name_matcher/mod.rs | 7 ++++++- charon/tests/test-rust-name-matcher.rs | 2 +- 10 files changed, 19 insertions(+), 9 deletions(-) diff --git a/charon-ml/src/CharonVersion.ml b/charon-ml/src/CharonVersion.ml index 4942a3d3..58134049 100644 --- a/charon-ml/src/CharonVersion.ml +++ b/charon-ml/src/CharonVersion.ml @@ -1,3 +1,3 @@ (* This is an automatically generated file, generated from `charon/Cargo.toml`. *) (* To re-generate this file, rune `make` in the root directory *) -let supported_charon_version = "0.1.26" +let supported_charon_version = "0.1.27" diff --git a/charon-ml/src/GAstOfJson.ml b/charon-ml/src/GAstOfJson.ml index 0e27b449..d6f159a3 100644 --- a/charon-ml/src/GAstOfJson.ml +++ b/charon-ml/src/GAstOfJson.ml @@ -1401,6 +1401,7 @@ and gtranslated_crate_of_json | `Assoc [ ("crate_name", name); + ("real_crate_name", _); ("id_to_file", id_to_file); ("all_ids", _); ("type_decls", types); diff --git a/charon/Cargo.lock b/charon/Cargo.lock index 9506e15e..55abf5b7 100644 --- a/charon/Cargo.lock +++ b/charon/Cargo.lock @@ -173,7 +173,7 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "charon" -version = "0.1.26" +version = "0.1.27" dependencies = [ "anyhow", "assert_cmd", diff --git a/charon/Cargo.toml b/charon/Cargo.toml index e6ef56e9..50c16c6c 100644 --- a/charon/Cargo.toml +++ b/charon/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "charon" -version = "0.1.26" +version = "0.1.27" authors = ["Son Ho "] edition = "2021" diff --git a/charon/src/ast/krate.rs b/charon/src/ast/krate.rs index 7bde89c9..eead3144 100644 --- a/charon/src/ast/krate.rs +++ b/charon/src/ast/krate.rs @@ -84,6 +84,8 @@ pub struct TranslatedCrate { /// The name that the user requested for the crate. This may differ from what rustc reports as /// the name of the crate. pub crate_name: String, + /// The name of the crate according to rustc. + pub real_crate_name: String, /// File names to ids and vice-versa #[drive(skip)] diff --git a/charon/src/bin/charon-driver/translate/translate_crate_to_ullbc.rs b/charon/src/bin/charon-driver/translate/translate_crate_to_ullbc.rs index 32979073..6a78b9f5 100644 --- a/charon/src/bin/charon-driver/translate/translate_crate_to_ullbc.rs +++ b/charon/src/bin/charon-driver/translate/translate_crate_to_ullbc.rs @@ -384,7 +384,7 @@ pub fn translate<'tcx, 'ctx>(options: &CliOpts, tcx: TyCtxt<'tcx>) -> TransformC let transform_options = TransformOptions { no_code_duplication: options.no_code_duplication, }; - let translate_options = TranslateOptions::new(&mut error_ctx, &real_crate_name, options); + let translate_options = TranslateOptions::new(&mut error_ctx, options); let mut ctx = TranslateCtx { tcx, hax_state, @@ -392,6 +392,7 @@ pub fn translate<'tcx, 'ctx>(options: &CliOpts, tcx: TyCtxt<'tcx>) -> TransformC errors: error_ctx, translated: TranslatedCrate { crate_name: requested_crate_name, + real_crate_name, ..TranslatedCrate::default() }, priority_queue: Default::default(), diff --git a/charon/src/bin/charon-driver/translate/translate_ctx.rs b/charon/src/bin/charon-driver/translate/translate_ctx.rs index 0440bb3a..090264e6 100644 --- a/charon/src/bin/charon-driver/translate/translate_ctx.rs +++ b/charon/src/bin/charon-driver/translate/translate_ctx.rs @@ -57,7 +57,7 @@ pub struct TranslateOptions { } impl TranslateOptions { - pub(crate) fn new(error_ctx: &mut ErrorCtx<'_>, crate_name: &str, options: &CliOpts) -> Self { + pub(crate) fn new(error_ctx: &mut ErrorCtx<'_>, options: &CliOpts) -> Self { let mut parse_pattern = |s: &str| match NamePattern::parse(s) { Ok(p) => Ok(p), Err(e) => { @@ -86,10 +86,10 @@ impl TranslateOptions { } // We always include the items from the crate. - opacities.push((crate_name.to_owned(), Transparent)); + opacities.push(("crate".to_owned(), Transparent)); for module in options.opaque_modules.iter() { - opacities.push((format!("{crate_name}::{module}"), Opaque)); + opacities.push((format!("crate::{module}"), Opaque)); } opacities diff --git a/charon/src/bin/generate-ml/templates/GAstOfJson.ml b/charon/src/bin/generate-ml/templates/GAstOfJson.ml index 73605a8e..bb87eb2c 100644 --- a/charon/src/bin/generate-ml/templates/GAstOfJson.ml +++ b/charon/src/bin/generate-ml/templates/GAstOfJson.ml @@ -258,6 +258,7 @@ and gtranslated_crate_of_json | `Assoc [ ("crate_name", name); + ("real_crate_name", _); ("id_to_file", id_to_file); ("all_ids", _); ("type_decls", types); diff --git a/charon/src/name_matcher/mod.rs b/charon/src/name_matcher/mod.rs index ce965293..86bed55f 100644 --- a/charon/src/name_matcher/mod.rs +++ b/charon/src/name_matcher/mod.rs @@ -164,7 +164,12 @@ impl PatElem { .. }, PathElem::Ident(ident, _), - ) => pat_ident == ident && PatTy::matches_generics(ctx, generics, args), + ) => { + // `crate` is a special keyword that referes to the current crate. + let same_ident = + pat_ident == ident || (pat_ident == "crate" && ident == &ctx.real_crate_name); + same_ident && PatTy::matches_generics(ctx, generics, args) + } (PatElem::Impl(_pat), PathElem::Impl(ImplElem::Ty(_, _ty), _)) => { todo!() } diff --git a/charon/tests/test-rust-name-matcher.rs b/charon/tests/test-rust-name-matcher.rs index 2f974569..d3211d14 100644 --- a/charon/tests/test-rust-name-matcher.rs +++ b/charon/tests/test-rust-name-matcher.rs @@ -47,7 +47,7 @@ fn test_name_matcher() -> anyhow::Result<()> { #[pattern::pass("test_crate::_")] #[pattern::pass("test_crate::_::bar")] #[pattern::fail("test_crate::_::lsdkfjs")] - #[pattern::fail("crate::foo::bar")] + #[pattern::pass("crate::foo::bar")] #[pattern::fail("foo::bar")] fn bar() {} } From 5a75ef7c71c4642f062a40628b2cdc21589bcd32 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Mon, 19 Aug 2024 17:40:27 +0200 Subject: [PATCH 09/10] Add `--include` and `--exclude` options to control translation finely --- charon/Cargo.toml | 1 + .../charon-driver/translate/translate_ctx.rs | 7 ++ charon/src/bin/charon/toml_config.rs | 6 ++ charon/src/name_matcher/mod.rs | 3 +- charon/src/name_matcher/parser.rs | 7 +- charon/src/options.rs | 34 ++++++- charon/tests/ui/opacity.out | 99 +++++++++++++++++++ charon/tests/ui/opacity.rs | 25 +++++ 8 files changed, 177 insertions(+), 5 deletions(-) create mode 100644 charon/tests/ui/opacity.out create mode 100644 charon/tests/ui/opacity.rs diff --git a/charon/Cargo.toml b/charon/Cargo.toml index 50c16c6c..73d03b6f 100644 --- a/charon/Cargo.toml +++ b/charon/Cargo.toml @@ -50,6 +50,7 @@ env_logger = { version = "0.11", features = ["color"] } hashlink = { version = "0.9", features = ["serde_impl"] } im = "15.1.0" index_vec = { version = "0.1.3", features = ["serde"] } +indoc = "2" itertools = "0.13" lazy_static = "1.4.0" log = "0.4.17" diff --git a/charon/src/bin/charon-driver/translate/translate_ctx.rs b/charon/src/bin/charon-driver/translate/translate_ctx.rs index 090264e6..f036948a 100644 --- a/charon/src/bin/charon-driver/translate/translate_ctx.rs +++ b/charon/src/bin/charon-driver/translate/translate_ctx.rs @@ -92,6 +92,13 @@ impl TranslateOptions { opacities.push((format!("crate::{module}"), Opaque)); } + for pat in options.include.iter() { + opacities.push((pat.to_string(), Transparent)); + } + for pat in options.exclude.iter() { + opacities.push((pat.to_string(), Opaque)); + } + opacities .into_iter() .filter_map(|(s, opacity)| parse_pattern(&s).ok().map(|pat| (pat, opacity))) diff --git a/charon/src/bin/charon/toml_config.rs b/charon/src/bin/charon/toml_config.rs index 64b02556..c83cc1c6 100644 --- a/charon/src/bin/charon/toml_config.rs +++ b/charon/src/bin/charon/toml_config.rs @@ -33,6 +33,10 @@ pub struct CharonTomlConfig { pub opaque_modules: Vec, #[serde(default)] pub extract_opaque_bodies: bool, + #[serde(default)] + pub include: Vec, + #[serde(default)] + pub exclude: Vec, } #[derive(Debug, Default, Deserialize)] @@ -53,6 +57,8 @@ impl TomlConfig { config.no_code_duplication |= self.charon.no_code_duplication; config.opaque_modules.extend(self.charon.opaque_modules); config.extract_opaque_bodies |= self.charon.extract_opaque_bodies; + config.include.extend(self.charon.include); + config.exclude.extend(self.charon.exclude); config.rustc_args.extend(self.rustc.flags); config } diff --git a/charon/src/name_matcher/mod.rs b/charon/src/name_matcher/mod.rs index 86bed55f..fca68efb 100644 --- a/charon/src/name_matcher/mod.rs +++ b/charon/src/name_matcher/mod.rs @@ -171,7 +171,8 @@ impl PatElem { same_ident && PatTy::matches_generics(ctx, generics, args) } (PatElem::Impl(_pat), PathElem::Impl(ImplElem::Ty(_, _ty), _)) => { - todo!() + // TODO + false } (PatElem::Impl(pat), PathElem::Impl(ImplElem::Trait(impl_id), _)) => { let Some(timpl) = ctx.trait_impls.get(*impl_id) else { diff --git a/charon/src/name_matcher/parser.rs b/charon/src/name_matcher/parser.rs index 79c2b8da..07ce823a 100644 --- a/charon/src/name_matcher/parser.rs +++ b/charon/src/name_matcher/parser.rs @@ -52,7 +52,7 @@ fn parse_pattern_complete(i: &str) -> Result> { } fn parse_pattern(i: &str) -> ParseResult<'_, Pattern> { - separated_list0(tag("::"), parse_pat_elem) + separated_list0(tag("::").followed_by(multispace0), parse_pat_elem) .map(|elems| Pattern { elems }) .parse(i) } @@ -109,7 +109,7 @@ fn parse_impl_elem(i: &str) -> ParseResult<'_, PatElem> { let impl_contents = parse_pattern.followed_by(multispace0).and(for_ty.opt()); let impl_expr = tag("{").followed_by(multispace0).precedes( delimited( - tag("impl").followed_by(multispace1), + tag("impl").followed_by(multispace1.cut()).opt(), impl_contents, tag("}"), ) @@ -211,9 +211,10 @@ fn test_roundtrip() { ]; let other_test_strings = [ ("blah::*", "blah::_"), - ("crate ::foo ::bar ", "crate::foo::bar"), + ("crate :: foo ::bar ", "crate::foo::bar"), ("a::b::Type < _ , _ >", "a::b::Type<_, _>"), ("{ impl Clone for usize }", "{impl Clone for usize}"), + ("{Clone for usize}", "{impl Clone for usize}"), ]; let failures = [ "{implClone for usize}", diff --git a/charon/src/options.rs b/charon/src/options.rs index 79887da2..895a3a26 100644 --- a/charon/src/options.rs +++ b/charon/src/options.rs @@ -1,6 +1,7 @@ +//! The options received as input by cargo-charon #![allow(dead_code)] use clap::Parser; -/// The options received as input by cargo-charon +use indoc::indoc; use serde::{Deserialize, Serialize}; use std::path::PathBuf; @@ -120,6 +121,37 @@ performs: `y := (x as E2).1`). Producing a better reconstruction is non-trivial. #[clap(long = "extract-opaque-bodies")] #[serde(default)] pub extract_opaque_bodies: bool, + /// Whitelist of items to translate. These use the name-matcher syntax. + #[clap( + long = "include", + help = indoc!(" + Whitelist of items to translate. These use the name-matcher syntax (note: this differs + a bit from the ocaml NameMatcher). + + Note: This is very rough at the moment. E.g. this parses `u64` as a path instead of the + built-in type. Some parts just don't work. Please report bugs or missing features. + + Examples: + - `crate::module1::module2::item`: refers to this item and all its subitems (e.g. + submodules or trait methods); + - `crate::module1::module2::item::_`: refers only to the subitems of this item; + - `core::convert::{impl core::convert::Into<_> for _}`: retrieve the body of this + very useful impl; + + When multiple patterns in the `--include` and `--exclude` options match the same item, + the most precise pattern wins. E.g.: `charon --exclude crate::module --include + crate::module::_` makes the `module` opaque (we won't explore its contents), but the + items in it transparent (we will translate them if we encounter them.) + "))] + #[serde(default)] + pub include: Vec, + /// Blacklist of items to keep opaque. These use the name-matcher syntax. + #[clap( + long = "exclude", + help = "Blacklist of items to keep opaque. Works just like `--include`, see the doc there." + )] + #[serde(default)] + pub exclude: Vec, /// Do not run cargo; instead, run the driver directly. // FIXME: use a subcommand instead, when we update clap to support flattening. #[clap(long = "no-cargo")] diff --git a/charon/tests/ui/opacity.out b/charon/tests/ui/opacity.out new file mode 100644 index 00000000..e840544e --- /dev/null +++ b/charon/tests/ui/opacity.out @@ -0,0 +1,99 @@ +# Final LLBC before serialization: + +enum core::option::Option = +| None() +| Some(T) + + +fn core::option::{core::option::Option}::is_some<'_0, T>(@1: &'_0 (core::option::Option)) -> bool +{ + let @0: bool; // return + let self@1: &'_ (core::option::Option); // arg #1 + + match *(self@1) { + 1 => { + @0 := const (true) + }, + 0 => { + @0 := const (false) + } + } + return +} + +trait core::convert::Into +{ + fn into : core::convert::Into::into +} + +trait core::convert::From +{ + fn from : core::convert::From::from +} + +fn core::convert::From::from(@1: T) -> Self + +fn core::convert::{impl core::convert::Into for T}#3::into(@1: T) -> U +where + // Inherited clauses: + [@TraitClause0]: core::convert::From, +{ + let @0: U; // return + let self@1: T; // arg #1 + + @0 := @TraitClause0::from(move (self@1)) + return +} + +impl core::convert::{impl core::convert::Into for T}#3 : core::convert::Into +where + [@TraitClause0]: core::convert::From, +{ + fn into = core::convert::{impl core::convert::Into for T}#3::into +} + +fn core::convert::num::{impl core::convert::From for u64}#72::from(@1: u32) -> u64 +{ + let @0: u64; // return + let small@1: u32; // arg #1 + + @0 := cast(copy (small@1)) + return +} + +impl core::convert::num::{impl core::convert::From for u64}#72 : core::convert::From +{ + fn from = core::convert::num::{impl core::convert::From for u64}#72::from +} + +fn core::convert::Into::into(@1: Self) -> T + +fn test_crate::foo() +{ + let @0: (); // return + let @1: bool; // anonymous local + let @2: &'_ (core::option::Option); // anonymous local + let @3: core::option::Option; // anonymous local + let @4: u64; // anonymous local + let @5: (); // anonymous local + + @3 := core::option::Option::Some { 0: const (0 : i32) } + @2 := &@3 + @1 := core::option::{core::option::Option}::is_some(move (@2)) + drop @2 + @fake_read(@1) + drop @3 + drop @1 + @4 := core::convert::{impl core::convert::Into for T}#3[core::convert::num::{impl core::convert::From for u64}#72]::into(const (42 : u32)) + @fake_read(@4) + drop @4 + @5 := () + @0 := move (@5) + @0 := () + return +} + +fn test_crate::module::dont_translate_body() + + + diff --git a/charon/tests/ui/opacity.rs b/charon/tests/ui/opacity.rs new file mode 100644 index 00000000..516ee366 --- /dev/null +++ b/charon/tests/ui/opacity.rs @@ -0,0 +1,25 @@ +//@ charon-args=--include core::option +//@ charon-args=--exclude test_crate::module::dont_translate_body +//@ charon-args=--exclude crate::module::other_mod +//@ charon-args=--include crate::module::other_mod::_ +//@ charon-args=--include core::convert::{core::convert::Into<_,_>}::into +//@ charon-args=--include core::convert::num::{core::convert::From<_,_>} +//@ charon-args=--exclude _::exclude_me +// Note: we don't use the `impl Trait for T` syntax above because we can't have spaces in these +// options. + +fn foo() { + let _ = Some(0).is_some(); + let _: u64 = 42u32.into(); +} + +mod module { + fn dont_translate_body() { + println!("aaa") + } + mod other_mod { + fn dont_even_see_me() {} + } +} + +fn exclude_me() {} From 33539f6fd5aed13b094995e67fe0f37b60f1e5cf Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Mon, 19 Aug 2024 17:42:29 +0200 Subject: [PATCH 10/10] Improve indentation in `options.rs` --- charon/src/options.rs | 91 +++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 52 deletions(-) diff --git a/charon/src/options.rs b/charon/src/options.rs index 895a3a26..cbb5c9f6 100644 --- a/charon/src/options.rs +++ b/charon/src/options.rs @@ -68,46 +68,46 @@ pub struct CliOpts { pub use_polonius: bool, #[clap( long = "no-code-duplication", - help = "Check that no code duplication happens during control-flow reconstruction -of the MIR code. + help = indoc!(" + Check that no code duplication happens during control-flow reconstruction + of the MIR code. -This is only used to make sure the reconstructed code is of good quality. -For instance, if we have the following CFG in MIR: - ``` - b0: switch x [true -> goto b1; false -> goto b2] - b1: y := 0; goto b3 - b2: y := 1; goto b3 - b3: return y - ``` + This is only used to make sure the reconstructed code is of good quality. + For instance, if we have the following CFG in MIR: + ``` + b0: switch x [true -> goto b1; false -> goto b2] + b1: y := 0; goto b3 + b2: y := 1; goto b3 + b3: return y + ``` -We want to reconstruct the control-flow as: - ``` - if x then { y := 0; } else { y := 1 }; - return y; - ``` + We want to reconstruct the control-flow as: + ``` + if x then { y := 0; } else { y := 1 }; + return y; + ``` -But if we don't do this reconstruction correctly, we might duplicate -the code starting at b3: - ``` - if x then { y := 0; return y; } else { y := 1; return y; } - ``` + But if we don't do this reconstruction correctly, we might duplicate + the code starting at b3: + ``` + if x then { y := 0; return y; } else { y := 1; return y; } + ``` -When activating this flag, we check that no such things happen. + When activating this flag, we check that no such things happen. -Also note that it is sometimes not possible to prevent code duplication, -if the original Rust looks like this for instance: - ``` - match x with - | E1(y,_) | E2(_,y) => { ... } // Some branches are \"fused\" - | E3 => { ... } - ``` + Also note that it is sometimes not possible to prevent code duplication, + if the original Rust looks like this for instance: + ``` + match x with + | E1(y,_) | E2(_,y) => { ... } // Some branches are \"fused\" + | E3 => { ... } + ``` -The reason is that assignments are introduced when desugaring the pattern -matching, and those assignments are specific to the variant on which we pattern -match (the `E1` branch performs: `y := (x as E1).0`, while the `E2` branch -performs: `y := (x as E2).1`). Producing a better reconstruction is non-trivial. -" - )] + The reason is that assignments are introduced when desugaring the pattern + matching, and those assignments are specific to the variant on which we pattern + match (the `E1` branch performs: `y := (x as E1).0`, while the `E2` branch + performs: `y := (x as E2).1`). Producing a better reconstruction is non-trivial. + "))] #[serde(default)] pub no_code_duplication: bool, /// A list of modules of the extracted crate that we consider as opaque: we @@ -170,43 +170,30 @@ performs: `y := (x as E2).1`). Producing a better reconstruction is non-trivial. #[serde(default)] pub abort_on_error: bool, /// Print the errors as warnings - #[clap( - long = "errors-as-warnings", - help = " -Report the errors as warnings -" - )] + #[clap(long = "errors-as-warnings", help = "Report the errors as warnings")] #[serde(default)] pub errors_as_warnings: bool, #[clap( long = "no-serialize", - help = " -Don't serialize the final (U)LLBC to a file. -" + help = "Don't serialize the final (U)LLBC to a file." )] #[serde(default)] pub no_serialize: bool, #[clap( long = "print-ullbc", - help = " -Print the ULLBC immediately after extraction from MIR. -" + help = "Print the ULLBC immediately after extraction from MIR." )] #[serde(default)] pub print_ullbc: bool, #[clap( long = "print-built-llbc", - help = " -Print the LLBC just after we built it (i.e., immediately after loop reconstruction). -" + help = "Print the LLBC just after we built it (i.e., immediately after loop reconstruction)." )] #[serde(default)] pub print_built_llbc: bool, #[clap( long = "print-llbc", - help = " -Print the final LLBC (after all the cleaning micro-passes). -" + help = "Print the final LLBC (after all the cleaning micro-passes)." )] #[serde(default)] pub print_llbc: bool,