From 618561998482a75f11dc61db98749e6d537258cd Mon Sep 17 00:00:00 2001 From: overlookmotel <557937+overlookmotel@users.noreply.github.com> Date: Tue, 4 Feb 2025 22:31:48 +0000 Subject: [PATCH] refactor(ast_tools): improve correctness and performance of idents (#8895) Refactor. Codegen produces lots of identifiers in the generated code. Ensure they're always prepended with `r#` if they're reserved words. Speed up checking if a name is a reserved word by using a perfect hash set, rather than iterating through an array. --- Cargo.lock | 1 + tasks/ast_tools/Cargo.toml | 1 + tasks/ast_tools/src/derives/clone_in.rs | 5 +- tasks/ast_tools/src/derives/content_eq.rs | 21 +++---- tasks/ast_tools/src/derives/get_span.rs | 37 ++++++------ tasks/ast_tools/src/generators/ast_builder.rs | 9 +-- tasks/ast_tools/src/generators/visit.rs | 58 +++++++++---------- .../ast_tools/src/schema/extensions/visit.rs | 44 +++++++++++--- tasks/ast_tools/src/utils.rs | 19 ++++-- 9 files changed, 114 insertions(+), 81 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0e8ae50dfdfae..700f5273ed57f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1614,6 +1614,7 @@ dependencies = [ "itertools", "lazy_static", "oxc_index", + "phf", "prettyplease", "proc-macro2", "quote", diff --git a/tasks/ast_tools/Cargo.toml b/tasks/ast_tools/Cargo.toml index e0d3dbaaedaf4..cf762b2202d4b 100644 --- a/tasks/ast_tools/Cargo.toml +++ b/tasks/ast_tools/Cargo.toml @@ -22,6 +22,7 @@ indexmap = { workspace = true } itertools = { workspace = true } lazy_static = { workspace = true } oxc_index = { workspace = true } +phf = { workspace = true, features = ["macros"] } prettyplease = { workspace = true } proc-macro2 = { workspace = true } quote = { workspace = true } diff --git a/tasks/ast_tools/src/derives/clone_in.rs b/tasks/ast_tools/src/derives/clone_in.rs index 5c6ee281aa412..bb0916ca6d049 100644 --- a/tasks/ast_tools/src/derives/clone_in.rs +++ b/tasks/ast_tools/src/derives/clone_in.rs @@ -1,11 +1,12 @@ //! Derive for `CloneIn` trait. use proc_macro2::TokenStream; -use quote::{format_ident, quote}; +use quote::quote; use syn::Ident; use crate::{ schema::{Def, EnumDef, FieldDef, Schema, StructDef, TypeDef}, + utils::create_safe_ident, Result, }; @@ -151,7 +152,7 @@ fn generate_impl( has_lifetime: bool, uses_allocator: bool, ) -> TokenStream { - let alloc_ident = format_ident!("{}", if uses_allocator { "allocator" } else { "_" }); + let alloc_ident = create_safe_ident(if uses_allocator { "allocator" } else { "_" }); if has_lifetime { quote! { diff --git a/tasks/ast_tools/src/derives/content_eq.rs b/tasks/ast_tools/src/derives/content_eq.rs index 03b4a773a85f4..aded378305e7b 100644 --- a/tasks/ast_tools/src/derives/content_eq.rs +++ b/tasks/ast_tools/src/derives/content_eq.rs @@ -1,10 +1,11 @@ //! Derive for `ContentEq` trait. use proc_macro2::TokenStream; -use quote::{format_ident, quote}; +use quote::quote; use crate::{ schema::{Def, EnumDef, Schema, StructDef, TypeDef}, + utils::create_safe_ident, Result, }; @@ -69,11 +70,11 @@ impl Derive for DeriveContentEq { } fn derive_struct(struct_def: &StructDef, schema: &Schema) -> TokenStream { - let mut other_name = "other"; + let mut uses_other = true; let body = if struct_def.content_eq.skip { // Struct has `#[content_eq(skip)]` attr. So `content_eq` always returns true. - other_name = "_"; + uses_other = false; quote!(true) } else { let fields = struct_def @@ -96,20 +97,20 @@ fn derive_struct(struct_def: &StructDef, schema: &Schema) -> TokenStream { let mut body = quote!( #(#fields)&&* ); if body.is_empty() { body = quote!(true); - other_name = "_"; + uses_other = false; }; body }; - generate_impl(&struct_def.ty_anon(schema), other_name, &body) + generate_impl(&struct_def.ty_anon(schema), &body, uses_other) } fn derive_enum(enum_def: &EnumDef, schema: &Schema) -> TokenStream { - let mut other_name = "other"; + let mut uses_other = true; let body = if enum_def.content_eq.skip { // Enum has `#[content_eq(skip)]` attr. So `content_eq` always returns true. - other_name = "_"; + uses_other = false; quote!(true) } else if enum_def.is_fieldless() { // We assume fieldless enums implement `PartialEq` @@ -132,11 +133,11 @@ fn derive_enum(enum_def: &EnumDef, schema: &Schema) -> TokenStream { } }; - generate_impl(&enum_def.ty_anon(schema), other_name, &body) + generate_impl(&enum_def.ty_anon(schema), &body, uses_other) } -fn generate_impl(ty: &TokenStream, other_name: &str, body: &TokenStream) -> TokenStream { - let other_ident = format_ident!("{other_name}"); +fn generate_impl(ty: &TokenStream, body: &TokenStream, uses_other: bool) -> TokenStream { + let other_ident = create_safe_ident(if uses_other { "other" } else { "_" }); quote! { impl ContentEq for #ty { fn content_eq(&self, #other_ident: &Self) -> bool { diff --git a/tasks/ast_tools/src/derives/get_span.rs b/tasks/ast_tools/src/derives/get_span.rs index cf4ccdcad0204..46f50ad76d214 100644 --- a/tasks/ast_tools/src/derives/get_span.rs +++ b/tasks/ast_tools/src/derives/get_span.rs @@ -1,11 +1,12 @@ //! Derive for `GetSpan` trait. use proc_macro2::TokenStream; -use quote::{format_ident, quote}; +use quote::quote; use syn::Ident; use crate::{ schema::{Def, EnumDef, Schema, StructDef}, + utils::create_safe_ident, Result, }; @@ -54,6 +55,8 @@ impl Derive for DeriveGetSpan { } fn derive(&self, type_def: StructOrEnum, schema: &Schema) -> TokenStream { + let trait_ident = create_safe_ident("GetSpan"); + let method_ident = create_safe_ident("span"); let self_ty = quote!(&self); let result_ty = quote!(Span); let result_expr = quote!(self.span); @@ -61,8 +64,8 @@ impl Derive for DeriveGetSpan { derive_type( type_def, - "GetSpan", - "span", + &trait_ident, + &method_ident, &self_ty, &result_ty, &result_expr, @@ -96,6 +99,8 @@ impl Derive for DeriveGetSpanMut { } fn derive(&self, type_def: StructOrEnum, schema: &Schema) -> TokenStream { + let trait_ident = create_safe_ident("GetSpanMut"); + let method_ident = create_safe_ident("span_mut"); let self_ty = quote!(&mut self); let result_ty = quote!(&mut Span); let result_expr = quote!(&mut self.span); @@ -103,8 +108,8 @@ impl Derive for DeriveGetSpanMut { derive_type( type_def, - "GetSpanMut", - "span_mut", + &trait_ident, + &method_ident, &self_ty, &result_ty, &result_expr, @@ -118,36 +123,28 @@ impl Derive for DeriveGetSpanMut { #[expect(clippy::too_many_arguments)] fn derive_type( type_def: StructOrEnum, - trait_name: &str, - method_name: &str, + trait_ident: &Ident, + method_ident: &Ident, self_ty: &TokenStream, result_ty: &TokenStream, result_expr: &TokenStream, reference: &TokenStream, schema: &Schema, ) -> TokenStream { - let trait_ident = format_ident!("{trait_name}"); - let method_ident = format_ident!("{method_name}"); match type_def { StructOrEnum::Struct(struct_def) => derive_struct( struct_def, - &trait_ident, - &method_ident, + trait_ident, + method_ident, self_ty, result_ty, result_expr, reference, schema, ), - StructOrEnum::Enum(enum_def) => derive_enum( - enum_def, - &trait_ident, - &method_ident, - self_ty, - result_ty, - reference, - schema, - ), + StructOrEnum::Enum(enum_def) => { + derive_enum(enum_def, trait_ident, method_ident, self_ty, result_ty, reference, schema) + } } } diff --git a/tasks/ast_tools/src/generators/ast_builder.rs b/tasks/ast_tools/src/generators/ast_builder.rs index 448017ca43419..4045d824b792e 100644 --- a/tasks/ast_tools/src/generators/ast_builder.rs +++ b/tasks/ast_tools/src/generators/ast_builder.rs @@ -8,7 +8,7 @@ use syn::Ident; use crate::{ output::{output_path, Output}, schema::{Def, EnumDef, FieldDef, Schema, StructDef, TypeDef, VariantDef}, - utils::is_reserved_name, + utils::{create_safe_ident, is_reserved_name}, Codegen, Generator, AST_CRATE_PATH, }; @@ -276,11 +276,11 @@ fn get_struct_params<'s>( TypeDef::Primitive(primitive_def) => match primitive_def.name() { "Atom" if !has_atom_generic => { has_atom_generic = true; - Some(format_ident!("A")) + Some(create_safe_ident("A")) } "&str" if !has_str_generic => { has_str_generic = true; - Some(format_ident!("S")) + Some(create_safe_ident("S")) } _ => None, }, @@ -450,7 +450,8 @@ fn struct_builder_name(snake_name: &str, does_alloc: bool) -> Ident { } else if is_reserved_name(snake_name) { format_ident!("{snake_name}_") } else { - format_ident!("{snake_name}") + // We just checked name is not a reserved word + create_safe_ident(snake_name) } } diff --git a/tasks/ast_tools/src/generators/visit.rs b/tasks/ast_tools/src/generators/visit.rs index 36f0463790089..109fb75c9ed89 100644 --- a/tasks/ast_tools/src/generators/visit.rs +++ b/tasks/ast_tools/src/generators/visit.rs @@ -11,7 +11,7 @@ use crate::{ extensions::visit::{Scope, VisitorNames}, Def, EnumDef, FieldDef, OptionDef, Schema, StructDef, TypeDef, VecDef, }, - utils::{create_ident, create_ident_tokens}, + utils::{create_ident, create_ident_tokens, create_safe_ident}, Codegen, Generator, Result, AST_CRATE_PATH, }; @@ -200,23 +200,23 @@ fn generate_outputs(schema: &Schema) -> (/* Visit */ TokenStream, /* VisitMut */ } }; let visit_output = generate_output( - &format_ident!("Visit"), + &create_safe_ident("Visit"), &visit_methods, &walk_fns, - &format_ident!("walk"), + &create_safe_ident("walk"), &alloc_fn, - &format_ident!("AstKind"), + &create_safe_ident("AstKind"), "e!(AstKind<'a>), ); // Generate `VisitMut` trait let visit_mut_output = generate_output( - &format_ident!("VisitMut"), + &create_safe_ident("VisitMut"), &visit_mut_methods, &walk_mut_fns, - &format_ident!("walk_mut"), + &create_safe_ident("walk_mut"), "e!(), - &format_ident!("AstType"), + &create_safe_ident("AstType"), "e!(AstType), ); @@ -338,8 +338,8 @@ impl VisitBuilder<'_> { // Generate visit methods let struct_ty = struct_def.ty(self.schema); - let visit_fn_ident = create_ident(&visitor_names.visit); - let walk_fn_ident = create_ident(&visitor_names.walk); + let visit_fn_ident = visitor_names.visitor_ident(); + let walk_fn_ident = visitor_names.walk_ident(); // Get additional params let (extra_params, extra_args) = if let Some(visit_args) = &struct_def.visit.visit_args { @@ -513,8 +513,8 @@ impl VisitBuilder<'_> { // Generate visit methods let enum_ty = enum_def.ty(self.schema); - let visit_fn_ident = create_ident(&visitor_names.visit); - let walk_fn_ident = create_ident(&visitor_names.walk); + let visit_fn_ident = visitor_names.visitor_ident(); + let walk_fn_ident = visitor_names.walk_ident(); let gen_visit = |reference| { quote! { @@ -563,8 +563,8 @@ impl VisitBuilder<'_> { .inherits_types(self.schema) .map(|inherits_type| { let inherits_type = inherits_type.as_enum().unwrap(); - let inner_visit_fn_name = inherits_type.visit.visitor_name(); - let Some(inner_visit_fn_name) = inner_visit_fn_name else { + let inner_visit_fn_ident = inherits_type.visit.visitor_ident(); + let Some(inner_visit_fn_ident) = inner_visit_fn_ident else { panic!( "When an enum inherits variants from another enum and the inheritor has a visitor, \ the inherited enum must also have a visitor: `{}` inheriting from `{}`", @@ -575,8 +575,6 @@ impl VisitBuilder<'_> { match_arm_count += 1; - let inner_visit_fn_ident = create_ident(inner_visit_fn_name); - let inherits_snake_name = inherits_type.snake_name(); let match_ident = format_ident!("match_{inherits_snake_name}"); @@ -643,8 +641,9 @@ impl VisitBuilder<'_> { // Generate visit methods let vec_ty = vec_def.ty(self.schema); - let visit_fn_ident = create_ident(&visitor_names.visit); - let walk_fn_ident = create_ident(&visitor_names.walk); + + let visit_fn_ident = visitor_names.visitor_ident(); + let walk_fn_ident = visitor_names.walk_ident(); let gen_visit = |reference| { quote! { @@ -660,15 +659,14 @@ impl VisitBuilder<'_> { // Generate walk functions let inner_type = vec_def.inner_type(self.schema); - let inner_visit_fn_name = match inner_type { - TypeDef::Struct(struct_def) => struct_def.visit.visitor_name().unwrap(), - TypeDef::Enum(enum_def) => enum_def.visit.visitor_name().unwrap(), + let inner_visit_fn_ident = match inner_type { + TypeDef::Struct(struct_def) => struct_def.visit.visitor_ident().unwrap(), + TypeDef::Enum(enum_def) => enum_def.visit.visitor_ident().unwrap(), _ => unreachable!(), }; - let inner_visit_fn_ident = create_ident(inner_visit_fn_name); let gen_walk = |visit_trait_name, reference| { - let visit_trait_ident = format_ident!("{visit_trait_name}"); + let visit_trait_ident = create_safe_ident(visit_trait_name); quote! { ///@@line_break #[inline] @@ -756,13 +754,12 @@ impl VisitBuilder<'_> { visit_args: Option<&Vec<(String, String)>>, trailing_semicolon: bool, ) -> Option<(/* visit */ TokenStream, /* visit_mut */ TokenStream)> { - let visit_fn_name = match type_def { - TypeDef::Struct(struct_def) => struct_def.visit.visitor_name()?, - TypeDef::Enum(enum_def) => enum_def.visit.visitor_name()?, + let visit_fn_ident = match type_def { + TypeDef::Struct(struct_def) => struct_def.visit.visitor_ident()?, + TypeDef::Enum(enum_def) => enum_def.visit.visitor_ident()?, _ => None?, }; - let visit_fn_ident = create_ident(visit_fn_name); Some(Self::generate_visit_with_visit_args( &visit_fn_ident, target, @@ -796,7 +793,7 @@ impl VisitBuilder<'_> { // e.g. if attr on struct field/enum variant is `#[visit(args(x = something, y = something_else))]`, // `extra_params` is `, x, y`. let extra_params = if let Some(args) = visit_args { - let arg_params = args.iter().map(|(arg_name, _)| format_ident!("{arg_name}")); + let arg_params = args.iter().map(|(arg_name, _)| create_ident(arg_name)); quote!( , #(#arg_params),* ) } else { quote!() @@ -814,7 +811,7 @@ impl VisitBuilder<'_> { // e.g. if attr on struct field/enum variant is `#[visit(args(x = something, y = something_else))]`, // then output `{ let x = something; let y = something_else; visitor.visit_thing(it, x, y) }`. let let_args = visit_args.iter().map(|(arg_name, arg_value)| { - let arg_ident = format_ident!("{arg_name}"); + let arg_ident = create_ident(arg_name); let arg_value = parse_str::(&arg_value.cow_replace("self", "it")).unwrap(); quote!( let #arg_ident = #arg_value; ) }); @@ -896,9 +893,8 @@ impl VisitBuilder<'_> { visit_args: Option<&Vec<(String, String)>>, trailing_semicolon: bool, ) -> Option<(/* visit */ TokenStream, /* visit_mut */ TokenStream)> { - if let Some(visit_fn_name) = vec_def.visit.visitor_name() { + if let Some(visit_fn_ident) = vec_def.visit.visitor_ident() { // Inner type is a struct or enum which has a visitor. This `Vec` has its own visitor. - let visit_fn_ident = create_ident(visit_fn_name); return Some(Self::generate_visit_with_visit_args( &visit_fn_ident, target, @@ -937,7 +933,7 @@ impl VisitBuilder<'_> { let target = target.into_tokens(); let gen_visit = |inner_visit, iter_method| { - let iter_method_ident = format_ident!("{iter_method}"); + let iter_method_ident = create_safe_ident(iter_method); quote! { for el in #target.#iter_method_ident() #maybe_flatten { #inner_visit diff --git a/tasks/ast_tools/src/schema/extensions/visit.rs b/tasks/ast_tools/src/schema/extensions/visit.rs index d175f40f4e9df..1fcbd8209bae2 100644 --- a/tasks/ast_tools/src/schema/extensions/visit.rs +++ b/tasks/ast_tools/src/schema/extensions/visit.rs @@ -1,3 +1,7 @@ +use syn::Ident; + +use crate::utils::create_safe_ident; + /// Details of visiting on a struct. #[derive(Default, Debug)] pub struct VisitStruct { @@ -14,9 +18,11 @@ impl VisitStruct { self.visitor_names.is_some() } - /// Get name of visitor method for this struct, if it has a visitor. - pub fn visitor_name(&self) -> Option<&str> { - self.visitor_names.as_ref().map(|names| names.visit.as_str()) + /// Get visitor method name for this struct as an [`Ident`], if it has a visitor. + /// + /// [`Ident`]: struct@Ident + pub fn visitor_ident(&self) -> Option { + self.visitor_names.as_ref().map(VisitorNames::visitor_ident) } } @@ -34,9 +40,11 @@ impl VisitEnum { self.visitor_names.is_some() } - /// Get name of visitor method for this enum, if it has a visitor. - pub fn visitor_name(&self) -> Option<&str> { - self.visitor_names.as_ref().map(|names| names.visit.as_str()) + /// Get visitor method name for this enum as an [`Ident`], if it has a visitor. + /// + /// [`Ident`]: struct@Ident + pub fn visitor_ident(&self) -> Option { + self.visitor_names.as_ref().map(VisitorNames::visitor_ident) } } @@ -55,9 +63,11 @@ impl VisitVec { self.visitor_names.is_some() } - /// Get name of visitor method for this `Vec`, if it has a visitor. - pub fn visitor_name(&self) -> Option<&str> { - self.visitor_names.as_ref().map(|names| names.visit.as_str()) + /// Get visitor method name for this `Vec` as an [`Ident`], if it has a visitor. + /// + /// [`Ident`]: struct@Ident + pub fn visitor_ident(&self) -> Option { + self.visitor_names.as_ref().map(VisitorNames::visitor_ident) } } @@ -78,6 +88,22 @@ impl VisitorNames { pub fn from_snake_name(snake_name: &str) -> Self { Self { visit: format!("visit_{snake_name}"), walk: format!("walk_{snake_name}") } } + + /// Get name of visitor method as an [`Ident`]. + /// + /// [`Ident`]: struct@Ident + pub fn visitor_ident(&self) -> Ident { + // Visitor method names cannot be reserved words, as they begin with `visit_` + create_safe_ident(&self.visit) + } + + /// Get name of walk function as an [`Ident`]. + /// + /// [`Ident`]: struct@Ident + pub fn walk_ident(&self) -> Ident { + // Walk function names cannot be reserved words, as they begin with `walk_` + create_safe_ident(&self.walk) + } } /// Details of scope on a struct. diff --git a/tasks/ast_tools/src/utils.rs b/tasks/ast_tools/src/utils.rs index cd4e850b32b22..71932607ae130 100644 --- a/tasks/ast_tools/src/utils.rs +++ b/tasks/ast_tools/src/utils.rs @@ -1,11 +1,11 @@ +use phf::{phf_set, Set as PhfSet}; use proc_macro2::{Span, TokenStream}; use quote::{format_ident, quote}; use syn::{Ident, LitInt}; /// Reserved word in Rust. /// From . -#[rustfmt::skip] -static RESERVED_NAMES: &[&str] = &[ +static RESERVED_NAMES: PhfSet<&'static str> = phf_set! { // Strict keywords "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn", "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref", "return", "self", "Self", @@ -16,11 +16,11 @@ static RESERVED_NAMES: &[&str] = &[ "virtual", "yield", "try", // Weak keywords "macro_rules", "union", // "dyn" also listed as a weak keyword, but is already on strict list -]; +}; /// Returns `true` if `name` is a reserved word in Rust. pub fn is_reserved_name(name: &str) -> bool { - RESERVED_NAMES.contains(&name) + RESERVED_NAMES.contains(name) } /// Create an [`Ident`] from a string. @@ -33,10 +33,19 @@ pub fn create_ident(name: &str) -> Ident { if is_reserved_name(name) { format_ident!("r#{name}") } else { - format_ident!("{name}") + create_safe_ident(name) } } +/// Create an [`Ident`] from a string, without checking it's not a reserved word. +/// +/// The provided `name` for the ident must not be a reserved word. +/// +/// [`Ident`]: struct@Ident +pub fn create_safe_ident(name: &str) -> Ident { + Ident::new(name, Span::call_site()) +} + /// Create an identifier from a string. /// /// If the name is a reserved word, it's prepended with `r#`.