Skip to content

Commit

Permalink
Auto merge of rust-lang#100996 - m-ou-se:format-args-2, r=estebank
Browse files Browse the repository at this point in the history
Rewrite and refactor format_args!() builtin macro.

This is a near complete rewrite of `compiler/rustc_builtin_macros/src/format.rs`.

This gets rid of the massive unmaintanable [`Context` struct](https://github.com/rust-lang/rust/blob/76531befc4b0352247ada67bd225e8cf71ee5686/compiler/rustc_builtin_macros/src/format.rs#L176-L263), and splits the macro expansion into three parts:

1. First, `parse_args` will parse the `(literal, arg, arg, name=arg, name=arg)` syntax, but doesn't parse the template (the literal) itself.
2. Second, `make_format_args` will parse the template, the format options, resolve argument references, produce diagnostics, and turn the whole thing into a `FormatArgs` structure.
3. Finally, `expand_parsed_format_args` will turn that `FormatArgs` structure into the expression that the macro expands to.

In other words, the `format_args` builtin macro used to be a hard-to-maintain 'single pass compiler', which I've split into a three phase compiler with a parser/tokenizer (step 1), semantic analysis (step 2), and backend (step 3). (It's compilers all the way down. ^^)

This can serve as a great starting point for rust-lang#99012, which will only need to change the implementation of 3, while leaving step 1 and 2 unchanged.

It also makes rust-lang/compiler-team#541 easier, which could then upgrade the new `FormatArgs` struct to an `ast` node and remove step 3, moving that step to later in the compilation process.

It also fixes a few diagnostics bugs.

This also [significantly reduces](https://gist.github.com/m-ou-se/b67b2d54172c4837a5ab1b26fa3e5284) the amount of generated code for cases with arguments in non-default order without formatting options, like `"{1} {0}"` or `"{a} {}"`, etc.
  • Loading branch information
bors committed Sep 28, 2022
2 parents 90c34fa + 20bb600 commit d6734be
Show file tree
Hide file tree
Showing 16 changed files with 1,449 additions and 1,540 deletions.
1,949 changes: 638 additions & 1,311 deletions compiler/rustc_builtin_macros/src/format.rs

Large diffs are not rendered by default.

240 changes: 240 additions & 0 deletions compiler/rustc_builtin_macros/src/format/ast.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
use rustc_ast::ptr::P;
use rustc_ast::Expr;
use rustc_data_structures::fx::FxHashMap;
use rustc_span::symbol::{Ident, Symbol};
use rustc_span::Span;

// Definitions:
//
// format_args!("hello {abc:.xyz$}!!", abc="world");
// └──────────────────────────────────────────────┘
// FormatArgs
//
// format_args!("hello {abc:.xyz$}!!", abc="world");
// └─────────┘
// argument
//
// format_args!("hello {abc:.xyz$}!!", abc="world");
// └───────────────────┘
// template
//
// format_args!("hello {abc:.xyz$}!!", abc="world");
// └────┘└─────────┘└┘
// pieces
//
// format_args!("hello {abc:.xyz$}!!", abc="world");
// └────┘ └┘
// literal pieces
//
// format_args!("hello {abc:.xyz$}!!", abc="world");
// └─────────┘
// placeholder
//
// format_args!("hello {abc:.xyz$}!!", abc="world");
// └─┘ └─┘
// positions (could be names, numbers, empty, or `*`)

/// (Parsed) format args.
///
/// Basically the "AST" for a complete `format_args!()`.
///
/// E.g., `format_args!("hello {name}");`.
#[derive(Clone, Debug)]
pub struct FormatArgs {
pub span: Span,
pub template: Vec<FormatArgsPiece>,
pub arguments: FormatArguments,
}

/// A piece of a format template string.
///
/// E.g. "hello" or "{name}".
#[derive(Clone, Debug)]
pub enum FormatArgsPiece {
Literal(Symbol),
Placeholder(FormatPlaceholder),
}

/// The arguments to format_args!().
///
/// E.g. `1, 2, name="ferris", n=3`,
/// but also implicit captured arguments like `x` in `format_args!("{x}")`.
#[derive(Clone, Debug)]
pub struct FormatArguments {
arguments: Vec<FormatArgument>,
num_unnamed_args: usize,
num_explicit_args: usize,
names: FxHashMap<Symbol, usize>,
}

impl FormatArguments {
pub fn new() -> Self {
Self {
arguments: Vec::new(),
names: FxHashMap::default(),
num_unnamed_args: 0,
num_explicit_args: 0,
}
}

pub fn add(&mut self, arg: FormatArgument) -> usize {
let index = self.arguments.len();
if let Some(name) = arg.kind.ident() {
self.names.insert(name.name, index);
} else if self.names.is_empty() {
// Only count the unnamed args before the first named arg.
// (Any later ones are errors.)
self.num_unnamed_args += 1;
}
if !matches!(arg.kind, FormatArgumentKind::Captured(..)) {
// This is an explicit argument.
// Make sure that all arguments so far are explcit.
assert_eq!(
self.num_explicit_args,
self.arguments.len(),
"captured arguments must be added last"
);
self.num_explicit_args += 1;
}
self.arguments.push(arg);
index
}

pub fn by_name(&self, name: Symbol) -> Option<(usize, &FormatArgument)> {
let i = *self.names.get(&name)?;
Some((i, &self.arguments[i]))
}

pub fn by_index(&self, i: usize) -> Option<&FormatArgument> {
(i < self.num_explicit_args).then(|| &self.arguments[i])
}

pub fn unnamed_args(&self) -> &[FormatArgument] {
&self.arguments[..self.num_unnamed_args]
}

pub fn named_args(&self) -> &[FormatArgument] {
&self.arguments[self.num_unnamed_args..self.num_explicit_args]
}

pub fn explicit_args(&self) -> &[FormatArgument] {
&self.arguments[..self.num_explicit_args]
}

pub fn into_vec(self) -> Vec<FormatArgument> {
self.arguments
}
}

#[derive(Clone, Debug)]
pub struct FormatArgument {
pub kind: FormatArgumentKind,
pub expr: P<Expr>,
}

#[derive(Clone, Debug)]
pub enum FormatArgumentKind {
/// `format_args(…, arg)`
Normal,
/// `format_args(…, arg = 1)`
Named(Ident),
/// `format_args("… {arg} …")`
Captured(Ident),
}

impl FormatArgumentKind {
pub fn ident(&self) -> Option<Ident> {
match self {
&Self::Normal => None,
&Self::Named(id) => Some(id),
&Self::Captured(id) => Some(id),
}
}
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct FormatPlaceholder {
/// Index into [`FormatArgs::arguments`].
pub argument: FormatArgPosition,
/// The span inside the format string for the full `{…}` placeholder.
pub span: Option<Span>,
/// `{}`, `{:?}`, or `{:x}`, etc.
pub format_trait: FormatTrait,
/// `{}` or `{:.5}` or `{:-^20}`, etc.
pub format_options: FormatOptions,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct FormatArgPosition {
/// Which argument this position refers to (Ok),
/// or would've referred to if it existed (Err).
pub index: Result<usize, usize>,
/// What kind of position this is. See [`FormatArgPositionKind`].
pub kind: FormatArgPositionKind,
/// The span of the name or number.
pub span: Option<Span>,
}

#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum FormatArgPositionKind {
/// `{}` or `{:.*}`
Implicit,
/// `{1}` or `{:1$}` or `{:.1$}`
Number,
/// `{a}` or `{:a$}` or `{:.a$}`
Named,
}

#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
pub enum FormatTrait {
/// `{}`
Display,
/// `{:?}`
Debug,
/// `{:e}`
LowerExp,
/// `{:E}`
UpperExp,
/// `{:o}`
Octal,
/// `{:p}`
Pointer,
/// `{:b}`
Binary,
/// `{:x}`
LowerHex,
/// `{:X}`
UpperHex,
}

#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct FormatOptions {
/// The width. E.g. `{:5}` or `{:width$}`.
pub width: Option<FormatCount>,
/// The precision. E.g. `{:.5}` or `{:.precision$}`.
pub precision: Option<FormatCount>,
/// The alignment. E.g. `{:>}` or `{:<}` or `{:^}`.
pub alignment: Option<FormatAlignment>,
/// The fill character. E.g. the `.` in `{:.>10}`.
pub fill: Option<char>,
/// The `+`, `-`, `0`, `#`, `x?` and `X?` flags.
pub flags: u32,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum FormatAlignment {
/// `{:<}`
Left,
/// `{:>}`
Right,
/// `{:^}`
Center,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum FormatCount {
/// `{:5}` or `{:.5}`
Literal(usize),
/// `{:.*}`, `{:.5$}`, or `{:a$}`, etc.
Argument(FormatArgPosition),
}
Loading

0 comments on commit d6734be

Please sign in to comment.