From 3a95291e6f405a536cd0018514cbfef2a3a0fbc2 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 10 Aug 2021 21:06:24 +0300 Subject: [PATCH] [RFC2603] Extend `` to include `str` and structural constants. --- text/2603-rust-symbol-name-mangling-v0.md | 68 ++++++++++++++++------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/text/2603-rust-symbol-name-mangling-v0.md b/text/2603-rust-symbol-name-mangling-v0.md index 8090cd45448..a68f1a74983 100644 --- a/text/2603-rust-symbol-name-mangling-v0.md +++ b/text/2603-rust-symbol-name-mangling-v0.md @@ -691,28 +691,30 @@ Mangled names conform to the following grammar: | "D" // dyn Trait + Send + 'a | - = "a" // i8 + = | "b" // bool | "c" // char | "d" // f64 | "e" // str | "f" // f32 - | "h" // u8 - | "i" // isize - | "j" // usize - | "l" // i32 - | "m" // u32 - | "n" // i128 - | "o" // u128 - | "s" // i16 - | "t" // u16 | "u" // () | "v" // ... - | "x" // i64 - | "y" // u64 | "z" // ! | "p" // placeholder (e.g. for generic params), shown as _ + = "a" // i8 + | "h" // u8 + | "i" // isize + | "j" // usize + | "l" // i32 + | "m" // u32 + | "n" // i128 + | "o" // u128 + | "s" // i16 + | "t" // u16 + | "x" // i64 + | "y" // u64 + // If the "U" is present then the function is `unsafe`. // The return type is always present, but demanglers can // choose to omit the ` -> ()` by special-casing "u". @@ -724,16 +726,40 @@ Mangled names conform to the following grammar: = [] {} "E" = {} = "p" - = - | "p" // placeholder, shown as _ + +// Constants are encoded structurally, as a tree of array/tuple/ADT constructors, +// with integer(-like) leaves, not using the constant's memory representation. +// See the comments on & for more details on leaf encoding. + = + | "b" // false, true + | "c" // '...' + | "e" // "..." + | "R" // &value + | "Q" // &mut value + | "A" {} "E" // [a, b, c, ...] + | "T" {} "E" // (a, b, c, ...) + | "V" // named struct/variant + | "p" // placeholder, shown as _ | -// The encoding of a constant depends on its type. Integers use their value, -// in base 16 (0-9a-f), not their memory representation. Negative integer -// values are preceded with "n". The bool value false is encoded as `0_`, true -// value as `1_`. The char constants are encoded using their Unicode scalar -// value. - = ["n"] {} "_" + = "U" // X + | "T" {} "E" // X(a, b, c, ...) + | "S" { } "E" // X { field: value, ... } + +// An integer(-like) constant's numeric value is encoded in base 16 (0-9a-f), +// with negative integer values being preceded with "n". +// For other types, the numeric value is the same one used for `as` casts, i.e.: +// * `bool`: 0 for `false` (encoded as `0_`), 1 for `true` (encoded as `1_`) +// * `char`: the Unicode scalar value + = ["n"] {} "_" + +// `str` constants are encoded as their (UTF-8) byte sequence, where each byte +// always uses two hex nibbles. +// Because the constant has `str` type, and not `&str`, demangling should make +// that clear by e.g. demangling `616263_` as `*"abc"` (instead of `"abc"`). +// In order to have constants of type `&str` demangle as a plain string literal +// (i.e. without `&*`), demanglers can special-case `Re...` constants. + = { } "_" // uses 0-9-a-z-A-Z as digits, i.e. 'a' is decimal 10 and // 'Z' is decimal 61. @@ -1158,3 +1184,5 @@ pub static QUUX: u32 = { - Make `` optional in `` and `` productions. - Extend `` to include `bool` values, `char` values, and negative integer values. - Remove type from constant placeholders. +- In amendment PR [#3161](https://github.com/rust-lang/rfcs/pull/3161): + - Extend `` to include `str` and structural constants