Skip to content

Commit

Permalink
rust-demangle.c (unescape): Remove.
Browse files Browse the repository at this point in the history
	* rust-demangle.c (unescape): Remove.
	(parse_lower_hex_nibble): New function.
	(parse_legacy_escape): New function.
	(is_prefixed_hash): Use parse_lower_hex_nibble.
	(looks_like_rust): Use parse_legacy_escape.
	(rust_demangle_sym): Use parse_legacy_escape.
	* testsuite/rust-demangle-expected: Add 'llv$u6d$' test.

From-SVN: r275353
  • Loading branch information
eddyb authored and Jeff Law committed Sep 3, 2019
1 parent 5f76ab1 commit 42bf58b
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 147 deletions.
10 changes: 10 additions & 0 deletions libiberty/ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
2019-09-03 Eduard-Mihai Burtescu <eddyb@lyken.rs>

* rust-demangle.c (unescape): Remove.
(parse_lower_hex_nibble): New function.
(parse_legacy_escape): New function.
(is_prefixed_hash): Use parse_lower_hex_nibble.
(looks_like_rust): Use parse_legacy_escape.
(rust_demangle_sym): Use parse_legacy_escape.
* testsuite/rust-demangle-expected: Add 'llv$u6d$' test.

2019-08-27 Martin Liska <mliska@suse.cz>

PR lto/91478
Expand Down
281 changes: 134 additions & 147 deletions libiberty/rust-demangle.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ extern void *memset(void *s, int c, size_t n);
#include "rust-demangle.h"


/* Mangled Rust symbols look like this:
/* Mangled (legacy) Rust symbols look like this:
_$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
The original symbol is:
Expand All @@ -74,16 +74,7 @@ extern void *memset(void *s, int c, size_t n);
">" => $GT$
"(" => $LP$
")" => $RP$
" " => $u20$
"\"" => $u22$
"'" => $u27$
"+" => $u2b$
";" => $u3b$
"[" => $u5b$
"]" => $u5d$
"{" => $u7b$
"}" => $u7d$
"~" => $u7e$
"\u{XY}" => $uXY$
A double ".." means "::" and a single "." means "-".
Expand All @@ -95,7 +86,8 @@ static const size_t hash_len = 16;

static int is_prefixed_hash (const char *start);
static int looks_like_rust (const char *sym, size_t len);
static int unescape (const char **in, char **out, const char *seq, char value);
static int parse_lower_hex_nibble (char nibble);
static char parse_legacy_escape (const char **in);

/* INPUT: sym: symbol that has been through C++ (gnu v3) demangling
Expand Down Expand Up @@ -149,20 +141,20 @@ is_prefixed_hash (const char *str)
const char *end;
char seen[16];
size_t i;
int count;
int count, nibble;

if (strncmp (str, hash_prefix, hash_prefix_len))
return 0;
str += hash_prefix_len;

memset (seen, 0, sizeof(seen));
for (end = str + hash_len; str < end; str++)
if (*str >= '0' && *str <= '9')
seen[*str - '0'] = 1;
else if (*str >= 'a' && *str <= 'f')
seen[*str - 'a' + 10] = 1;
else
return 0;
{
nibble = parse_lower_hex_nibble (*str);
if (nibble < 0)
return 0;
seen[nibble] = 1;
}

/* Count how many distinct digits seen */
count = 0;
Expand All @@ -179,57 +171,17 @@ looks_like_rust (const char *str, size_t len)
const char *end = str + len;

while (str < end)
switch (*str)
{
case '$':
if (!strncmp (str, "$C$", 3))
str += 3;
else if (!strncmp (str, "$SP$", 4)
|| !strncmp (str, "$BP$", 4)
|| !strncmp (str, "$RF$", 4)
|| !strncmp (str, "$LT$", 4)
|| !strncmp (str, "$GT$", 4)
|| !strncmp (str, "$LP$", 4)
|| !strncmp (str, "$RP$", 4))
str += 4;
else if (!strncmp (str, "$u20$", 5)
|| !strncmp (str, "$u22$", 5)
|| !strncmp (str, "$u27$", 5)
|| !strncmp (str, "$u2b$", 5)
|| !strncmp (str, "$u3b$", 5)
|| !strncmp (str, "$u5b$", 5)
|| !strncmp (str, "$u5d$", 5)
|| !strncmp (str, "$u7b$", 5)
|| !strncmp (str, "$u7d$", 5)
|| !strncmp (str, "$u7e$", 5))
str += 5;
else
return 0;
break;
case '.':
/* Do not allow three or more consecutive dots */
if (!strncmp (str, "...", 3))
return 0;
/* Fall through */
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
case '_':
case ':':
str++;
break;
default:
return 0;
}
{
if (*str == '$')
{
if (!parse_legacy_escape (&str))
return 0;
}
else if (*str == '.' || *str == '_' || *str == ':' || ISALNUM (*str))
str++;
else
return 0;
}

return 1;
}
Expand All @@ -246,6 +198,7 @@ rust_demangle_sym (char *sym)
const char *in;
char *out;
const char *end;
char unescaped;

if (!sym)
return;
Expand All @@ -255,75 +208,49 @@ rust_demangle_sym (char *sym)
end = sym + strlen (sym) - (hash_prefix_len + hash_len);

while (in < end)
switch (*in)
{
case '$':
if (!(unescape (&in, &out, "$C$", ',')
|| unescape (&in, &out, "$SP$", '@')
|| unescape (&in, &out, "$BP$", '*')
|| unescape (&in, &out, "$RF$", '&')
|| unescape (&in, &out, "$LT$", '<')
|| unescape (&in, &out, "$GT$", '>')
|| unescape (&in, &out, "$LP$", '(')
|| unescape (&in, &out, "$RP$", ')')
|| unescape (&in, &out, "$u20$", ' ')
|| unescape (&in, &out, "$u22$", '\"')
|| unescape (&in, &out, "$u27$", '\'')
|| unescape (&in, &out, "$u2b$", '+')
|| unescape (&in, &out, "$u3b$", ';')
|| unescape (&in, &out, "$u5b$", '[')
|| unescape (&in, &out, "$u5d$", ']')
|| unescape (&in, &out, "$u7b$", '{')
|| unescape (&in, &out, "$u7d$", '}')
|| unescape (&in, &out, "$u7e$", '~'))) {
/* unexpected escape sequence, not looks_like_rust. */
goto fail;
}
break;
case '_':
/* If this is the start of a path component and the next
character is an escape sequence, ignore the underscore. The
mangler inserts an underscore to make sure the path
component begins with a XID_Start character. */
if ((in == sym || in[-1] == ':') && in[1] == '$')
in++;
else
*out++ = *in++;
break;
case '.':
if (in[1] == '.')
{
/* ".." becomes "::" */
*out++ = ':';
*out++ = ':';
in += 2;
}
else
{
/* "." becomes "-" */
*out++ = '-';
in++;
}
break;
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
case ':':
*out++ = *in++;
break;
default:
/* unexpected character in symbol, not looks_like_rust. */
goto fail;
}
{
if (*in == '$')
{
unescaped = parse_legacy_escape (&in);
if (unescaped)
*out++ = unescaped;
else
/* unexpected escape sequence, not looks_like_rust. */
goto fail;
}
else if (*in == '_')
{
/* If this is the start of a path component and the next
character is an escape sequence, ignore the underscore. The
mangler inserts an underscore to make sure the path
component begins with a XID_Start character. */
if ((in == sym || in[-1] == ':') && in[1] == '$')
in++;
else
*out++ = *in++;
}
else if (*in == '.')
{
if (in[1] == '.')
{
/* ".." becomes "::" */
*out++ = ':';
*out++ = ':';
in += 2;
}
else
{
/* "." becomes "-" */
*out++ = '-';
in++;
}
}
else if (*in == ':' || ISALNUM (*in))
*out++ = *in++;
else
/* unexpected character in symbol, not looks_like_rust. */
goto fail;
}
goto done;

fail:
Expand All @@ -332,18 +259,78 @@ rust_demangle_sym (char *sym)
*out = '\0';
}

/* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
static int
unescape (const char **in, char **out, const char *seq, char value)
parse_lower_hex_nibble (char nibble)
{
size_t len = strlen (seq);
if ('0' <= nibble && nibble <= '9')
return nibble - '0';
if ('a' <= nibble && nibble <= 'f')
return 0xa + (nibble - 'a');
return -1;
}

if (strncmp (*in, seq, len))
return 0;
/* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
static char
parse_legacy_escape (const char **in)
{
char c = 0;
const char *e;
size_t escape_len = 0;
int lo_nibble = -1, hi_nibble = -1;

**out = value;
if ((*in)[0] != '$')
return 0;

*in += len;
*out += 1;
e = *in + 1;

if (e[0] == 'C')
{
escape_len = 1;

c = ',';
}
else
{
escape_len = 2;

if (e[0] == 'S' && e[1] == 'P')
c = '@';
else if (e[0] == 'B' && e[1] == 'P')
c = '*';
else if (e[0] == 'R' && e[1] == 'F')
c = '&';
else if (e[0] == 'L' && e[1] == 'T')
c = '<';
else if (e[0] == 'G' && e[1] == 'T')
c = '>';
else if (e[0] == 'L' && e[1] == 'P')
c = '(';
else if (e[0] == 'R' && e[1] == 'P')
c = ')';
else if (e[0] == 'u')
{
escape_len = 3;

hi_nibble = parse_lower_hex_nibble (e[1]);
if (hi_nibble < 0)
return 0;
lo_nibble = parse_lower_hex_nibble (e[2]);
if (lo_nibble < 0)
return 0;

/* Only allow non-control ASCII characters. */
if (hi_nibble > 7)
return 0;
c = (hi_nibble << 4) | lo_nibble;
if (c < 0x20)
return 0;
}
}

if (!c || e[escape_len] != '$')
return 0;

return 1;
*in += 2 + escape_len;
return c;
}
4 changes: 4 additions & 0 deletions libiberty/testsuite/rust-demangle-expected
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,7 @@ _ZN68_$LT$core..nonzero..NonZero$LT$T$GT$$u20$as$u20$core..ops..Deref$GT$5deref1
--format=rust
_ZN63_$LT$core..ptr..Unique$LT$T$GT$$u20$as$u20$core..ops..Deref$GT$5deref17h19f2ad4920655e85E
<core::ptr::Unique<T> as core::ops::Deref>::deref
#
--format=rust
_ZN11issue_609253foo37Foo$LT$issue_60925..llv$u6d$..Foo$GT$3foo17h059a991a004536adE
issue_60925::foo::Foo<issue_60925::llvm::Foo>::foo

0 comments on commit 42bf58b

Please sign in to comment.