From b458c5dc53c88f4fdd86114bff1c4ddd264d7bb2 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sun, 2 Aug 2020 20:42:31 -0700 Subject: [PATCH 1/2] Match rustc's new shebang handling --- src/lib.rs | 20 ++++++++++----- src/whitespace.rs | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 7 deletions(-) create mode 100644 src/whitespace.rs diff --git a/src/lib.rs b/src/lib.rs index f95d43da84..f3c45746d1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -456,6 +456,9 @@ pub mod parse_macro_input; #[cfg(all(feature = "parsing", feature = "printing"))] pub mod spanned; +#[cfg(all(feature = "parsing", feature = "full"))] +mod whitespace; + mod gen { /// Syntax tree traversal to walk a shared borrow of a syntax tree. /// @@ -943,13 +946,16 @@ pub fn parse_file(mut content: &str) -> Result { } let mut shebang = None; - if content.starts_with("#!") && !content.starts_with("#![") { - if let Some(idx) = content.find('\n') { - shebang = Some(content[..idx].to_string()); - content = &content[idx..]; - } else { - shebang = Some(content.to_string()); - content = ""; + if content.starts_with("#!") { + let rest = whitespace::skip(&content[2..]); + if !rest.starts_with('[') { + if let Some(idx) = content.find('\n') { + shebang = Some(content[..idx].to_string()); + content = &content[idx..]; + } else { + shebang = Some(content.to_string()); + content = ""; + } } } diff --git a/src/whitespace.rs b/src/whitespace.rs new file mode 100644 index 0000000000..7be082e1a2 --- /dev/null +++ b/src/whitespace.rs @@ -0,0 +1,65 @@ +pub fn skip(mut s: &str) -> &str { + 'skip: while !s.is_empty() { + let byte = s.as_bytes()[0]; + if byte == b'/' { + if s.starts_with("//") + && (!s.starts_with("///") || s.starts_with("////")) + && !s.starts_with("//!") + { + if let Some(i) = s.find('\n') { + s = &s[i + 1..]; + continue; + } else { + return ""; + } + } else if s.starts_with("/**/") { + s = &s[4..]; + continue; + } else if s.starts_with("/*") + && (!s.starts_with("/**") || s.starts_with("/***")) + && !s.starts_with("/*!") + { + let mut depth = 0; + let bytes = s.as_bytes(); + let mut i = 0; + let upper = bytes.len() - 1; + while i < upper { + if bytes[i] == b'/' && bytes[i + 1] == b'*' { + depth += 1; + i += 1; // eat '*' + } else if bytes[i] == b'*' && bytes[i + 1] == b'/' { + depth -= 1; + if depth == 0 { + s = &s[i + 2..]; + continue 'skip; + } + i += 1; // eat '/' + } + i += 1; + } + return s; + } + } + match byte { + b' ' | 0x09..=0x0d => { + s = &s[1..]; + continue; + } + b if b <= 0x7f => {} + _ => { + let ch = s.chars().next().unwrap(); + if is_whitespace(ch) { + s = &s[ch.len_utf8()..]; + continue; + } + } + } + return s; + } + s +} + +fn is_whitespace(ch: char) -> bool { + // Rust treats left-to-right mark and right-to-left mark as whitespace + ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}' +} From 30ffa3d5ba8f5c2e9b542d61a44b4c05cf596c44 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sun, 2 Aug 2020 21:38:45 -0700 Subject: [PATCH 2/2] Add shebang tests --- tests/test_shebang.rs | 59 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 tests/test_shebang.rs diff --git a/tests/test_shebang.rs b/tests/test_shebang.rs new file mode 100644 index 0000000000..e76e1803da --- /dev/null +++ b/tests/test_shebang.rs @@ -0,0 +1,59 @@ +#[macro_use] +mod macros; + +#[test] +fn test_basic() { + let content = "#!/usr/bin/env rustx\nfn main() {}"; + let file = syn::parse_file(content).unwrap(); + snapshot!(file, @r###" + File { + shebang: Some("#!/usr/bin/env rustx"), + items: [ + Item::Fn { + vis: Inherited, + sig: Signature { + ident: "main", + generics: Generics, + output: Default, + }, + block: Block, + }, + ], + } + "###); +} + +#[test] +fn test_comment() { + let content = "#!//am/i/a/comment\n[allow(dead_code)] fn main() {}"; + let file = syn::parse_file(content).unwrap(); + snapshot!(file, @r###" + File { + attrs: [ + Attribute { + style: Inner, + path: Path { + segments: [ + PathSegment { + ident: "allow", + arguments: None, + }, + ], + }, + tokens: TokenStream(`( dead_code )`), + }, + ], + items: [ + Item::Fn { + vis: Inherited, + sig: Signature { + ident: "main", + generics: Generics, + output: Default, + }, + block: Block, + }, + ], + } + "###); +}