diff --git a/crates/swc_fast_ts_strip/src/lib.rs b/crates/swc_fast_ts_strip/src/lib.rs index c74812dc959e..5a9e277306e4 100644 --- a/crates/swc_fast_ts_strip/src/lib.rs +++ b/crates/swc_fast_ts_strip/src/lib.rs @@ -113,14 +113,52 @@ pub fn operate( return Ok(fm.src.to_string()); } + let source = fm.src.clone(); let mut code = fm.src.to_string().into_bytes(); for r in replacements { - for c in &mut code[(r.0 .0 - 1) as usize..(r.1 .0 - 1) as usize] { - if *c == b'\n' || *c == b'\r' { - continue; + let (start, end) = (r.0 .0 as usize - 1, r.1 .0 as usize - 1); + + for (i, c) in source[start..end].char_indices() { + let i = start + i; + match c { + // https://262.ecma-international.org/#sec-white-space + '\u{0009}' | '\u{0000B}' | '\u{000C}' | '\u{FEFF}' => continue, + // Space_Separator + '\u{0020}' | '\u{00A0}' | '\u{1680}' | '\u{2000}' | '\u{2001}' | '\u{2002}' + | '\u{2003}' | '\u{2004}' | '\u{2005}' | '\u{2006}' | '\u{2007}' | '\u{2008}' + | '\u{2009}' | '\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' => continue, + // https://262.ecma-international.org/#sec-line-terminators + '\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}' => continue, + _ => match c.len_utf8() { + 1 => { + // Space 0020 + code[i] = 0x20; + } + 2 => { + // No-Break Space 00A0 + code[i] = 0xc2; + code[i + 1] = 0xa0; + } + 3 => { + // En Space 2002 + code[i] = 0xe2; + code[i + 1] = 0x80; + code[i + 2] = 0x82; + } + 4 => { + // We do not have a 4-byte space character in the Unicode standard. + + // Space 0020 + code[i] = 0x20; + // ZWNBSP FEFF + code[i + 1] = 0xef; + code[i + 2] = 0xbb; + code[i + 3] = 0xbf; + } + _ => unreachable!(), + }, } - *c = b' '; } } @@ -128,7 +166,13 @@ pub fn operate( code[i.0 as usize - 1] = v; } - String::from_utf8(code).map_err(|_| anyhow::anyhow!("failed to convert to utf-8")) + if cfg!(debug_assertions) { + String::from_utf8(code).map_err(|_| anyhow::anyhow!("failed to convert to utf-8")) + } else { + // SAFETY: We've already validated that the source is valid utf-8 + // and our operations are limited to character-level string replacements. + unsafe { Ok(String::from_utf8_unchecked(code)) } + } } struct TsStrip { @@ -197,15 +241,18 @@ impl Visit for TsStrip { if let Some(ret) = &n.return_type { self.add_replacement(ret.span); - let l_paren = self.get_prev_token(ret.span_lo() - BytePos(1)); - debug_assert_eq!(l_paren.token, Token::RParen); + let r_paren = self.get_prev_token(ret.span_lo() - BytePos(1)); + debug_assert_eq!(r_paren.token, Token::RParen); let arrow = self.get_next_token(ret.span_hi()); debug_assert_eq!(arrow.token, Token::Arrow); - let span = span(l_paren.span.lo, arrow.span.hi); + let span = span(r_paren.span.lo, arrow.span.lo); - let slice = self.get_src_slice(span).as_bytes(); - if slice.contains(&b'\n') { - self.add_replacement(l_paren.span); + let slice = self.get_src_slice(span); + if slice + .chars() + .any(|c| matches!(c, '\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}')) + { + self.add_replacement(r_paren.span); // Instead of moving the arrow mark, we shift the right parenthesis to the next // line. This is because there might be a line break after the right @@ -219,11 +266,17 @@ impl Visit for TsStrip { // // ```TypeScript // ( - // )=> + // ) => // 1; // ``` - self.add_overwrite(ret.span_hi() - BytePos(1), b')'); + let mut pos = ret.span_hi() - BytePos(1); + while !self.src.as_bytes()[pos.0 as usize - 1].is_utf8_char_boundary() { + self.add_overwrite(pos, b' '); + pos = pos - BytePos(1); + } + + self.add_overwrite(pos, b')'); } } @@ -611,6 +664,19 @@ impl Visit for TsStrip { } } +trait U8Helper { + fn is_utf8_char_boundary(&self) -> bool; +} + +impl U8Helper for u8 { + // Copy from std::core::num::u8 + #[inline] + fn is_utf8_char_boundary(&self) -> bool { + // This is bit magic equivalent to: b < 128 || b >= 192 + (*self as i8) >= -0x40 + } +} + fn span(lo: BytePos, hi: BytePos) -> Span { Span::new(lo, hi, Default::default()) } diff --git a/crates/swc_fast_ts_strip/tests/fixture/test-case-1.js b/crates/swc_fast_ts_strip/tests/fixture/test-case-1.js index 1873dde31794..e52037e8b242 100644 --- a/crates/swc_fast_ts_strip/tests/fixture/test-case-1.js +++ b/crates/swc_fast_ts_strip/tests/fixture/test-case-1.js @@ -180,3 +180,19 @@ void 0; ( )=> 1; + +{ + (a, b, c = [] /*comment-1*/ /*comment-2*/ + )/*comment-4*/=> + 1 +}; + +      + +( +   ) => +1; + +( /*comment-1*/ +    ) /*comment-4*/=> +1; \ No newline at end of file diff --git a/crates/swc_fast_ts_strip/tests/fixture/test-case-1.ts b/crates/swc_fast_ts_strip/tests/fixture/test-case-1.ts index 9b1f5dc60a22..dfff5559b718 100644 --- a/crates/swc_fast_ts_strip/tests/fixture/test-case-1.ts +++ b/crates/swc_fast_ts_strip/tests/fixture/test-case-1.ts @@ -180,3 +180,19 @@ void 0; (): any=> 1; + +{ + (a, b, c: D = [] as any/*comment-1*/)/*comment-2*/: + /*comment-3*/any/*comment-4*/=> + 1 +}; + +type 任意の型 = any; + +(): +任意の型=> +1; + +()/*comment-1*/:/*comment-2*/ +/*comment-3*/任意の型/*comment-4*/=> +1; \ No newline at end of file diff --git a/crates/swc_fast_ts_strip/tests/fixture/unicode.js b/crates/swc_fast_ts_strip/tests/fixture/unicode.js new file mode 100644 index 000000000000..f5299dfd6153 --- /dev/null +++ b/crates/swc_fast_ts_strip/tests/fixture/unicode.js @@ -0,0 +1,7 @@ +    + +function foo() { +    (void 1); throw new Error('foo'); +} + +foo(); \ No newline at end of file diff --git a/crates/swc_fast_ts_strip/tests/fixture/unicode.ts b/crates/swc_fast_ts_strip/tests/fixture/unicode.ts new file mode 100644 index 000000000000..0a766c0e0e2e --- /dev/null +++ b/crates/swc_fast_ts_strip/tests/fixture/unicode.ts @@ -0,0 +1,7 @@ +type 任意 = any; + +function foo() { + <任意>(void 1); throw new Error('foo'); +} + +foo(); \ No newline at end of file