diff --git a/Cargo.toml b/Cargo.toml index b955431..a8e4884 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ scroll = { version = "0.10.1", features = ["derive"], optional = true } data-encoding = "2.3.3" debugid = {version = "0.8.0", features = ["serde"] } base64-simd = { version = "0.7" } +bitvec = "1.0.1" rustc-hash = "1.1.0" [build-dependencies] diff --git a/src/builder.rs b/src/builder.rs index bf0e005..d6e6584 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -181,6 +181,7 @@ impl SourceMapBuilder { } /// Adds a new mapping to the builder. + #[allow(clippy::too_many_arguments)] pub fn add( &mut self, dst_line: u32, @@ -189,8 +190,11 @@ impl SourceMapBuilder { src_col: u32, source: Option<&str>, name: Option<&str>, + is_range: bool, ) -> RawToken { - self.add_with_id(dst_line, dst_col, src_line, src_col, source, !0, name) + self.add_with_id( + dst_line, dst_col, src_line, src_col, source, !0, name, is_range, + ) } #[allow(clippy::too_many_arguments)] @@ -203,6 +207,7 @@ impl SourceMapBuilder { source: Option<&str>, source_id: u32, name: Option<&str>, + is_range: bool, ) -> RawToken { let src_id = match source { Some(source) => self.add_source_with_id(source, source_id), @@ -219,12 +224,14 @@ impl SourceMapBuilder { src_col, src_id, name_id, + is_range, }; self.tokens.push(raw); raw } /// Adds a new mapping to the builder. + #[allow(clippy::too_many_arguments)] pub fn add_raw( &mut self, dst_line: u32, @@ -233,6 +240,7 @@ impl SourceMapBuilder { src_col: u32, source: Option, name: Option, + is_range: bool, ) -> RawToken { let src_id = source.unwrap_or(!0); let name_id = name.unwrap_or(!0); @@ -243,6 +251,7 @@ impl SourceMapBuilder { src_col, src_id, name_id, + is_range, }; self.tokens.push(raw); raw @@ -260,6 +269,7 @@ impl SourceMapBuilder { token.get_source(), token.get_src_id(), name, + token.is_range(), ) } diff --git a/src/decoder.rs b/src/decoder.rs index e701c47..55278b0 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -1,6 +1,9 @@ use std::io; use std::io::{BufReader, Read}; +use bitvec::field::BitField; +use bitvec::order::Lsb0; +use bitvec::vec::BitVec; use serde_json::Value; use crate::errors::{Error, Result}; @@ -120,6 +123,29 @@ pub fn strip_junk_header(slice: &[u8]) -> io::Result<&[u8]> { Ok(&slice[slice.len()..]) } +/// Decodes range mappping bitfield string into index +fn decode_rmi(rmi_str: &str, val: &mut BitVec) -> Result<()> { + val.clear(); + val.resize(rmi_str.len() * 6, false); + + for (idx, &byte) in rmi_str.as_bytes().iter().enumerate() { + let byte = match byte { + b'A'..=b'Z' => byte - b'A', + b'a'..=b'z' => byte - b'a' + 26, + b'0'..=b'9' => byte - b'0' + 52, + b'+' => 62, + b'/' => 63, + _ => { + fail!(Error::InvalidBase64(byte as char)); + } + }; + + val[6 * idx..6 * (idx + 1)].store_le::(byte); + } + + Ok(()) +} + pub fn decode_regular(rsm: RawSourceMap) -> Result { let mut dst_col; let mut src_id = 0; @@ -129,20 +155,28 @@ pub fn decode_regular(rsm: RawSourceMap) -> Result { let names = rsm.names.unwrap_or_default(); let sources = rsm.sources.unwrap_or_default(); + let range_mappings = rsm.range_mappings.unwrap_or_default(); let mappings = rsm.mappings.unwrap_or_default(); let allocation_size = mappings.matches(&[',', ';'][..]).count() + 10; let mut tokens = Vec::with_capacity(allocation_size); let mut nums = Vec::with_capacity(6); + let mut rmi = BitVec::new(); - for (dst_line, line) in mappings.split(';').enumerate() { + for (dst_line, (line, rmi_str)) in mappings + .split(';') + .zip(range_mappings.split(';').chain(std::iter::repeat(""))) + .enumerate() + { if line.is_empty() { continue; } dst_col = 0; - for segment in line.split(',') { + decode_rmi(rmi_str, &mut rmi)?; + + for (line_index, segment) in line.split(',').enumerate() { if segment.is_empty() { continue; } @@ -176,6 +210,8 @@ pub fn decode_regular(rsm: RawSourceMap) -> Result { } } + let is_range = rmi.get(line_index).map(|v| *v).unwrap_or_default(); + tokens.push(RawToken { dst_line: dst_line as u32, dst_col, @@ -183,6 +219,7 @@ pub fn decode_regular(rsm: RawSourceMap) -> Result { src_col, src_id: src, name_id: name, + is_range, }); } } @@ -311,3 +348,24 @@ fn test_bad_newline() { } } } + +#[test] +fn test_decode_rmi() { + fn decode(rmi_str: &str) -> Vec { + let mut out = bitvec::bitvec![u8, Lsb0; 0; 0]; + decode_rmi(rmi_str, &mut out).expect("failed to decode"); + + let mut res = vec![]; + for (idx, bit) in out.iter().enumerate() { + if *bit { + res.push(idx); + } + } + res + } + + // This is 0-based index of the bits + assert_eq!(decode("AAB"), vec![12]); + assert_eq!(decode("g"), vec![5]); + assert_eq!(decode("Bg"), vec![0, 11]); +} diff --git a/src/encoder.rs b/src/encoder.rs index af080be..9a7fea0 100644 --- a/src/encoder.rs +++ b/src/encoder.rs @@ -1,5 +1,8 @@ use std::io::Write; +use bitvec::field::BitField; +use bitvec::order::Lsb0; +use bitvec::view::BitView; use serde_json::Value; use crate::errors::Result; @@ -21,6 +24,78 @@ fn encode_vlq_diff(out: &mut String, a: u32, b: u32) { encode_vlq(out, i64::from(a) - i64::from(b)) } +fn encode_rmi(out: &mut Vec, data: &mut Vec) { + fn encode_byte(b: u8) -> u8 { + match b { + 0..=25 => b + b'A', + 26..=51 => b + b'a' - 26, + 52..=61 => b + b'0' - 52, + 62 => b'+', + 63 => b'/', + _ => panic!("invalid byte"), + } + } + + let bits = data.view_bits_mut::(); + + // trim zero at the end + let mut last = 0; + for (idx, bit) in bits.iter().enumerate() { + if *bit { + last = idx; + } + } + let bits = &mut bits[..last + 1]; + + for byte in bits.chunks(6) { + let byte = byte.load::(); + + let encoded = encode_byte(byte); + + out.push(encoded); + } +} + +fn serialize_range_mappings(sm: &SourceMap) -> Option { + let mut buf = Vec::new(); + let mut prev_line = 0; + let mut had_rmi = false; + + let mut idx_of_first_in_line = 0; + + let mut rmi_data = Vec::::new(); + + for (idx, token) in sm.tokens().enumerate() { + if token.is_range() { + had_rmi = true; + + let num = idx - idx_of_first_in_line; + + rmi_data.resize(rmi_data.len() + 2, 0); + + let rmi_bits = rmi_data.view_bits_mut::(); + rmi_bits.set(num, true); + } + + while token.get_dst_line() != prev_line { + if had_rmi { + encode_rmi(&mut buf, &mut rmi_data); + rmi_data.clear(); + } + + buf.push(b';'); + prev_line += 1; + had_rmi = false; + idx_of_first_in_line = idx; + } + } + if had_rmi { + encode_rmi(&mut buf, &mut rmi_data); + } + + Some(String::from_utf8(buf).expect("invalid utf8")) +} + fn serialize_mappings(sm: &SourceMap) -> String { let mut rv = String::new(); // dst == minified == generated @@ -89,6 +164,7 @@ impl Encodable for SourceMap { sources_content: if have_contents { Some(contents) } else { None }, sections: None, names: Some(self.names().map(|x| Value::String(x.to_string())).collect()), + range_mappings: serialize_range_mappings(self), mappings: Some(serialize_mappings(self)), x_facebook_offsets: None, x_metro_module_paths: None, @@ -121,6 +197,7 @@ impl Encodable for SourceMapIndex { .collect(), ), names: None, + range_mappings: None, mappings: None, x_facebook_offsets: None, x_metro_module_paths: None, @@ -139,3 +216,26 @@ impl Encodable for DecodedMap { } } } + +#[test] +fn test_encode_rmi() { + fn encode(indices: &[usize]) -> String { + let mut out = vec![]; + + // Fill with zeros while testing + let mut data = vec![0; 256]; + + let bits = data.view_bits_mut::(); + for &i in indices { + bits.set(i, true); + } + + encode_rmi(&mut out, &mut data); + String::from_utf8(out).unwrap() + } + + // This is 0-based index + assert_eq!(encode(&[12]), "AAB"); + assert_eq!(encode(&[5]), "g"); + assert_eq!(encode(&[0, 11]), "Bg"); +} diff --git a/src/errors.rs b/src/errors.rs index 5c4fbe1..d833dc3 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -45,6 +45,10 @@ pub enum Error { InvalidRamBundleEntry, /// Tried to operate on a non RAM bundle file NotARamBundle, + /// Range mapping index is invalid + InvalidRangeMappingIndex(data_encoding::DecodeError), + + InvalidBase64(char), } impl From for Error { @@ -78,6 +82,12 @@ impl From for Error { } } +impl From for Error { + fn from(err: data_encoding::DecodeError) -> Error { + Error::InvalidRangeMappingIndex(err) + } +} + impl error::Error for Error { fn cause(&self) -> Option<&dyn error::Error> { match *self { @@ -114,6 +124,8 @@ impl fmt::Display for Error { Error::InvalidRamBundleIndex => write!(f, "invalid module index in ram bundle"), Error::InvalidRamBundleEntry => write!(f, "invalid ram bundle module entry"), Error::NotARamBundle => write!(f, "not a ram bundle"), + Error::InvalidRangeMappingIndex(err) => write!(f, "invalid range mapping index: {err}"), + Error::InvalidBase64(c) => write!(f, "invalid base64 character: {}", c), } } } diff --git a/src/jsontypes.rs b/src/jsontypes.rs index 00630c0..59d893b 100644 --- a/src/jsontypes.rs +++ b/src/jsontypes.rs @@ -42,6 +42,8 @@ pub struct RawSourceMap { pub sections: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub names: Option>, + #[serde(rename = "rangeMappings", skip_serializing_if = "Option::is_none")] + pub range_mappings: Option, #[serde(skip_serializing_if = "Option::is_none")] pub mappings: Option, #[serde(skip_serializing_if = "Option::is_none")] diff --git a/src/ram_bundle.rs b/src/ram_bundle.rs index 23f4401..fd3f63c 100644 --- a/src/ram_bundle.rs +++ b/src/ram_bundle.rs @@ -402,6 +402,7 @@ impl<'a> SplitRamBundleModuleIter<'a> { token.get_src_col(), token.get_source(), token.get_name(), + false, ); if token.get_source().is_some() && !builder.has_source_contents(raw.src_id) { builder.set_source_contents( diff --git a/src/types.rs b/src/types.rs index ce7d22f..4093a5a 100644 --- a/src/types.rs +++ b/src/types.rs @@ -145,6 +145,11 @@ pub struct RawToken { pub src_id: u32, /// name identifier (`!0` in case there is no associated name) pub name_id: u32, + + /// If true, this token is a range token. + /// + /// See + pub is_range: bool, } /// Represents a token from a sourcemap @@ -152,6 +157,7 @@ pub struct RawToken { pub struct Token<'a> { raw: &'a RawToken, i: &'a SourceMap, + offset: u32, idx: u32, } @@ -187,6 +193,8 @@ impl<'a> Ord for Token<'a> { try_cmp!(self.get_src_line(), other.get_src_line()); try_cmp!(self.get_src_col(), other.get_src_col()); try_cmp!(self.get_name(), other.get_name()); + try_cmp!(self.is_range(), other.is_range()); + Ordering::Equal } } @@ -214,7 +222,7 @@ impl<'a> Token<'a> { /// get the source column number pub fn get_src_col(&self) -> u32 { - self.raw.src_col + self.raw.src_col.saturating_add(self.offset) } /// get the source line and column @@ -280,6 +288,13 @@ impl<'a> Token<'a> { pub fn get_source_view(&self) -> Option<&SourceView<'_>> { self.i.get_source_view(self.get_src_id()) } + + /// If true, this token is a range token. + /// + /// See + pub fn is_range(&self) -> bool { + self.raw.is_range + } } pub fn idx_from_token(token: &Token<'_>) -> u32 { @@ -410,7 +425,13 @@ impl<'a> fmt::Display for Token<'a> { .unwrap_or_default() )?; if f.alternate() { - write!(f, " ({}:{})", self.get_dst_line(), self.get_dst_col())?; + write!( + f, + " ({}:{}){}", + self.get_dst_line(), + self.get_dst_col(), + if self.is_range() { " (range)" } else { "" } + )?; } Ok(()) } @@ -660,9 +681,12 @@ impl SourceMap { /// Looks up a token by its index. pub fn get_token(&self, idx: u32) -> Option> { - self.tokens - .get(idx as usize) - .map(|raw| Token { raw, i: self, idx }) + self.tokens.get(idx as usize).map(|raw| Token { + raw, + i: self, + idx, + offset: 0, + }) } /// Returns the number of tokens in the sourcemap. @@ -681,7 +705,14 @@ impl SourceMap { /// Looks up the closest token to a given 0-indexed line and column. pub fn lookup_token(&self, line: u32, col: u32) -> Option> { let ii = greatest_lower_bound(&self.index, &(line, col), |ii| (ii.0, ii.1))?; - self.get_token(ii.2) + + let mut token = self.get_token(ii.2)?; + + if token.is_range() { + token.offset = col - token.get_dst_col(); + } + + Some(token) } /// Given a location, name and minified source file resolve a minified @@ -1203,6 +1234,7 @@ impl SourceMapIndex { token.get_src_col(), token.get_source(), token.get_name(), + token.is_range(), ); if token.get_source().is_some() && !builder.has_source_contents(raw.src_id) { builder.set_source_contents( diff --git a/tests/test_builder.rs b/tests/test_builder.rs index 8d34a30..ad0716b 100644 --- a/tests/test_builder.rs +++ b/tests/test_builder.rs @@ -12,7 +12,7 @@ fn test_builder_into_sourcemap() { assert_eq!(sm.get_source(0), Some("/foo/bar/baz.js")); assert_eq!(sm.get_name(0), Some("x")); - let expected = br#"{"version":3,"sources":["baz.js"],"sourceRoot":"/foo/bar","names":["x"],"mappings":""}"#; + let expected = br#"{"version":3,"sources":["baz.js"],"sourceRoot":"/foo/bar","names":["x"],"rangeMappings":"","mappings":""}"#; let mut output: Vec = vec![]; sm.to_writer(&mut output).unwrap(); assert_eq!(output, expected); diff --git a/tests/test_decoder.rs b/tests/test_decoder.rs index 103d194..9f89802 100644 --- a/tests/test_decoder.rs +++ b/tests/test_decoder.rs @@ -158,3 +158,24 @@ fn test_sourcemap_nofiles() { assert_eq!(iter.next().unwrap().to_tuple(), ("", 2, 2, Some("alert"))); assert!(iter.next().is_none()); } + +#[test] +fn test_sourcemap_range_mappings() { + let input: &[_] = br#"{ + "version": 3, + "sources": [null], + "names": ["console","log","ab"], + "mappings": "AACAA,QAAQC,GAAG,CAAC,OAAM,OAAM,QACxBD,QAAQC,GAAG,CAAC,QAEZD,QAAQC,GAAG,CAJD;IAACC,IAAI;AAAI,IAKnBF,QAAQC,GAAG,CAAC,YACZD,QAAQC,GAAG,CAAC", + "rangeMappings": "AAB;;g" + }"#; + let sm = SourceMap::from_reader(input).unwrap(); + + let mut iter = sm.tokens().filter(Token::is_range); + + assert_eq!(sm.tokens().filter(Token::is_range).count(), 2); + + assert_eq!(iter.next().unwrap().to_tuple(), ("", 4, 11, None)); + + assert_eq!(iter.next().unwrap().to_tuple(), ("", 6, 0, Some("console"))); + assert!(iter.next().is_none()); +} diff --git a/tests/test_encoder.rs b/tests/test_encoder.rs index d38273c..23adc7f 100644 --- a/tests/test_encoder.rs +++ b/tests/test_encoder.rs @@ -23,5 +23,25 @@ fn test_basic_sourcemap() { fn test_sourcemap_data_url() { let input: &[_] = br#"{"version":3,"file":"build/foo.min.js","sources":["src/foo.js"],"names":[],"mappings":"AAAA","sourceRoot":"/"}"#; let sm = SourceMap::from_reader(input).unwrap(); - assert_eq!(sm.to_data_url().unwrap(), "data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiYnVpbGQvZm9vLm1pbi5qcyIsInNvdXJjZXMiOlsic3JjL2Zvby5qcyJdLCJzb3VyY2VSb290IjoiLyIsIm5hbWVzIjpbXSwibWFwcGluZ3MiOiJBQUFBIn0="); + assert_eq!(sm.to_data_url().unwrap(), "data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiYnVpbGQvZm9vLm1pbi5qcyIsInNvdXJjZXMiOlsic3JjL2Zvby5qcyJdLCJzb3VyY2VSb290IjoiLyIsIm5hbWVzIjpbXSwicmFuZ2VNYXBwaW5ncyI6IiIsIm1hcHBpbmdzIjoiQUFBQSJ9"); +} + +#[test] +fn test_basic_range() { + let input: &[_] = br#"{ + "version": 3, + "sources": [null], + "names": ["console","log","ab"], + "mappings": "AACAA,QAAQC,GAAG,CAAC,OAAM,OAAM,QACxBD,QAAQC,GAAG,CAAC,QAEZD,QAAQC,GAAG,CAJD;IAACC,IAAI;AAAI,IAKnBF,QAAQC,GAAG,CAAC,YACZD,QAAQC,GAAG,CAAC", + "rangeMappings": "AAB;;g" + }"#; + let sm = SourceMap::from_reader(input).unwrap(); + let mut out: Vec = vec![]; + sm.to_writer(&mut out).unwrap(); + + let sm2 = SourceMap::from_reader(&out[..]).unwrap(); + + for (tok1, tok2) in sm.tokens().zip(sm2.tokens()) { + assert_eq!(tok1, tok2); + } } diff --git a/tests/test_regular.rs b/tests/test_regular.rs index 141c3b5..93db0d4 100644 --- a/tests/test_regular.rs +++ b/tests/test_regular.rs @@ -1,4 +1,4 @@ -use sourcemap::SourceMap; +use sourcemap::{SourceMap, SourceMapBuilder}; #[test] fn test_basic_sourcemap() { @@ -35,3 +35,24 @@ fn test_basic_sourcemap() { ("coolstuff.js", 2, 8, None) ); } + +#[test] +fn test_basic_range() { + let mut b = SourceMapBuilder::new(None); + let id = b.add_source("input.js"); + b.add_raw(1, 0, 2, 2, Some(id), None, true); + let sm = b.into_sourcemap(); + + assert_eq!( + sm.lookup_token(1, 0).unwrap().to_tuple(), + ("input.js", 2, 2, None) + ); + assert_eq!( + sm.lookup_token(1, 8).unwrap().to_tuple(), + ("input.js", 2, 10, None) + ); + assert_eq!( + sm.lookup_token(1, 12).unwrap().to_tuple(), + ("input.js", 2, 14, None) + ); +}