Skip to content

Commit

Permalink
Rollup merge of rust-lang#65074 - Rantanen:json-byte-pos, r=matklad
Browse files Browse the repository at this point in the history
Fix the start/end byte positions in the compiler JSON output

Track the changes made during normalization in the `SourceFile` and use this information to correct the `start_byte` and `end_byte` fields in the JSON output.

This should ensure the start/end byte fields can be used to index the original file, even if Rust normalized the source code for parsing purposes. Both CRLF to LF and BOM removal are handled with this one.

The rough plan was discussed with @matklad in rust-lang/rustfix#176 - although I ended up going with `u32` offset tracking so I wouldn't need to deal with `u32 + i32` arithmetics when applying the offset to the span byte positions.

Fixes rust-lang#65029
  • Loading branch information
Centril committed Oct 8, 2019
2 parents 71b6f77 + bbf262d commit 3e390d5
Show file tree
Hide file tree
Showing 11 changed files with 403 additions and 22 deletions.
19 changes: 19 additions & 0 deletions src/librustc/ich/impls_syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,7 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
ref lines,
ref multibyte_chars,
ref non_narrow_chars,
ref normalized_pos,
} = *self;

(name_hash as u64).hash_stable(hcx, hasher);
Expand Down Expand Up @@ -452,6 +453,12 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
for &char_pos in non_narrow_chars.iter() {
stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher);
}

normalized_pos.len().hash_stable(hcx, hasher);
for &char_pos in normalized_pos.iter() {
stable_normalized_pos(char_pos, start_pos).hash_stable(hcx, hasher);
}

}
}

Expand Down Expand Up @@ -481,6 +488,18 @@ fn stable_non_narrow_char(swc: ::syntax_pos::NonNarrowChar,
(pos.0 - source_file_start.0, width as u32)
}

fn stable_normalized_pos(np: ::syntax_pos::NormalizedPos,
source_file_start: ::syntax_pos::BytePos)
-> (u32, u32) {
let ::syntax_pos::NormalizedPos {
pos,
diff
} = np;

(pos.0 - source_file_start.0, diff)
}


impl<'tcx> HashStable<StableHashingContext<'tcx>> for feature_gate::Features {
fn hash_stable(&self, hcx: &mut StableHashingContext<'tcx>, hasher: &mut StableHasher) {
// Unfortunately we cannot exhaustively list fields here, since the
Expand Down
7 changes: 6 additions & 1 deletion src/librustc_metadata/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1317,6 +1317,7 @@ impl<'a, 'tcx> CrateMetadata {
mut lines,
mut multibyte_chars,
mut non_narrow_chars,
mut normalized_pos,
name_hash,
.. } = source_file_to_import;

Expand All @@ -1336,6 +1337,9 @@ impl<'a, 'tcx> CrateMetadata {
for swc in &mut non_narrow_chars {
*swc = *swc - start_pos;
}
for np in &mut normalized_pos {
np.pos = np.pos - start_pos;
}

let local_version = local_source_map.new_imported_source_file(name,
name_was_remapped,
Expand All @@ -1345,7 +1349,8 @@ impl<'a, 'tcx> CrateMetadata {
source_length,
lines,
multibyte_chars,
non_narrow_chars);
non_narrow_chars,
normalized_pos);
debug!("CrateMetaData::imported_source_files alloc \
source_file {:?} original (start_pos {:?} end_pos {:?}) \
translated (start_pos {:?} end_pos {:?})",
Expand Down
7 changes: 5 additions & 2 deletions src/libsyntax/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ use std::sync::{Arc, Mutex};

use rustc_serialize::json::{as_json, as_pretty_json};

#[cfg(test)]
mod tests;

pub struct JsonEmitter {
dst: Box<dyn Write + Send>,
registry: Option<Registry>,
Expand Down Expand Up @@ -332,8 +335,8 @@ impl DiagnosticSpan {

DiagnosticSpan {
file_name: start.file.name.to_string(),
byte_start: span.lo().0 - start.file.start_pos.0,
byte_end: span.hi().0 - start.file.start_pos.0,
byte_start: start.file.original_relative_byte_pos(span.lo()).0,
byte_end: start.file.original_relative_byte_pos(span.hi()).0,
line_start: start.line,
line_end: end.line,
column_start: start.col.0 + 1,
Expand Down
186 changes: 186 additions & 0 deletions src/libsyntax/json/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
use super::*;

use crate::json::JsonEmitter;
use crate::source_map::{FilePathMapping, SourceMap};
use crate::tests::Shared;
use crate::with_default_globals;

use errors::emitter::{ColorConfig, HumanReadableErrorType};
use errors::Handler;
use rustc_serialize::json::decode;
use syntax_pos::{BytePos, Span};

use std::str;

#[derive(RustcDecodable, Debug, PartialEq, Eq)]
struct TestData {
spans: Vec<SpanTestData>,
}

#[derive(RustcDecodable, Debug, PartialEq, Eq)]
struct SpanTestData {
pub byte_start: u32,
pub byte_end: u32,
pub line_start: u32,
pub column_start: u32,
pub line_end: u32,
pub column_end: u32,
}

/// Test the span yields correct positions in JSON.
fn test_positions(code: &str, span: (u32, u32), expected_output: SpanTestData) {
let expected_output = TestData { spans: vec![expected_output] };

with_default_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
sm.new_source_file(Path::new("test.rs").to_owned().into(), code.to_owned());

let output = Arc::new(Mutex::new(Vec::new()));
let je = JsonEmitter::new(
Box::new(Shared { data: output.clone() }),
None,
sm,
true,
HumanReadableErrorType::Short(ColorConfig::Never),
false,
);

let span = Span::with_root_ctxt(BytePos(span.0), BytePos(span.1));
let handler = Handler::with_emitter(true, None, Box::new(je));
handler.span_err(span, "foo");

let bytes = output.lock().unwrap();
let actual_output = str::from_utf8(&bytes).unwrap();
let actual_output: TestData = decode(actual_output).unwrap();

assert_eq!(expected_output, actual_output)
})
}

#[test]
fn empty() {
test_positions(
" ",
(0, 1),
SpanTestData {
byte_start: 0,
byte_end: 1,
line_start: 1,
column_start: 1,
line_end: 1,
column_end: 2,
},
)
}

#[test]
fn bom() {
test_positions(
"\u{feff} ",
(0, 1),
SpanTestData {
byte_start: 3,
byte_end: 4,
line_start: 1,
column_start: 1,
line_end: 1,
column_end: 2,
},
)
}

#[test]
fn lf_newlines() {
test_positions(
"\nmod foo;\nmod bar;\n",
(5, 12),
SpanTestData {
byte_start: 5,
byte_end: 12,
line_start: 2,
column_start: 5,
line_end: 3,
column_end: 3,
},
)
}

#[test]
fn crlf_newlines() {
test_positions(
"\r\nmod foo;\r\nmod bar;\r\n",
(5, 12),
SpanTestData {
byte_start: 6,
byte_end: 14,
line_start: 2,
column_start: 5,
line_end: 3,
column_end: 3,
},
)
}

#[test]
fn crlf_newlines_with_bom() {
test_positions(
"\u{feff}\r\nmod foo;\r\nmod bar;\r\n",
(5, 12),
SpanTestData {
byte_start: 9,
byte_end: 17,
line_start: 2,
column_start: 5,
line_end: 3,
column_end: 3,
},
)
}

#[test]
fn span_before_crlf() {
test_positions(
"foo\r\nbar",
(2, 3),
SpanTestData {
byte_start: 2,
byte_end: 3,
line_start: 1,
column_start: 3,
line_end: 1,
column_end: 4,
},
)
}

#[test]
fn span_on_crlf() {
test_positions(
"foo\r\nbar",
(3, 4),
SpanTestData {
byte_start: 3,
byte_end: 5,
line_start: 1,
column_start: 4,
line_end: 2,
column_end: 1,
},
)
}

#[test]
fn span_after_crlf() {
test_positions(
"foo\r\nbar",
(4, 5),
SpanTestData {
byte_start: 5,
byte_end: 6,
line_start: 2,
column_start: 1,
line_end: 2,
column_end: 2,
},
)
}
6 changes: 6 additions & 0 deletions src/libsyntax/source_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ impl SourceMap {
mut file_local_lines: Vec<BytePos>,
mut file_local_multibyte_chars: Vec<MultiByteChar>,
mut file_local_non_narrow_chars: Vec<NonNarrowChar>,
mut file_local_normalized_pos: Vec<NormalizedPos>,
) -> Lrc<SourceFile> {
let start_pos = self.next_start_pos();

Expand All @@ -301,6 +302,10 @@ impl SourceMap {
*swc = *swc + start_pos;
}

for nc in &mut file_local_normalized_pos {
nc.pos = nc.pos + start_pos;
}

let source_file = Lrc::new(SourceFile {
name: filename,
name_was_remapped,
Expand All @@ -314,6 +319,7 @@ impl SourceMap {
lines: file_local_lines,
multibyte_chars: file_local_multibyte_chars,
non_narrow_chars: file_local_non_narrow_chars,
normalized_pos: file_local_normalized_pos,
name_hash,
});

Expand Down
4 changes: 2 additions & 2 deletions src/libsyntax/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ struct SpanLabel {
label: &'static str,
}

struct Shared<T: Write> {
data: Arc<Mutex<T>>,
crate struct Shared<T: Write> {
pub data: Arc<Mutex<T>>,
}

impl<T: Write> Write for Shared<T> {
Expand Down
Loading

0 comments on commit 3e390d5

Please sign in to comment.