Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Display spans correctly when there are zero-width or wide characters #45711

Merged
merged 1 commit into from
Nov 5, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions src/librustc/ich/impls_syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ impl<'gcx> HashStable<StableHashingContext<'gcx>> for FileMap {
end_pos: _,
ref lines,
ref multibyte_chars,
ref non_narrow_chars,
} = *self;

name.hash_stable(hcx, hasher);
Expand All @@ -389,6 +390,12 @@ impl<'gcx> HashStable<StableHashingContext<'gcx>> for FileMap {
for &char_pos in multibyte_chars.iter() {
stable_multibyte_char(char_pos, start_pos).hash_stable(hcx, hasher);
}

let non_narrow_chars = non_narrow_chars.borrow();
non_narrow_chars.len().hash_stable(hcx, hasher);
for &char_pos in non_narrow_chars.iter() {
stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher);
}
}
}

Expand All @@ -408,3 +415,12 @@ fn stable_multibyte_char(mbc: ::syntax_pos::MultiByteChar,

(pos.0 - filemap_start.0, bytes as u32)
}

fn stable_non_narrow_char(swc: ::syntax_pos::NonNarrowChar,
filemap_start: ::syntax_pos::BytePos)
-> (u32, u32) {
let pos = swc.pos();
let width = swc.width();

(pos.0 - filemap_start.0, width as u32)
}
14 changes: 7 additions & 7 deletions src/librustc_errors/emitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

use self::Destination::*;

use syntax_pos::{DUMMY_SP, FileMap, Span, MultiSpan, CharPos};
use syntax_pos::{DUMMY_SP, FileMap, Span, MultiSpan};

use {Level, CodeSuggestion, DiagnosticBuilder, SubDiagnostic, CodeMapper};
use RenderSpan::*;
Expand Down Expand Up @@ -201,17 +201,17 @@ impl EmitterWriter {
// 6..7. This is degenerate input, but it's best to degrade
// gracefully -- and the parser likes to supply a span like
// that for EOF, in particular.
if lo.col == hi.col && lo.line == hi.line {
hi.col = CharPos(lo.col.0 + 1);
if lo.col_display == hi.col_display && lo.line == hi.line {
hi.col_display += 1;
}

let ann_type = if lo.line != hi.line {
let ml = MultilineAnnotation {
depth: 1,
line_start: lo.line,
line_end: hi.line,
start_col: lo.col.0,
end_col: hi.col.0,
start_col: lo.col_display,
end_col: hi.col_display,
is_primary: span_label.is_primary,
label: span_label.label.clone(),
};
Expand All @@ -221,8 +221,8 @@ impl EmitterWriter {
AnnotationType::Singleline
};
let ann = Annotation {
start_col: lo.col.0,
end_col: hi.col.0,
start_col: lo.col_display,
end_col: hi.col_display,
is_primary: span_label.is_primary,
label: span_label.label.clone(),
annotation_type: ann_type,
Expand Down
8 changes: 7 additions & 1 deletion src/librustc_metadata/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1189,6 +1189,7 @@ impl<'a, 'tcx> CrateMetadata {
end_pos,
lines,
multibyte_chars,
non_narrow_chars,
.. } = filemap_to_import;

let source_length = (end_pos - start_pos).to_usize();
Expand All @@ -1206,14 +1207,19 @@ impl<'a, 'tcx> CrateMetadata {
for mbc in &mut multibyte_chars {
mbc.pos = mbc.pos - start_pos;
}
let mut non_narrow_chars = non_narrow_chars.into_inner();
for swc in &mut non_narrow_chars {
*swc = *swc - start_pos;
}

let local_version = local_codemap.new_imported_filemap(name,
name_was_remapped,
self.cnum.as_u32(),
src_hash,
source_length,
lines,
multibyte_chars);
multibyte_chars,
non_narrow_chars);
debug!("CrateMetaData::imported_filemaps alloc \
filemap {:?} original (start_pos {:?} end_pos {:?}) \
translated (start_pos {:?} end_pos {:?})",
Expand Down
42 changes: 40 additions & 2 deletions src/libsyntax/codemap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,8 @@ impl CodeMap {
src_hash: u128,
source_len: usize,
mut file_local_lines: Vec<BytePos>,
mut file_local_multibyte_chars: Vec<MultiByteChar>)
mut file_local_multibyte_chars: Vec<MultiByteChar>,
mut file_local_non_narrow_chars: Vec<NonNarrowChar>)
-> Rc<FileMap> {
let start_pos = self.next_start_pos();
let mut files = self.files.borrow_mut();
Expand All @@ -258,6 +259,10 @@ impl CodeMap {
mbc.pos = mbc.pos + start_pos;
}

for swc in &mut file_local_non_narrow_chars {
*swc = *swc + start_pos;
}

let filemap = Rc::new(FileMap {
name: filename,
name_was_remapped,
Expand All @@ -270,6 +275,7 @@ impl CodeMap {
end_pos,
lines: RefCell::new(file_local_lines),
multibyte_chars: RefCell::new(file_local_multibyte_chars),
non_narrow_chars: RefCell::new(file_local_non_narrow_chars),
});

files.push(filemap.clone());
Expand Down Expand Up @@ -297,6 +303,24 @@ impl CodeMap {
let line = a + 1; // Line numbers start at 1
let linebpos = (*f.lines.borrow())[a];
let linechpos = self.bytepos_to_file_charpos(linebpos);
let col = chpos - linechpos;

let col_display = {
let non_narrow_chars = f.non_narrow_chars.borrow();
let start_width_idx = non_narrow_chars
.binary_search_by_key(&linebpos, |x| x.pos())
.unwrap_or_else(|x| x);
let end_width_idx = non_narrow_chars
.binary_search_by_key(&pos, |x| x.pos())
.unwrap_or_else(|x| x);
let special_chars = end_width_idx - start_width_idx;
let non_narrow: usize =
non_narrow_chars[start_width_idx..end_width_idx]
.into_iter()
.map(|x| x.width())
.sum();
col.0 - special_chars + non_narrow
};
debug!("byte pos {:?} is on the line at byte pos {:?}",
pos, linebpos);
debug!("char pos {:?} is on the line at char pos {:?}",
Expand All @@ -306,14 +330,28 @@ impl CodeMap {
Loc {
file: f,
line,
col: chpos - linechpos,
col,
col_display,
}
}
Err(f) => {
let col_display = {
let non_narrow_chars = f.non_narrow_chars.borrow();
let end_width_idx = non_narrow_chars
.binary_search_by_key(&pos, |x| x.pos())
.unwrap_or_else(|x| x);
let non_narrow: usize =
non_narrow_chars[0..end_width_idx]
.into_iter()
.map(|x| x.width())
.sum();
chpos.0 - end_width_idx + non_narrow
};
Loc {
file: f,
line: 0,
col: chpos,
col_display,
}
}
}
Expand Down
1 change: 1 addition & 0 deletions src/libsyntax/parse/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,7 @@ impl<'a> StringReader<'a> {
self.filemap.record_multibyte_char(self.pos, new_ch_len);
}
}
self.filemap.record_width(self.pos, new_ch);
} else {
self.ch = None;
self.pos = new_pos;
Expand Down
1 change: 1 addition & 0 deletions src/libsyntax_pos/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ crate-type = ["dylib"]
[dependencies]
serialize = { path = "../libserialize" }
rustc_data_structures = { path = "../librustc_data_structures" }
unicode-width = "0.1.4"
95 changes: 91 additions & 4 deletions src/libsyntax_pos/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ use serialize::{Encodable, Decodable, Encoder, Decoder};
extern crate serialize;
extern crate serialize as rustc_serialize; // used by deriving

extern crate unicode_width;

pub mod hygiene;
pub use hygiene::{SyntaxContext, ExpnInfo, ExpnFormat, NameAndSpan, CompilerDesugaringKind};

Expand Down Expand Up @@ -494,6 +496,63 @@ pub struct MultiByteChar {
pub bytes: usize,
}

/// Identifies an offset of a non-narrow character in a FileMap
#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq)]
pub enum NonNarrowChar {
/// Represents a zero-width character
ZeroWidth(BytePos),
/// Represents a wide (fullwidth) character
Wide(BytePos),
}

impl NonNarrowChar {
fn new(pos: BytePos, width: usize) -> Self {
match width {
0 => NonNarrowChar::ZeroWidth(pos),
2 => NonNarrowChar::Wide(pos),
_ => panic!("width {} given for non-narrow character", width),
}
}

/// Returns the absolute offset of the character in the CodeMap
pub fn pos(&self) -> BytePos {
match *self {
NonNarrowChar::ZeroWidth(p) |
NonNarrowChar::Wide(p) => p,
}
}

/// Returns the width of the character, 0 (zero-width) or 2 (wide)
pub fn width(&self) -> usize {
match *self {
NonNarrowChar::ZeroWidth(_) => 0,
NonNarrowChar::Wide(_) => 2,
}
}
}

impl Add<BytePos> for NonNarrowChar {
type Output = Self;

fn add(self, rhs: BytePos) -> Self {
match self {
NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs),
NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs),
}
}
}

impl Sub<BytePos> for NonNarrowChar {
type Output = Self;

fn sub(self, rhs: BytePos) -> Self {
match self {
NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs),
NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs),
}
}
}

/// The state of the lazy external source loading mechanism of a FileMap.
#[derive(PartialEq, Eq, Clone)]
pub enum ExternalSource {
Expand Down Expand Up @@ -552,11 +611,13 @@ pub struct FileMap {
pub lines: RefCell<Vec<BytePos>>,
/// Locations of multi-byte characters in the source code
pub multibyte_chars: RefCell<Vec<MultiByteChar>>,
/// Width of characters that are not narrow in the source code
pub non_narrow_chars: RefCell<Vec<NonNarrowChar>>,
}

impl Encodable for FileMap {
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
s.emit_struct("FileMap", 7, |s| {
s.emit_struct("FileMap", 8, |s| {
s.emit_struct_field("name", 0, |s| self.name.encode(s))?;
s.emit_struct_field("name_was_remapped", 1, |s| self.name_was_remapped.encode(s))?;
s.emit_struct_field("src_hash", 6, |s| self.src_hash.encode(s))?;
Expand Down Expand Up @@ -610,6 +671,9 @@ impl Encodable for FileMap {
})?;
s.emit_struct_field("multibyte_chars", 5, |s| {
(*self.multibyte_chars.borrow()).encode(s)
})?;
s.emit_struct_field("non_narrow_chars", 7, |s| {
(*self.non_narrow_chars.borrow()).encode(s)
})
})
}
Expand All @@ -618,7 +682,7 @@ impl Encodable for FileMap {
impl Decodable for FileMap {
fn decode<D: Decoder>(d: &mut D) -> Result<FileMap, D::Error> {

d.read_struct("FileMap", 6, |d| {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was the 6 a bug?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no idea, but for me 6 there didn't make sense. There were total of 7 (now 8) fields.

d.read_struct("FileMap", 8, |d| {
let name: String = d.read_struct_field("name", 0, |d| Decodable::decode(d))?;
let name_was_remapped: bool =
d.read_struct_field("name_was_remapped", 1, |d| Decodable::decode(d))?;
Expand Down Expand Up @@ -657,6 +721,8 @@ impl Decodable for FileMap {
})?;
let multibyte_chars: Vec<MultiByteChar> =
d.read_struct_field("multibyte_chars", 5, |d| Decodable::decode(d))?;
let non_narrow_chars: Vec<NonNarrowChar> =
d.read_struct_field("non_narrow_chars", 7, |d| Decodable::decode(d))?;
Ok(FileMap {
name,
name_was_remapped,
Expand All @@ -671,7 +737,8 @@ impl Decodable for FileMap {
src_hash,
external_src: RefCell::new(ExternalSource::AbsentOk),
lines: RefCell::new(lines),
multibyte_chars: RefCell::new(multibyte_chars)
multibyte_chars: RefCell::new(multibyte_chars),
non_narrow_chars: RefCell::new(non_narrow_chars)
})
})
}
Expand Down Expand Up @@ -709,6 +776,7 @@ impl FileMap {
end_pos: Pos::from_usize(end_pos),
lines: RefCell::new(Vec::new()),
multibyte_chars: RefCell::new(Vec::new()),
non_narrow_chars: RefCell::new(Vec::new()),
}
}

Expand Down Expand Up @@ -798,6 +866,23 @@ impl FileMap {
self.multibyte_chars.borrow_mut().push(mbc);
}

pub fn record_width(&self, pos: BytePos, ch: char) {
let width = match ch {
'\t' | '\n' =>
// Tabs will consume one column.
// Make newlines take one column so that displayed spans can point them.
1,
Copy link
Contributor

@estebank estebank Nov 2, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As you're adding support for characters with varying widths, adding support for either 4 or 8 width tabs should be easy to add. Let's do so in a subsequent PR.

Done in #45953.

ch =>
// Assume control characters are zero width.
// FIXME: How can we decide between `width` and `width_cjk`?
unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0),
};
// Only record non-narrow characters.
if width != 1 {
self.non_narrow_chars.borrow_mut().push(NonNarrowChar::new(pos, width));
}
}

pub fn is_real_file(&self) -> bool {
!(self.name.starts_with("<") &&
self.name.ends_with(">"))
Expand Down Expand Up @@ -944,7 +1029,9 @@ pub struct Loc {
/// The (1-based) line number
pub line: usize,
/// The (0-based) column offset
pub col: CharPos
pub col: CharPos,
/// The (0-based) column offset when displayed
pub col_display: usize,
}

/// A source code location used as the result of lookup_char_pos_adj
Expand Down
2 changes: 1 addition & 1 deletion src/test/ui/codemap_tests/unicode.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ error: invalid ABI: expected one of [cdecl, stdcall, fastcall, vectorcall, thisc
--> $DIR/unicode.rs:11:8
|
11 | extern "路濫狼á́́" fn foo() {}
| ^^^^^^^^
| ^^^^^^^^^

error: aborting due to previous error

Loading