Skip to content

Commit

Permalink
convert \r\n to \n when loading files
Browse files Browse the repository at this point in the history
  • Loading branch information
matklad committed Aug 14, 2019
1 parent 60960a2 commit 004f3ac
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 0 deletions.
56 changes: 56 additions & 0 deletions src/libsyntax_pos/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1045,6 +1045,7 @@ impl SourceFile {
mut src: String,
start_pos: BytePos) -> Result<SourceFile, OffsetOverflowError> {
remove_bom(&mut src);
normalize_newlines(&mut src);

let src_hash = {
let mut hasher: StableHasher<u128> = StableHasher::new();
Expand Down Expand Up @@ -1212,6 +1213,61 @@ fn remove_bom(src: &mut String) {
}
}


/// Replaces `\r\n` with `\n` in-place in `src`.
///
/// Returns error if there's a lone `\r` in the string
fn normalize_newlines(src: &mut String) {
if !src.as_bytes().contains(&b'\r') {
return;
}

// We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
// While we *can* call `as_mut_vec` and do surgery on the live string
// directly, let's rather steal the contents of `src`. This makes the code
// safe even if a panic occurs.

let mut buf = std::mem::replace(src, String::new()).into_bytes();
let mut gap_len = 0;
let mut tail = buf.as_mut_slice();
loop {
let idx = match find_crlf(&tail[gap_len..]) {
None => tail.len(),
Some(idx) => idx + gap_len,
};
tail.copy_within(gap_len..idx, 0);
tail = &mut tail[idx - gap_len..];
if tail.len() == gap_len {
break;
}
gap_len += 1;
}

// Account for removed `\r`.
// After `set_len`, `buf` is guaranteed to contain utf-8 again.
let new_len = buf.len() - gap_len;
unsafe {
buf.set_len(new_len);
*src = String::from_utf8_unchecked(buf);
}

fn find_crlf(src: &[u8]) -> Option<usize> {
let mut search_idx = 0;
while let Some(idx) = find_cr(&src[search_idx..]) {
if src[search_idx..].get(idx + 1) != Some(&b'\n') {
search_idx += idx + 1;
continue;
}
return Some(search_idx + idx);
}
None
}

fn find_cr(src: &[u8]) -> Option<usize> {
src.iter().position(|&b| b == b'\r')
}
}

// _____________________________________________________________________________
// Pos, BytePos, CharPos
//
Expand Down
20 changes: 20 additions & 0 deletions src/libsyntax_pos/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,23 @@ fn test_lookup_line() {
assert_eq!(lookup_line(lines, BytePos(28)), 2);
assert_eq!(lookup_line(lines, BytePos(29)), 2);
}

#[test]
fn test_normalize_newlines() {
fn check(before: &str, after: &str) {
let mut actual = before.to_string();
normalize_newlines(&mut actual);
assert_eq!(actual.as_str(), after);
}
check("", "");
check("\n", "\n");
check("\r", "\r");
check("\r\r", "\r\r");
check("\r\n", "\n");
check("hello world", "hello world");
check("hello\nworld", "hello\nworld");
check("hello\r\nworld", "hello\nworld");
check("\r\nhello\r\nworld\r\n", "\nhello\nworld\n");
check("\r\r\n", "\r\n");
check("hello\rworld", "hello\rworld");
}

0 comments on commit 004f3ac

Please sign in to comment.