From 4465b2fbf3487e8f45e2eee4da187776574febec Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <nnethercote@mozilla.com>
Date: Thu, 3 May 2018 17:14:04 +1000
Subject: [PATCH 1/6] Inline `char_at()` and `record_width`.

Because `bump()` is hot.
---
 src/libsyntax/str.rs     | 1 +
 src/libsyntax_pos/lib.rs | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/libsyntax/str.rs b/src/libsyntax/str.rs
index d0f47629b10e5..281861918fd8e 100644
--- a/src/libsyntax/str.rs
+++ b/src/libsyntax/str.rs
@@ -8,6 +8,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
+#[inline]
 pub fn char_at(s: &str, byte: usize) -> char {
     s[byte..].chars().next().unwrap()
 }
diff --git a/src/libsyntax_pos/lib.rs b/src/libsyntax_pos/lib.rs
index d30d3d78ca540..2bab958dceda8 100644
--- a/src/libsyntax_pos/lib.rs
+++ b/src/libsyntax_pos/lib.rs
@@ -1047,6 +1047,7 @@ impl FileMap {
         self.multibyte_chars.borrow_mut().push(mbc);
     }
 
+    #[inline]
     pub fn record_width(&self, pos: BytePos, ch: char) {
         let width = match ch {
             '\t' =>

From b1aae607c56d26333589dc45daa20859950bf6e1 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <nnethercote@mozilla.com>
Date: Fri, 4 May 2018 06:38:15 +1000
Subject: [PATCH 2/6] Tweak naming and ordering in `StringReader::bump()`.

This patch removes the "old"/"new" names in favour of "foo"/"next_foo",
which matches the field names.

It also moves the setting of `self.{ch,pos,next_pos}` in the common case
to the end, so that the meaning of "foo"/"next_foo" is consistent until
the end.
---
 src/libsyntax/parse/lexer/mod.rs | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 22a0261d8c6b1..a24af857af142 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -442,36 +442,35 @@ impl<'a> StringReader<'a> {
     /// Advance the StringReader by one character. If a newline is
     /// discovered, add it to the FileMap's list of line start offsets.
     pub fn bump(&mut self) {
-        let new_pos = self.next_pos;
-        let new_byte_offset = self.byte_offset(new_pos).to_usize();
+        let next_byte_offset = self.byte_offset(self.next_pos).to_usize();
         let end = self.terminator.map_or(self.source_text.len(), |t| {
             self.byte_offset(t).to_usize()
         });
-        if new_byte_offset < end {
-            let old_ch_is_newline = self.ch.unwrap() == '\n';
-            let new_ch = char_at(&self.source_text, new_byte_offset);
-            let new_ch_len = new_ch.len_utf8();
-
-            self.ch = Some(new_ch);
-            self.pos = new_pos;
-            self.next_pos = new_pos + Pos::from_usize(new_ch_len);
-            if old_ch_is_newline {
+        if next_byte_offset < end {
+            let next_ch = char_at(&self.source_text, next_byte_offset);
+            let next_ch_len = next_ch.len_utf8();
+
+            if self.ch.unwrap() == '\n' {
                 if self.save_new_lines_and_multibyte {
-                    self.filemap.next_line(self.pos);
+                    self.filemap.next_line(self.next_pos);
                 }
                 self.col = CharPos(0);
             } else {
                 self.col = self.col + CharPos(1);
             }
-            if new_ch_len > 1 {
+            if next_ch_len > 1 {
                 if self.save_new_lines_and_multibyte {
-                    self.filemap.record_multibyte_char(self.pos, new_ch_len);
+                    self.filemap.record_multibyte_char(self.next_pos, next_ch_len);
                 }
             }
-            self.filemap.record_width(self.pos, new_ch);
+            self.filemap.record_width(self.next_pos, next_ch);
+
+            self.ch = Some(next_ch);
+            self.pos = self.next_pos;
+            self.next_pos = self.next_pos + Pos::from_usize(next_ch_len);
         } else {
             self.ch = None;
-            self.pos = new_pos;
+            self.pos = self.next_pos;
         }
     }
 

From 7a090fbe02b6b20f0dce8acf07a0328375f91d68 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <nnethercote@mozilla.com>
Date: Wed, 9 May 2018 12:49:39 +1000
Subject: [PATCH 3/6] Rename some stuff in `StringReader`.

- `source_text` becomes `src`, matching `FileMap::src`.

- `byte_offset()` becomes `src_index()`, which makes it clearer that
  it's an index into `src`. (Likewise for variables containing
  `byte_offset` in their name.) This function also now returns a `usize`
  instead of a `BytePos`, because every callsite immediately converted
  the `BytePos` to a `usize`.
---
 src/libsyntax/parse/lexer/mod.rs | 53 +++++++++++++++-----------------
 1 file changed, 24 insertions(+), 29 deletions(-)

diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index a24af857af142..e2030ff62c579 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -61,7 +61,7 @@ pub struct StringReader<'a> {
     pub fatal_errs: Vec<DiagnosticBuilder<'a>>,
     // cache a direct reference to the source text, so that we don't have to
     // retrieve it via `self.filemap.src.as_ref().unwrap()` all the time.
-    source_text: Lrc<String>,
+    src: Lrc<String>,
     /// Stack of open delimiters and their spans. Used for error message.
     token: token::Token,
     span: Span,
@@ -176,7 +176,7 @@ impl<'a> StringReader<'a> {
                                               filemap.name));
         }
 
-        let source_text = (*filemap.src.as_ref().unwrap()).clone();
+        let src = (*filemap.src.as_ref().unwrap()).clone();
 
         StringReader {
             sess,
@@ -190,7 +190,7 @@ impl<'a> StringReader<'a> {
             // dummy values; not read
             peek_tok: token::Eof,
             peek_span: syntax_pos::DUMMY_SP,
-            source_text,
+            src,
             fatal_errs: Vec::new(),
             token: token::Eof,
             span: syntax_pos::DUMMY_SP,
@@ -326,9 +326,7 @@ impl<'a> StringReader<'a> {
     /// offending string to the error message
     fn fatal_span_verbose(&self, from_pos: BytePos, to_pos: BytePos, mut m: String) -> FatalError {
         m.push_str(": ");
-        let from = self.byte_offset(from_pos).to_usize();
-        let to = self.byte_offset(to_pos).to_usize();
-        m.push_str(&self.source_text[from..to]);
+        m.push_str(&self.src[self.src_index(from_pos)..self.src_index(to_pos)]);
         self.fatal_span_(from_pos, to_pos, &m[..])
     }
 
@@ -354,8 +352,9 @@ impl<'a> StringReader<'a> {
         Ok(())
     }
 
-    fn byte_offset(&self, pos: BytePos) -> BytePos {
-        (pos - self.filemap.start_pos)
+    #[inline]
+    fn src_index(&self, pos: BytePos) -> usize {
+        (pos - self.filemap.start_pos).to_usize()
     }
 
     /// Calls `f` with a string slice of the source text spanning from `start`
@@ -386,7 +385,7 @@ impl<'a> StringReader<'a> {
     fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T
         where F: FnOnce(&str) -> T
     {
-        f(&self.source_text[self.byte_offset(start).to_usize()..self.byte_offset(end).to_usize()])
+        f(&self.src[self.src_index(start)..self.src_index(end)])
     }
 
     /// Converts CRLF to LF in the given string, raising an error on bare CR.
@@ -438,16 +437,13 @@ impl<'a> StringReader<'a> {
         }
     }
 
-
     /// Advance the StringReader by one character. If a newline is
     /// discovered, add it to the FileMap's list of line start offsets.
     pub fn bump(&mut self) {
-        let next_byte_offset = self.byte_offset(self.next_pos).to_usize();
-        let end = self.terminator.map_or(self.source_text.len(), |t| {
-            self.byte_offset(t).to_usize()
-        });
-        if next_byte_offset < end {
-            let next_ch = char_at(&self.source_text, next_byte_offset);
+        let next_src_index = self.src_index(self.next_pos);
+        let end_src_index = self.terminator.map_or(self.src.len(), |t| self.src_index(t));
+        if next_src_index < end_src_index {
+            let next_ch = char_at(&self.src, next_src_index);
             let next_ch_len = next_ch.len_utf8();
 
             if self.ch.unwrap() == '\n' {
@@ -475,9 +471,9 @@ impl<'a> StringReader<'a> {
     }
 
     pub fn nextch(&self) -> Option<char> {
-        let offset = self.byte_offset(self.next_pos).to_usize();
-        if offset < self.source_text.len() {
-            Some(char_at(&self.source_text, offset))
+        let next_src_index = self.src_index(self.next_pos);
+        if next_src_index < self.src.len() {
+            Some(char_at(&self.src, next_src_index))
         } else {
             None
         }
@@ -488,14 +484,14 @@ impl<'a> StringReader<'a> {
     }
 
     pub fn nextnextch(&self) -> Option<char> {
-        let offset = self.byte_offset(self.next_pos).to_usize();
-        let s = &self.source_text[..];
-        if offset >= s.len() {
+        let next_src_index = self.src_index(self.next_pos);
+        let s = &self.src[..];
+        if next_src_index >= s.len() {
             return None;
         }
-        let next = offset + char_at(s, offset).len_utf8();
-        if next < s.len() {
-            Some(char_at(s, next))
+        let next_next_src_index = next_src_index + char_at(s, next_src_index).len_utf8();
+        if next_next_src_index < s.len() {
+            Some(char_at(s, next_next_src_index))
         } else {
             None
         }
@@ -1358,8 +1354,8 @@ impl<'a> StringReader<'a> {
                     loop {
                         self.bump();
                         if self.ch_is('\'') {
-                            let start = self.byte_offset(start).to_usize();
-                            let end = self.byte_offset(self.pos).to_usize();
+                            let start = self.src_index(start);
+                            let end = self.src_index(self.pos);
                             self.bump();
                             let span = self.mk_sp(start_with_quote, self.pos);
                             self.sess.span_diagnostic
@@ -1368,8 +1364,7 @@ impl<'a> StringReader<'a> {
                                 .span_suggestion(span,
                                                  "if you meant to write a `str` literal, \
                                                   use double quotes",
-                                                 format!("\"{}\"",
-                                                         &self.source_text[start..end]))
+                                                 format!("\"{}\"", &self.src[start..end]))
                                 .emit();
                             return Ok(token::Literal(token::Str_(Symbol::intern("??")), None))
                         }

From 548067e00f64861915d2374082adc7796c73ceb8 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <nnethercote@mozilla.com>
Date: Wed, 9 May 2018 12:55:13 +1000
Subject: [PATCH 4/6] Remove `StringReader::terminator`.

It's silly for a hot function like `bump()` to have such an expensive
bounds check. This patch replaces terminator with `end_src_index`.

Note that the `self.terminator` check in `is_eof()` wasn't necessary
because of the way `StringReader` is initialized.
---
 src/libsyntax/parse/lexer/mod.rs | 31 +++++++++++--------------------
 1 file changed, 11 insertions(+), 20 deletions(-)

diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index e2030ff62c579..f80eaf0a9b446 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -49,8 +49,8 @@ pub struct StringReader<'a> {
     /// The current character (which has been read from self.pos)
     pub ch: Option<char>,
     pub filemap: Lrc<syntax_pos::FileMap>,
-    /// If Some, stop reading the source at this position (inclusive).
-    pub terminator: Option<BytePos>,
+    /// Stop reading src at this index.
+    pub end_src_index: usize,
     /// Whether to record new-lines and multibyte chars in filemap.
     /// This is only necessary the first time a filemap is lexed.
     /// If part of a filemap is being re-lexed, this should be set to false.
@@ -113,14 +113,7 @@ impl<'a> StringReader<'a> {
         self.unwrap_or_abort(res)
     }
     fn is_eof(&self) -> bool {
-        if self.ch.is_none() {
-            return true;
-        }
-
-        match self.terminator {
-            Some(t) => self.next_pos > t,
-            None => false,
-        }
+        self.ch.is_none()
     }
     /// Return the next token. EFFECT: advances the string_reader.
     pub fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> {
@@ -185,7 +178,7 @@ impl<'a> StringReader<'a> {
             col: CharPos(0),
             ch: Some('\n'),
             filemap,
-            terminator: None,
+            end_src_index: src.len(),
             save_new_lines_and_multibyte: true,
             // dummy values; not read
             peek_tok: token::Eof,
@@ -222,7 +215,7 @@ impl<'a> StringReader<'a> {
         // Seek the lexer to the right byte range.
         sr.save_new_lines_and_multibyte = false;
         sr.next_pos = span.lo();
-        sr.terminator = Some(span.hi());
+        sr.end_src_index = sr.src_index(span.hi());
 
         sr.bump();
 
@@ -441,8 +434,7 @@ impl<'a> StringReader<'a> {
     /// discovered, add it to the FileMap's list of line start offsets.
     pub fn bump(&mut self) {
         let next_src_index = self.src_index(self.next_pos);
-        let end_src_index = self.terminator.map_or(self.src.len(), |t| self.src_index(t));
-        if next_src_index < end_src_index {
+        if next_src_index < self.end_src_index {
             let next_ch = char_at(&self.src, next_src_index);
             let next_ch_len = next_ch.len_utf8();
 
@@ -472,7 +464,7 @@ impl<'a> StringReader<'a> {
 
     pub fn nextch(&self) -> Option<char> {
         let next_src_index = self.src_index(self.next_pos);
-        if next_src_index < self.src.len() {
+        if next_src_index < self.end_src_index {
             Some(char_at(&self.src, next_src_index))
         } else {
             None
@@ -485,13 +477,12 @@ impl<'a> StringReader<'a> {
 
     pub fn nextnextch(&self) -> Option<char> {
         let next_src_index = self.src_index(self.next_pos);
-        let s = &self.src[..];
-        if next_src_index >= s.len() {
+        if next_src_index >= self.end_src_index {
             return None;
         }
-        let next_next_src_index = next_src_index + char_at(s, next_src_index).len_utf8();
-        if next_next_src_index < s.len() {
-            Some(char_at(s, next_next_src_index))
+        let next_next_src_index = next_src_index + char_at(&self.src, next_src_index).len_utf8();
+        if next_next_src_index < self.end_src_index {
+            Some(char_at(&self.src, next_next_src_index))
         } else {
             None
         }

From 444b770f4cd8d817e7b7fec683ea301620034d13 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <nnethercote@mozilla.com>
Date: Wed, 9 May 2018 13:20:55 +1000
Subject: [PATCH 5/6] Make `nextnextch()` more closely resemble `nextch()`.

---
 src/libsyntax/parse/lexer/mod.rs | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index f80eaf0a9b446..3968b7f113b95 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -477,15 +477,14 @@ impl<'a> StringReader<'a> {
 
     pub fn nextnextch(&self) -> Option<char> {
         let next_src_index = self.src_index(self.next_pos);
-        if next_src_index >= self.end_src_index {
-            return None;
-        }
-        let next_next_src_index = next_src_index + char_at(&self.src, next_src_index).len_utf8();
-        if next_next_src_index < self.end_src_index {
-            Some(char_at(&self.src, next_next_src_index))
-        } else {
-            None
+        if next_src_index < self.end_src_index {
+            let next_next_src_index =
+                next_src_index + char_at(&self.src, next_src_index).len_utf8();
+            if next_next_src_index < self.end_src_index {
+                return Some(char_at(&self.src, next_next_src_index));
+            }
         }
+        None
     }
 
     pub fn nextnextch_is(&self, c: char) -> bool {

From e913d692117e57ea2908461865aa08037f0bd2b9 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <nnethercote@mozilla.com>
Date: Tue, 8 May 2018 19:58:54 +1000
Subject: [PATCH 6/6] Remove `StringReader::col`.

It only has a single use, within code handling indented block comments.
We can replace that with the new `FileMap::col_pos()`, which computes
the col position (BytePos instead of CharPos) based on the record of the
last newline char (which we already record).

This is actually an improvement, because
`trim_whitespace_prefix_and_push_line()` was using `col`, which is a
`CharPos`, as a slice index, which is a byte/char confusion.
---
 src/libsyntax/parse/lexer/comments.rs | 14 +++++++++++++-
 src/libsyntax/parse/lexer/mod.rs      |  6 ------
 src/libsyntax_pos/lib.rs              |  9 +++++++++
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs
index 63aa5d28ce8dc..672b0b9bbd16d 100644
--- a/src/libsyntax/parse/lexer/comments.rs
+++ b/src/libsyntax/parse/lexer/comments.rs
@@ -238,7 +238,19 @@ fn read_block_comment(rdr: &mut StringReader,
     debug!(">>> block comment");
     let p = rdr.pos;
     let mut lines: Vec<String> = Vec::new();
-    let col = rdr.col;
+
+    // Count the number of chars since the start of the line by rescanning.
+    let mut src_index = rdr.src_index(rdr.filemap.line_begin_pos());
+    let end_src_index = rdr.src_index(rdr.pos);
+    assert!(src_index <= end_src_index);
+    let mut n = 0;
+    while src_index < end_src_index {
+        let c = char_at(&rdr.src, src_index);
+        src_index += c.len_utf8();
+        n += 1;
+    }
+    let col = CharPos(n);
+
     rdr.bump();
     rdr.bump();
 
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 3968b7f113b95..566e0c213b119 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -44,8 +44,6 @@ pub struct StringReader<'a> {
     pub next_pos: BytePos,
     /// The absolute offset within the codemap of the current character
     pub pos: BytePos,
-    /// The column of the next character to read
-    pub col: CharPos,
     /// The current character (which has been read from self.pos)
     pub ch: Option<char>,
     pub filemap: Lrc<syntax_pos::FileMap>,
@@ -175,7 +173,6 @@ impl<'a> StringReader<'a> {
             sess,
             next_pos: filemap.start_pos,
             pos: filemap.start_pos,
-            col: CharPos(0),
             ch: Some('\n'),
             filemap,
             end_src_index: src.len(),
@@ -442,9 +439,6 @@ impl<'a> StringReader<'a> {
                 if self.save_new_lines_and_multibyte {
                     self.filemap.next_line(self.next_pos);
                 }
-                self.col = CharPos(0);
-            } else {
-                self.col = self.col + CharPos(1);
             }
             if next_ch_len > 1 {
                 if self.save_new_lines_and_multibyte {
diff --git a/src/libsyntax_pos/lib.rs b/src/libsyntax_pos/lib.rs
index 2bab958dceda8..26ab5d0a34ba5 100644
--- a/src/libsyntax_pos/lib.rs
+++ b/src/libsyntax_pos/lib.rs
@@ -971,6 +971,15 @@ impl FileMap {
         lines.push(pos);
     }
 
+    /// Return the BytePos of the beginning of the current line.
+    pub fn line_begin_pos(&self) -> BytePos {
+        let lines = self.lines.borrow();
+        match lines.last() {
+            Some(&line_pos) => line_pos,
+            None => self.start_pos,
+        }
+    }
+
     /// Add externally loaded source.
     /// If the hash of the input doesn't match or no input is supplied via None,
     /// it is interpreted as an error and the corresponding enum variant is set.