Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert BufferQueue to use Interior Mutability #542

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions html5ever/src/tokenizer/char_ref/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ impl CharRefTokenizer {
pub(super) fn step<Sink: TokenSink>(
&mut self,
tokenizer: &mut Tokenizer<Sink>,
input: &mut BufferQueue,
input: & BufferQueue,
) -> Status {
if self.result.is_some() {
return Done;
Expand All @@ -135,7 +135,7 @@ impl CharRefTokenizer {
fn do_begin<Sink: TokenSink>(
&mut self,
tokenizer: &mut Tokenizer<Sink>,
input: &mut BufferQueue,
input: & BufferQueue,
) -> Status {
match unwrap_or_return!(tokenizer.peek(input), Stuck) {
'a'..='z' | 'A'..='Z' | '0'..='9' => {
Expand All @@ -156,7 +156,7 @@ impl CharRefTokenizer {
fn do_octothorpe<Sink: TokenSink>(
&mut self,
tokenizer: &mut Tokenizer<Sink>,
input: &mut BufferQueue,
input: & BufferQueue,
) -> Status {
let c = unwrap_or_return!(tokenizer.peek(input), Stuck);
match c {
Expand All @@ -177,7 +177,7 @@ impl CharRefTokenizer {
fn do_numeric<Sink: TokenSink>(
&mut self,
tokenizer: &mut Tokenizer<Sink>,
input: &mut BufferQueue,
input: & BufferQueue,
base: u32,
) -> Status {
let c = unwrap_or_return!(tokenizer.peek(input), Stuck);
Expand Down Expand Up @@ -207,7 +207,7 @@ impl CharRefTokenizer {
fn do_numeric_semicolon<Sink: TokenSink>(
&mut self,
tokenizer: &mut Tokenizer<Sink>,
input: &mut BufferQueue,
input: & BufferQueue,
) -> Status {
match unwrap_or_return!(tokenizer.peek(input), Stuck) {
';' => tokenizer.discard_char(input),
Expand All @@ -221,7 +221,7 @@ impl CharRefTokenizer {
fn unconsume_numeric<Sink: TokenSink>(
&mut self,
tokenizer: &mut Tokenizer<Sink>,
input: &mut BufferQueue,
input: & BufferQueue,
) -> Status {
let mut unconsume = StrTendril::from_char('#');
if let Some(c) = self.hex_marker {
Expand Down Expand Up @@ -270,7 +270,7 @@ impl CharRefTokenizer {
fn do_named<Sink: TokenSink>(
&mut self,
tokenizer: &mut Tokenizer<Sink>,
input: &mut BufferQueue,
input: & BufferQueue,
) -> Status {
// peek + discard skips over newline normalization, therefore making it easier to
// un-consume
Expand Down Expand Up @@ -304,14 +304,14 @@ impl CharRefTokenizer {
tokenizer.emit_error(msg);
}

fn unconsume_name(&mut self, input: &mut BufferQueue) {
fn unconsume_name(&mut self, input: & BufferQueue) {
input.push_front(self.name_buf_opt.take().unwrap());
}

fn finish_named<Sink: TokenSink>(
&mut self,
tokenizer: &mut Tokenizer<Sink>,
input: &mut BufferQueue,
input: & BufferQueue,
end_char: Option<char>,
) -> Status {
match self.name_match {
Expand Down Expand Up @@ -395,7 +395,7 @@ impl CharRefTokenizer {
fn do_bogus_name<Sink: TokenSink>(
&mut self,
tokenizer: &mut Tokenizer<Sink>,
input: &mut BufferQueue,
input: & BufferQueue,
) -> Status {
// peek + discard skips over newline normalization, therefore making it easier to
// un-consume
Expand All @@ -414,7 +414,7 @@ impl CharRefTokenizer {
pub(super) fn end_of_file<Sink: TokenSink>(
&mut self,
tokenizer: &mut Tokenizer<Sink>,
input: &mut BufferQueue,
input: & BufferQueue,
) {
while self.result.is_none() {
match self.state {
Expand Down
22 changes: 12 additions & 10 deletions html5ever/src/tokenizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
}

/// Feed an input string into the tokenizer.
pub fn feed(&mut self, input: &mut BufferQueue) -> TokenizerResult<Sink::Handle> {
pub fn feed(&mut self, input: & BufferQueue) -> TokenizerResult<Sink::Handle> {
if input.is_empty() {
return TokenizerResult::Done;
}
Expand Down Expand Up @@ -248,7 +248,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ preprocessing-the-input-stream
// Get the next input character, which might be the character
// 'c' that we already consumed from the buffers.
fn get_preprocessed_char(&mut self, mut c: char, input: &mut BufferQueue) -> Option<char> {
fn get_preprocessed_char(&mut self, mut c: char, input: & BufferQueue) -> Option<char> {
if self.ignore_lf {
self.ignore_lf = false;
if c == '\n' {
Expand Down Expand Up @@ -283,7 +283,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {

//§ tokenization
// Get the next input character, if one is available.
fn get_char(&mut self, input: &mut BufferQueue) -> Option<char> {
fn get_char(&mut self, input: & BufferQueue) -> Option<char> {
if self.reconsume {
self.reconsume = false;
Some(self.current_char)
Expand All @@ -294,7 +294,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
}
}

fn pop_except_from(&mut self, input: &mut BufferQueue, set: SmallCharSet) -> Option<SetResult> {
fn pop_except_from(&mut self, input: & BufferQueue, set: SmallCharSet) -> Option<SetResult> {
// Bail to the slow path for various corner cases.
// This means that `FromSet` can contain characters not in the set!
// It shouldn't matter because the fallback `FromSet` case should
Expand All @@ -321,7 +321,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
// NB: this doesn't set the current input character.
fn eat(
&mut self,
input: &mut BufferQueue,
input: & BufferQueue,
pat: &str,
eq: fn(&u8, &u8) -> bool,
) -> Option<bool> {
Expand All @@ -336,15 +336,17 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
match input.eat(pat, eq) {
None if self.at_eof => Some(false),
None => {
self.temp_buf.extend(input);
while let Some(data) = input.next() {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jdm I extracted next() and and reimplemented extend

self.temp_buf.push_char(data);
}
None
},
Some(matched) => Some(matched),
}
}

/// Run the state machine for as long as we can.
fn run(&mut self, input: &mut BufferQueue) -> TokenizerResult<Sink::Handle> {
fn run(&mut self, input: & BufferQueue) -> TokenizerResult<Sink::Handle> {
if self.opts.profile {
loop {
let state = self.state;
Expand Down Expand Up @@ -567,7 +569,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
}
}

fn discard_char(&mut self, input: &mut BufferQueue) {
fn discard_char(&mut self, input: & BufferQueue) {
// peek() deals in un-processed characters (no newline normalization), while get_char()
// does.
//
Expand Down Expand Up @@ -696,7 +698,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
// Return true if we should be immediately re-invoked
// (this just simplifies control flow vs. break / continue).
#[allow(clippy::never_loop)]
fn step(&mut self, input: &mut BufferQueue) -> ProcessResult<Sink::Handle> {
fn step(&mut self, input: & BufferQueue) -> ProcessResult<Sink::Handle> {
if self.char_ref_tokenizer.is_some() {
return self.step_char_ref_tokenizer(input);
}
Expand Down Expand Up @@ -1382,7 +1384,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
}
}

fn step_char_ref_tokenizer(&mut self, input: &mut BufferQueue) -> ProcessResult<Sink::Handle> {
fn step_char_ref_tokenizer(&mut self, input: & BufferQueue) -> ProcessResult<Sink::Handle> {
// FIXME HACK: Take and replace the tokenizer so we don't
// double-mut-borrow self. This is why it's boxed.
let mut tok = self.char_ref_tokenizer.take().unwrap();
Expand Down
65 changes: 32 additions & 33 deletions markup5ever/util/buffer_queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
//!
//! [`BufferQueue`]: struct.BufferQueue.html

use std::collections::VecDeque;
use std::{cell::RefCell, collections::VecDeque};

use tendril::StrTendril;

Expand Down Expand Up @@ -46,15 +46,15 @@ pub enum SetResult {
#[derive(Debug)]
pub struct BufferQueue {
/// Buffers to process.
buffers: VecDeque<StrTendril>,
buffers: RefCell<VecDeque<StrTendril>>,
}

impl Default for BufferQueue {
/// Create an empty BufferQueue.
#[inline]
fn default() -> Self {
Self {
buffers: VecDeque::with_capacity(16),
buffers: RefCell::new(VecDeque::with_capacity(16)),
}
}
}
Expand All @@ -63,42 +63,45 @@ impl BufferQueue {
/// Returns whether the queue is empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.buffers.is_empty()
self.buffers.borrow().is_empty()
}

/// Get the buffer at the beginning of the queue.
#[inline]
pub fn pop_front(&mut self) -> Option<StrTendril> {
self.buffers.pop_front()
pub fn pop_front(&self) -> Option<StrTendril> {
self.buffers.borrow_mut().pop_front()
}

/// Add a buffer to the beginning of the queue.
///
/// If the buffer is empty, it will be skipped.
pub fn push_front(&mut self, buf: StrTendril) {
pub fn push_front(&self, buf: StrTendril) {
if buf.len32() == 0 {
return;
}
self.buffers.push_front(buf);
self.buffers.borrow_mut().push_front(buf);
}

/// Add a buffer to the end of the queue.
///
/// If the buffer is empty, it will be skipped.
pub fn push_back(&mut self, buf: StrTendril) {
pub fn push_back(&self, buf: StrTendril) {
if buf.len32() == 0 {
return;
}
self.buffers.push_back(buf);
self.buffers.borrow_mut().push_back(buf);
}

/// Look at the next available character without removing it, if the queue is not empty.
pub fn peek(&self) -> Option<char> {
debug_assert!(
!self.buffers.iter().any(|el| el.len32() == 0),
!self.buffers.borrow().iter().any(|el| el.len32() == 0),
"invariant \"all buffers in the queue are non-empty\" failed"
);
self.buffers.front().map(|b| b.chars().next().unwrap())
self.buffers
.borrow()
.front()
.map(|b| b.chars().next().unwrap())
}

/// Pops and returns either a single character from the given set, or
Expand Down Expand Up @@ -128,8 +131,8 @@ impl BufferQueue {
/// // ...
/// # }
/// ```
pub fn pop_except_from(&mut self, set: SmallCharSet) -> Option<SetResult> {
let (result, now_empty) = match self.buffers.front_mut() {
pub fn pop_except_from(&self, set: SmallCharSet) -> Option<SetResult> {
let (result, now_empty) = match self.buffers.borrow_mut().front_mut() {
None => (None, false),
Some(buf) => {
let n = set.nonmember_prefix_len(buf);
Expand All @@ -149,7 +152,7 @@ impl BufferQueue {

// Unborrow self for this part.
if now_empty {
self.buffers.pop_front();
self.buffers.borrow_mut().pop_front();
}

result
Expand Down Expand Up @@ -178,17 +181,17 @@ impl BufferQueue {
/// assert!(queue.is_empty());
/// # }
/// ```
pub fn eat<F: Fn(&u8, &u8) -> bool>(&mut self, pat: &str, eq: F) -> Option<bool> {
pub fn eat<F: Fn(&u8, &u8) -> bool>(&self, pat: &str, eq: F) -> Option<bool> {
let mut buffers_exhausted = 0;
let mut consumed_from_last = 0;

self.buffers.front()?;
self.buffers.borrow().front()?;

for pattern_byte in pat.bytes() {
if buffers_exhausted >= self.buffers.len() {
if buffers_exhausted >= self.buffers.borrow().len() {
return None;
}
let buf = &self.buffers[buffers_exhausted];
let buf = &self.buffers.borrow()[buffers_exhausted];

if !eq(&buf.as_bytes()[consumed_from_last], &pattern_byte) {
return Some(false);
Expand All @@ -203,26 +206,22 @@ impl BufferQueue {

// We have a match. Commit changes to the BufferQueue.
for _ in 0..buffers_exhausted {
self.buffers.pop_front();
self.buffers.borrow_mut().pop_front();
}

match self.buffers.front_mut() {
match self.buffers.borrow_mut().front_mut() {
None => assert_eq!(consumed_from_last, 0),
Some(ref mut buf) => buf.pop_front(consumed_from_last as u32),
}

Some(true)
}
}

impl Iterator for BufferQueue {
type Item = char;

/// Get the next character if one is available, removing it from the queue.
///
/// This function manages the buffers, removing them as they become empty.
fn next(&mut self) -> Option<char> {
let (result, now_empty) = match self.buffers.front_mut() {
pub fn next(&self) -> Option<char> {
let (result, now_empty) = match self.buffers.borrow_mut().front_mut() {
None => (None, false),
Some(buf) => {
let c = buf.pop_front_char().expect("empty buffer in queue");
Expand All @@ -231,7 +230,7 @@ impl Iterator for BufferQueue {
};

if now_empty {
self.buffers.pop_front();
self.buffers.borrow_mut().pop_front();
}

result
Expand All @@ -248,7 +247,7 @@ mod test {

#[test]
fn smoke_test() {
let mut bq = BufferQueue::default();
let bq = BufferQueue::default();
assert_eq!(bq.peek(), None);
assert_eq!(bq.next(), None);

Expand All @@ -266,7 +265,7 @@ mod test {

#[test]
fn can_unconsume() {
let mut bq = BufferQueue::default();
let bq = BufferQueue::default();
bq.push_back("abc".to_tendril());
assert_eq!(bq.next(), Some('a'));

Expand All @@ -280,9 +279,9 @@ mod test {

#[test]
fn can_pop_except_set() {
let mut bq = BufferQueue::default();
let bq = BufferQueue::default();
bq.push_back("abc&def".to_tendril());
let mut pop = || bq.pop_except_from(small_char_set!('&'));
let pop = || bq.pop_except_from(small_char_set!('&'));
assert_eq!(pop(), Some(NotFromSet("abc".to_tendril())));
assert_eq!(pop(), Some(FromSet('&')));
assert_eq!(pop(), Some(NotFromSet("def".to_tendril())));
Expand All @@ -294,7 +293,7 @@ mod test {
// This is not very comprehensive. We rely on the tokenizer
// integration tests for more thorough testing with many
// different input buffer splits.
let mut bq = BufferQueue::default();
let bq = BufferQueue::default();
bq.push_back("a".to_tendril());
bq.push_back("bc".to_tendril());
assert_eq!(bq.eat("abcd", u8::eq_ignore_ascii_case), None);
Expand Down
Loading