Skip to content

Commit

Permalink
Remove string-interner dependency and implement custom string `Inte…
Browse files Browse the repository at this point in the history
…rner` (#2147)

So, @raskad and myself had a short discussion about the state of #736, and we came to the conclusion that it would be a good time to implement our own string interner; partly because the `string-interner` crate is a bit unmaintained (as shown by Robbepop/string-interner#42 and Robbepop/string-interner#47), and partly because it would be hard to experiment with custom optimizations for UTF-16 strings. I still want to thank @Robbepop for the original implementation though, because some parts of this design have been shamelessly stolen from it 😅.

Having said that, this PR is a complete reimplementation of the interner, but with some modifications to (hopefully!) make it a bit easier to experiment with UTF-16 strings, apply optimizations, and whatnot :)
  • Loading branch information
jedel1043 committed Jun 30, 2022
1 parent a2a33e7 commit f7aef10
Show file tree
Hide file tree
Showing 13 changed files with 488 additions and 314 deletions.
38 changes: 4 additions & 34 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 39 additions & 8 deletions boa_engine/src/builtins/function/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use crate::{
value::IntegerOrInfinity,
Context, JsResult, JsString, JsValue,
};
use boa_gc::{self, Finalize, Gc, Trace};
use boa_gc::{self, custom_trace, Finalize, Gc, Trace};
use boa_interner::Sym;
use boa_profiler::Profiler;
use dyn_clone::DynClone;
Expand Down Expand Up @@ -138,12 +138,26 @@ impl ConstructorKind {
/// - [ECMAScript specification][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-classfielddefinition-record-specification-type
#[derive(Clone, Debug, Trace, Finalize)]
#[derive(Clone, Debug, Finalize)]
pub enum ClassFieldDefinition {
Public(PropertyKey, JsFunction),
Private(Sym, JsFunction),
}

unsafe impl Trace for ClassFieldDefinition {
custom_trace! {this, {
match this {
Self::Public(key, func) => {
mark(key);
mark(func);
}
Self::Private(_, func) => {
mark(func);
}
}
}}
}

/// Wrapper for `Gc<GcCell<dyn NativeObject>>` that allows passing additional
/// captures through a `Copy` closure.
///
Expand Down Expand Up @@ -191,18 +205,14 @@ impl Captures {
/// (AST Node).
///
/// <https://tc39.es/ecma262/#sec-ecmascript-function-objects>
#[derive(Trace, Finalize)]
#[derive(Finalize)]
pub enum Function {
Native {
#[unsafe_ignore_trace]
function: NativeFunctionSignature,
#[unsafe_ignore_trace]
constructor: Option<ConstructorKind>,
},
Closure {
#[unsafe_ignore_trace]
function: Box<dyn ClosureFunctionSignature>,
#[unsafe_ignore_trace]
constructor: Option<ConstructorKind>,
captures: Captures,
},
Expand All @@ -211,7 +221,6 @@ pub enum Function {
environments: DeclarativeEnvironmentStack,

/// The `[[ConstructorKind]]` internal slot.
#[unsafe_ignore_trace]
constructor_kind: ConstructorKind,

/// The `[[HomeObject]]` internal slot.
Expand All @@ -229,6 +238,28 @@ pub enum Function {
},
}

unsafe impl Trace for Function {
custom_trace! {this, {
match this {
Self::Native { .. } => {}
Self::Closure { captures, .. } => mark(captures),
Self::Ordinary { code, environments, home_object, fields, private_methods, .. } => {
mark(code);
mark(environments);
mark(home_object);
mark(fields);
for (_, elem) in private_methods {
mark(elem);
}
}
Self::Generator { code, environments } => {
mark(code);
mark(environments);
}
}
}}
}

impl fmt::Debug for Function {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Function {{ ... }}")
Expand Down
7 changes: 5 additions & 2 deletions boa_engine/src/bytecompiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::{
vm::{BindingOpcode, CodeBlock, Opcode},
Context, JsBigInt, JsResult, JsString, JsValue,
};
use boa_gc::{Cell, Gc};
use boa_gc::Gc;
use boa_interner::{Interner, Sym};
use rustc_hash::FxHashMap;
use std::mem::size_of;
Expand Down Expand Up @@ -98,7 +98,10 @@ impl<'b> ByteCompiler<'b> {

/// Push a compile time environment to the current `CodeBlock` and return it's index.
#[inline]
fn push_compile_environment(&mut self, environment: Gc<Cell<CompileTimeEnvironment>>) -> usize {
fn push_compile_environment(
&mut self,
environment: Gc<boa_gc::Cell<CompileTimeEnvironment>>,
) -> usize {
let index = self.code_block.compile_environments.len();
self.code_block.compile_environments.push(environment);
index
Expand Down
13 changes: 12 additions & 1 deletion boa_engine/src/object/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ impl<T: Any + Debug + Trace> NativeObject for T {
}

/// The internal representation of a JavaScript object.
#[derive(Debug, Trace, Finalize)]
#[derive(Debug, Finalize)]
pub struct Object {
/// The type of the object.
pub data: ObjectData,
Expand All @@ -122,6 +122,17 @@ pub struct Object {
private_elements: FxHashMap<Sym, PrivateElement>,
}

unsafe impl Trace for Object {
boa_gc::custom_trace!(this, {
mark(&this.data);
mark(&this.properties);
mark(&this.prototype);
for elem in this.private_elements.values() {
mark(elem);
}
});
}

/// The representation of private object elements.
#[derive(Clone, Debug, Trace, Finalize)]
pub enum PrivateElement {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use crate::syntax::ast::node::Node;
use boa_gc::{Finalize, Trace};
use boa_interner::{Interner, Sym, ToInternedString};

#[cfg(feature = "deser")]
Expand All @@ -19,7 +18,7 @@ use serde::{Deserialize, Serialize};
/// [spec]: https://tc39.es/ecma262/#prod-ContinueStatement
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/continue
#[cfg_attr(feature = "deser", derive(Serialize, Deserialize))]
#[derive(Clone, Debug, Trace, Finalize, PartialEq)]
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Continue {
label: Option<Sym>,
}
Expand Down
2 changes: 2 additions & 0 deletions boa_engine/src/vm/code_block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ unsafe impl Readable for f64 {}
#[derive(Clone, Debug, Trace, Finalize)]
pub struct CodeBlock {
/// Name of this function
#[unsafe_ignore_trace]
pub(crate) name: Sym,

/// The number of arguments expected.
Expand All @@ -77,6 +78,7 @@ pub struct CodeBlock {
pub(crate) literals: Vec<JsValue>,

/// Property field names.
#[unsafe_ignore_trace]
pub(crate) names: Vec<Sym>,

/// Locators for all bindings in the codeblock.
Expand Down
2 changes: 1 addition & 1 deletion boa_gc/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Garbage collector for the Boa JavaScript engine.
pub use gc::{
custom_trace, force_collect, unsafe_empty_trace, Finalize, Gc, GcCell as Cell,
custom_trace, finalizer_safe, force_collect, unsafe_empty_trace, Finalize, Gc, GcCell as Cell,
GcCellRef as Ref, GcCellRefMut as RefMut, Trace,
};
5 changes: 3 additions & 2 deletions boa_interner/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ categories = ["data-structures"]
license = "Unlicense/MIT"

[dependencies]
string-interner = "0.14.0"
serde = { version = "1.0.137", features = ["derive"], optional = true }
gc = { version = "0.4.1", features = ["derive"] }
phf = { version = "0.10.1", features = ["macros"] }
rustc-hash = "1.1.0"
static_assertions = "1.1.0"
62 changes: 62 additions & 0 deletions boa_interner/src/fixed_string.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
use crate::interned_str::InternedStr;

#[derive(Debug, Default)]
pub(super) struct FixedString {
inner: String,
}

impl FixedString {
/// Creates a new, pinned [`FixedString`].
pub(super) fn new(capacity: usize) -> Self {
Self {
inner: String::with_capacity(capacity),
}
}

/// Gets the maximum capacity of the [`FixedString`].
pub(super) fn capacity(&self) -> usize {
self.inner.capacity()
}

/// Returns `true` if the [`FixedString`] has length zero,
/// and `false` otherwise.
pub(super) fn is_empty(&self) -> bool {
self.inner.is_empty()
}

/// Tries to push `string` to the [`FixedString`], and returns
/// an [`InternedStr`] pointer to the stored `string`, or
/// `None` if the capacity is not enough to store `string`.
///
/// # Safety
///
/// The caller is responsible for ensuring `self` outlives the returned
/// `InternedStr`.
pub(super) unsafe fn push(&mut self, string: &str) -> Option<InternedStr> {
let capacity = self.inner.capacity();
(capacity >= self.inner.len() + string.len()).then(|| {
let old_len = self.inner.len();
self.inner.push_str(string);
// SAFETY: The caller is responsible for extending the lifetime
// of `self` to outlive the return value.
unsafe { InternedStr::new(self.inner[old_len..self.inner.len()].into()) }
})
}

/// Pushes `string` to the [`FixedString`], and returns
/// an [`InternedStr`] pointer to the stored `string`, without
/// checking if the total `capacity` is enough to store `string`.
///
/// # Safety
///
/// The caller is responsible for ensuring that `self` outlives the returned
/// `InternedStr` and that it has enough capacity to store `string` without
/// reallocating.
pub(super) unsafe fn push_unchecked(&mut self, string: &str) -> InternedStr {
let old_len = self.inner.len();
self.inner.push_str(string);
// SAFETY: The caller is responsible for extending the lifetime
// of `self` to outlive the return value.
unsafe { InternedStr::new(self.inner[old_len..self.inner.len()].into()) }
}
}
70 changes: 70 additions & 0 deletions boa_interner/src/interned_str.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
use std::{borrow::Borrow, ptr::NonNull};

/// Wrapper for an interned str pointer, required to
/// quickly check using a hash if a string is inside an [`Interner`][`super::Interner`].
///
/// # Safety
///
/// This struct could cause Undefined Behaviour on:
/// - Use without ensuring the referenced memory is still allocated.
/// - Construction of an [`InternedStr`] from an invalid [`NonNull<str>`].
///
/// In general, this should not be used outside of an [`Interner`][`super::Interner`].
#[derive(Debug, Clone)]
pub(super) struct InternedStr {
ptr: NonNull<str>,
}

impl InternedStr {
/// Create a new interned string from the given `str`.
///
/// # Safety
///
/// Not maintaining the invariants specified on the struct definition
/// could cause Undefined Behaviour.
#[inline]
pub(super) unsafe fn new(ptr: NonNull<str>) -> Self {
Self { ptr }
}

/// Returns a shared reference to the underlying string.
///
/// # Safety
///
/// Not maintaining the invariants specified on the struct definition
/// could cause Undefined Behaviour.
#[inline]
pub(super) unsafe fn as_str(&self) -> &str {
// SAFETY: The caller must verify the invariants
// specified on the struct definition.
unsafe { self.ptr.as_ref() }
}
}

impl std::hash::Hash for InternedStr {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
// SAFETY: The caller must verify the invariants
// specified in the struct definition.
unsafe {
self.as_str().hash(state);
}
}
}

impl Eq for InternedStr {}

impl PartialEq for InternedStr {
fn eq(&self, other: &Self) -> bool {
// SAFETY: The caller must verify the invariants
// specified in the struct definition.
unsafe { self.as_str() == other.as_str() }
}
}

impl Borrow<str> for InternedStr {
fn borrow(&self) -> &str {
// SAFETY: The caller must verify the invariants
// specified in the struct definition.
unsafe { self.as_str() }
}
}
Loading

0 comments on commit f7aef10

Please sign in to comment.