Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize table.init instruction and instantiation #2847

Merged
merged 2 commits into from
Apr 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cranelift/wasm/src/environ/dummy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,7 @@ impl<'data> ModuleEnvironment<'data> for DummyEnvironment {
&mut self,
_table_index: TableIndex,
_base: Option<GlobalIndex>,
_offset: usize,
_offset: u32,
_elements: Box<[FuncIndex]>,
) -> WasmResult<()> {
// We do nothing
Expand Down Expand Up @@ -792,7 +792,7 @@ impl<'data> ModuleEnvironment<'data> for DummyEnvironment {
&mut self,
_memory_index: MemoryIndex,
_base: Option<GlobalIndex>,
_offset: usize,
_offset: u32,
_data: &'data [u8],
) -> WasmResult<()> {
// We do nothing
Expand Down
4 changes: 2 additions & 2 deletions cranelift/wasm/src/environ/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -937,7 +937,7 @@ pub trait ModuleEnvironment<'data>: TargetEnvironment {
&mut self,
table_index: TableIndex,
base: Option<GlobalIndex>,
offset: usize,
offset: u32,
elements: Box<[FuncIndex]>,
) -> WasmResult<()>;

Expand Down Expand Up @@ -984,7 +984,7 @@ pub trait ModuleEnvironment<'data>: TargetEnvironment {
&mut self,
memory_index: MemoryIndex,
base: Option<GlobalIndex>,
offset: usize,
offset: u32,
data: &'data [u8],
) -> WasmResult<()>;

Expand Down
16 changes: 2 additions & 14 deletions cranelift/wasm/src/sections_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ pub fn parse_element_section<'data>(
} => {
let mut init_expr_reader = init_expr.get_binary_reader();
let (base, offset) = match init_expr_reader.read_operator()? {
Operator::I32Const { value } => (None, value as u32 as usize),
Operator::I32Const { value } => (None, value as u32),
Operator::GlobalGet { global_index } => {
(Some(GlobalIndex::from_u32(global_index)), 0)
}
Expand All @@ -388,12 +388,6 @@ pub fn parse_element_section<'data>(
));
}
};
// Check for offset + len overflow
if offset.checked_add(segments.len()).is_none() {
return Err(wasm_unsupported!(
"element segment offset and length overflows"
));
}
environ.declare_table_elements(
TableIndex::from_u32(table_index),
base,
Expand Down Expand Up @@ -429,7 +423,7 @@ pub fn parse_data_section<'data>(
} => {
let mut init_expr_reader = init_expr.get_binary_reader();
let (base, offset) = match init_expr_reader.read_operator()? {
Operator::I32Const { value } => (None, value as u32 as usize),
Operator::I32Const { value } => (None, value as u32),
Operator::GlobalGet { global_index } => {
(Some(GlobalIndex::from_u32(global_index)), 0)
}
Expand All @@ -440,12 +434,6 @@ pub fn parse_data_section<'data>(
))
}
};
// Check for offset + len overflow
if offset.checked_add(data.len()).is_none() {
return Err(wasm_unsupported!(
"data segment offset and length overflows"
));
}
environ.declare_data_initialization(
MemoryIndex::from_u32(memory_index),
base,
Expand Down
19 changes: 14 additions & 5 deletions crates/environ/src/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use cranelift_wasm::*;
use indexmap::IndexMap;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::convert::TryFrom;
use std::sync::Arc;

/// Implemenation styles for WebAssembly linear memory.
Expand Down Expand Up @@ -86,7 +87,7 @@ pub struct MemoryInitializer {
/// Optionally, a global variable giving a base index.
pub base: Option<GlobalIndex>,
/// The offset to add to the base.
pub offset: usize,
pub offset: u32,
/// The data to write into the linear memory.
pub data: Box<[u8]>,
}
Expand Down Expand Up @@ -168,7 +169,15 @@ impl MemoryInitialization {
// Perform a bounds check on the segment
// As this segment is referencing a defined memory without a global base, the last byte
// written to by the segment cannot exceed the memory's initial minimum size
if (initializer.offset + initializer.data.len())
let offset = usize::try_from(initializer.offset).unwrap();
let end = match offset.checked_add(initializer.data.len()) {
Some(end) => end,
None => {
out_of_bounds = true;
continue;
}
};
if end
> ((module.memory_plans[initializer.memory_index].memory.minimum
as usize)
* WASM_PAGE_SIZE)
Expand All @@ -178,8 +187,8 @@ impl MemoryInitialization {
}

let pages = &mut map[index];
let mut page_index = initializer.offset / WASM_PAGE_SIZE;
let mut page_offset = initializer.offset % WASM_PAGE_SIZE;
let mut page_index = offset / WASM_PAGE_SIZE;
let mut page_offset = offset % WASM_PAGE_SIZE;
let mut data_offset = 0;
let mut data_remaining = initializer.data.len();

Expand Down Expand Up @@ -268,7 +277,7 @@ pub struct TableInitializer {
/// Optionally, a global variable giving a base index.
pub base: Option<GlobalIndex>,
/// The offset to add to the base.
pub offset: usize,
pub offset: u32,
/// The values to write into the table elements.
pub elements: Box<[FuncIndex]>,
}
Expand Down
4 changes: 2 additions & 2 deletions crates/environ/src/module_environ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,7 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data
&mut self,
table_index: TableIndex,
base: Option<GlobalIndex>,
offset: usize,
offset: u32,
elements: Box<[FuncIndex]>,
) -> WasmResult<()> {
for element in elements.iter() {
Expand Down Expand Up @@ -794,7 +794,7 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data
&mut self,
memory_index: MemoryIndex,
base: Option<GlobalIndex>,
offset: usize,
offset: u32,
data: &'data [u8],
) -> WasmResult<()> {
match &mut self.result.module.memory_initialization {
Expand Down
88 changes: 58 additions & 30 deletions crates/runtime/src/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -573,11 +573,11 @@ impl Instance {
return None;
}

Some(unsafe { &*self.anyfunc_ptr(index) })
unsafe { Some(&*self.vmctx_plus_offset(self.offsets.vmctx_anyfunc(index))) }
}

unsafe fn anyfunc_ptr(&self, index: FuncIndex) -> *mut VMCallerCheckedAnyfunc {
self.vmctx_plus_offset(self.offsets.vmctx_anyfunc(index))
unsafe fn anyfunc_base(&self) -> *mut VMCallerCheckedAnyfunc {
self.vmctx_plus_offset(self.offsets.vmctx_anyfuncs_begin())
Comment on lines +576 to +580
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change? The anyfunc_ptr call should be inlined just fine, unless I'm missing something. This just makes things more open coded, which isn't the direction I'd generally move in.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This mostly changed because it was previously used in a loop but the pattern it was using was a bit slower than necessary. The main slowdown is that vmctx_anyfunc performed a multiplication instruction with the size of a pointer, where both the size and the index were not constant, resulting in bad codegen. By using the native types here it makes pointer arithmetic much speedier since everything is known at compile time.

Once I removed one main use of the function in the table initialization bits I figured I might as well go ahead and remove the function and speed up other callsites as well.

I do agree it's a bit more open coded, but then again everything about VMContext feels sort of inherently open-coded

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Huh, I would really have expected anyfunc_ptr to be inlined and then for inst combine to see that one of the operands of the multiplication was a constant and go to town from there.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh it is inlined, but that's not the problem here. The function called has a multiplication with 3 * self.pointer_size, but self.pointer_size is not a constant value. We could probably improve codegen by having something like NativeVMOffsets for when you're specifically not cross-compiling somewhere else but for now LLVM has no way of seeing that the value originally stored there was mem::size_of::<usize>()

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I get it now. Thanks for your patience explaining it to me!

}

fn find_passive_segment<'a, I, D, T>(
Expand Down Expand Up @@ -611,38 +611,56 @@ impl Instance {
src: u32,
len: u32,
) -> Result<(), Trap> {
// https://webassembly.github.io/bulk-memory-operations/core/exec/instructions.html#exec-table-init

let table = self.get_table(table_index);

let elements = Self::find_passive_segment(
elem_index,
&self.module.passive_elements_map,
&self.module.passive_elements,
&self.dropped_elements,
);
self.table_init_segment(table_index, elements, dst, src, len)
}

if src
.checked_add(len)
.map_or(true, |n| n as usize > elements.len())
|| dst.checked_add(len).map_or(true, |m| m > table.size())
{
return Err(Trap::wasm(ir::TrapCode::TableOutOfBounds));
}
pub(crate) fn table_init_segment(
&self,
table_index: TableIndex,
elements: &[FuncIndex],
dst: u32,
src: u32,
len: u32,
) -> Result<(), Trap> {
// https://webassembly.github.io/bulk-memory-operations/core/exec/instructions.html#exec-table-init

// TODO(#983): investigate replacing this get/set loop with a `memcpy`.
for (dst, src) in (dst..dst + len).zip(src..src + len) {
let elem = self
.get_caller_checked_anyfunc(elements[src as usize])
.map_or(ptr::null_mut(), |f: &VMCallerCheckedAnyfunc| {
f as *const VMCallerCheckedAnyfunc as *mut _
});

table
.set(dst, TableElement::FuncRef(elem))
.expect("should never panic because we already did the bounds check above");
}
let table = self.get_table(table_index);

let elements = match elements
.get(usize::try_from(src).unwrap()..)
.and_then(|s| s.get(..usize::try_from(len).unwrap()))
{
Some(elements) => elements,
None => return Err(Trap::wasm(ir::TrapCode::TableOutOfBounds)),
};

match table.element_type() {
TableElementType::Func => unsafe {
let base = self.anyfunc_base();
table.init_funcs(
dst,
elements.iter().map(|idx| {
if *idx == FuncIndex::reserved_value() {
ptr::null_mut()
} else {
debug_assert!(idx.as_u32() < self.offsets.num_defined_functions);
base.add(usize::try_from(idx.as_u32()).unwrap())
}
}),
)?;
},

TableElementType::Val(_) => {
debug_assert!(elements.iter().all(|e| *e == FuncIndex::reserved_value()));
table.fill(dst, TableElement::ExternRef(None), len)?;
}
}
Ok(())
}

Expand Down Expand Up @@ -773,16 +791,26 @@ impl Instance {
src: u32,
len: u32,
) -> Result<(), Trap> {
// https://webassembly.github.io/bulk-memory-operations/core/exec/instructions.html#exec-memory-init

let memory = self.get_memory(memory_index);

let data = Self::find_passive_segment(
data_index,
&self.module.passive_data_map,
&self.module.passive_data,
&self.dropped_data,
);
self.memory_init_segment(memory_index, &data, dst, src, len)
}

pub(crate) fn memory_init_segment(
&self,
memory_index: MemoryIndex,
data: &[u8],
dst: u32,
src: u32,
len: u32,
) -> Result<(), Trap> {
// https://webassembly.github.io/bulk-memory-operations/core/exec/instructions.html#exec-memory-init

let memory = self.get_memory(memory_index);

if src
.checked_add(len)
Expand Down
Loading