diff --git a/cranelift/codegen/src/ir/libcall.rs b/cranelift/codegen/src/ir/libcall.rs index 6f816f4f577d..9dc134e48073 100644 --- a/cranelift/codegen/src/ir/libcall.rs +++ b/cranelift/codegen/src/ir/libcall.rs @@ -32,6 +32,12 @@ pub enum LibCall { UremI64, /// srem.i64 SremI64, + /// ishl.i64 + IshlI64, + /// ushr.i64 + UshrI64, + /// sshr.i64 + SshrI64, /// ceil.f32 CeilF32, /// ceil.f64 @@ -75,6 +81,9 @@ impl FromStr for LibCall { "SdivI64" => Ok(Self::SdivI64), "UremI64" => Ok(Self::UremI64), "SremI64" => Ok(Self::SremI64), + "IshlI64" => Ok(Self::IshlI64), + "UshrI64" => Ok(Self::UshrI64), + "SshrI64" => Ok(Self::SshrI64), "CeilF32" => Ok(Self::CeilF32), "CeilF64" => Ok(Self::CeilF64), "FloorF32" => Ok(Self::FloorF32), @@ -105,6 +114,9 @@ impl LibCall { Opcode::Sdiv => Self::SdivI64, Opcode::Urem => Self::UremI64, Opcode::Srem => Self::SremI64, + Opcode::Ishl => Self::IshlI64, + Opcode::Ushr => Self::UshrI64, + Opcode::Sshr => Self::SshrI64, _ => return None, }, types::F32 => match opcode { diff --git a/cranelift/codegen/src/isa/x86/enc_tables.rs b/cranelift/codegen/src/isa/x86/enc_tables.rs index c00ca973575b..b957a65254f2 100644 --- a/cranelift/codegen/src/isa/x86/enc_tables.rs +++ b/cranelift/codegen/src/isa/x86/enc_tables.rs @@ -1,6 +1,7 @@ //! Encoding tables for x86 ISAs. use super::registers::*; +use super::settings as isa_settings; use crate::bitset::BitSet; use crate::cursor::{Cursor, FuncCursor}; use crate::flowgraph::ControlFlowGraph; @@ -13,6 +14,7 @@ use crate::isa::encoding::base_size; use crate::isa::encoding::{Encoding, RecipeSizing}; use crate::isa::RegUnit; use crate::isa::{self, TargetIsa}; +use crate::legalizer::expand_as_libcall; use crate::predicates; use crate::regalloc::RegDiversions; @@ -1376,6 +1378,8 @@ fn convert_ushr( let mut pos = FuncCursor::new(func).at_inst(inst); pos.use_srcloc(inst); + let isa_flags = isa_settings::Flags::new(&isa.flags(), isa_settings::builder()); + if let ir::InstructionData::Binary { opcode: ir::Opcode::Ushr, args: [arg0, arg1], @@ -1414,10 +1418,16 @@ fn convert_ushr( pos.func.dfg.replace(inst).x86_psrl(arg0, shift_index); } else if arg0_type == I64 { // 64 bit shifts need to be legalized on x86_32. - let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type); - let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type); - let shifted = pos.ins().x86_psrl(value, amount); - contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type); + if isa_flags.has_sse41() { + // if we have pinstrq/pextrq (SSE 4.1), legalize to that + let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type); + let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type); + let shifted = pos.ins().x86_psrl(value, amount); + contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type); + } else { + // otherwise legalize to libcall + expand_as_libcall(inst, func, isa); + } } else { // Everything else should be already legal. unreachable!() @@ -1450,6 +1460,8 @@ fn convert_ishl( let mut pos = FuncCursor::new(func).at_inst(inst); pos.use_srcloc(inst); + let isa_flags = isa_settings::Flags::new(&isa.flags(), isa_settings::builder()); + if let ir::InstructionData::Binary { opcode: ir::Opcode::Ishl, args: [arg0, arg1], @@ -1488,10 +1500,16 @@ fn convert_ishl( pos.func.dfg.replace(inst).x86_psll(arg0, shift_index); } else if arg0_type == I64 { // 64 bit shifts need to be legalized on x86_32. - let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type); - let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type); - let shifted = pos.ins().x86_psll(value, amount); - contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type); + if isa_flags.has_sse41() { + // if we have pinstrq/pextrq (SSE 4.1), legalize to that + let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type); + let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type); + let shifted = pos.ins().x86_psll(value, amount); + contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type); + } else { + // otherwise legalize to libcall + expand_as_libcall(inst, func, isa); + } } else { // Everything else should be already legal. unreachable!() diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs index e28cc47d6ab7..7b8307487cbe 100644 --- a/cranelift/codegen/src/legalizer/mod.rs +++ b/cranelift/codegen/src/legalizer/mod.rs @@ -35,7 +35,7 @@ mod table; use self::call::expand_call; use self::globalvalue::expand_global_value; use self::heap::expand_heap_addr; -use self::libcall::expand_as_libcall; +pub use self::libcall::expand_as_libcall; use self::table::expand_table_addr; enum LegalizeInstResult { diff --git a/cranelift/filetests/filetests/wasm/i64-arith32.clif b/cranelift/filetests/filetests/wasm/i64-arith32.clif index d3017c23e608..e9a793a1d0d8 100644 --- a/cranelift/filetests/filetests/wasm/i64-arith32.clif +++ b/cranelift/filetests/filetests/wasm/i64-arith32.clif @@ -29,3 +29,39 @@ block0(v0: i64, v1: i64): v2 = urem v0, v1 return v2 } + +function %i64_i32_shl(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = ishl v0, v1 + return v2 +} + +function %i64_i32_shr_u(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = ushr v0, v1 + return v2 +} + +function %i64_i32_shr_s(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = sshr v0, v1 + return v2 +} + +function %i64_i64_shl(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ishl v0, v1 + return v2 +} + +function %i64_i64_shr_u(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ushr v0, v1 + return v2 +} + +function %i64_i64_shr_s(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sshr v0, v1 + return v2 +} diff --git a/cranelift/module/src/backend.rs b/cranelift/module/src/backend.rs index 205d759c5e0c..4743e4fe6143 100644 --- a/cranelift/module/src/backend.rs +++ b/cranelift/module/src/backend.rs @@ -183,6 +183,9 @@ pub fn default_libcall_names() -> Box String> { ir::LibCall::SdivI64 => "__divdi3".to_owned(), ir::LibCall::UremI64 => "__umoddi3".to_owned(), ir::LibCall::SremI64 => "__moddi3".to_owned(), + ir::LibCall::IshlI64 => "__ashldi3".to_owned(), + ir::LibCall::UshrI64 => "__lshrdi3".to_owned(), + ir::LibCall::SshrI64 => "__ashrdi3".to_owned(), ir::LibCall::CeilF32 => "ceilf".to_owned(), ir::LibCall::CeilF64 => "ceil".to_owned(), ir::LibCall::FloorF32 => "floorf".to_owned(), diff --git a/crates/jit/src/link.rs b/crates/jit/src/link.rs index 26a9a8833ccc..30d96877f1bd 100644 --- a/crates/jit/src/link.rs +++ b/crates/jit/src/link.rs @@ -50,6 +50,9 @@ fn apply_reloc( SdivI64 => wasmtime_i64_sdiv as usize, UremI64 => wasmtime_i64_urem as usize, SremI64 => wasmtime_i64_srem as usize, + IshlI64 => wasmtime_i64_ishl as usize, + UshrI64 => wasmtime_i64_ushr as usize, + SshrI64 => wasmtime_i64_sshr as usize, CeilF32 => wasmtime_f32_ceil as usize, FloorF32 => wasmtime_f32_floor as usize, TruncF32 => wasmtime_f32_trunc as usize, diff --git a/crates/runtime/src/libcalls.rs b/crates/runtime/src/libcalls.rs index f651dc0d1430..be5e5b9e16e7 100644 --- a/crates/runtime/src/libcalls.rs +++ b/crates/runtime/src/libcalls.rs @@ -98,6 +98,21 @@ pub extern "C" fn wasmtime_i64_srem(x: i64, y: i64) -> i64 { x % y } +/// Implementation of i64.ishl +pub extern "C" fn wasmtime_i64_ishl(x: i64, y: i64) -> i64 { + x << y +} + +/// Implementation of i64.ushr +pub extern "C" fn wasmtime_i64_ushr(x: u64, y: i64) -> u64 { + x >> y +} + +/// Implementation of i64.sshr +pub extern "C" fn wasmtime_i64_sshr(x: i64, y: i64) -> i64 { + x >> y +} + /// Implementation of f64.ceil pub extern "C" fn wasmtime_f64_ceil(x: f64) -> f64 { x.ceil()