From e3ece4b156f1c613699af2979cd72f13277212dc Mon Sep 17 00:00:00 2001 From: whitequark Date: Fri, 29 May 2020 15:53:52 +0000 Subject: [PATCH] cranelift: add i64.{ishl,ushr,ashr} libcalls. These libcalls are useful for 32-bit platforms. On x86_32 in particular, commit 4ec16fa0 added support for legalizing 64-bit shifts through SIMD operations. However, that legalization requires SIMD to be enabled and SSE 4.1 to be supported, which is not acceptable as a hard requirement. --- cranelift/codegen/src/ir/libcall.rs | 12 ++++++ cranelift/codegen/src/isa/x86/enc_tables.rs | 37 +++++++++++++++---- cranelift/codegen/src/legalizer/mod.rs | 2 +- .../filetests/filetests/wasm/i64-arith32.clif | 36 ++++++++++++++++++ cranelift/module/src/backend.rs | 3 ++ crates/jit/src/link.rs | 3 ++ crates/runtime/src/libcalls.rs | 15 ++++++++ 7 files changed, 99 insertions(+), 9 deletions(-) diff --git a/cranelift/codegen/src/ir/libcall.rs b/cranelift/codegen/src/ir/libcall.rs index 6f816f4f577d..9dc134e48073 100644 --- a/cranelift/codegen/src/ir/libcall.rs +++ b/cranelift/codegen/src/ir/libcall.rs @@ -32,6 +32,12 @@ pub enum LibCall { UremI64, /// srem.i64 SremI64, + /// ishl.i64 + IshlI64, + /// ushr.i64 + UshrI64, + /// sshr.i64 + SshrI64, /// ceil.f32 CeilF32, /// ceil.f64 @@ -75,6 +81,9 @@ impl FromStr for LibCall { "SdivI64" => Ok(Self::SdivI64), "UremI64" => Ok(Self::UremI64), "SremI64" => Ok(Self::SremI64), + "IshlI64" => Ok(Self::IshlI64), + "UshrI64" => Ok(Self::UshrI64), + "SshrI64" => Ok(Self::SshrI64), "CeilF32" => Ok(Self::CeilF32), "CeilF64" => Ok(Self::CeilF64), "FloorF32" => Ok(Self::FloorF32), @@ -105,6 +114,9 @@ impl LibCall { Opcode::Sdiv => Self::SdivI64, Opcode::Urem => Self::UremI64, Opcode::Srem => Self::SremI64, + Opcode::Ishl => Self::IshlI64, + Opcode::Ushr => Self::UshrI64, + Opcode::Sshr => Self::SshrI64, _ => return None, }, types::F32 => match opcode { diff --git a/cranelift/codegen/src/isa/x86/enc_tables.rs b/cranelift/codegen/src/isa/x86/enc_tables.rs index d9f8f87f9ad1..a751ea313858 100644 --- a/cranelift/codegen/src/isa/x86/enc_tables.rs +++ b/cranelift/codegen/src/isa/x86/enc_tables.rs @@ -13,6 +13,7 @@ use crate::isa::encoding::base_size; use crate::isa::encoding::{Encoding, RecipeSizing}; use crate::isa::RegUnit; use crate::isa::{self, TargetIsa}; +use crate::legalizer::expand_as_libcall; use crate::predicates; use crate::regalloc::RegDiversions; @@ -1428,10 +1429,20 @@ fn convert_ushr( pos.func.dfg.replace(inst).x86_psrl(arg0, shift_index); } else if arg0_type == I64 { // 64 bit shifts need to be legalized on x86_32. - let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type); - let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type); - let shifted = pos.ins().x86_psrl(value, amount); - contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type); + let x86_isa = isa + .as_any() + .downcast_ref::() + .expect("the target ISA must be x86 at this point"); + if x86_isa.isa_flags.has_sse41() { + // if we have pinstrq/pextrq (SSE 4.1), legalize to that + let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type); + let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type); + let shifted = pos.ins().x86_psrl(value, amount); + contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type); + } else { + // otherwise legalize to libcall + expand_as_libcall(inst, func, isa); + } } else { // Everything else should be already legal. unreachable!() @@ -1502,10 +1513,20 @@ fn convert_ishl( pos.func.dfg.replace(inst).x86_psll(arg0, shift_index); } else if arg0_type == I64 { // 64 bit shifts need to be legalized on x86_32. - let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type); - let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type); - let shifted = pos.ins().x86_psll(value, amount); - contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type); + let x86_isa = isa + .as_any() + .downcast_ref::() + .expect("the target ISA must be x86 at this point"); + if x86_isa.isa_flags.has_sse41() { + // if we have pinstrq/pextrq (SSE 4.1), legalize to that + let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type); + let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type); + let shifted = pos.ins().x86_psll(value, amount); + contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type); + } else { + // otherwise legalize to libcall + expand_as_libcall(inst, func, isa); + } } else { // Everything else should be already legal. unreachable!() diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs index e28cc47d6ab7..49e4602cf504 100644 --- a/cranelift/codegen/src/legalizer/mod.rs +++ b/cranelift/codegen/src/legalizer/mod.rs @@ -35,7 +35,7 @@ mod table; use self::call::expand_call; use self::globalvalue::expand_global_value; use self::heap::expand_heap_addr; -use self::libcall::expand_as_libcall; +pub(crate) use self::libcall::expand_as_libcall; use self::table::expand_table_addr; enum LegalizeInstResult { diff --git a/cranelift/filetests/filetests/wasm/i64-arith32.clif b/cranelift/filetests/filetests/wasm/i64-arith32.clif index d3017c23e608..e9a793a1d0d8 100644 --- a/cranelift/filetests/filetests/wasm/i64-arith32.clif +++ b/cranelift/filetests/filetests/wasm/i64-arith32.clif @@ -29,3 +29,39 @@ block0(v0: i64, v1: i64): v2 = urem v0, v1 return v2 } + +function %i64_i32_shl(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = ishl v0, v1 + return v2 +} + +function %i64_i32_shr_u(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = ushr v0, v1 + return v2 +} + +function %i64_i32_shr_s(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = sshr v0, v1 + return v2 +} + +function %i64_i64_shl(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ishl v0, v1 + return v2 +} + +function %i64_i64_shr_u(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ushr v0, v1 + return v2 +} + +function %i64_i64_shr_s(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sshr v0, v1 + return v2 +} diff --git a/cranelift/module/src/backend.rs b/cranelift/module/src/backend.rs index 205d759c5e0c..4743e4fe6143 100644 --- a/cranelift/module/src/backend.rs +++ b/cranelift/module/src/backend.rs @@ -183,6 +183,9 @@ pub fn default_libcall_names() -> Box String> { ir::LibCall::SdivI64 => "__divdi3".to_owned(), ir::LibCall::UremI64 => "__umoddi3".to_owned(), ir::LibCall::SremI64 => "__moddi3".to_owned(), + ir::LibCall::IshlI64 => "__ashldi3".to_owned(), + ir::LibCall::UshrI64 => "__lshrdi3".to_owned(), + ir::LibCall::SshrI64 => "__ashrdi3".to_owned(), ir::LibCall::CeilF32 => "ceilf".to_owned(), ir::LibCall::CeilF64 => "ceil".to_owned(), ir::LibCall::FloorF32 => "floorf".to_owned(), diff --git a/crates/jit/src/link.rs b/crates/jit/src/link.rs index 9201c4e0b015..cc3044b1a5e5 100644 --- a/crates/jit/src/link.rs +++ b/crates/jit/src/link.rs @@ -50,6 +50,9 @@ fn apply_reloc( SdivI64 => wasmtime_i64_sdiv as usize, UremI64 => wasmtime_i64_urem as usize, SremI64 => wasmtime_i64_srem as usize, + IshlI64 => wasmtime_i64_ishl as usize, + UshrI64 => wasmtime_i64_ushr as usize, + SshrI64 => wasmtime_i64_sshr as usize, CeilF32 => wasmtime_f32_ceil as usize, FloorF32 => wasmtime_f32_floor as usize, TruncF32 => wasmtime_f32_trunc as usize, diff --git a/crates/runtime/src/libcalls.rs b/crates/runtime/src/libcalls.rs index f651dc0d1430..be5e5b9e16e7 100644 --- a/crates/runtime/src/libcalls.rs +++ b/crates/runtime/src/libcalls.rs @@ -98,6 +98,21 @@ pub extern "C" fn wasmtime_i64_srem(x: i64, y: i64) -> i64 { x % y } +/// Implementation of i64.ishl +pub extern "C" fn wasmtime_i64_ishl(x: i64, y: i64) -> i64 { + x << y +} + +/// Implementation of i64.ushr +pub extern "C" fn wasmtime_i64_ushr(x: u64, y: i64) -> u64 { + x >> y +} + +/// Implementation of i64.sshr +pub extern "C" fn wasmtime_i64_sshr(x: i64, y: i64) -> i64 { + x >> y +} + /// Implementation of f64.ceil pub extern "C" fn wasmtime_f64_ceil(x: f64) -> f64 { x.ceil()