From d0e48ff3f5ac83a41f980b2e3e2ba91de00f9dcc Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 9 Jun 2020 10:55:55 +0100 Subject: [PATCH] arm64: Implement Icmp for I16X8 and I32X4 Copyright (c) 2020, Arm Limited. --- build.rs | 2 + cranelift/codegen/src/isa/aarch64/abi.rs | 10 +- .../codegen/src/isa/aarch64/inst/emit.rs | 2 + .../src/isa/aarch64/inst/emit_tests.rs | 120 ++++++++++++++++++ cranelift/codegen/src/isa/aarch64/inst/mod.rs | 7 +- .../codegen/src/isa/aarch64/inst/regs.rs | 2 + cranelift/codegen/src/isa/aarch64/lower.rs | 4 +- .../codegen/src/isa/aarch64/lower_inst.rs | 15 ++- 8 files changed, 149 insertions(+), 13 deletions(-) diff --git a/build.rs b/build.rs index 835cc8788ea5..b36501679fe5 100644 --- a/build.rs +++ b/build.rs @@ -182,6 +182,8 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { "Cranelift" => match (testsuite, testname) { ("simd", "simd_i8x16_cmp") => return false, ("simd", "simd_store") => return false, + ("simd", "simd_i16x8_cmp") => return false, + ("simd", "simd_i32x4_cmp") => return false, // Most simd tests are known to fail on aarch64 for now, it's going // to be a big chunk of work to implement them all there! ("simd", _) if target.contains("aarch64") => return true, diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 7ddf5ecb1b32..ed12ed036548 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -406,7 +406,15 @@ fn in_int_reg(ty: ir::Type) -> bool { fn in_vec_reg(ty: ir::Type) -> bool { match ty { - types::F32 | types::F64 | types::I8X16 | types::I16X8 | types::I32X4 | types::I64X2 => true, + types::F32 | types::F64 => true, + types::B8X16 + | types::I8X16 + | types::B16X8 + | types::I16X8 + | types::B32X4 + | types::I32X4 + | types::B64X2 + | types::I64X2 => true, _ => false, } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 60e2480cb08b..d337508cb4d4 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1175,6 +1175,8 @@ impl MachInstEmit for Inst { } => { let enc_size_for_cmp = match ty { I8X16 => 0b00, + I16X8 => 0b01, + I32X4 => 0b10, _ => 0, }; diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 58985852857d..a0e05e3a37bb 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -1985,6 +1985,126 @@ fn test_aarch64_binemit() { "cmhs v8.16b, v2.16b, v15.16b", )); + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmeq, + rd: writable_vreg(3), + rn: vreg(23), + rm: vreg(24), + ty: I16X8, + }, + "E38E786E", + "cmeq v3.8h, v23.8h, v24.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmgt, + rd: writable_vreg(3), + rn: vreg(23), + rm: vreg(24), + ty: I16X8, + }, + "E336784E", + "cmgt v3.8h, v23.8h, v24.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmge, + rd: writable_vreg(23), + rn: vreg(9), + rm: vreg(12), + ty: I16X8, + }, + "373D6C4E", + "cmge v23.8h, v9.8h, v12.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmhi, + rd: writable_vreg(5), + rn: vreg(1), + rm: vreg(1), + ty: I16X8, + }, + "2534616E", + "cmhi v5.8h, v1.8h, v1.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmhs, + rd: writable_vreg(8), + rn: vreg(2), + rm: vreg(15), + ty: I16X8, + }, + "483C6F6E", + "cmhs v8.8h, v2.8h, v15.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmeq, + rd: writable_vreg(3), + rn: vreg(23), + rm: vreg(24), + ty: I32X4, + }, + "E38EB86E", + "cmeq v3.4s, v23.4s, v24.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmgt, + rd: writable_vreg(3), + rn: vreg(23), + rm: vreg(24), + ty: I32X4, + }, + "E336B84E", + "cmgt v3.4s, v23.4s, v24.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmge, + rd: writable_vreg(23), + rn: vreg(9), + rm: vreg(12), + ty: I32X4, + }, + "373DAC4E", + "cmge v23.4s, v9.4s, v12.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmhi, + rd: writable_vreg(5), + rn: vreg(1), + rm: vreg(1), + ty: I32X4, + }, + "2534A16E", + "cmhi v5.4s, v1.4s, v1.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmhs, + rd: writable_vreg(8), + rn: vreg(2), + rm: vreg(15), + ty: I32X4, + }, + "483CAF6E", + "cmhs v8.4s, v2.4s, v15.4s", + )); + insns.push(( Inst::VecMisc { op: VecMisc2::Not, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index f5b6ecde259b..780d14718b2e 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -5,8 +5,8 @@ use crate::binemit::CodeOffset; use crate::ir::types::{ - B1, B16, B32, B64, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I16X4, I16X8, I32, I32X2, - I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS, + B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I16X4, + I16X8, I32, I32X2, I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS, }; use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type}; use crate::machinst::*; @@ -1977,8 +1977,7 @@ impl MachInst for Inst { I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64), F32 | F64 => Ok(RegClass::V128), IFLAGS | FFLAGS => Ok(RegClass::I64), - I8X16 | I16X8 | I32X4 | I64X2 => Ok(RegClass::V128), - B8X16 => Ok(RegClass::V128), + B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 => Ok(RegClass::V128), _ => Err(CodegenError::Unsupported(format!( "Unexpected SSA-value type: {}", ty diff --git a/cranelift/codegen/src/isa/aarch64/inst/regs.rs b/cranelift/codegen/src/isa/aarch64/inst/regs.rs index 59841ed82c9c..5b60e2974019 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs @@ -320,6 +320,8 @@ pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> match ty { I8X16 => s.push_str(".16b"), + I16X8 => s.push_str(".8h"), + I32X4 => s.push_str(".4s"), F32X2 => s.push_str(".2s"), I8X8 => s.push_str(".8b"), I16X4 => s.push_str(".4h"), diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 2cf93296e908..5cb4940b66f5 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -716,8 +716,8 @@ pub fn ty_bits(ty: Type) -> usize { B64 | I64 | F64 => 64, B128 | I128 => 128, IFLAGS | FFLAGS => 32, - I8X8 | I16X4 | I32X2 => 64, - B8X16 | I8X16 | I16X8 | I32X4 | I64X2 => 128, + B8X8 | I8X8 | B16X4 | I16X4 | B32X2 | I32X2 => 64, + B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 => 128, _ => panic!("ty_bits() on unknown type: {:?}", ty), } } diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index a52b6fba0297..d2aeabb1a61a 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1193,12 +1193,15 @@ pub(crate) fn lower_insn_to_regs>( ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm)); ctx.emit(Inst::CondSet { cond, rd }); } else { - if ty != I8X16 { - return Err(CodegenError::Unsupported(format!( - "unsupported simd type: {:?}", - ty - ))); - } + match ty { + I8X16 | I16X8 | I32X4 => {} + _ => { + return Err(CodegenError::Unsupported(format!( + "unsupported simd type: {:?}", + ty + ))); + } + }; let mut rn = input_to_reg(ctx, inputs[0], narrow_mode); let mut rm = input_to_reg(ctx, inputs[1], narrow_mode);