Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable the wast::Cranelift::spec::simd::simd_align test for AArch64 #1802

Merged
merged 1 commit into from
Jun 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
_ => (),
},
"Cranelift" => match (testsuite, testname) {
("simd", "simd_store") => return false,
("simd", "simd_i8x16_cmp") => return false,
("simd", "simd_store") => return false,
// Most simd tests are known to fail on aarch64 for now, it's going
// to be a big chunk of work to implement them all there!
("simd", _) if target.contains("aarch64") => return true,
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/aarch64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ fn in_int_reg(ty: ir::Type) -> bool {

fn in_vec_reg(ty: ir::Type) -> bool {
match ty {
types::F32 | types::F64 | types::I8X16 => true,
types::F32 | types::F64 | types::I8X16 | types::I16X8 | types::I32X4 | types::I64X2 => true,
_ => false,
}
}
Expand Down
17 changes: 17 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1149,6 +1149,23 @@ impl MachInstEmit for Inst {
| machreg_to_gpr(rd.to_reg()),
);
}
&Inst::VecExtend { t, rd, rn } => {
let (u, immh) = match t {
VecExtendOp::Sxtl8 => (0b0, 0b001),
VecExtendOp::Sxtl16 => (0b0, 0b010),
VecExtendOp::Sxtl32 => (0b0, 0b100),
VecExtendOp::Uxtl8 => (0b1, 0b001),
VecExtendOp::Uxtl16 => (0b1, 0b010),
VecExtendOp::Uxtl32 => (0b1, 0b100),
};
sink.put4(
0b000_011110_0000_000_101001_00000_00000
| (u << 29)
| (immh << 19)
| (machreg_to_vec(rn) << 5)
| machreg_to_vec(rd.to_reg()),
);
}
&Inst::VecRRR {
rd,
rn,
Expand Down
54 changes: 54 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1826,6 +1826,60 @@ fn test_aarch64_binemit() {
"E5979F9A",
"cset x5, hi",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Sxtl8,
rd: writable_vreg(4),
rn: vreg(27),
},
"64A7080F",
"sxtl v4.8h, v27.8b",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Sxtl16,
rd: writable_vreg(17),
rn: vreg(19),
},
"71A6100F",
"sxtl v17.4s, v19.4h",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Sxtl32,
rd: writable_vreg(30),
rn: vreg(6),
},
"DEA4200F",
"sxtl v30.2d, v6.2s",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Uxtl8,
rd: writable_vreg(3),
rn: vreg(29),
},
"A3A7082F",
"uxtl v3.8h, v29.8b",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Uxtl16,
rd: writable_vreg(15),
rn: vreg(12),
},
"8FA5102F",
"uxtl v15.4s, v12.4h",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Uxtl32,
rd: writable_vreg(28),
rn: vreg(2),
},
"5CA4202F",
"uxtl v28.2d, v2.2s",
));
insns.push((
Inst::VecRRR {
rd: writable_vreg(21),
Expand Down
54 changes: 52 additions & 2 deletions cranelift/codegen/src/isa/aarch64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

use crate::binemit::CodeOffset;
use crate::ir::types::{
B1, B16, B32, B64, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS,
B1, B16, B32, B64, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I16X4, I16X8, I32, I32X2,
I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS,
};
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
use crate::machinst::*;
Expand Down Expand Up @@ -186,6 +187,23 @@ pub enum FpuRoundMode {
Nearest64,
}

/// Type of vector element extensions.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum VecExtendOp {
/// Signed extension of 8-bit elements
Sxtl8,
/// Signed extension of 16-bit elements
Sxtl16,
/// Signed extension of 32-bit elements
Sxtl32,
/// Unsigned extension of 8-bit elements
Uxtl8,
/// Unsigned extension of 16-bit elements
Uxtl16,
/// Unsigned extension of 32-bit elements
Uxtl32,
}

/// A vector ALU operation.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum VecALUOp {
Expand Down Expand Up @@ -667,6 +685,13 @@ pub enum Inst {
rn: Reg,
},

/// Vector extend.
VecExtend {
t: VecExtendOp,
rd: Writable<Reg>,
rn: Reg,
},

/// A vector ALU op.
VecRRR {
alu_op: VecALUOp,
Expand Down Expand Up @@ -1208,6 +1233,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_def(rd);
collector.add_use(rn);
}
&Inst::VecExtend { rd, rn, .. } => {
collector.add_def(rd);
collector.add_use(rn);
}
&Inst::VecRRR { rd, rn, rm, .. } => {
collector.add_def(rd);
collector.add_use(rn);
Expand Down Expand Up @@ -1752,6 +1781,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
map_def(mapper, rd);
map_use(mapper, rn);
}
&mut Inst::VecExtend {
ref mut rd,
ref mut rn,
..
} => {
map_def(mapper, rd);
map_use(mapper, rn);
}
&mut Inst::VecRRR {
ref mut rd,
ref mut rn,
Expand Down Expand Up @@ -1940,7 +1977,7 @@ impl MachInst for Inst {
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
F32 | F64 => Ok(RegClass::V128),
IFLAGS | FFLAGS => Ok(RegClass::I64),
I8X16 => Ok(RegClass::V128),
I8X16 | I16X8 | I32X4 | I64X2 => Ok(RegClass::V128),
B8X16 => Ok(RegClass::V128),
_ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}",
Expand Down Expand Up @@ -2515,6 +2552,19 @@ impl ShowWithRRU for Inst {
let rn = rn.show_rru(mb_rru);
format!("mov {}, {}.d[0]", rd, rn)
}
&Inst::VecExtend { t, rd, rn } => {
let (op, dest, src) = match t {
VecExtendOp::Sxtl8 => ("sxtl", I16X8, I8X8),
VecExtendOp::Sxtl16 => ("sxtl", I32X4, I16X4),
VecExtendOp::Sxtl32 => ("sxtl", I64X2, I32X2),
VecExtendOp::Uxtl8 => ("uxtl", I16X8, I8X8),
VecExtendOp::Uxtl16 => ("uxtl", I32X4, I16X4),
VecExtendOp::Uxtl32 => ("uxtl", I64X2, I32X2),
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
let rn = show_vreg_vector(rn, mb_rru, src);
format!("{} {}, {}", op, rd, rn)
}
&Inst::VecRRR {
rd,
rn,
Expand Down
6 changes: 6 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/regs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,12 @@ pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) ->
match ty {
I8X16 => s.push_str(".16b"),
F32X2 => s.push_str(".2s"),
I8X8 => s.push_str(".8b"),
I16X4 => s.push_str(".4h"),
I16X8 => s.push_str(".8h"),
I32X2 => s.push_str(".2s"),
I32X4 => s.push_str(".4s"),
I64X2 => s.push_str(".2d"),
_ => unimplemented!(),
}

Expand Down
5 changes: 3 additions & 2 deletions cranelift/codegen/src/isa/aarch64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -716,15 +716,16 @@ pub fn ty_bits(ty: Type) -> usize {
B64 | I64 | F64 => 64,
B128 | I128 => 128,
IFLAGS | FFLAGS => 32,
I8X16 | B8X16 => 128,
I8X8 | I16X4 | I32X2 => 64,
B8X16 | I8X16 | I16X8 | I32X4 | I64X2 => 128,
_ => panic!("ty_bits() on unknown type: {:?}", ty),
}
}

pub(crate) fn ty_is_int(ty: Type) -> bool {
match ty {
B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
F32 | F64 | B128 | I128 | I8X16 => false,
F32 | F64 | B128 | I128 | I8X8 | I8X16 | I16X4 | I16X8 | I32X2 | I32X4 | I64X2 => false,
IFLAGS | FFLAGS => panic!("Unexpected flags type"),
_ => panic!("ty_is_int() on unknown type: {:?}", ty),
}
Expand Down
37 changes: 30 additions & 7 deletions cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -829,7 +829,13 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Uload16Complex
| Opcode::Sload16Complex
| Opcode::Uload32Complex
| Opcode::Sload32Complex => {
| Opcode::Sload32Complex
| Opcode::Sload8x8
| Opcode::Uload8x8
| Opcode::Sload16x4
| Opcode::Uload16x4
| Opcode::Sload32x2
| Opcode::Uload32x2 => {
let off = ldst_offset(ctx.data(insn)).unwrap();
let elem_ty = match op {
Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
Expand All @@ -844,6 +850,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Sload32Complex
| Opcode::Uload32Complex => I32,
Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
Opcode::Sload8x8 | Opcode::Uload8x8 => I8X8,
Opcode::Sload16x4 | Opcode::Uload16x4 => I16X4,
Opcode::Sload32x2 | Opcode::Uload32x2 => I32X2,
_ => unreachable!(),
};
let sign_extend = match op {
Expand Down Expand Up @@ -877,10 +886,30 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
(32, true, false) => Inst::SLoad32 { rd, mem, srcloc },
(32, _, true) => Inst::FpuLoad32 { rd, mem, srcloc },
(64, _, false) => Inst::ULoad64 { rd, mem, srcloc },
// Note that we treat some of the vector loads as scalar floating-point loads,
// which is correct in a little endian environment.
(64, _, true) => Inst::FpuLoad64 { rd, mem, srcloc },
(128, _, _) => Inst::FpuLoad128 { rd, mem, srcloc },
_ => panic!("Unsupported size in load"),
});

let vec_extend = match op {
Opcode::Sload8x8 => Some(VecExtendOp::Sxtl8),
Opcode::Uload8x8 => Some(VecExtendOp::Uxtl8),
Opcode::Sload16x4 => Some(VecExtendOp::Sxtl16),
Opcode::Uload16x4 => Some(VecExtendOp::Uxtl16),
Opcode::Sload32x2 => Some(VecExtendOp::Sxtl32),
Opcode::Uload32x2 => Some(VecExtendOp::Uxtl32),
_ => None,
};

if let Some(t) = vec_extend {
ctx.emit(Inst::VecExtend {
t,
rd,
rn: rd.to_reg(),
});
}
}

Opcode::Store
Expand Down Expand Up @@ -1433,17 +1462,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Extractlane
| Opcode::ScalarToVector
| Opcode::Swizzle
| Opcode::Uload8x8
| Opcode::Uload8x8Complex
| Opcode::Sload8x8
| Opcode::Sload8x8Complex
| Opcode::Uload16x4
| Opcode::Uload16x4Complex
| Opcode::Sload16x4
| Opcode::Sload16x4Complex
| Opcode::Uload32x2
| Opcode::Uload32x2Complex
| Opcode::Sload32x2
| Opcode::Sload32x2Complex => {
// TODO
panic!("Vector ops not implemented.");
Expand Down