diff --git a/cranelift-codegen/meta/src/cdsl/ast.rs b/cranelift-codegen/meta/src/cdsl/ast.rs index 141ffd84c..798ee29d0 100644 --- a/cranelift-codegen/meta/src/cdsl/ast.rs +++ b/cranelift-codegen/meta/src/cdsl/ast.rs @@ -8,6 +8,7 @@ use cranelift_entity::{entity_impl, PrimaryMap}; use std::fmt; +#[derive(Debug)] pub enum Expr { Var(VarIndex), Literal(Literal), @@ -363,6 +364,7 @@ impl VarPool { /// /// An `Apply` AST expression is created by using function call syntax on instructions. This /// applies to both bound and unbound polymorphic instructions. +#[derive(Debug)] pub struct Apply { pub inst: Instruction, pub args: Vec, diff --git a/cranelift-codegen/meta/src/cdsl/instructions.rs b/cranelift-codegen/meta/src/cdsl/instructions.rs index 1689f2a5c..f061be94e 100644 --- a/cranelift-codegen/meta/src/cdsl/instructions.rs +++ b/cranelift-codegen/meta/src/cdsl/instructions.rs @@ -14,7 +14,7 @@ use crate::cdsl::type_inference::Constraint; use crate::cdsl::types::{LaneType, ReferenceType, ValueType, VectorType}; use crate::cdsl::typevar::TypeVar; -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct OpcodeNumber(u32); entity_impl!(OpcodeNumber); @@ -79,12 +79,14 @@ impl InstructionGroup { } } +#[derive(Debug)] pub struct PolymorphicInfo { pub use_typevar_operand: bool, pub ctrl_typevar: TypeVar, pub other_typevars: Vec, } +#[derive(Debug)] pub struct InstructionContent { /// Instruction mnemonic, also becomes opcode name. pub name: String, @@ -139,7 +141,7 @@ pub struct InstructionContent { pub writes_cpu_flags: bool, } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct Instruction { content: Rc, } @@ -1125,6 +1127,11 @@ fn bind_vector( mut value_types: Vec, ) -> BoundInstruction { let num_lanes = vector_size_in_bits / lane_type.lane_bits(); + assert!( + num_lanes >= 2, + "Minimum lane number for bind_vector is 2, found {}.", + num_lanes, + ); let vector_type = ValueType::Vector(VectorType::new(lane_type, num_lanes)); value_types.push(ValueTypeOrAny::ValueType(vector_type)); verify_polymorphic_binding(&inst, &value_types); diff --git a/cranelift-codegen/meta/src/cdsl/type_inference.rs b/cranelift-codegen/meta/src/cdsl/type_inference.rs index 101cfa410..a56d81463 100644 --- a/cranelift-codegen/meta/src/cdsl/type_inference.rs +++ b/cranelift-codegen/meta/src/cdsl/type_inference.rs @@ -4,7 +4,7 @@ use crate::cdsl::typevar::{DerivedFunc, TypeSet, TypeVar}; use std::collections::{HashMap, HashSet}; use std::iter::FromIterator; -#[derive(Hash, PartialEq, Eq)] +#[derive(Debug, Hash, PartialEq, Eq)] pub enum Constraint { /// Constraint specifying that a type var tv1 must be wider than or equal to type var tv2 at /// runtime. This requires that: diff --git a/cranelift-codegen/meta/src/cdsl/types.rs b/cranelift-codegen/meta/src/cdsl/types.rs index eba239d1d..f431bb3ed 100644 --- a/cranelift-codegen/meta/src/cdsl/types.rs +++ b/cranelift-codegen/meta/src/cdsl/types.rs @@ -215,12 +215,14 @@ impl LaneType { LaneType::BoolType(shared_types::Bool::B16) => 2, LaneType::BoolType(shared_types::Bool::B32) => 3, LaneType::BoolType(shared_types::Bool::B64) => 4, - LaneType::IntType(shared_types::Int::I8) => 5, - LaneType::IntType(shared_types::Int::I16) => 6, - LaneType::IntType(shared_types::Int::I32) => 7, - LaneType::IntType(shared_types::Int::I64) => 8, - LaneType::FloatType(shared_types::Float::F32) => 9, - LaneType::FloatType(shared_types::Float::F64) => 10, + LaneType::BoolType(shared_types::Bool::B128) => 5, + LaneType::IntType(shared_types::Int::I8) => 6, + LaneType::IntType(shared_types::Int::I16) => 7, + LaneType::IntType(shared_types::Int::I32) => 8, + LaneType::IntType(shared_types::Int::I64) => 9, + LaneType::IntType(shared_types::Int::I128) => 10, + LaneType::FloatType(shared_types::Float::F32) => 11, + LaneType::FloatType(shared_types::Float::F64) => 12, } } @@ -231,6 +233,7 @@ impl LaneType { 16 => shared_types::Bool::B16, 32 => shared_types::Bool::B32, 64 => shared_types::Bool::B64, + 128 => shared_types::Bool::B128, _ => unreachable!("unxpected num bits for bool"), }) } @@ -241,6 +244,7 @@ impl LaneType { 16 => shared_types::Int::I16, 32 => shared_types::Int::I32, 64 => shared_types::Int::I64, + 128 => shared_types::Int::I128, _ => unreachable!("unxpected num bits for int"), }) } diff --git a/cranelift-codegen/meta/src/cdsl/typevar.rs b/cranelift-codegen/meta/src/cdsl/typevar.rs index 9ae4c33fd..71c2fd2e2 100644 --- a/cranelift-codegen/meta/src/cdsl/typevar.rs +++ b/cranelift-codegen/meta/src/cdsl/typevar.rs @@ -9,7 +9,8 @@ use std::rc::Rc; use crate::cdsl::types::{BVType, LaneType, ReferenceType, SpecialType, ValueType}; const MAX_LANES: u16 = 256; -const MAX_BITS: u16 = 64; +const MAX_BITS: u16 = 128; +const MAX_FLOAT_BITS: u16 = 64; const MAX_BITVEC: u16 = MAX_BITS * MAX_LANES; /// Type variables can be used in place of concrete types when defining @@ -177,7 +178,7 @@ impl TypeVar { "can't double all integer types" ); assert!( - ts.floats.len() == 0 || *ts.floats.iter().max().unwrap() < MAX_BITS, + ts.floats.len() == 0 || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS, "can't double all float types" ); assert!( @@ -503,7 +504,7 @@ impl TypeSet { copy.floats = NumSet::from_iter( self.floats .iter() - .filter(|&&x| x < MAX_BITS) + .filter(|&&x| x < MAX_FLOAT_BITS) .map(|&x| x * 2), ); copy.bools = NumSet::from_iter( @@ -621,7 +622,7 @@ impl TypeSet { let mut copy = self.clone(); copy.bitvecs = NumSet::new(); if self.bools.contains(&1) { - copy.ints = NumSet::from_iter(vec![8, 16, 32, 64]); + copy.ints = NumSet::from_iter(vec![8, 16, 32, 64, 128]); copy.floats = NumSet::from_iter(vec![32, 64]); } else { copy.ints = &self.bools - &NumSet::from_iter(vec![1]); @@ -950,7 +951,7 @@ fn test_typevar_builder() { let type_set = TypeSetBuilder::new().ints(Interval::All).build(); assert_eq!(type_set.lanes, num_set![1]); assert!(type_set.floats.is_empty()); - assert_eq!(type_set.ints, num_set![8, 16, 32, 64]); + assert_eq!(type_set.ints, num_set![8, 16, 32, 64, 128]); assert!(type_set.bools.is_empty()); assert!(type_set.bitvecs.is_empty()); assert!(type_set.specials.is_empty()); @@ -959,7 +960,7 @@ fn test_typevar_builder() { assert_eq!(type_set.lanes, num_set![1]); assert!(type_set.floats.is_empty()); assert!(type_set.ints.is_empty()); - assert_eq!(type_set.bools, num_set![1, 8, 16, 32, 64]); + assert_eq!(type_set.bools, num_set![1, 8, 16, 32, 64, 128]); assert!(type_set.bitvecs.is_empty()); assert!(type_set.specials.is_empty()); @@ -1101,7 +1102,7 @@ fn test_forward_images() { ); assert_eq!( TypeSetBuilder::new().ints(32..64).build().double_width(), - TypeSetBuilder::new().ints(64..64).build() + TypeSetBuilder::new().ints(64..128).build() ); assert_eq!( TypeSetBuilder::new().floats(32..32).build().double_width(), @@ -1117,7 +1118,7 @@ fn test_forward_images() { ); assert_eq!( TypeSetBuilder::new().bools(32..64).build().double_width(), - TypeSetBuilder::new().bools(64..64).build() + TypeSetBuilder::new().bools(64..128).build() ); } @@ -1145,7 +1146,7 @@ fn test_backward_images() { assert_eq!( TypeSetBuilder::new() .simd_lanes(1..4) - .bools(1..64) + .bools(1..128) .build() .preimage(DerivedFunc::AsBool), TypeSetBuilder::new() @@ -1205,9 +1206,9 @@ fn test_backward_images() { // Half width. assert_eq!( TypeSetBuilder::new() - .ints(64..64) + .ints(128..128) .floats(64..64) - .bools(64..64) + .bools(128..128) .build() .preimage(DerivedFunc::HalfWidth) .size(), @@ -1221,7 +1222,7 @@ fn test_backward_images() { .preimage(DerivedFunc::HalfWidth), TypeSetBuilder::new() .simd_lanes(64..256) - .bools(16..64) + .bools(16..128) .build(), ); diff --git a/cranelift-codegen/meta/src/cdsl/xform.rs b/cranelift-codegen/meta/src/cdsl/xform.rs index b1a0234cd..b90d552b9 100644 --- a/cranelift-codegen/meta/src/cdsl/xform.rs +++ b/cranelift-codegen/meta/src/cdsl/xform.rs @@ -183,7 +183,14 @@ fn rewrite_expr( assert_eq!( apply_target.inst().operands_in.len(), dummy_args.len(), - "number of arguments in instruction is incorrect" + "number of arguments in instruction {} is incorrect\nexpected: {:?}", + apply_target.inst().name, + apply_target + .inst() + .operands_in + .iter() + .map(|operand| format!("{}: {}", operand.name, operand.kind.name)) + .collect::>(), ); let mut args = Vec::new(); diff --git a/cranelift-codegen/meta/src/gen_legalizer.rs b/cranelift-codegen/meta/src/gen_legalizer.rs index 7b59844e6..da1fb1f58 100644 --- a/cranelift-codegen/meta/src/gen_legalizer.rs +++ b/cranelift-codegen/meta/src/gen_legalizer.rs @@ -61,10 +61,10 @@ fn unwrap_inst( fmtln!(fmt, "{},", field.member); } - if iform.num_value_operands == 1 { - fmt.line("arg,"); - } else if iform.has_value_list || iform.num_value_operands > 1 { + if iform.has_value_list || iform.num_value_operands > 1 { fmt.line("ref args,"); + } else if iform.num_value_operands == 1 { + fmt.line("arg,"); } fmt.line(".."); @@ -87,6 +87,13 @@ fn unwrap_inst( } else if op.is_value() { let n = inst.value_opnums.iter().position(|&i| i == op_num).unwrap(); fmtln!(fmt, "func.dfg.resolve_aliases(args[{}]),", n); + } else if op.is_varargs() { + let n = inst.imm_opnums.iter().chain(inst.value_opnums.iter()).max().map(|n| n + 1).unwrap_or(0); + // We need to create a `Vec` here, as using a slice would result in a borrowck + // error later on. + fmtln!(fmt, "\ + args.iter().skip({}).map(|&arg| func.dfg.resolve_aliases(arg)).collect::>(),\ + ", n); } } @@ -104,6 +111,19 @@ fn unwrap_inst( }); fmtln!(fmt, "};"); + assert_eq!(inst.operands_in.len(), apply.args.len()); + for (i, op) in inst.operands_in.iter().enumerate() { + if op.is_varargs() { + let name = var_pool + .get(apply.args[i].maybe_var().expect("vararg without name")) + .name; + + // Above name is set to an `Vec` representing the varargs. However it is expected to be + // `&[Value]` below, so we borrow it. + fmtln!(fmt, "let {} = &{};", name, name); + } + } + for &op_num in &inst.value_opnums { let arg = &apply.args[op_num]; if let Some(var_index) = arg.maybe_var() { @@ -402,6 +422,13 @@ fn gen_transform<'a>( fmt.line("let removed = pos.remove_inst();"); fmt.line("debug_assert_eq!(removed, inst);"); } + + if transform.def_pool.get(transform.src).apply.inst.is_branch { + // A branch might have been legalized into multiple branches, so we need to recompute + // the cfg. + fmt.line("cfg.recompute_ebb(pos.func, pos.current_ebb().unwrap());"); + } + fmt.line("return true;"); }); fmt.line("}"); diff --git a/cranelift-codegen/meta/src/isa/x86/encodings.rs b/cranelift-codegen/meta/src/isa/x86/encodings.rs index 71f104210..253491f01 100644 --- a/cranelift-codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift-codegen/meta/src/isa/x86/encodings.rs @@ -9,7 +9,7 @@ use crate::cdsl::instructions::{ }; use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes}; use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber}; -use crate::cdsl::types::ValueType; +use crate::cdsl::types::{LaneType, ValueType}; use crate::shared::types::Bool::{B1, B16, B32, B64, B8}; use crate::shared::types::Float::{F32, F64}; use crate::shared::types::Int::{I16, I32, I64, I8}; @@ -1735,6 +1735,8 @@ pub(crate) fn define( // legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the // value across the register + let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128; + // PSHUFB, 8-bit shuffle using two XMM registers for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) { let instruction = x86_pshufb.bind_vector_from_lane(ty, sse_vector_size); @@ -1756,7 +1758,7 @@ pub(crate) fn define( // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according // to the Intel manual: "When the destination operand is an XMM register, the source operand is // written to the low doubleword of the register and the regiser is zero-extended to 128 bits." - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) { + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { let instruction = scalar_to_vector.bind_vector_from_lane(ty, sse_vector_size); let template = rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]); // MOVD/MOVQ if ty.lane_bits() < 64 { @@ -1774,7 +1776,7 @@ pub(crate) fn define( insertlane_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x22], Some(use_sse41_simd))); // PINSRD insertlane_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x22], Some(use_sse41_simd))); // PINSRQ, only x86_64 - for ty in ValueType::all_lane_types() { + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { if let Some((opcode, isap)) = insertlane_mapping.get(&ty.lane_bits()) { let instruction = insertlane.bind_vector_from_lane(ty, sse_vector_size); let template = rec_r_ib_unsigned_r.opcodes(opcode.clone()); @@ -1795,7 +1797,7 @@ pub(crate) fn define( extractlane_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x16], Some(use_sse41_simd))); // PEXTRD extractlane_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x16], Some(use_sse41_simd))); // PEXTRQ, only x86_64 - for ty in ValueType::all_lane_types() { + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { if let Some((opcode, isap)) = extractlane_mapping.get(&ty.lane_bits()) { let instruction = extractlane.bind_vector_from_lane(ty, sse_vector_size); let template = rec_r_ib_unsigned_gpr.opcodes(opcode.clone()); @@ -1816,8 +1818,9 @@ pub(crate) fn define( } // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8) - for from_type in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) { - for to_type in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8 && *t != from_type) + for from_type in ValueType::all_lane_types().filter(allowed_simd_type) { + for to_type in + ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type) { let instruction = raw_bitcast .bind_vector_from_lane(to_type, sse_vector_size) @@ -1833,7 +1836,7 @@ pub(crate) fn define( // for that; alternately, constants could be loaded into XMM registers using a sequence like: // MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored // in memory) but some performance measurements are needed. - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) { + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { let instruction = vconst.bind_vector_from_lane(ty, sse_vector_size); let template = rec_vconst.nonrex().opcodes(vec![0x0f, 0x10]); e.enc_32_64_maybe_isap(instruction, template, None); // from SSE diff --git a/cranelift-codegen/meta/src/shared/instructions.rs b/cranelift-codegen/meta/src/shared/instructions.rs index 67e289106..843347ce9 100644 --- a/cranelift-codegen/meta/src/shared/instructions.rs +++ b/cranelift-codegen/meta/src/shared/instructions.rs @@ -3143,7 +3143,7 @@ pub(crate) fn define( "WideInt", "An integer type with lanes from `i16` upwards", TypeSetBuilder::new() - .ints(16..64) + .ints(16..128) .simd_lanes(Interval::All) .build(), ); @@ -3171,9 +3171,9 @@ pub(crate) fn define( let NarrowInt = &TypeVar::new( "NarrowInt", - "An integer type with lanes type to `i32`", + "An integer type with lanes type to `i64`", TypeSetBuilder::new() - .ints(8..32) + .ints(8..64) .simd_lanes(Interval::All) .build(), ); diff --git a/cranelift-codegen/meta/src/shared/legalize.rs b/cranelift-codegen/meta/src/shared/legalize.rs index 90fe47d0f..958827270 100644 --- a/cranelift-codegen/meta/src/shared/legalize.rs +++ b/cranelift-codegen/meta/src/shared/legalize.rs @@ -4,7 +4,7 @@ use crate::cdsl::xform::{TransformGroupBuilder, TransformGroups}; use crate::shared::immediates::Immediates; use crate::shared::types::Float::{F32, F64}; -use crate::shared::types::Int::{I16, I32, I64, I8}; +use crate::shared::types::Int::{I128, I16, I32, I64, I8}; pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGroups { let mut narrow = TransformGroupBuilder::new( @@ -49,6 +49,8 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro let bor = insts.by_name("bor"); let bor_imm = insts.by_name("bor_imm"); let bor_not = insts.by_name("bor_not"); + let brnz = insts.by_name("brnz"); + let brz = insts.by_name("brz"); let br_icmp = insts.by_name("br_icmp"); let br_table = insts.by_name("br_table"); let bxor = insts.by_name("bxor"); @@ -177,9 +179,14 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro let al = var("al"); let ah = var("ah"); let cc = var("cc"); + let ebb = var("ebb"); let ptr = var("ptr"); let flags = var("flags"); let offset = var("off"); + let vararg = var("vararg"); + + narrow.custom_legalize(load, "narrow_load"); + narrow.custom_legalize(store, "narrow_store"); narrow.legalize( def!(a = iadd(x, y)), @@ -203,7 +210,7 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro ], ); - for &bin_op in &[band, bor, bxor] { + for &bin_op in &[band, bor, bxor, band_not, bor_not, bxor_not] { narrow.legalize( def!(a = bin_op(x, y)), vec![ @@ -216,6 +223,16 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro ); } + narrow.legalize( + def!(a = bnot(x)), + vec![ + def!((xl, xh) = isplit(x)), + def!(al = bnot(xl)), + def!(ah = bnot(xh)), + def!(a = iconcat(al, ah)), + ], + ); + narrow.legalize( def!(a = select(c, x, y)), vec![ @@ -227,6 +244,38 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro ], ); + narrow.legalize( + def!(brz.I128(x, ebb, vararg)), + vec![ + def!((xl, xh) = isplit(x)), + def!( + a = icmp_imm( + Literal::enumerator_for(&imm.intcc, "eq"), + xl, + Literal::constant(&imm.imm64, 0) + ) + ), + def!( + b = icmp_imm( + Literal::enumerator_for(&imm.intcc, "eq"), + xh, + Literal::constant(&imm.imm64, 0) + ) + ), + def!(c = band(a, b)), + def!(brz(c, ebb, vararg)), + ], + ); + + narrow.legalize( + def!(brnz.I128(x, ebb, vararg)), + vec![ + def!((xl, xh) = isplit(x)), + def!(brnz(xl, ebb, vararg)), + def!(brnz(xh, ebb, vararg)), + ], + ); + // Widen instructions with one input operand. for &op in &[bnot, popcnt] { for &int_ty in &[I8, I16] { diff --git a/cranelift-codegen/meta/src/shared/types.rs b/cranelift-codegen/meta/src/shared/types.rs index 266c30b3d..52fa9545c 100644 --- a/cranelift-codegen/meta/src/shared/types.rs +++ b/cranelift-codegen/meta/src/shared/types.rs @@ -12,6 +12,8 @@ pub enum Bool { B32 = 32, /// 64-bit bool. B64 = 64, + /// 128-bit bool. + B128 = 128, } /// This provides an iterator through all of the supported bool variants. @@ -34,6 +36,7 @@ impl Iterator for BoolIterator { 2 => Some(Bool::B16), 3 => Some(Bool::B32), 4 => Some(Bool::B64), + 5 => Some(Bool::B128), _ => return None, }; self.index += 1; @@ -51,6 +54,8 @@ pub enum Int { I32 = 32, /// 64-bit int. I64 = 64, + /// 128-bit int. + I128 = 128, } /// This provides an iterator through all of the supported int variants. @@ -72,6 +77,7 @@ impl Iterator for IntIterator { 1 => Some(Int::I16), 2 => Some(Int::I32), 3 => Some(Int::I64), + 4 => Some(Int::I128), _ => return None, }; self.index += 1; @@ -189,6 +195,7 @@ mod iter_tests { assert_eq!(bool_iter.next(), Some(Bool::B16)); assert_eq!(bool_iter.next(), Some(Bool::B32)); assert_eq!(bool_iter.next(), Some(Bool::B64)); + assert_eq!(bool_iter.next(), Some(Bool::B128)); assert_eq!(bool_iter.next(), None); } @@ -199,6 +206,7 @@ mod iter_tests { assert_eq!(int_iter.next(), Some(Int::I16)); assert_eq!(int_iter.next(), Some(Int::I32)); assert_eq!(int_iter.next(), Some(Int::I64)); + assert_eq!(int_iter.next(), Some(Int::I128)); assert_eq!(int_iter.next(), None); } diff --git a/cranelift-codegen/src/ir/types.rs b/cranelift-codegen/src/ir/types.rs index 4eb72f3fc..10fca8aaa 100644 --- a/cranelift-codegen/src/ir/types.rs +++ b/cranelift-codegen/src/ir/types.rs @@ -10,11 +10,11 @@ use target_lexicon::{PointerWidth, Triple}; /// field is present put no type is needed, such as the controlling type variable for a /// non-polymorphic instruction. /// -/// Basic integer types: `I8`, `I16`, `I32`, and `I64`. These types are sign-agnostic. +/// Basic integer types: `I8`, `I16`, `I32`, `I64`, and `I128`. These types are sign-agnostic. /// /// Basic floating point types: `F32` and `F64`. IEEE single and double precision. /// -/// Boolean types: `B1`, `B8`, `B16`, `B32`, and `B64`. These all encode 'true' or 'false'. The +/// Boolean types: `B1`, `B8`, `B16`, `B32`, `B64`, and `B128`. These all encode 'true' or 'false'. The /// larger types use redundant bits. /// /// SIMD vector types have power-of-two lanes, up to 256. Lanes can be any int/float/bool type. @@ -63,6 +63,7 @@ impl Type { B16 | I16 => 4, B32 | I32 | F32 | R32 => 5, B64 | I64 | F64 | R64 => 6, + B128 | I128 => 7, _ => 0, } } @@ -75,6 +76,7 @@ impl Type { B16 | I16 => 16, B32 | I32 | F32 | R32 => 32, B64 | I64 | F64 | R64 => 64, + B128 | I128 => 128, _ => 0, } } @@ -86,6 +88,7 @@ impl Type { 16 => Some(I16), 32 => Some(I32), 64 => Some(I64), + 128 => Some(I128), _ => None, } } @@ -109,6 +112,7 @@ impl Type { B32 | I32 | F32 => B32, B64 | I64 | F64 => B64, R32 | R64 => panic!("Reference types should not convert to bool"), + B128 | I128 => B128, _ => B1, }) } @@ -132,10 +136,12 @@ impl Type { I16 => I8, I32 => I16, I64 => I32, + I128 => I64, F64 => F32, B16 => B8, B32 => B16, B64 => B32, + B128 => B64, _ => return None, })) } @@ -147,10 +153,12 @@ impl Type { I8 => I16, I16 => I32, I32 => I64, + I64 => I128, F32 => F64, B8 => B16, B16 => B32, B32 => B64, + B64 => B128, _ => return None, })) } @@ -182,7 +190,7 @@ impl Type { /// Is this a scalar boolean type? pub fn is_bool(self) -> bool { match self { - B1 | B8 | B16 | B32 | B64 => true, + B1 | B8 | B16 | B32 | B64 | B128 => true, _ => false, } } @@ -190,7 +198,7 @@ impl Type { /// Is this a scalar integer type? pub fn is_int(self) -> bool { match self { - I8 | I16 | I32 | I64 => true, + I8 | I16 | I32 | I64 | I128 => true, _ => false, } } @@ -370,10 +378,12 @@ mod tests { assert_eq!(B16, B16.lane_type()); assert_eq!(B32, B32.lane_type()); assert_eq!(B64, B64.lane_type()); + assert_eq!(B128, B128.lane_type()); assert_eq!(I8, I8.lane_type()); assert_eq!(I16, I16.lane_type()); assert_eq!(I32, I32.lane_type()); assert_eq!(I64, I64.lane_type()); + assert_eq!(I128, I128.lane_type()); assert_eq!(F32, F32.lane_type()); assert_eq!(F64, F64.lane_type()); assert_eq!(B1, B1.by(8).unwrap().lane_type()); @@ -390,10 +400,12 @@ mod tests { assert_eq!(B16.lane_bits(), 16); assert_eq!(B32.lane_bits(), 32); assert_eq!(B64.lane_bits(), 64); + assert_eq!(B128.lane_bits(), 128); assert_eq!(I8.lane_bits(), 8); assert_eq!(I16.lane_bits(), 16); assert_eq!(I32.lane_bits(), 32); assert_eq!(I64.lane_bits(), 64); + assert_eq!(I128.lane_bits(), 128); assert_eq!(F32.lane_bits(), 32); assert_eq!(F64.lane_bits(), 64); assert_eq!(R32.lane_bits(), 32); @@ -410,11 +422,13 @@ mod tests { assert_eq!(B16.half_width(), Some(B8)); assert_eq!(B32.half_width(), Some(B16)); assert_eq!(B64.half_width(), Some(B32)); + assert_eq!(B128.half_width(), Some(B64)); assert_eq!(I8.half_width(), None); assert_eq!(I16.half_width(), Some(I8)); assert_eq!(I32.half_width(), Some(I16)); assert_eq!(I32X4.half_width(), Some(I16X4)); assert_eq!(I64.half_width(), Some(I32)); + assert_eq!(I128.half_width(), Some(I64)); assert_eq!(F32.half_width(), None); assert_eq!(F64.half_width(), Some(F32)); @@ -425,12 +439,14 @@ mod tests { assert_eq!(B8.double_width(), Some(B16)); assert_eq!(B16.double_width(), Some(B32)); assert_eq!(B32.double_width(), Some(B64)); - assert_eq!(B64.double_width(), None); + assert_eq!(B64.double_width(), Some(B128)); + assert_eq!(B128.double_width(), None); assert_eq!(I8.double_width(), Some(I16)); assert_eq!(I16.double_width(), Some(I32)); assert_eq!(I32.double_width(), Some(I64)); assert_eq!(I32X4.double_width(), Some(I64X4)); - assert_eq!(I64.double_width(), None); + assert_eq!(I64.double_width(), Some(I128)); + assert_eq!(I128.double_width(), None); assert_eq!(F32.double_width(), Some(F64)); assert_eq!(F64.double_width(), None); } @@ -461,10 +477,12 @@ mod tests { assert_eq!(B16.to_string(), "b16"); assert_eq!(B32.to_string(), "b32"); assert_eq!(B64.to_string(), "b64"); + assert_eq!(B128.to_string(), "b128"); assert_eq!(I8.to_string(), "i8"); assert_eq!(I16.to_string(), "i16"); assert_eq!(I32.to_string(), "i32"); assert_eq!(I64.to_string(), "i64"); + assert_eq!(I128.to_string(), "i128"); assert_eq!(F32.to_string(), "f32"); assert_eq!(F64.to_string(), "f64"); assert_eq!(R32.to_string(), "r32"); diff --git a/cranelift-codegen/src/legalizer/mod.rs b/cranelift-codegen/src/legalizer/mod.rs index e6f7bcb00..0f24689d8 100644 --- a/cranelift-codegen/src/legalizer/mod.rs +++ b/cranelift-codegen/src/legalizer/mod.rs @@ -21,6 +21,8 @@ use crate::ir::{self, InstBuilder, MemFlags}; use crate::isa::TargetIsa; use crate::predicates; use crate::timing; +use std::collections::BTreeSet; +use std::vec::Vec; mod boundary; mod call; @@ -36,31 +38,78 @@ use self::heap::expand_heap_addr; use self::libcall::expand_as_libcall; use self::table::expand_table_addr; -/// Legalize `inst` for `isa`. Return true if any changes to the code were -/// made; return false if the instruction was successfully encoded as is. +enum LegalizeInstResult { + Done, + Legalized, + SplitLegalizePending, +} + +/// Legalize `inst` for `isa`. fn legalize_inst( inst: ir::Inst, pos: &mut FuncCursor, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa, -) -> bool { +) -> LegalizeInstResult { let opcode = pos.func.dfg[inst].opcode(); // Check for ABI boundaries that need to be converted to the legalized signature. if opcode.is_call() { if boundary::handle_call_abi(inst, pos.func, cfg) { - return true; + return LegalizeInstResult::Legalized; } } else if opcode.is_return() { if boundary::handle_return_abi(inst, pos.func, cfg) { - return true; + return LegalizeInstResult::Legalized; } } else if opcode.is_branch() { split::simplify_branch_arguments(&mut pos.func.dfg, inst); + } else if opcode == ir::Opcode::Isplit { + pos.use_srcloc(inst); + + let arg = match pos.func.dfg[inst] { + ir::InstructionData::Unary { arg, .. } => pos.func.dfg.resolve_aliases(arg), + _ => panic!("Expected isplit: {}", pos.func.dfg.display_inst(inst, None)), + }; + + match pos.func.dfg.value_def(arg) { + ir::ValueDef::Result(inst, _num) => { + if let ir::InstructionData::Binary { + opcode: ir::Opcode::Iconcat, + .. + } = pos.func.dfg[inst] + { + // `arg` was created by an `iconcat` instruction. + } else { + // `arg` was not created by an `iconcat` instruction. Don't try to resolve it, + // as otherwise `split::isplit` will re-insert the original `isplit`, causing + // an endless loop. + return LegalizeInstResult::SplitLegalizePending; + } + } + ir::ValueDef::Param(_ebb, _num) => {} + } + + let res = pos.func.dfg.inst_results(inst).to_vec(); + assert_eq!(res.len(), 2); + let (resl, resh) = (res[0], res[1]); // Prevent borrowck error + + // Remove old isplit + pos.func.dfg.clear_results(inst); + pos.remove_inst(); + + let curpos = pos.position(); + let srcloc = pos.srcloc(); + let (xl, xh) = split::isplit(pos.func, cfg, curpos, srcloc, arg); + + pos.func.dfg.change_to_alias(resl, xl); + pos.func.dfg.change_to_alias(resh, xh); + + return LegalizeInstResult::Legalized; } match pos.func.update_encoding(inst, isa) { - Ok(()) => false, + Ok(()) => LegalizeInstResult::Done, Err(action) => { // We should transform the instruction into legal equivalents. // If the current instruction was replaced, we need to double back and revisit @@ -69,12 +118,16 @@ fn legalize_inst( // There's a risk of infinite looping here if the legalization patterns are // unsound. Should we attempt to detect that? if action(inst, pos.func, cfg, isa) { - return true; + return LegalizeInstResult::Legalized; } // We don't have any pattern expansion for this instruction either. // Try converting it to a library call as a last resort. - expand_as_libcall(inst, pos.func, isa) + if expand_as_libcall(inst, pos.func, isa) { + LegalizeInstResult::Legalized + } else { + LegalizeInstResult::Done + } } } } @@ -94,24 +147,42 @@ pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, is let mut pos = FuncCursor::new(func); + // This must be a set to prevent trying to legalize `isplit` and `vsplit` twice in certain cases. + let mut pending_splits = BTreeSet::new(); + // Process EBBs in layout order. Some legalization actions may split the current EBB or append // new ones to the end. We need to make sure we visit those new EBBs too. - while let Some(_ebb) = pos.next_ebb() { + while let Some(ebb) = pos.next_ebb() { + split::split_ebb_params(pos.func, cfg, ebb); + // Keep track of the cursor position before the instruction being processed, so we can // double back when replacing instructions. let mut prev_pos = pos.position(); while let Some(inst) = pos.next_inst() { - if legalize_inst(inst, &mut pos, cfg, isa) { - // Go back and legalize the inserted return value conversion instructions. - pos.set_position(prev_pos); - } else { + match legalize_inst(inst, &mut pos, cfg, isa) { // Remember this position in case we need to double back. - prev_pos = pos.position(); + LegalizeInstResult::Done => prev_pos = pos.position(), + + // Go back and legalize the inserted return value conversion instructions. + LegalizeInstResult::Legalized => pos.set_position(prev_pos), + + // The argument of a `isplit` or `vsplit` instruction didn't resolve to a + // `iconcat` or `vconcat` instruction. Try again after legalizing the rest of + // the instructions. + LegalizeInstResult::SplitLegalizePending => { + pending_splits.insert(inst); + } } } } + // Try legalizing `isplit` and `vsplit` instructions, which could not previously be legalized. + for inst in pending_splits { + pos.goto_inst(inst); + legalize_inst(inst, &mut pos, cfg, isa); + } + // Now that we've lowered all br_tables, we don't need the jump tables anymore. if !isa.flags().jump_tables_enabled() { pos.func.jump_tables.clear(); @@ -498,3 +569,67 @@ fn expand_stack_store( mflags.set_aligned(); pos.func.dfg.replace(inst).store(mflags, val, addr, 0); } + +/// Split a load into two parts before `iconcat`ing the result together. +fn narrow_load( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + let (ptr, offset, flags) = match pos.func.dfg[inst] { + ir::InstructionData::Load { + opcode: ir::Opcode::Load, + arg, + offset, + flags, + } => (arg, offset, flags), + _ => panic!("Expected load: {}", pos.func.dfg.display_inst(inst, None)), + }; + + let res_ty = pos.func.dfg.ctrl_typevar(inst); + let small_ty = res_ty.half_width().expect("Can't narrow load"); + + let al = pos.ins().load(small_ty, flags, ptr, offset); + let ah = pos.ins().load( + small_ty, + flags, + ptr, + offset.try_add_i64(8).expect("load offset overflow"), + ); + pos.func.dfg.replace(inst).iconcat(al, ah); +} + +/// Split a store into two parts after `isplit`ing the value. +fn narrow_store( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + let (val, ptr, offset, flags) = match pos.func.dfg[inst] { + ir::InstructionData::Store { + opcode: ir::Opcode::Store, + args, + offset, + flags, + } => (args[0], args[1], offset, flags), + _ => panic!("Expected store: {}", pos.func.dfg.display_inst(inst, None)), + }; + + let (al, ah) = pos.ins().isplit(val); + pos.ins().store(flags, al, ptr, offset); + pos.ins().store( + flags, + ah, + ptr, + offset.try_add_i64(8).expect("store offset overflow"), + ); + pos.remove_inst(); +} diff --git a/cranelift-codegen/src/legalizer/split.rs b/cranelift-codegen/src/legalizer/split.rs index 773df1321..f16dae161 100644 --- a/cranelift-codegen/src/legalizer/split.rs +++ b/cranelift-codegen/src/legalizer/split.rs @@ -124,6 +124,35 @@ fn split_any( let pos = &mut FuncCursor::new(func).at_position(pos).with_srcloc(srcloc); let result = split_value(pos, value, concat, &mut repairs); + perform_repairs(pos, cfg, repairs); + + result +} + +pub fn split_ebb_params(func: &mut ir::Function, cfg: &ControlFlowGraph, ebb: Ebb) { + let mut repairs = Vec::new(); + let pos = &mut FuncCursor::new(func).at_top(ebb); + + for (num, ebb_param) in pos + .func + .dfg + .ebb_params(ebb) + .to_vec() + .into_iter() + .enumerate() + { + let ty = pos.func.dfg.value_type(ebb_param); + if ty != ir::types::I128 { + continue; + } + + split_ebb_param(pos, ebb, num, ebb_param, Opcode::Iconcat, &mut repairs); + } + + perform_repairs(pos, cfg, repairs); +} + +fn perform_repairs(pos: &mut FuncCursor, cfg: &ControlFlowGraph, mut repairs: Vec) { // We have split the value requested, and now we may need to fix some EBB predecessors. while let Some(repair) = repairs.pop() { for BasicBlock { inst, .. } in cfg.pred_iter(repair.ebb) { @@ -181,8 +210,6 @@ fn split_any( pos.func.dfg[inst].put_value_list(args); } } - - result } /// Split a single value using the integer or vector semantics given by the `concat` opcode. @@ -215,40 +242,7 @@ fn split_value( // This is an EBB parameter. We can split the parameter value unless this is the entry // block. if pos.func.layout.entry_block() != Some(ebb) { - // We are going to replace the parameter at `num` with two new arguments. - // Determine the new value types. - let ty = pos.func.dfg.value_type(value); - let split_type = match concat { - Opcode::Iconcat => ty.half_width().expect("Invalid type for isplit"), - Opcode::Vconcat => ty.half_vector().expect("Invalid type for vsplit"), - _ => panic!("Unhandled concat opcode: {}", concat), - }; - - // Since the `repairs` stack potentially contains other parameter numbers for - // `ebb`, avoid shifting and renumbering EBB parameters. It could invalidate other - // `repairs` entries. - // - // Replace the original `value` with the low part, and append the high part at the - // end of the argument list. - let lo = pos.func.dfg.replace_ebb_param(value, split_type); - let hi_num = pos.func.dfg.num_ebb_params(ebb); - let hi = pos.func.dfg.append_ebb_param(ebb, split_type); - reuse = Some((lo, hi)); - - // Now the original value is dangling. Insert a concatenation instruction that can - // compute it from the two new parameters. This also serves as a record of what we - // did so a future call to this function doesn't have to redo the work. - // - // Note that it is safe to move `pos` here since `reuse` was set above, so we don't - // need to insert a split instruction before returning. - pos.goto_first_inst(ebb); - pos.ins() - .with_result(value) - .Binary(concat, split_type, lo, hi); - - // Finally, splitting the EBB parameter is not enough. We also have to repair all - // of the predecessor instructions that branch here. - add_repair(concat, split_type, ebb, num, hi_num, repairs); + reuse = Some(split_ebb_param(pos, ebb, num, value, concat, repairs)); } } } @@ -267,6 +261,51 @@ fn split_value( } } +fn split_ebb_param( + pos: &mut FuncCursor, + ebb: Ebb, + param_num: usize, + value: Value, + concat: Opcode, + repairs: &mut Vec, +) -> (Value, Value) { + // We are going to replace the parameter at `num` with two new arguments. + // Determine the new value types. + let ty = pos.func.dfg.value_type(value); + let split_type = match concat { + Opcode::Iconcat => ty.half_width().expect("Invalid type for isplit"), + Opcode::Vconcat => ty.half_vector().expect("Invalid type for vsplit"), + _ => panic!("Unhandled concat opcode: {}", concat), + }; + + // Since the `repairs` stack potentially contains other parameter numbers for + // `ebb`, avoid shifting and renumbering EBB parameters. It could invalidate other + // `repairs` entries. + // + // Replace the original `value` with the low part, and append the high part at the + // end of the argument list. + let lo = pos.func.dfg.replace_ebb_param(value, split_type); + let hi_num = pos.func.dfg.num_ebb_params(ebb); + let hi = pos.func.dfg.append_ebb_param(ebb, split_type); + + // Now the original value is dangling. Insert a concatenation instruction that can + // compute it from the two new parameters. This also serves as a record of what we + // did so a future call to this function doesn't have to redo the work. + // + // Note that it is safe to move `pos` here since `reuse` was set above, so we don't + // need to insert a split instruction before returning. + pos.goto_first_inst(ebb); + pos.ins() + .with_result(value) + .Binary(concat, split_type, lo, hi); + + // Finally, splitting the EBB parameter is not enough. We also have to repair all + // of the predecessor instructions that branch here. + add_repair(concat, split_type, ebb, param_num, hi_num, repairs); + + (lo, hi) +} + // Add a repair entry to the work list. fn add_repair( concat: Opcode, diff --git a/cranelift-codegen/src/regalloc/reload.rs b/cranelift-codegen/src/regalloc/reload.rs index fb6b61ec6..bbc198c45 100644 --- a/cranelift-codegen/src/regalloc/reload.rs +++ b/cranelift-codegen/src/regalloc/reload.rs @@ -233,7 +233,7 @@ impl<'a> Context<'a> { let dst_ty = self.cur.func.dfg.value_type(dst_val); debug_assert!(src_ty == dst_ty); // This limits the transformation to copies of the - // types: I64 I32 I16 I8 F64 and F32, since that's + // types: I128 I64 I32 I16 I8 F64 and F32, since that's // the set of `copy_nop` encodings available. src_ty.is_int() || src_ty.is_float() } diff --git a/cranelift-reader/src/lexer.rs b/cranelift-reader/src/lexer.rs index 465e79e97..2432ab055 100644 --- a/cranelift-reader/src/lexer.rs +++ b/cranelift-reader/src/lexer.rs @@ -365,6 +365,7 @@ impl<'a> Lexer<'a> { "i16" => types::I16, "i32" => types::I32, "i64" => types::I64, + "i128" => types::I128, "f32" => types::F32, "f64" => types::F64, "b1" => types::B1, @@ -372,6 +373,7 @@ impl<'a> Lexer<'a> { "b16" => types::B16, "b32" => types::B32, "b64" => types::B64, + "b128" => types::B128, "r32" => types::R32, "r64" => types::R64, _ => return None, diff --git a/filetests/isa/x86/br-i128.clif b/filetests/isa/x86/br-i128.clif new file mode 100644 index 000000000..a09db3f41 --- /dev/null +++ b/filetests/isa/x86/br-i128.clif @@ -0,0 +1,24 @@ +test compile +target x86_64 + +function u0:0(i128) -> i8 fast { +ebb0(v0: i128): + brz v0, ebb1 + v1 = iconst.i8 0 + return v1 + +ebb1: + v2 = iconst.i8 1 + return v2 +} + +function u0:1(i128) -> i8 fast { +ebb0(v0: i128): + brnz v0, ebb1 + v1 = iconst.i8 0 + return v1 + +ebb1: + v2 = iconst.i8 1 + return v2 +} diff --git a/filetests/isa/x86/i128.clif b/filetests/isa/x86/i128.clif new file mode 100644 index 000000000..b710a7430 --- /dev/null +++ b/filetests/isa/x86/i128.clif @@ -0,0 +1,46 @@ +test compile +target x86_64 + +function u0:0(i64, i64) -> i128 fast { +ebb0(v0: i64, v1: i64): +;check: ebb0(v0: i64 [%rdi], v1: i64 [%rsi], v3: i64 [%rbp]): + + v2 = iconcat.i64 v0, v1 + ; check: regmove v0, %rdi -> %rax + ; check: regmove v1, %rsi -> %rdx + + return v2 + ; check: v4 = x86_pop.i64 + ; check: return v0, v1, v4 +} + +function u0:1(i128) -> i64, i64 fast { +ebb0(v0: i128): +; check: ebb0(v3: i64 [%rdi], v4: i64 [%rsi], v5: i64 [%rbp]): + + v1, v2 = isplit v0 + ; check: regmove v3, %rdi -> %rax + ; check: regmove v4, %rsi -> %rdx + + return v1, v2 + ; check: v6 = x86_pop.i64 + ; check: return v3, v4, v6 +} + +function u0:2(i64, i128) fast { +; check: ebb0(v0: i64 [%rdi], v2: i64 [%rsi], v3: i64 [%rdx], v6: i64 [%rbp]): +ebb0(v0: i64, v1: i128): + ; check: store v2, v0+8 + ; check: store v3, v0+16 + store v1, v0+8 + return +} + +function u0:3(i64) -> i128 fast { +ebb0(v0: i64): + ; check: v2 = load.i64 v0+8 + ; check: v3 = load.i64 v0+16 + v1 = load.i128 v0+8 + ; check: return v2, v3, v5 + return v1 +} diff --git a/filetests/isa/x86/isplit-not-legalized-twice.clif b/filetests/isa/x86/isplit-not-legalized-twice.clif new file mode 100644 index 000000000..4b81a186d --- /dev/null +++ b/filetests/isa/x86/isplit-not-legalized-twice.clif @@ -0,0 +1,20 @@ +test compile +target x86_64 + +function u0:0(i64, i64) -> i128 system_v { +ebb0(v0: i64, v1: i64): + trap user0 + +ebb30: + v245 = iconst.i64 0 + v246 = iconcat v245, v245 + ; The next instruction used to be legalized twice, causing a panic the second time. + v250, v251 = isplit.i128 v370 + v252, v253 = isplit v246 + trap user0 + +ebb45: + v369 = iconst.i64 0 + v370 = load.i128 v369 + trap user0 +} diff --git a/filetests/isa/x86/jump_i128_param_unused.clif b/filetests/isa/x86/jump_i128_param_unused.clif new file mode 100644 index 000000000..9d96fcbe3 --- /dev/null +++ b/filetests/isa/x86/jump_i128_param_unused.clif @@ -0,0 +1,10 @@ +test compile +target x86_64 + +function u0:0(i128) system_v { +ebb0(v0: i128): + jump ebb1(v0) + +ebb1(v1: i128): + return +} diff --git a/filetests/isa/x86/legalize-isplit-backwards.clif b/filetests/isa/x86/legalize-isplit-backwards.clif new file mode 100644 index 000000000..43881fe09 --- /dev/null +++ b/filetests/isa/x86/legalize-isplit-backwards.clif @@ -0,0 +1,24 @@ +test compile +target x86_64 + +function u0:0(i128) -> i64, i64 fast { +; check: ebb0(v4: i64 [%rdi], v5: i64 [%rsi], v8: i64 [%rbp]): +ebb0(v0: i128): + jump ebb2 + +ebb1: + ; When this `isplit` is legalized, the bnot below is not yet legalized, + ; so there isn't a corresponding `iconcat` yet. We should try legalization + ; for this `isplit` again once all instrucions have been legalized. + v2, v3 = isplit.i128 v1 + ; return v6, v7 + return v2, v3 + +ebb2: + ; check: v6 = bnot.i64 v4 + ; check: v2 -> v6 + ; check: v7 = bnot.i64 v5 + ; check: v3 -> v7 + v1 = bnot.i128 v0 + jump ebb1 +} diff --git a/filetests/isa/x86/load-store-narrow.clif b/filetests/isa/x86/load-store-narrow.clif new file mode 100644 index 000000000..5f95b92fc --- /dev/null +++ b/filetests/isa/x86/load-store-narrow.clif @@ -0,0 +1,16 @@ +test compile +target i686 + +function u0:0(i64, i32) system_v { +ebb0(v0: i64, v1: i32): + v2 = bor v0, v0 + store v2, v1 + return +} + +function u0:1(i32) -> i64 system_v { +ebb0(v1: i32): + v0 = load.i64 v1 + v2 = bor v0, v0 + return v2 +}