From 41eca60b14e98260855f8a0bdf8498584198ff9f Mon Sep 17 00:00:00 2001 From: beetrees Date: Wed, 17 Jul 2024 17:39:47 +0100 Subject: [PATCH] cranelift: Add `f16const` and `f128const` instructions (#8893) * cranelift: Add `f16const` and `f128const` instructions * cranelift: Add constant propagation for `f16` and `f128` --- cranelift/codegen/meta/src/shared/formats.rs | 3 + .../codegen/meta/src/shared/immediates.rs | 10 +++ .../codegen/meta/src/shared/instructions.rs | 34 +++++++ cranelift/codegen/src/inst_predicates.rs | 1 + cranelift/codegen/src/ir/constant.rs | 29 +++++- cranelift/codegen/src/ir/entities.rs | 2 + cranelift/codegen/src/ir/immediates.rs | 90 ++++++++++++++----- cranelift/codegen/src/isle_prelude.rs | 24 +++++ cranelift/codegen/src/machinst/isle.rs | 9 +- cranelift/codegen/src/opts.rs | 10 ++- cranelift/codegen/src/opts/cprop.isle | 16 ++++ cranelift/codegen/src/prelude.isle | 15 ++++ cranelift/codegen/src/verifier/mod.rs | 12 ++- cranelift/codegen/src/write.rs | 11 ++- .../filetests/filetests/egraph/cprop.clif | 69 +++++++++++++- .../filetests/runtests/f128const.clif | 53 +++++++++++ .../filetests/runtests/f16const.clif | 53 +++++++++++ cranelift/fuzzgen/src/function_generator.rs | 6 ++ cranelift/interpreter/src/step.rs | 19 ++-- cranelift/reader/src/parser.rs | 7 ++ 20 files changed, 437 insertions(+), 36 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/f128const.clif create mode 100644 cranelift/filetests/filetests/runtests/f16const.clif diff --git a/cranelift/codegen/meta/src/shared/formats.rs b/cranelift/codegen/meta/src/shared/formats.rs index b3ee5b2c2585..86d54338d63d 100644 --- a/cranelift/codegen/meta/src/shared/formats.rs +++ b/cranelift/codegen/meta/src/shared/formats.rs @@ -36,6 +36,7 @@ pub(crate) struct Formats { pub(crate) unary: Rc, pub(crate) unary_const: Rc, pub(crate) unary_global_value: Rc, + pub(crate) unary_ieee16: Rc, pub(crate) unary_ieee32: Rc, pub(crate) unary_ieee64: Rc, pub(crate) unary_imm: Rc, @@ -48,6 +49,8 @@ impl Formats { unary_imm: Builder::new("UnaryImm").imm(&imm.imm64).build(), + unary_ieee16: Builder::new("UnaryIeee16").imm(&imm.ieee16).build(), + unary_ieee32: Builder::new("UnaryIeee32").imm(&imm.ieee32).build(), unary_ieee64: Builder::new("UnaryIeee64").imm(&imm.ieee64).build(), diff --git a/cranelift/codegen/meta/src/shared/immediates.rs b/cranelift/codegen/meta/src/shared/immediates.rs index 5584b5564bb9..8e30c6b5f87d 100644 --- a/cranelift/codegen/meta/src/shared/immediates.rs +++ b/cranelift/codegen/meta/src/shared/immediates.rs @@ -31,6 +31,11 @@ pub(crate) struct Immediates { /// This is used to represent an immediate address offset in load/store instructions. pub offset32: OperandKind, + /// A 16-bit immediate floating point operand. + /// + /// IEEE 754-2008 binary16 interchange format. + pub ieee16: OperandKind, + /// A 32-bit immediate floating point operand. /// /// IEEE 754-2008 binary32 interchange format. @@ -119,6 +124,11 @@ impl Immediates { "ir::immediates::Offset32", "A 32-bit immediate signed offset.", ), + ieee16: new_imm( + "imm", + "ir::immediates::Ieee16", + "A 16-bit immediate floating point number.", + ), ieee32: new_imm( "imm", "ir::immediates::Ieee32", diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 6019ce2d2afd..862ae8271434 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -599,8 +599,10 @@ pub(crate) fn define( // Operand kind shorthands. let i8: &TypeVar = &ValueType::from(LaneType::from(types::Int::I8)).into(); + let f16_: &TypeVar = &ValueType::from(LaneType::from(types::Float::F16)).into(); let f32_: &TypeVar = &ValueType::from(LaneType::from(types::Float::F32)).into(); let f64_: &TypeVar = &ValueType::from(LaneType::from(types::Float::F64)).into(); + let f128_: &TypeVar = &ValueType::from(LaneType::from(types::Float::F128)).into(); // Starting definitions. let Int = &TypeVar::new( @@ -1285,6 +1287,22 @@ pub(crate) fn define( ]), ); + ig.push( + Inst::new( + "f16const", + r#" + Floating point constant. + + Create a `f16` SSA value with an immediate constant value. + "#, + &formats.unary_ieee16, + ) + .operands_in(vec![Operand::new("N", &imm.ieee16)]) + .operands_out(vec![ + Operand::new("a", f16_).with_doc("A constant f16 scalar value") + ]), + ); + ig.push( Inst::new( "f32const", @@ -1317,6 +1335,22 @@ pub(crate) fn define( ]), ); + ig.push( + Inst::new( + "f128const", + r#" + Floating point constant. + + Create a `f128` SSA value with an immediate constant value. + "#, + &formats.unary_const, + ) + .operands_in(vec![Operand::new("N", &imm.pool_constant)]) + .operands_out(vec![ + Operand::new("a", f128_).with_doc("A constant f128 scalar value") + ]), + ); + ig.push( Inst::new( "vconst", diff --git a/cranelift/codegen/src/inst_predicates.rs b/cranelift/codegen/src/inst_predicates.rs index 658ccbc96afb..f31da8f04897 100644 --- a/cranelift/codegen/src/inst_predicates.rs +++ b/cranelift/codegen/src/inst_predicates.rs @@ -121,6 +121,7 @@ pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option { } match data { &InstructionData::UnaryImm { imm, .. } => Some(imm.bits() as u64), + &InstructionData::UnaryIeee16 { imm, .. } => Some(imm.bits() as u64), &InstructionData::UnaryIeee32 { imm, .. } => Some(imm.bits() as u64), &InstructionData::UnaryIeee64 { imm, .. } => Some(imm.bits()), _ => None, diff --git a/cranelift/codegen/src/ir/constant.rs b/cranelift/codegen/src/ir/constant.rs index 5c50afd7896a..fadaabc314bd 100644 --- a/cranelift/codegen/src/ir/constant.rs +++ b/cranelift/codegen/src/ir/constant.rs @@ -8,7 +8,7 @@ //! - ensuring alignment of constants within the pool, //! - bucketing constants by size. -use crate::ir::immediates::{IntoBytes, V128Imm}; +use crate::ir::immediates::{Ieee128, IntoBytes, V128Imm}; use crate::ir::Constant; use alloc::collections::BTreeMap; use alloc::vec::Vec; @@ -54,6 +54,22 @@ impl From for ConstantData { } } +impl From for ConstantData { + fn from(v: Ieee128) -> Self { + Self(v.into_bytes()) + } +} + +impl TryFrom<&ConstantData> for Ieee128 { + type Error = <[u8; 16] as TryFrom<&'static [u8]>>::Error; + + fn try_from(value: &ConstantData) -> Result { + Ok(Ieee128::with_bits(u128::from_le_bytes( + value.as_slice().try_into()?, + ))) + } +} + impl ConstantData { /// Return the number of bytes in the constant. pub fn len(&self) -> usize { @@ -459,4 +475,15 @@ mod tests { [0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] ); } + + #[test] + fn constant_ieee128() { + let value = Ieee128::with_bits(0x000102030405060708090a0b0c0d0e0f); + let constant = ConstantData::from(value); + assert_eq!( + constant.as_slice(), + &[0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0] + ); + assert_eq!(Ieee128::try_from(&constant).unwrap().bits(), value.bits()); + } } diff --git a/cranelift/codegen/src/ir/entities.rs b/cranelift/codegen/src/ir/entities.rs index e8f5212fad42..a658e3228648 100644 --- a/cranelift/codegen/src/ir/entities.rs +++ b/cranelift/codegen/src/ir/entities.rs @@ -56,8 +56,10 @@ impl Block { /// [`InstBuilder`](super::InstBuilder) instructions: /// /// - [`iconst`](super::InstBuilder::iconst) for integer constants +/// - [`f16const`](super::InstBuilder::f16const) for 16-bit float constants /// - [`f32const`](super::InstBuilder::f32const) for 32-bit float constants /// - [`f64const`](super::InstBuilder::f64const) for 64-bit float constants +/// - [`f128const`](super::InstBuilder::f128const) for 128-bit float constants /// - [`vconst`](super::InstBuilder::vconst) for vector constants /// - [`null`](super::InstBuilder::null) for null reference constants /// diff --git a/cranelift/codegen/src/ir/immediates.rs b/cranelift/codegen/src/ir/immediates.rs index c5f5d7bc6168..ffc212e41f7b 100644 --- a/cranelift/codegen/src/ir/immediates.rs +++ b/cranelift/codegen/src/ir/immediates.rs @@ -768,6 +768,9 @@ fn parse_float(s: &str, w: u8, t: u8) -> Result { impl Ieee16 { const SIGNIFICAND_BITS: u8 = 10; const EXPONENT_BITS: u8 = 5; + const SIGN_MASK: u16 = 1 << (Self::EXPONENT_BITS + Self::SIGNIFICAND_BITS); + const SIGNIFICAND_MASK: u16 = u16::MAX >> (Self::EXPONENT_BITS + 1); + const EXPONENT_MASK: u16 = !Self::SIGN_MASK & !Self::SIGNIFICAND_MASK; /// Create a new `Ieee16` containing the bits of `x`. pub fn with_bits(x: u16) -> Self { @@ -779,6 +782,16 @@ impl Ieee16 { self.0 } + /// Computes the absolute value of self. + pub fn abs(self) -> Self { + Self::with_bits(self.bits() & !Self::SIGN_MASK) + } + + /// Returns a number composed of the magnitude of self and the sign of sign. + pub fn copysign(self, sign: Self) -> Self { + Self::with_bits((self.bits() & !Self::SIGN_MASK) | (sign.bits() & Self::SIGN_MASK)) + } + /// Returns true if self is positive or negative zero pub fn is_zero(&self) -> bool { self.partial_cmp(&Self::with_bits(0)) == Some(Ordering::Equal) @@ -788,14 +801,12 @@ impl Ieee16 { impl PartialOrd for Ieee16 { fn partial_cmp(&self, rhs: &Self) -> Option { // FIXME(#8312): Use Rust `f16` comparisons once `f16` support is stabalised. - let significand_mask = u16::MAX >> (Self::EXPONENT_BITS + 1); - let sign_mask = 1 << (Self::EXPONENT_BITS + Self::SIGNIFICAND_BITS); - let exponent_mask = !sign_mask & !significand_mask; - - let lhs_abs = self.bits() & !sign_mask; - let rhs_abs = rhs.bits() & !sign_mask; - if (lhs_abs & exponent_mask == exponent_mask && lhs_abs & significand_mask != 0) - && (rhs_abs & exponent_mask == exponent_mask && rhs_abs & significand_mask != 0) + let lhs_abs = self.bits() & !Self::SIGN_MASK; + let rhs_abs = rhs.bits() & !Self::SIGN_MASK; + if (lhs_abs & Self::EXPONENT_MASK == Self::EXPONENT_MASK + && lhs_abs & Self::SIGNIFICAND_MASK != 0) + && (rhs_abs & Self::EXPONENT_MASK == Self::EXPONENT_MASK + && rhs_abs & Self::SIGNIFICAND_MASK != 0) { // One of the floats is a NaN. return None; @@ -804,8 +815,8 @@ impl PartialOrd for Ieee16 { // Zeros are always equal regardless of sign. return Some(Ordering::Equal); } - let lhs_positive = self.bits() & sign_mask == 0; - let rhs_positive = rhs.bits() & sign_mask == 0; + let lhs_positive = self.bits() & Self::SIGN_MASK == 0; + let rhs_positive = rhs.bits() & Self::SIGN_MASK == 0; if lhs_positive != rhs_positive { // Different signs: negative < positive return lhs_positive.partial_cmp(&rhs_positive); @@ -843,6 +854,20 @@ impl FromStr for Ieee16 { } } +impl IntoBytes for Ieee16 { + fn into_bytes(self) -> Vec { + self.bits().to_le_bytes().to_vec() + } +} + +impl Neg for Ieee16 { + type Output = Self; + + fn neg(self) -> Self { + Self::with_bits(self.bits() ^ Self::SIGN_MASK) + } +} + impl Ieee32 { /// Create a new `Ieee32` containing the bits of `x`. pub fn with_bits(x: u32) -> Self { @@ -1287,6 +1312,9 @@ impl Not for Ieee64 { impl Ieee128 { const SIGNIFICAND_BITS: u8 = 112; const EXPONENT_BITS: u8 = 15; + const SIGN_MASK: u128 = 1 << (Self::EXPONENT_BITS + Self::SIGNIFICAND_BITS); + const SIGNIFICAND_MASK: u128 = u128::MAX >> (Self::EXPONENT_BITS + 1); + const EXPONENT_MASK: u128 = !Self::SIGN_MASK & !Self::SIGNIFICAND_MASK; /// Create a new `Ieee128` containing the bits of `x`. pub fn with_bits(x: u128) -> Self { @@ -1298,6 +1326,16 @@ impl Ieee128 { self.0 } + /// Computes the absolute value of self. + pub fn abs(self) -> Self { + Self::with_bits(self.bits() & !Self::SIGN_MASK) + } + + /// Returns a number composed of the magnitude of self and the sign of sign. + pub fn copysign(self, sign: Self) -> Self { + Self::with_bits((self.bits() & !Self::SIGN_MASK) | (sign.bits() & Self::SIGN_MASK)) + } + /// Returns true if self is positive or negative zero pub fn is_zero(&self) -> bool { self.partial_cmp(&Self::with_bits(0)) == Some(Ordering::Equal) @@ -1307,14 +1345,12 @@ impl Ieee128 { impl PartialOrd for Ieee128 { fn partial_cmp(&self, rhs: &Self) -> Option { // FIXME(#8312): Use Rust `f128` comparisons once `f128` support is stabalised. - let significand_mask = u128::MAX >> (Self::EXPONENT_BITS + 1); - let sign_mask = 1 << (Self::EXPONENT_BITS + Self::SIGNIFICAND_BITS); - let exponent_mask = !sign_mask & !significand_mask; - - let lhs_abs = self.bits() & !sign_mask; - let rhs_abs = rhs.bits() & !sign_mask; - if (lhs_abs & exponent_mask == exponent_mask && lhs_abs & significand_mask != 0) - && (rhs_abs & exponent_mask == exponent_mask && rhs_abs & significand_mask != 0) + let lhs_abs = self.bits() & !Self::SIGN_MASK; + let rhs_abs = rhs.bits() & !Self::SIGN_MASK; + if (lhs_abs & Self::EXPONENT_MASK == Self::EXPONENT_MASK + && lhs_abs & Self::SIGNIFICAND_MASK != 0) + && (rhs_abs & Self::EXPONENT_MASK == Self::EXPONENT_MASK + && rhs_abs & Self::SIGNIFICAND_MASK != 0) { // One of the floats is a NaN. return None; @@ -1323,8 +1359,8 @@ impl PartialOrd for Ieee128 { // Zeros are always equal regardless of sign. return Some(Ordering::Equal); } - let lhs_positive = self.bits() & sign_mask == 0; - let rhs_positive = rhs.bits() & sign_mask == 0; + let lhs_positive = self.bits() & Self::SIGN_MASK == 0; + let rhs_positive = rhs.bits() & Self::SIGN_MASK == 0; if lhs_positive != rhs_positive { // Different signs: negative < positive return lhs_positive.partial_cmp(&rhs_positive); @@ -1357,6 +1393,20 @@ impl FromStr for Ieee128 { } } +impl IntoBytes for Ieee128 { + fn into_bytes(self) -> Vec { + self.bits().to_le_bytes().to_vec() + } +} + +impl Neg for Ieee128 { + type Output = Self; + + fn neg(self) -> Self { + Self::with_bits(self.bits() ^ Self::SIGN_MASK) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index 748367ada58f..f36c4bdb0a12 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -938,6 +938,18 @@ macro_rules! isle_common_prelude_methods { } } + fn f16_neg(&mut self, n: Ieee16) -> Ieee16 { + -n + } + + fn f16_abs(&mut self, n: Ieee16) -> Ieee16 { + n.abs() + } + + fn f16_copysign(&mut self, a: Ieee16, b: Ieee16) -> Ieee16 { + a.copysign(b) + } + fn f32_neg(&mut self, n: Ieee32) -> Ieee32 { n.neg() } @@ -961,5 +973,17 @@ macro_rules! isle_common_prelude_methods { fn f64_copysign(&mut self, a: Ieee64, b: Ieee64) -> Ieee64 { a.copysign(b) } + + fn f128_neg(&mut self, n: Ieee128) -> Ieee128 { + -n + } + + fn f128_abs(&mut self, n: Ieee128) -> Ieee128 { + n.abs() + } + + fn f128_copysign(&mut self, a: Ieee128, b: Ieee128) -> Ieee128 { + a.copysign(b) + } }; } diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 54d88e2495f4..dcc208f0fa69 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -253,7 +253,7 @@ macro_rules! isle_lower_prelude_methods { return self.zero_value(arg); } InstructionData::UnaryConst { - opcode: Opcode::Vconst, + opcode: Opcode::Vconst | Opcode::F128const, constant_handle, } => { let constant_data = @@ -271,6 +271,13 @@ macro_rules! isle_lower_prelude_methods { return None; } } + InstructionData::UnaryIeee16 { imm, .. } => { + if imm.bits() == 0 { + return Some(value); + } else { + return None; + } + } InstructionData::UnaryIeee32 { imm, .. } => { if imm.bits() == 0 { return Some(value); diff --git a/cranelift/codegen/src/opts.rs b/cranelift/codegen/src/opts.rs index f6bdcfb38184..44ef9e6b5200 100644 --- a/cranelift/codegen/src/opts.rs +++ b/cranelift/codegen/src/opts.rs @@ -3,7 +3,7 @@ use crate::egraph::{NewOrExistingInst, OptimizeCtx}; pub use crate::ir::condcodes::{FloatCC, IntCC}; use crate::ir::dfg::ValueDef; -pub use crate::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm8, V128Imm}; +pub use crate::ir::immediates::{Ieee128, Ieee16, Ieee32, Ieee64, Imm64, Offset32, Uimm8, V128Imm}; use crate::ir::instructions::InstructionFormat; pub use crate::ir::types::*; pub use crate::ir::{ @@ -292,4 +292,12 @@ impl<'a, 'b, 'c> generated_code::Context for IsleContext<'a, 'b, 'c> { fn u64_bswap64(&mut self, n: u64) -> u64 { n.swap_bytes() } + + fn ieee128_constant_extractor(&mut self, n: Constant) -> Option { + self.ctx.func.dfg.constants.get(n).try_into().ok() + } + + fn ieee128_constant(&mut self, n: Ieee128) -> Constant { + self.ctx.func.dfg.constants.insert(n.into()) + } } diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle index d6b56543b4fe..9b31b0e7ebd5 100644 --- a/cranelift/codegen/src/opts/cprop.isle +++ b/cranelift/codegen/src/opts/cprop.isle @@ -281,17 +281,33 @@ (extern constructor u64_bswap64 u64_bswap64) ;; Constant fold bitwise float operations (fneg/fabs/fcopysign) +(rule (simplify (fneg $F16 (f16const $F16 n))) + (subsume (f16const $F16 (f16_neg n)))) (rule (simplify (fneg $F32 (f32const $F32 n))) (subsume (f32const $F32 (f32_neg n)))) (rule (simplify (fneg $F64 (f64const $F64 n))) (subsume (f64const $F64 (f64_neg n)))) +(rule (simplify (fneg $F128 (f128const $F128 (ieee128_constant n)))) + (subsume (f128const $F128 (ieee128_constant (f128_neg n))))) +(rule (simplify (fabs $F16 (f16const $F16 n))) + (subsume (f16const $F16 (f16_abs n)))) (rule (simplify (fabs $F32 (f32const $F32 n))) (subsume (f32const $F32 (f32_abs n)))) (rule (simplify (fabs $F64 (f64const $F64 n))) (subsume (f64const $F64 (f64_abs n)))) +(rule (simplify (fabs $F128 (f128const $F128 (ieee128_constant n)))) + (subsume (f128const $F128 (ieee128_constant (f128_abs n))))) +(rule (simplify (fcopysign $F16 (f16const $F16 n) (f16const $F16 m))) + (subsume (f16const $F16 (f16_copysign n m)))) (rule (simplify (fcopysign $F32 (f32const $F32 n) (f32const $F32 m))) (subsume (f32const $F32 (f32_copysign n m)))) (rule (simplify (fcopysign $F64 (f64const $F64 n) (f64const $F64 m))) (subsume (f64const $F64 (f64_copysign n m)))) +(rule (simplify (fcopysign $F128 (f128const $F128 (ieee128_constant n)) (f128const $F128 (ieee128_constant m)))) + (subsume (f128const $F128 (ieee128_constant (f128_copysign n m))))) + +(decl ieee128_constant (Ieee128) Constant) +(extern constructor ieee128_constant ieee128_constant) +(extern extractor ieee128_constant ieee128_constant_extractor) diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index da11a7e24785..95f31bfdbcb0 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -239,6 +239,12 @@ ;; Floating point operations +(decl pure f16_neg (Ieee16) Ieee16) +(extern constructor f16_neg f16_neg) +(decl pure f16_abs (Ieee16) Ieee16) +(extern constructor f16_abs f16_abs) +(decl pure f16_copysign (Ieee16 Ieee16) Ieee16) +(extern constructor f16_copysign f16_copysign) (decl pure f32_neg (Ieee32) Ieee32) (extern constructor f32_neg f32_neg) (decl pure f32_abs (Ieee32) Ieee32) @@ -251,6 +257,13 @@ (extern constructor f64_abs f64_abs) (decl pure f64_copysign (Ieee64 Ieee64) Ieee64) (extern constructor f64_copysign f64_copysign) +(decl pure f128_neg (Ieee128) Ieee128) +(extern constructor f128_neg f128_neg) +(decl pure f128_abs (Ieee128) Ieee128) +(extern constructor f128_abs f128_abs) +(decl pure f128_copysign (Ieee128 Ieee128) Ieee128) +(extern constructor f128_copysign f128_copysign) +(type Ieee128 (primitive Ieee128)) ;;;; `cranelift_codegen::ir::Type` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -263,8 +276,10 @@ (extern const $R32 Type) (extern const $R64 Type) +(extern const $F16 Type) (extern const $F32 Type) (extern const $F64 Type) +(extern const $F128 Type) (extern const $I8X8 Type) (extern const $I8X16 Type) diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs index c277defc5ea3..457c9e3a9268 100644 --- a/cranelift/codegen/src/verifier/mod.rs +++ b/cranelift/codegen/src/verifier/mod.rs @@ -671,11 +671,11 @@ impl<'a> Verifier<'a> { self.verify_bitcast(inst, flags, arg, errors)?; } UnaryConst { - opcode: Opcode::Vconst, + opcode: opcode @ (Opcode::Vconst | Opcode::F128const), constant_handle, .. } => { - self.verify_constant_size(inst, constant_handle, errors)?; + self.verify_constant_size(inst, opcode, constant_handle, errors)?; } // Exhaustive list so we can't forget to add new formats @@ -686,6 +686,7 @@ impl<'a> Verifier<'a> { | Unary { .. } | UnaryConst { .. } | UnaryImm { .. } + | UnaryIeee16 { .. } | UnaryIeee32 { .. } | UnaryIeee64 { .. } | Binary { .. } @@ -1034,10 +1035,15 @@ impl<'a> Verifier<'a> { fn verify_constant_size( &self, inst: Inst, + opcode: Opcode, constant: Constant, errors: &mut VerifierErrors, ) -> VerifierStepResult { - let type_size = self.func.dfg.ctrl_typevar(inst).bytes() as usize; + let type_size = match opcode { + Opcode::F128const => types::F128.bytes(), + Opcode::Vconst => self.func.dfg.ctrl_typevar(inst).bytes(), + _ => unreachable!("unexpected opcode {opcode:?}"), + } as usize; let constant_size = self.func.dfg.constants.get(constant).len(); if type_size != constant_size { errors.fatal(( diff --git a/cranelift/codegen/src/write.rs b/cranelift/codegen/src/write.rs index 7ca06628eedd..a7ebd49d32db 100644 --- a/cranelift/codegen/src/write.rs +++ b/cranelift/codegen/src/write.rs @@ -5,8 +5,9 @@ use crate::entity::SecondaryMap; use crate::ir::entities::AnyEntity; +use crate::ir::immediates::Ieee128; use crate::ir::pcc::Fact; -use crate::ir::{Block, DataFlowGraph, Function, Inst, SigRef, Type, Value, ValueDef}; +use crate::ir::{Block, DataFlowGraph, Function, Inst, Opcode, SigRef, Type, Value, ValueDef}; use crate::packed_option::ReservedValue; use alloc::string::{String, ToString}; use alloc::vec::Vec; @@ -396,6 +397,7 @@ pub fn write_operands(w: &mut dyn Write, dfg: &DataFlowGraph, inst: Inst) -> fmt StoreNoOffset { flags, args, .. } => write!(w, "{} {}, {}", flags, args[0], args[1]), Unary { arg, .. } => write!(w, " {}", arg), UnaryImm { imm, .. } => write!(w, " {}", imm), + UnaryIeee16 { imm, .. } => write!(w, " {}", imm), UnaryIeee32 { imm, .. } => write!(w, " {}", imm), UnaryIeee64 { imm, .. } => write!(w, " {}", imm), UnaryGlobalValue { global_value, .. } => write!(w, " {}", global_value), @@ -494,8 +496,15 @@ pub fn write_operands(w: &mut dyn Write, dfg: &DataFlowGraph, inst: Inst) -> fmt if let ValueDef::Result(src, _) = dfg.value_def(arg) { let imm = match dfg.insts[src] { UnaryImm { imm, .. } => imm.to_string(), + UnaryIeee16 { imm, .. } => imm.to_string(), UnaryIeee32 { imm, .. } => imm.to_string(), UnaryIeee64 { imm, .. } => imm.to_string(), + UnaryConst { + constant_handle, + opcode: Opcode::F128const, + } => Ieee128::try_from(dfg.constants.get(constant_handle)) + .expect("16-byte f128 constant") + .to_string(), UnaryConst { constant_handle, .. } => constant_handle.to_string(), diff --git a/cranelift/filetests/filetests/egraph/cprop.clif b/cranelift/filetests/filetests/egraph/cprop.clif index df1ac116fb03..70d026f41da7 100644 --- a/cranelift/filetests/filetests/egraph/cprop.clif +++ b/cranelift/filetests/filetests/egraph/cprop.clif @@ -313,6 +313,37 @@ block0: ; check: v2 = iconst.i64 0xf0de_bc9a_7856_3412 ; nextln: return v2 +function %f16_fneg() -> f16 { +block0: + v1 = f16const 0.0 + v2 = fneg v1 + return v2 +} + +; check: v3 = f16const -0.0 +; check: return v3 ; v3 = -0.0 + +function %f16_fabs() -> f16 { +block0: + v1 = f16const -0.0 + v2 = fabs v1 + return v2 +} + +; check: v3 = f16const 0.0 +; check: return v3 ; v3 = 0.0 + +function %f16_fcopysign() -> f16 { +block0: + v1 = f16const -0.0 + v2 = f16const NaN + v3 = fcopysign v2, v1 + return v3 +} + +; check: v4 = f16const -NaN +; check: return v4 ; v4 = -NaN + function %f32_fneg() -> f32 { block0: v1 = f32const 0.0 @@ -333,7 +364,7 @@ block0: ; check: v3 = f32const 0.0 ; check: return v3 ; v3 = 0.0 -function %f32_fabs() -> f32 { +function %f32_fcopysign() -> f32 { block0: v1 = f32const -0.0 v2 = f32const NaN @@ -364,7 +395,7 @@ block0: ; check: v3 = f64const 0.0 ; check: return v3 ; v3 = 0.0 -function %f64_fabs() -> f64 { +function %f64_fcopysign() -> f64 { block0: v1 = f64const -0.0 v2 = f64const NaN @@ -374,3 +405,37 @@ block0: ; check: v4 = f64const -NaN ; check: return v4 ; v4 = -NaN + +function %f128_fneg() -> f128 { +block0: + v1 = f128const 0.0 + v2 = fneg v1 + return v2 +} + +; check: const1 = 0x80000000000000000000000000000000 +; check: v3 = f128const const1 +; check: return v3 ; v3 = -0.0 + +function %f128_fabs() -> f128 { +block0: + v1 = f128const -0.0 + v2 = fabs v1 + return v2 +} + +; check: const1 = 0x00000000000000000000000000000000 +; check: v3 = f128const const1 +; check: return v3 ; v3 = 0.0 + +function %f128_fcopysign() -> f128 { +block0: + v1 = f128const -0.0 + v2 = f128const NaN + v3 = fcopysign v2, v1 + return v3 +} + +; check: const2 = 0xffff8000000000000000000000000000 +; check: v4 = f128const const2 +; check: return v4 ; v4 = -NaN diff --git a/cranelift/filetests/filetests/runtests/f128const.clif b/cranelift/filetests/filetests/runtests/f128const.clif new file mode 100644 index 000000000000..d670a2a67871 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/f128const.clif @@ -0,0 +1,53 @@ +test interpret + + +;; These values are special for RISC-V since it has a dedicated +;; instruction to generate them. + +function %special_f128_values() -> f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128, f128 { +block0: + v0 = f128const -0x1.p0 + v1 = f128const 0x1.p-16382 + v2 = f128const 0x1.p-16 + v3 = f128const 0x1.p-15 + v4 = f128const 0x1.p-8 + v5 = f128const 0x1.p-7 + v6 = f128const 0x1.p-4 + v7 = f128const 0x1.p-3 + v8 = f128const 0x1.p-2 + v9 = f128const 0x1.4p-2 + v10 = f128const 0x1.8p-2 + v11 = f128const 0x1.cp-2 + v12 = f128const 0x1.p-1 + v13 = f128const 0x1.4p-1 + v14 = f128const 0x1.8p-1 + v15 = f128const 0x1.cp-1 + v16 = f128const 0x1.p0 + v17 = f128const 0x1.4p0 + v18 = f128const 0x1.8p0 + v19 = f128const 0x1.cp0 + v20 = f128const 0x1.p1 + v21 = f128const 0x1.4p1 + v22 = f128const 0x1.8p1 + v23 = f128const 0x1.p2 + v24 = f128const 0x1.p3 + v25 = f128const 0x1.p4 + v26 = f128const 0x1.p7 + v27 = f128const 0x1.p8 + v28 = f128const 0x1.p15 + v29 = f128const 0x1.p16 + v30 = f128const +Inf + v31 = f128const +NaN + + return v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31 +} + +; run: %special_f128_values() == [-0x1.p0, 0x1.p-16382, 0x1.p-16, 0x1.p-15, 0x1.p-8, 0x1.p-7, 0x1.p-4, 0x1.p-3, 0x1.p-2, 0x1.4p-2, 0x1.8p-2, 0x1.cp-2, 0x1.p-1, 0x1.4p-1, 0x1.8p-1, 0x1.cp-1, 0x1.p0, 0x1.4p0, 0x1.8p0, 0x1.cp0, 0x1.p1, 0x1.4p1, 0x1.8p1, 0x1.p2, 0x1.p3, 0x1.p4, 0x1.p7, 0x1.p8, 0x1.p15, 0x1.p16, +Inf, +NaN] + +function %f128const_neg_nan() -> f128 { +block0: + v0 = f128const -NaN + return v0 +} + +; run: %f128const_neg_nan() == -NaN diff --git a/cranelift/filetests/filetests/runtests/f16const.clif b/cranelift/filetests/filetests/runtests/f16const.clif new file mode 100644 index 000000000000..99507b888548 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/f16const.clif @@ -0,0 +1,53 @@ +test interpret + + +;; These values are special for RISC-V since it has a dedicated +;; instruction to generate them. + +function %special_f16_values() -> f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16 { +block0: + v0 = f16const -0x1.p0 + v1 = f16const 0x1.p-14 + v2 = f16const 0x1.p-16 + v3 = f16const 0x1.p-15 + v4 = f16const 0x1.p-8 + v5 = f16const 0x1.p-7 + v6 = f16const 0x1.p-4 + v7 = f16const 0x1.p-3 + v8 = f16const 0x1.p-2 + v9 = f16const 0x1.4p-2 + v10 = f16const 0x1.8p-2 + v11 = f16const 0x1.cp-2 + v12 = f16const 0x1.p-1 + v13 = f16const 0x1.4p-1 + v14 = f16const 0x1.8p-1 + v15 = f16const 0x1.cp-1 + v16 = f16const 0x1.p0 + v17 = f16const 0x1.4p0 + v18 = f16const 0x1.8p0 + v19 = f16const 0x1.cp0 + v20 = f16const 0x1.p1 + v21 = f16const 0x1.4p1 + v22 = f16const 0x1.8p1 + v23 = f16const 0x1.p2 + v24 = f16const 0x1.p3 + v25 = f16const 0x1.p4 + v26 = f16const 0x1.p7 + v27 = f16const 0x1.p8 + v28 = f16const 0x1.p15 + v29 = f16const +Inf + v30 = f16const +NaN + + return v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30 +} + +; run: %special_f16_values() == [-0x1.p0, 0x1.p-14, 0x1.p-16, 0x1.p-15, 0x1.p-8, 0x1.p-7, 0x1.p-4, 0x1.p-3, 0x1.p-2, 0x1.4p-2, 0x1.8p-2, 0x1.cp-2, 0x1.p-1, 0x1.4p-1, 0x1.8p-1, 0x1.cp-1, 0x1.p0, 0x1.4p0, 0x1.8p0, 0x1.cp0, 0x1.p1, 0x1.4p1, 0x1.8p1, 0x1.p2, 0x1.p3, 0x1.p4, 0x1.p7, 0x1.p8, 0x1.p15, +Inf, +NaN] + + +function %f16const_neg_nan() -> f16 { +block0: + v0 = f16const -NaN + return v0 +} + +; run: %f16const_neg_nan() == -NaN diff --git a/cranelift/fuzzgen/src/function_generator.rs b/cranelift/fuzzgen/src/function_generator.rs index af345c73b82b..61c110cc36bf 100644 --- a/cranelift/fuzzgen/src/function_generator.rs +++ b/cranelift/fuzzgen/src/function_generator.rs @@ -1091,6 +1091,7 @@ fn inserter_for_format(fmt: InstructionFormat) -> OpcodeInserter { InstructionFormat::Unary => insert_opcode, InstructionFormat::UnaryConst => insert_const, InstructionFormat::UnaryGlobalValue => todo!(), + InstructionFormat::UnaryIeee16 => insert_const, InstructionFormat::UnaryIeee32 => insert_const, InstructionFormat::UnaryIeee64 => insert_const, InstructionFormat::UnaryImm => insert_const, @@ -1403,8 +1404,13 @@ where let lo = builder.ins().iconst(I64, i as i64); builder.ins().iconcat(lo, hi) } + DataValue::F16(f) => builder.ins().f16const(f), DataValue::F32(f) => builder.ins().f32const(f), DataValue::F64(f) => builder.ins().f64const(f), + DataValue::F128(f) => { + let handle = builder.func.dfg.constants.insert(f.into()); + builder.ins().f128const(handle) + } DataValue::V128(bytes) => { let data = bytes.to_vec().into(); let handle = builder.func.dfg.constants.insert(data); diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 01d0377e4559..879417e8fe60 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -83,18 +83,19 @@ where let imm = || -> DataValue { DataValue::from(match inst { InstructionData::UnaryConst { - constant_handle, .. + constant_handle, + opcode, } => { let buffer = state .get_current_function() .dfg .constants - .get(constant_handle.clone()) - .as_slice(); - match ctrl_ty.bytes() { - 16 => DataValue::V128(buffer.try_into().expect("a 16-byte data buffer")), - 8 => DataValue::V64(buffer.try_into().expect("an 8-byte data buffer")), - length => panic!("unexpected UnaryConst buffer length {}", length), + .get(constant_handle.clone()); + match (ctrl_ty.bytes(), opcode) { + (_, Opcode::F128const) => DataValue::F128(buffer.try_into().expect("a 16-byte data buffer")), + (16, Opcode::Vconst) => DataValue::V128(buffer.as_slice().try_into().expect("a 16-byte data buffer")), + (8, Opcode::Vconst) => DataValue::V64(buffer.as_slice().try_into().expect("an 8-byte data buffer")), + (length, opcode) => panic!("unexpected UnaryConst controlling type size {length} for opcode {opcode:?}"), } } InstructionData::Shuffle { imm, .. } => { @@ -115,6 +116,8 @@ where InstructionData::BinaryImm8 { imm, .. } | InstructionData::TernaryImm8 { imm, .. } => { DataValue::from(imm as i8) // Note the switch from unsigned to signed. } + // 16-bit + InstructionData::UnaryIeee16 { imm, .. } => DataValue::from(imm), // 32-bit InstructionData::UnaryIeee32 { imm, .. } => DataValue::from(imm), InstructionData::Load { offset, .. } @@ -552,8 +555,10 @@ where ControlFlow::Continue } Opcode::Iconst => assign(DataValueExt::int(imm().into_int_signed()?, ctrl_ty)?), + Opcode::F16const => assign(imm()), Opcode::F32const => assign(imm()), Opcode::F64const => assign(imm()), + Opcode::F128const => assign(imm()), Opcode::Vconst => assign(imm()), Opcode::Null => unimplemented!("Null"), Opcode::Nop => ControlFlow::Continue, diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 3783bb49e543..299b7c892ac1 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -2874,6 +2874,10 @@ impl<'a> Parser<'a> { imm: Imm64::new(unsigned), } } + InstructionFormat::UnaryIeee16 => InstructionData::UnaryIeee16 { + opcode, + imm: self.match_ieee16("expected immediate 16-bit float operand")?, + }, InstructionFormat::UnaryIeee32 => InstructionData::UnaryIeee32 { opcode, imm: self.match_ieee32("expected immediate 32-bit float operand")?, @@ -2888,6 +2892,9 @@ impl<'a> Parser<'a> { let c = self.match_constant()?; ctx.check_constant(c, self.loc)?; c + } else if opcode == Opcode::F128const { + let ieee128 = self.match_ieee128("expected immediate 128-bit float operand")?; + ctx.function.dfg.constants.insert(ieee128.into()) } else if let Some(controlling_type) = explicit_control_type { // If an explicit control type is present, we expect a sized value and insert // it in the constant pool.