diff --git a/js/src/jit-test/lib/wasm-binary.js b/js/src/jit-test/lib/wasm-binary.js index fb73c0f8f9433..7ae0be157d537 100644 --- a/js/src/jit-test/lib/wasm-binary.js +++ b/js/src/jit-test/lib/wasm-binary.js @@ -133,6 +133,10 @@ const F64x2PMinCode = 0xf6; const F64x2PMaxCode = 0xf7; const V128Load32ZeroCode = 0xfc; const V128Load64ZeroCode = 0xfd; +const F32x4RelaxedFmaCode = 0xaf; +const F32x4RelaxedFmsCode = 0xb0; +const F64x2RelaxedFmaCode = 0xcf; +const F64x2RelaxedFmsCode = 0xd0; // SIMD wormhole opcodes. const WORMHOLE_SELFTEST = 0; diff --git a/js/src/jit-test/tests/wasm/simd/experimental.js b/js/src/jit-test/tests/wasm/simd/experimental.js index a594785f11ca8..6727909836adb 100644 --- a/js/src/jit-test/tests/wasm/simd/experimental.js +++ b/js/src/jit-test/tests/wasm/simd/experimental.js @@ -1,3 +1,5 @@ +// |jit-test| --wasm-relaxed-simd; skip-if: !wasmSimdEnabled() + // Experimental opcodes. We have no text parsing support for these yet. The // tests will be cleaned up and moved into ad-hack.js if the opcodes are // adopted. @@ -61,4 +63,43 @@ function V128StoreExpr(addr, v) { SimdPrefix, V128StoreCode, 4, varU32(0)]; } -// (Currently no tests here but there were some in the past and there will be more in the future.) +// FMA/FMS, https://github.com/WebAssembly/relaxed-simd/issues/27 + +function fma(a, x, y) { return a + (x * y) } +function fms(a, x, y) { return a - (x * y) } + +var fas = [0, 100, 500, 700]; +var fxs = [10, 20, 30, 40]; +var fys = [-2, -3, -4, -5]; +var das = [0, 100]; +var dxs = [10, 20]; +var dys = [-2, -3]; + +for ( let [opcode, as, xs, ys, operator] of [[F32x4RelaxedFmaCode, fas, fxs, fys, fma], + [F32x4RelaxedFmsCode, fas, fxs, fys, fms], + [F64x2RelaxedFmaCode, das, dxs, dys, fma], + [F64x2RelaxedFmsCode, das, dxs, dys, fms]] ) { + var k = xs.length; + var ans = iota(k).map((i) => operator(as[i], xs[i], ys[i])) + + var ins = wasmEval(moduleWithSections([ + sigSection([v2vSig]), + declSection([0]), + memorySection(1), + exportSection([{funcIndex: 0, name: "run"}, + {memIndex: 0, name: "mem"}]), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(16), + ...V128Load(32), + ...V128Load(48), + SimdPrefix, varU32(opcode)])]})])])); + + var mem = new (k == 4 ? Float32Array : Float64Array)(ins.exports.mem.buffer); + set(mem, k, as); + set(mem, 2*k, xs); + set(mem, 3*k, ys); + ins.exports.run(); + var result = get(mem, 0, k); + assertSame(result, ans); +} diff --git a/js/src/jit/MacroAssembler.h b/js/src/jit/MacroAssembler.h index 844cc310654de..13350cc4df05c 100644 --- a/js/src/jit/MacroAssembler.h +++ b/js/src/jit/MacroAssembler.h @@ -3455,6 +3455,20 @@ class MacroAssembler : public MacroAssemblerSpecific { inline void nearestFloat64x2(FloatRegister src, FloatRegister dest) DEFINED_ON(x86_shared, arm64); + // Floating multiply-accumulate: srcDest [+-]= src1 * src2 + + inline void fmaFloat32x4(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) DEFINED_ON(x86_shared, arm64); + + inline void fmsFloat32x4(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) DEFINED_ON(x86_shared, arm64); + + inline void fmaFloat64x2(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) DEFINED_ON(x86_shared, arm64); + + inline void fmsFloat64x2(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) DEFINED_ON(x86_shared, arm64); + public: // ======================================================================== // Truncate floating point. diff --git a/js/src/jit/arm64/CodeGenerator-arm64.cpp b/js/src/jit/arm64/CodeGenerator-arm64.cpp index fe6c788099beb..46d3a8ef39929 100644 --- a/js/src/jit/arm64/CodeGenerator-arm64.cpp +++ b/js/src/jit/arm64/CodeGenerator-arm64.cpp @@ -2976,6 +2976,22 @@ void CodeGenerator::visitWasmTernarySimd128(LWasmTernarySimd128* ins) { masm.bitwiseSelectSimd128(lhs, rhs, controlDest); break; } + case wasm::SimdOp::F32x4RelaxedFma: + masm.fmaFloat32x4(ToFloatRegister(ins->v1()), ToFloatRegister(ins->v2()), + ToFloatRegister(ins->v0())); + break; + case wasm::SimdOp::F32x4RelaxedFms: + masm.fmsFloat32x4(ToFloatRegister(ins->v1()), ToFloatRegister(ins->v2()), + ToFloatRegister(ins->v0())); + break; + case wasm::SimdOp::F64x2RelaxedFma: + masm.fmaFloat64x2(ToFloatRegister(ins->v1()), ToFloatRegister(ins->v2()), + ToFloatRegister(ins->v0())); + break; + case wasm::SimdOp::F64x2RelaxedFms: + masm.fmsFloat64x2(ToFloatRegister(ins->v1()), ToFloatRegister(ins->v2()), + ToFloatRegister(ins->v0())); + break; default: MOZ_CRASH("NYI"); } diff --git a/js/src/jit/arm64/Lowering-arm64.cpp b/js/src/jit/arm64/Lowering-arm64.cpp index 3aa173654a420..2c9c3689070db 100644 --- a/js/src/jit/arm64/Lowering-arm64.cpp +++ b/js/src/jit/arm64/Lowering-arm64.cpp @@ -994,6 +994,16 @@ void LIRGenerator::visitWasmTernarySimd128(MWasmTernarySimd128* ins) { defineReuseInput(lir, ins, LWasmTernarySimd128::V2); break; } + case wasm::SimdOp::F32x4RelaxedFma: + case wasm::SimdOp::F32x4RelaxedFms: + case wasm::SimdOp::F64x2RelaxedFma: + case wasm::SimdOp::F64x2RelaxedFms: { + auto* lir = new (alloc()) + LWasmTernarySimd128(ins->simdOp(), useRegisterAtStart(ins->v0()), + useRegister(ins->v1()), useRegister(ins->v2())); + defineReuseInput(lir, ins, LWasmTernarySimd128::V0); + break; + } default: MOZ_CRASH("NYI"); } diff --git a/js/src/jit/arm64/MacroAssembler-arm64-inl.h b/js/src/jit/arm64/MacroAssembler-arm64-inl.h index de1129f5e1d1d..5a182ce4ec74c 100644 --- a/js/src/jit/arm64/MacroAssembler-arm64-inl.h +++ b/js/src/jit/arm64/MacroAssembler-arm64-inl.h @@ -3827,6 +3827,28 @@ void MacroAssembler::nearestFloat64x2(FloatRegister src, FloatRegister dest) { Frintn(Simd2D(dest), Simd2D(src)); } +// Floating multiply-accumulate: srcDest [+-]= src1 * src2 + +void MacroAssembler::fmaFloat32x4(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) { + Fmla(Simd4S(srcDest), Simd4S(src1), Simd4S(src2)); +} + +void MacroAssembler::fmsFloat32x4(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) { + Fmls(Simd4S(srcDest), Simd4S(src1), Simd4S(src2)); +} + +void MacroAssembler::fmaFloat64x2(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) { + Fmla(Simd2D(srcDest), Simd2D(src1), Simd2D(src2)); +} + +void MacroAssembler::fmsFloat64x2(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) { + Fmls(Simd2D(srcDest), Simd2D(src1), Simd2D(src2)); +} + //}}} check_macroassembler_style // =============================================================== diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp index fcb9978c13eee..a6eab3026f547 100644 --- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp +++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp @@ -2270,6 +2270,22 @@ void CodeGenerator::visitWasmTernarySimd128(LWasmTernarySimd128* ins) { masm.bitwiseSelectSimd128(control, lhsDest, rhs, lhsDest, temp); break; } + case wasm::SimdOp::F32x4RelaxedFma: + masm.fmaFloat32x4(ToFloatRegister(ins->v1()), ToFloatRegister(ins->v2()), + ToFloatRegister(ins->v0())); + break; + case wasm::SimdOp::F32x4RelaxedFms: + masm.fmsFloat32x4(ToFloatRegister(ins->v1()), ToFloatRegister(ins->v2()), + ToFloatRegister(ins->v0())); + break; + case wasm::SimdOp::F64x2RelaxedFma: + masm.fmaFloat64x2(ToFloatRegister(ins->v1()), ToFloatRegister(ins->v2()), + ToFloatRegister(ins->v0())); + break; + case wasm::SimdOp::F64x2RelaxedFms: + masm.fmsFloat64x2(ToFloatRegister(ins->v1()), ToFloatRegister(ins->v2()), + ToFloatRegister(ins->v0())); + break; default: MOZ_CRASH("NYI"); } diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp index 9a1d705a7ebfc..3db6105544735 100644 --- a/js/src/jit/x86-shared/Lowering-x86-shared.cpp +++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp @@ -832,6 +832,16 @@ void LIRGenerator::visitWasmTernarySimd128(MWasmTernarySimd128* ins) { defineReuseInput(lir, ins, LWasmTernarySimd128::V0); break; } + case wasm::SimdOp::F32x4RelaxedFma: + case wasm::SimdOp::F32x4RelaxedFms: + case wasm::SimdOp::F64x2RelaxedFma: + case wasm::SimdOp::F64x2RelaxedFms: { + auto* lir = new (alloc()) + LWasmTernarySimd128(ins->simdOp(), useRegisterAtStart(ins->v0()), + useRegister(ins->v1()), useRegister(ins->v2())); + defineReuseInput(lir, ins, LWasmTernarySimd128::V0); + break; + } default: MOZ_CRASH("NYI"); } diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h index 6fd4473e3edbf..d49619692cc36 100644 --- a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h @@ -2837,6 +2837,41 @@ void MacroAssembler::unsignedWidenHighInt32x4(FloatRegister src, vpmovzxdq(Operand(dest), dest); } +// Floating multiply-accumulate: srcDest [+-]= src1 * src2 +// The Intel FMA feature is some AVX* special sauce, no support yet. + +void MacroAssembler::fmaFloat32x4(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) { + ScratchFloat32Scope scratch(*this); + moveSimd128(src1, scratch); + mulFloat32x4(src2, scratch); + addFloat32x4(scratch, srcDest); +} + +void MacroAssembler::fmsFloat32x4(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) { + ScratchFloat32Scope scratch(*this); + moveSimd128(src1, scratch); + mulFloat32x4(src2, scratch); + subFloat32x4(scratch, srcDest); +} + +void MacroAssembler::fmaFloat64x2(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) { + ScratchFloat32Scope scratch(*this); + moveSimd128(src1, scratch); + mulFloat64x2(src2, scratch); + addFloat64x2(scratch, srcDest); +} + +void MacroAssembler::fmsFloat64x2(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) { + ScratchFloat32Scope scratch(*this); + moveSimd128(src1, scratch); + mulFloat64x2(src2, scratch); + subFloat64x2(scratch, srcDest); +} + // ======================================================================== // Truncate floating point. diff --git a/js/src/wasm/WasmBaselineCompile.cpp b/js/src/wasm/WasmBaselineCompile.cpp index ab056ba2a9c50..83152b8815db3 100644 --- a/js/src/wasm/WasmBaselineCompile.cpp +++ b/js/src/wasm/WasmBaselineCompile.cpp @@ -15240,6 +15240,28 @@ static void BitselectV128(MacroAssembler& masm, RegV128 rhs, RegV128 control, } # endif +# ifdef ENABLE_WASM_RELAXED_SIMD +static void RelaxedFmaF32x4(MacroAssembler& masm, RegV128 rs1, RegV128 rs2, + RegV128 rsd) { + masm.fmaFloat32x4(rs1, rs2, rsd); +} + +static void RelaxedFmsF32x4(MacroAssembler& masm, RegV128 rs1, RegV128 rs2, + RegV128 rsd) { + masm.fmsFloat32x4(rs1, rs2, rsd); +} + +static void RelaxedFmaF64x2(MacroAssembler& masm, RegV128 rs1, RegV128 rs2, + RegV128 rsd) { + masm.fmaFloat64x2(rs1, rs2, rsd); +} + +static void RelaxedFmsF64x2(MacroAssembler& masm, RegV128 rs1, RegV128 rs2, + RegV128 rsd) { + masm.fmsFloat64x2(rs1, rs2, rsd); +} +# endif + void BaseCompiler::emitVectorAndNot() { // We want x & ~y but the available operation is ~x & y, so reverse the // operands. @@ -16950,6 +16972,29 @@ bool BaseCompiler::emitBody() { CHECK_NEXT(emitStoreLane(4)); case uint32_t(SimdOp::V128Store64Lane): CHECK_NEXT(emitStoreLane(8)); +# ifdef ENABLE_WASM_RELAXED_SIMD + case uint32_t(SimdOp::F32x4RelaxedFma): + if (!moduleEnv_.v128RelaxedEnabled()) { + return iter_.unrecognizedOpcode(&op); + } + CHECK_NEXT(dispatchTernary1(RelaxedFmaF32x4, ValType::V128)); + case uint32_t(SimdOp::F32x4RelaxedFms): + if (!moduleEnv_.v128RelaxedEnabled()) { + return iter_.unrecognizedOpcode(&op); + } + CHECK_NEXT(dispatchTernary1(RelaxedFmsF32x4, ValType::V128)); + case uint32_t(SimdOp::F64x2RelaxedFma): + if (!moduleEnv_.v128RelaxedEnabled()) { + return iter_.unrecognizedOpcode(&op); + } + CHECK_NEXT(dispatchTernary1(RelaxedFmaF64x2, ValType::V128)); + case uint32_t(SimdOp::F64x2RelaxedFms): + if (!moduleEnv_.v128RelaxedEnabled()) { + return iter_.unrecognizedOpcode(&op); + } + CHECK_NEXT(dispatchTernary1(RelaxedFmsF64x2, ValType::V128)); + break; +# endif default: break; } // switch (op.b1) diff --git a/js/src/wasm/WasmConstants.h b/js/src/wasm/WasmConstants.h index 30b8f634d29e3..e258b994fcf84 100644 --- a/js/src/wasm/WasmConstants.h +++ b/js/src/wasm/WasmConstants.h @@ -676,8 +676,8 @@ enum class SimdOp { I32x4ShrS = 0xac, I32x4ShrU = 0xad, I32x4Add = 0xae, - // AddSatS = 0xaf - // AddSatU = 0xb0 + F32x4RelaxedFma = 0xaf, + F32x4RelaxedFms = 0xb0, I32x4Sub = 0xb1, // SubSatS = 0xb2 // SubSatU = 0xb3 @@ -708,8 +708,8 @@ enum class SimdOp { I64x2ShrS = 0xcc, I64x2ShrU = 0xcd, I64x2Add = 0xce, - // Unused = 0xcf - // Unused = 0xd0 + F64x2RelaxedFma = 0xcf, + F64x2RelaxedFms = 0xd0, I64x2Sub = 0xd1, // Unused = 0xd2 // Unused = 0xd3 diff --git a/js/src/wasm/WasmIonCompile.cpp b/js/src/wasm/WasmIonCompile.cpp index f24e1d4edaa69..9b58c5b9db494 100644 --- a/js/src/wasm/WasmIonCompile.cpp +++ b/js/src/wasm/WasmIonCompile.cpp @@ -5384,6 +5384,18 @@ static bool EmitBodyExprs(FunctionCompiler& f) { CHECK(EmitStoreLaneSimd128(f, 4)); case uint32_t(SimdOp::V128Store64Lane): CHECK(EmitStoreLaneSimd128(f, 8)); +# ifdef ENABLE_WASM_RELAXED_SIMD + case uint32_t(SimdOp::F32x4RelaxedFma): + case uint32_t(SimdOp::F32x4RelaxedFms): + case uint32_t(SimdOp::F64x2RelaxedFma): + case uint32_t(SimdOp::F64x2RelaxedFms): { + if (!f.moduleEnv().v128RelaxedEnabled()) { + return f.iter().unrecognizedOpcode(&op); + } + CHECK(EmitTernarySimd128(f, SimdOp(op.b1))); + } +# endif + default: return f.iter().unrecognizedOpcode(&op); } // switch (op.b1) diff --git a/js/src/wasm/WasmOpIter.cpp b/js/src/wasm/WasmOpIter.cpp index b434457214932..227db638bf68a 100644 --- a/js/src/wasm/WasmOpIter.cpp +++ b/js/src/wasm/WasmOpIter.cpp @@ -591,6 +591,11 @@ OpKind wasm::Classify(OpBytes op) { case SimdOp::V128Store32Lane: case SimdOp::V128Store64Lane: WASM_SIMD_OP(OpKind::StoreLane); + case SimdOp::F32x4RelaxedFma: + case SimdOp::F32x4RelaxedFms: + case SimdOp::F64x2RelaxedFma: + case SimdOp::F64x2RelaxedFms: + WASM_SIMD_OP(OpKind::Ternary); # ifdef ENABLE_WASM_SIMD_WORMHOLE case SimdOp::MozWHSELFTEST: case SimdOp::MozWHPMADDUBSW: diff --git a/js/src/wasm/WasmValidate.cpp b/js/src/wasm/WasmValidate.cpp index f4ccf0cba24b6..05b6244bf2f47 100644 --- a/js/src/wasm/WasmValidate.cpp +++ b/js/src/wasm/WasmValidate.cpp @@ -1007,6 +1007,19 @@ static bool DecodeFunctionBodyExprs(const ModuleEnvironment& env, CHECK(iter.readStoreLane(8, &addr, &noIndex, ¬hing)); } +# ifdef ENABLE_WASM_RELAXED_SIMD + case uint32_t(SimdOp::F32x4RelaxedFma): + case uint32_t(SimdOp::F32x4RelaxedFms): + case uint32_t(SimdOp::F64x2RelaxedFma): + case uint32_t(SimdOp::F64x2RelaxedFms): { + if (!env.v128RelaxedEnabled()) { + return iter.unrecognizedOpcode(&op); + } + CHECK( + iter.readTernary(ValType::V128, ¬hing, ¬hing, ¬hing)); + } +# endif + default: return iter.unrecognizedOpcode(&op); }