diff --git a/.github/actions/install-ninja/action.yml b/.github/actions/install-ninja/action.yml new file mode 100644 index 000000000000..5f4dda4fec5d --- /dev/null +++ b/.github/actions/install-ninja/action.yml @@ -0,0 +1,18 @@ +name: 'Install ninja' +description: 'Install ninja' + +runs: + using: composite + steps: + - name: Install ninja (macOS) + run: brew install ninja + if: runner.os == 'macOS' + shell: bash + - name: Install ninja (Windows) + run: choco install ninja + if: runner.os == 'Windows' + shell: bash + - name: Install ninja (Linux) + run: sudo apt-get update && sudo apt-get install -y ninja-build + if: runner.os == 'Linux' + shell: bash diff --git a/.github/actions/install-rust/action.yml b/.github/actions/install-rust/action.yml index a0859dda1206..87a05edb8fbd 100644 --- a/.github/actions/install-rust/action.yml +++ b/.github/actions/install-rust/action.yml @@ -28,7 +28,7 @@ runs: elif [ "${{ inputs.toolchain }}" = "msrv" ]; then echo "version=1.$msrv.0" >> "$GITHUB_OUTPUT" elif [ "${{ inputs.toolchain }}" = "wasmtime-ci-pinned-nightly" ]; then - echo "version=nightly-2024-10-02" >> "$GITHUB_OUTPUT" + echo "version=nightly-2024-10-22" >> "$GITHUB_OUTPUT" else echo "version=${{ inputs.toolchain }}" >> "$GITHUB_OUTPUT" fi diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e97582b96e8e..29d8d83fcac0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1123,6 +1123,7 @@ jobs: with: submodules: true + - uses: ./.github/actions/install-ninja - uses: ./.github/actions/install-rust with: toolchain: ${{ matrix.rust }} diff --git a/ci/build-release-artifacts.sh b/ci/build-release-artifacts.sh index cd91e430032b..606cf61c9b87 100755 --- a/ci/build-release-artifacts.sh +++ b/ci/build-release-artifacts.sh @@ -45,6 +45,7 @@ cargo build --release $flags --target $target -p wasmtime-cli $bin_flags --featu mkdir -p target/c-api-build cd target/c-api-build cmake \ + -G Ninja \ ../../crates/c-api \ $cmake_flags \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/ci/docker/aarch64-linux/Dockerfile b/ci/docker/aarch64-linux/Dockerfile index 4573e830e435..cbe2a7166ac8 100644 --- a/ci/docker/aarch64-linux/Dockerfile +++ b/ci/docker/aarch64-linux/Dockerfile @@ -1,6 +1,6 @@ FROM ubuntu:16.04 -RUN apt-get update -y && apt-get install -y gcc gcc-aarch64-linux-gnu ca-certificates curl make git +RUN apt-get update -y && apt-get install -y gcc gcc-aarch64-linux-gnu ca-certificates curl make git ninja-build RUN git config --global --add safe.directory '*' # The CMake in Ubuntu 16.04 was a bit too old for us to use so download one from diff --git a/ci/docker/riscv64gc-linux/Dockerfile b/ci/docker/riscv64gc-linux/Dockerfile index c10524d862b0..cdfbd8eacf7a 100644 --- a/ci/docker/riscv64gc-linux/Dockerfile +++ b/ci/docker/riscv64gc-linux/Dockerfile @@ -1,6 +1,6 @@ FROM ubuntu:22.04 -RUN apt-get update -y && apt-get install -y gcc gcc-riscv64-linux-gnu ca-certificates cmake git +RUN apt-get update -y && apt-get install -y gcc gcc-riscv64-linux-gnu ca-certificates cmake git ninja-build RUN git config --global --add safe.directory '*' ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER=riscv64-linux-gnu-gcc diff --git a/ci/docker/s390x-linux/Dockerfile b/ci/docker/s390x-linux/Dockerfile index 6c97b174dea2..ebe629581e9d 100644 --- a/ci/docker/s390x-linux/Dockerfile +++ b/ci/docker/s390x-linux/Dockerfile @@ -1,6 +1,6 @@ FROM ubuntu:16.04 -RUN apt-get update -y && apt-get install -y gcc gcc-s390x-linux-gnu ca-certificates curl make git +RUN apt-get update -y && apt-get install -y gcc gcc-s390x-linux-gnu ca-certificates curl make git ninja-build RUN git config --global --add safe.directory '*' # The CMake in Ubuntu 16.04 was a bit too old for us to use so download one from diff --git a/ci/docker/x86_64-linux/Dockerfile b/ci/docker/x86_64-linux/Dockerfile index add422150508..814407be098a 100644 --- a/ci/docker/x86_64-linux/Dockerfile +++ b/ci/docker/x86_64-linux/Dockerfile @@ -1,4 +1,9 @@ FROM almalinux:8 -RUN dnf install -y git gcc make cmake git +RUN dnf install -y git gcc make cmake git unzip RUN git config --global --add safe.directory '*' + +WORKDIR /usr/local/bin +RUN curl -LO https://github.com/ninja-build/ninja/releases/download/v1.12.1/ninja-linux.zip +RUN unzip ./ninja-linux +WORKDIR / diff --git a/ci/docker/x86_64-musl/Dockerfile b/ci/docker/x86_64-musl/Dockerfile index acd6dd88d981..c1ca3d9a3f4a 100644 --- a/ci/docker/x86_64-musl/Dockerfile +++ b/ci/docker/x86_64-musl/Dockerfile @@ -6,7 +6,7 @@ RUN apk add libgcc # Use something glibc-based for the actual compile because the Rust toolchain # we're using is glibc-based in CI. FROM ubuntu:24.04 -RUN apt-get update -y && apt-get install -y cmake musl-tools git +RUN apt-get update -y && apt-get install -y cmake musl-tools git ninja-build COPY --from=libgcc_s_src /usr/lib/libgcc_s.so.1 /usr/lib/x86_64-linux-musl RUN git config --global --add safe.directory '*' diff --git a/cranelift/codegen/build.rs b/cranelift/codegen/build.rs index 50f067e1cfd7..9d4ff1b241b3 100644 --- a/cranelift/codegen/build.rs +++ b/cranelift/codegen/build.rs @@ -264,5 +264,5 @@ fn rustfmt(code: &str) -> std::io::Result { )); } - Ok(String::from_utf8(data).expect("rustfmt always writs utf-8 to stdout")) + Ok(String::from_utf8(data).expect("rustfmt always writes utf-8 to stdout")) } diff --git a/cranelift/codegen/src/isa/x64/encoding/rex.rs b/cranelift/codegen/src/isa/x64/encoding/rex.rs index ba7b3b19d74c..ae456b7d6545 100644 --- a/cranelift/codegen/src/isa/x64/encoding/rex.rs +++ b/cranelift/codegen/src/isa/x64/encoding/rex.rs @@ -1,21 +1,19 @@ -//! Encodes instructions in the standard x86 encoding mode. This is called IA-32E mode in the Intel -//! manuals but corresponds to the addition of the REX-prefix format (hence the name of this module) -//! that allowed encoding instructions in both compatibility mode (32-bit instructions running on a +//! Encodes instructions in the standard x86 encoding mode. This is called +//! IA-32E mode in the Intel manuals but corresponds to the addition of the +//! REX-prefix format (hence the name of this module) that allowed encoding +//! instructions in both compatibility mode (32-bit instructions running on a //! 64-bit OS) and in 64-bit mode (using the full 64-bit address space). //! -//! For all of the routines that take both a memory-or-reg operand (sometimes called "E" in the -//! Intel documentation, see the Intel Developer's manual, vol. 2, section A.2) and a reg-only -//! operand ("G" in Intelese), the order is always G first, then E. The term "enc" in the following -//! means "hardware register encoding number". - -use crate::machinst::{Reg, RegClass}; -use crate::{ - isa::x64::inst::{ - args::{Amode, OperandSize}, - regs, Inst, LabelUse, - }, - machinst::MachBuffer, -}; +//! For all of the routines that take both a memory-or-reg operand (sometimes +//! called "E" in the Intel documentation, see the Intel Developer's manual, +//! vol. 2, section A.2) and a reg-only operand ("G" in Intel-ese), the order is +//! always G first, then E. The term "enc" in the following means "hardware +//! register encoding number". + +use super::ByteSink; +use crate::isa::x64::inst::args::{Amode, OperandSize}; +use crate::isa::x64::inst::{regs, Inst, LabelUse}; +use crate::machinst::{MachBuffer, Reg, RegClass}; pub(crate) fn low8_will_sign_extend_to_64(x: u32) -> bool { let xs = (x as i32) as i64; @@ -81,6 +79,12 @@ impl RexFlags { Self(1) } + /// True if 64-bit operands are used. + #[inline(always)] + pub fn must_clear_w(&self) -> bool { + (self.0 & 1) != 0 + } + /// Require that the REX prefix is emitted. #[inline(always)] pub fn always_emit(&mut self) -> &mut Self { @@ -88,6 +92,12 @@ impl RexFlags { self } + /// True if the REX prefix must always be emitted. + #[inline(always)] + pub fn must_always_emit(&self) -> bool { + (self.0 & 2) != 0 + } + /// Emit the rex prefix if the referenced register would require it for 8-bit operations. #[inline(always)] pub fn always_emit_if_8bit_needed(&mut self, reg: Reg) -> &mut Self { @@ -98,21 +108,9 @@ impl RexFlags { self } - /// True if 64-bit operands are used. - #[inline(always)] - pub fn must_clear_w(&self) -> bool { - (self.0 & 1) != 0 - } - - /// True if the REX prefix must always be emitted. - #[inline(always)] - pub fn must_always_emit(&self) -> bool { - (self.0 & 2) != 0 - } - /// Emit a unary instruction. #[inline(always)] - pub fn emit_one_op(&self, sink: &mut MachBuffer, enc_e: u8) { + pub fn emit_one_op(&self, sink: &mut BS, enc_e: u8) { // Register Operand coded in Opcode Byte // REX.R and REX.X unused // REX.B == 1 accesses r8-r15 @@ -128,7 +126,7 @@ impl RexFlags { /// Emit a binary instruction. #[inline(always)] - pub fn emit_two_op(&self, sink: &mut MachBuffer, enc_g: u8, enc_e: u8) { + pub fn emit_two_op(&self, sink: &mut BS, enc_g: u8, enc_e: u8) { let w = if self.must_clear_w() { 0 } else { 1 }; let r = (enc_g >> 3) & 1; let x = 0; @@ -141,9 +139,9 @@ impl RexFlags { /// Emit a ternary instruction. #[inline(always)] - pub fn emit_three_op( + pub fn emit_three_op( &self, - sink: &mut MachBuffer, + sink: &mut BS, enc_g: u8, enc_index: u8, enc_base: u8, @@ -232,7 +230,7 @@ pub enum LegacyPrefixes { impl LegacyPrefixes { /// Emit the legacy prefix as bytes (e.g. in REX instructions). #[inline(always)] - pub(crate) fn emit(&self, sink: &mut MachBuffer) { + pub(crate) fn emit(&self, sink: &mut BS) { match self { Self::_66 => sink.put1(0x66), Self::_F0 => sink.put1(0xF0), @@ -501,7 +499,7 @@ impl Imm { } } - fn emit(&self, sink: &mut MachBuffer) { + fn emit(&self, sink: &mut BS) { match self { Imm::None => {} Imm::Imm8(n) => sink.put1(*n as u8), @@ -514,8 +512,8 @@ impl Imm { /// /// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E /// operand is a register rather than memory. Hence it is much simpler. -pub(crate) fn emit_std_enc_enc( - sink: &mut MachBuffer, +pub(crate) fn emit_std_enc_enc( + sink: &mut BS, prefixes: LegacyPrefixes, opcodes: u32, mut num_opcodes: usize, @@ -571,8 +569,8 @@ pub(crate) fn emit_std_reg_mem( ); } -pub(crate) fn emit_std_reg_reg( - sink: &mut MachBuffer, +pub(crate) fn emit_std_reg_reg( + sink: &mut BS, prefixes: LegacyPrefixes, opcodes: u32, num_opcodes: usize, @@ -586,7 +584,7 @@ pub(crate) fn emit_std_reg_reg( } /// Write a suitable number of bits from an imm64 to the sink. -pub(crate) fn emit_simm(sink: &mut MachBuffer, size: u8, simm32: u32) { +pub(crate) fn emit_simm(sink: &mut BS, size: u8, simm32: u32) { match size { 8 | 4 => sink.put4(simm32), 2 => sink.put2(simm32 as u16), diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 3d7a72462048..0421ad965337 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -24,7 +24,8 @@ (AluRM (size OperandSize) ;; 1, 2, 4 or 8 (op AluRmiROpcode) (src1_dst SyntheticAmode) - (src2 Gpr)) + (src2 Gpr) + (lock bool)) ;; Integer arithmetic binary op that relies on the VEX prefix. ;; NOTE: we don't currently support emitting VEX instructions with memory @@ -682,6 +683,18 @@ (dst_old_low WritableReg) (dst_old_high WritableReg)) + ;; A standard (native) `lock xadd src, (amode)` + (LockXadd (size OperandSize) + (operand Reg) + (mem SyntheticAmode) + (dst_old WritableReg)) + + ;; A standard (native) `xchg src, (amode)` + (Xchg (size OperandSize) + (operand Reg) + (mem SyntheticAmode) + (dst_old WritableReg)) + ;; A synthetic instruction, based on a loop around a native `lock ;; cmpxchg` instruction. ;; @@ -708,7 +721,7 @@ ;; - %rflags is written. Do not assume anything about it after the ;; instruction. (AtomicRmwSeq (ty Type) ;; I8, I16, I32, or I64 - (op MachAtomicRmwOp) + (op AtomicRmwSeqOp) (mem SyntheticAmode) (operand Reg) (temp WritableReg) @@ -719,7 +732,7 @@ ;; ;; This is the same as `AtomicRmwSeq`, but for 128-bit integers. ;; - ;; For `MachAtomicRmwOp::Xchg`, use `Atomic128XchgSeq` instead. + ;; For `AtomicRmwOp::Xchg`, use `Atomic128XchgSeq` instead. ;; ;; This instruction sequence has fixed register uses as follows: ;; - %rax (low), %rdx (high) (written) the old value at `mem` @@ -727,7 +740,7 @@ ;; the replacement value ;; - %rflags is written. Do not assume anything about it after the ;; instruction. - (Atomic128RmwSeq (op MachAtomicRmwOp) + (Atomic128RmwSeq (op Atomic128RmwSeqOp) (mem BoxSyntheticAmode) (operand_low Reg) (operand_high Reg) @@ -739,8 +752,8 @@ ;; A synthetic instruction, based on a loop around a native `lock ;; cmpxchg16b` instruction. ;; - ;; This is `Atomic128XchgSeq` but only for `MachAtomicRmwOp::Xchg`. As - ;; the replacement value is the same every time, this instruction doesn't + ;; This is `Atomic128XchgSeq` but only for `AtomicRmwOp::Xchg`. As the + ;; replacement value is the same every time, this instruction doesn't ;; require any temporary registers. ;; ;; This instruction sequence has fixed register uses as follows: @@ -4902,7 +4915,7 @@ (decl alu_rm (Type AluRmiROpcode Amode Gpr) SideEffectNoResult) (rule (alu_rm ty opcode src1_dst src2) (let ((size OperandSize (operand_size_of_type_32_64 ty))) - (SideEffectNoResult.Inst (MInst.AluRM size opcode src1_dst src2)))) + (SideEffectNoResult.Inst (MInst.AluRM size opcode src1_dst src2 $false)))) (decl x64_add_mem (Type Amode Gpr) SideEffectNoResult) (spec (x64_add_mem ty addr val) @@ -5294,14 +5307,51 @@ (_ Unit (emit (MInst.LockCmpxchg16b replacement_low replacement_high expected_low expected_high addr dst_low dst_high)))) (value_regs dst_low dst_high))) -(decl x64_atomic_rmw_seq (Type MachAtomicRmwOp SyntheticAmode Gpr) Gpr) +(decl x64_xadd (OperandSize SyntheticAmode Gpr) Gpr) +(rule (x64_xadd size addr operand) + (let ((dst WritableGpr (temp_writable_gpr)) + (_ Unit (emit (MInst.LockXadd size operand addr dst)))) + dst)) + +(decl x64_xchg (OperandSize SyntheticAmode Gpr) Gpr) +(rule (x64_xchg size addr operand) + (let ((dst WritableGpr (temp_writable_gpr)) + (_ Unit (emit (MInst.Xchg size operand addr dst)))) + dst)) + +(decl lock_alu_rm (OperandSize AluRmiROpcode SyntheticAmode Gpr) Reg) +(rule (lock_alu_rm size opcode addr operand) + (let ((_ Unit (emit (MInst.AluRM size opcode addr operand $true)))) + (invalid_reg))) + +(decl x64_lock_add (OperandSize SyntheticAmode Gpr) Reg) +(rule (x64_lock_add size addr operand) + (lock_alu_rm size (AluRmiROpcode.Add) addr operand)) + +(decl x64_lock_sub (OperandSize SyntheticAmode Gpr) Reg) +(rule (x64_lock_sub size addr operand) + (lock_alu_rm size (AluRmiROpcode.Sub) addr operand)) + +(decl x64_lock_and (OperandSize SyntheticAmode Gpr) Reg) +(rule (x64_lock_and size addr operand) + (lock_alu_rm size (AluRmiROpcode.And) addr operand)) + +(decl x64_lock_or (OperandSize SyntheticAmode Gpr) Reg) +(rule (x64_lock_or size addr operand) + (lock_alu_rm size (AluRmiROpcode.Or) addr operand)) + +(decl x64_lock_xor (OperandSize SyntheticAmode Gpr) Reg) +(rule (x64_lock_xor size addr operand) + (lock_alu_rm size (AluRmiROpcode.Xor) addr operand)) + +(decl x64_atomic_rmw_seq (Type AtomicRmwSeqOp SyntheticAmode Gpr) Gpr) (rule (x64_atomic_rmw_seq ty op mem input) (let ((dst WritableGpr (temp_writable_gpr)) (tmp WritableGpr (temp_writable_gpr)) (_ Unit (emit (MInst.AtomicRmwSeq ty op mem input tmp dst)))) dst)) -(decl x64_atomic_128_rmw_seq (MachAtomicRmwOp SyntheticAmode ValueRegs) ValueRegs) +(decl x64_atomic_128_rmw_seq (AtomicRmwOp SyntheticAmode ValueRegs) ValueRegs) (rule (x64_atomic_128_rmw_seq op mem input) (let ((dst_low WritableGpr (temp_writable_gpr)) (dst_high WritableGpr (temp_writable_gpr)) @@ -5309,10 +5359,10 @@ (tmp_high WritableGpr (temp_writable_gpr)) (input_low Gpr (value_regs_get_gpr input 0)) (input_high Gpr (value_regs_get_gpr input 1)) - (_ Unit (emit (MInst.Atomic128RmwSeq op mem input_low input_high tmp_low tmp_high dst_low dst_high)))) + (_ Unit (emit (MInst.Atomic128RmwSeq (atomic_128_rmw_seq_op op) mem input_low input_high tmp_low tmp_high dst_low dst_high)))) (value_regs dst_low dst_high))) -(rule 1 (x64_atomic_128_rmw_seq (mach_atomic_rmw_op_xchg) mem input) +(rule 1 (x64_atomic_128_rmw_seq (AtomicRmwOp.Xchg) mem input) (let ((dst_low WritableGpr (temp_writable_gpr)) (dst_high WritableGpr (temp_writable_gpr)) (input_low Gpr (value_regs_get_gpr input 0)) @@ -5328,14 +5378,50 @@ (input_high Gpr (value_regs_get_gpr input 1))) (SideEffectNoResult.Inst (MInst.Atomic128XchgSeq mem input_low input_high dst_low dst_high)))) -(decl mach_atomic_rmw_op_xchg () MachAtomicRmwOp) -(extern extractor mach_atomic_rmw_op_xchg mach_atomic_rmw_op_is_xchg) -;; CLIF IR has one enumeration for atomic operations (`AtomicRmwOp`) while the -;; mach backend has another (`MachAtomicRmwOp`)--this converts one to the other. -(type MachAtomicRmwOp extern (enum)) -(decl atomic_rmw_op_to_mach_atomic_rmw_op (AtomicRmwOp) MachAtomicRmwOp) -(extern constructor atomic_rmw_op_to_mach_atomic_rmw_op atomic_rmw_op_to_mach_atomic_rmw_op) +(type AtomicRmwSeqOp + (enum And + Nand + Or + Xor + Umin + Umax + Smin + Smax)) + +(decl atomic_rmw_seq_op (AtomicRmwOp) AtomicRmwSeqOp) +(rule (atomic_rmw_seq_op (AtomicRmwOp.And)) (AtomicRmwSeqOp.And)) +(rule (atomic_rmw_seq_op (AtomicRmwOp.Nand)) (AtomicRmwSeqOp.Nand)) +(rule (atomic_rmw_seq_op (AtomicRmwOp.Or)) (AtomicRmwSeqOp.Or)) +(rule (atomic_rmw_seq_op (AtomicRmwOp.Xor)) (AtomicRmwSeqOp.Xor)) +(rule (atomic_rmw_seq_op (AtomicRmwOp.Umin)) (AtomicRmwSeqOp.Umin)) +(rule (atomic_rmw_seq_op (AtomicRmwOp.Umax)) (AtomicRmwSeqOp.Umax)) +(rule (atomic_rmw_seq_op (AtomicRmwOp.Smin)) (AtomicRmwSeqOp.Smin)) +(rule (atomic_rmw_seq_op (AtomicRmwOp.Smax)) (AtomicRmwSeqOp.Smax)) + +(type Atomic128RmwSeqOp + (enum Add + Sub + And + Nand + Or + Xor + Umin + Umax + Smin + Smax)) + +(decl atomic_128_rmw_seq_op (AtomicRmwOp) Atomic128RmwSeqOp) +(rule (atomic_128_rmw_seq_op (AtomicRmwOp.Add)) (Atomic128RmwSeqOp.Add)) +(rule (atomic_128_rmw_seq_op (AtomicRmwOp.Sub)) (Atomic128RmwSeqOp.Sub)) +(rule (atomic_128_rmw_seq_op (AtomicRmwOp.And)) (Atomic128RmwSeqOp.And)) +(rule (atomic_128_rmw_seq_op (AtomicRmwOp.Nand)) (Atomic128RmwSeqOp.Nand)) +(rule (atomic_128_rmw_seq_op (AtomicRmwOp.Or)) (Atomic128RmwSeqOp.Or)) +(rule (atomic_128_rmw_seq_op (AtomicRmwOp.Xor)) (Atomic128RmwSeqOp.Xor)) +(rule (atomic_128_rmw_seq_op (AtomicRmwOp.Umin)) (Atomic128RmwSeqOp.Umin)) +(rule (atomic_128_rmw_seq_op (AtomicRmwOp.Umax)) (Atomic128RmwSeqOp.Umax)) +(rule (atomic_128_rmw_seq_op (AtomicRmwOp.Smin)) (Atomic128RmwSeqOp.Smin)) +(rule (atomic_128_rmw_seq_op (AtomicRmwOp.Smax)) (Atomic128RmwSeqOp.Smax)) ;;;; Casting ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -5562,7 +5648,6 @@ (convert VCodeConstant RegMem const_to_reg_mem) (convert IntCC CC intcc_to_cc) -(convert AtomicRmwOp MachAtomicRmwOp atomic_rmw_op_to_mach_atomic_rmw_op) (convert SinkableLoad RegMem sink_load_to_reg_mem) (convert SinkableLoad GprMem sink_load_to_gpr_mem) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 7cb22c624909..0e963d516d98 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -1,7 +1,6 @@ //! Instruction operand sub-components (aka "parts"): definitions and printing. use super::regs::{self}; -use super::EmitState; use crate::ir::condcodes::{FloatCC, IntCC}; use crate::ir::types::*; use crate::ir::MemFlags; @@ -574,21 +573,20 @@ impl SyntheticAmode { } } - pub(crate) fn finalize(&self, state: &mut EmitState, buffer: &mut MachBuffer) -> Amode { + pub(crate) fn finalize(&self, frame: &FrameLayout, buffer: &mut MachBuffer) -> Amode { match self { SyntheticAmode::Real(addr) => addr.clone(), SyntheticAmode::IncomingArg { offset } => { - // NOTE: this could be made relative to RSP by adding additional offsets from the - // frame_layout. - let args_max_fp_offset = - state.frame_layout().tail_args_size + state.frame_layout().setup_area_size; + // NOTE: this could be made relative to RSP by adding additional + // offsets from the frame_layout. + let args_max_fp_offset = frame.tail_args_size + frame.setup_area_size; Amode::imm_reg( i32::try_from(args_max_fp_offset - offset).unwrap(), regs::rbp(), ) } SyntheticAmode::SlotOffset { simm32 } => { - let off = *simm32 as i64 + i64::from(state.frame_layout().outgoing_args_size); + let off = *simm32 as i64 + i64::from(frame.outgoing_args_size); Amode::imm_reg(off.try_into().expect("invalid sp offset"), regs::rsp()) } SyntheticAmode::ConstantOffset(c) => { diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 12f698daa941..e1eb7a0c51c2 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -9,6 +9,7 @@ use crate::isa::x64::encoding::rex::{ use crate::isa::x64::encoding::vex::{VexInstruction, VexVectorLength}; use crate::isa::x64::inst::args::*; use crate::isa::x64::inst::*; +use crate::isa::x64::lower::isle::generated_code::{Atomic128RmwSeqOp, AtomicRmwSeqOp}; /// A small helper to generate a signed conversion instruction. fn emit_signed_cvt( @@ -198,7 +199,7 @@ pub(crate) fn emit( } RegMemImm::Mem { addr } => { - let amode = addr.finalize(state, sink); + let amode = addr.finalize(state.frame_layout(), sink); // Here we revert to the "normal" G-E ordering. emit_std_reg_mem(sink, prefix, opcode_m, 1, reg_g, &amode, rex, 0); } @@ -255,9 +256,10 @@ pub(crate) fn emit( src1_dst, src2, op, + lock, } => { let src2 = src2.to_reg(); - let src1_dst = src1_dst.finalize(state, sink).clone(); + let src1_dst = src1_dst.finalize(state.frame_layout(), sink).clone(); let opcode = match op { AluRmiROpcode::Add => 0x01, @@ -268,10 +270,11 @@ pub(crate) fn emit( _ => panic!("Unsupported read-modify-write ALU opcode"), }; - let prefix = if *size == OperandSize::Size16 { - LegacyPrefixes::_66 - } else { - LegacyPrefixes::None + let prefix = match (size, lock) { + (OperandSize::Size16, false) => LegacyPrefixes::_66, + (OperandSize::Size16, true) => LegacyPrefixes::_66F0, + (_, false) => LegacyPrefixes::None, + (_, true) => LegacyPrefixes::_F0, }; let opcode = if *size == OperandSize::Size8 { opcode - 1 @@ -305,7 +308,9 @@ pub(crate) fn emit( RegMem::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMem::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let w = match size { @@ -364,7 +369,7 @@ pub(crate) fn emit( emit_std_reg_reg(sink, prefix, opcode, num_opcodes, dst, src, rex_flags); } RegMem::Mem { addr: src } => { - let amode = src.finalize(state, sink).clone(); + let amode = src.finalize(state.frame_layout(), sink).clone(); emit_std_reg_mem(sink, prefix, opcode, num_opcodes, dst, &amode, rex_flags, 0); } } @@ -376,7 +381,9 @@ pub(crate) fn emit( RegMem::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMem::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let (opcode, opcode_ext) = match op { @@ -407,7 +414,9 @@ pub(crate) fn emit( RegMem::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMem::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let opcode = match op { @@ -528,7 +537,7 @@ pub(crate) fn emit( ) } RegMem::Mem { addr: src } => { - let amode = src.finalize(state, sink); + let amode = src.finalize(state.frame_layout(), sink); emit_std_enc_mem( sink, prefix, @@ -574,7 +583,7 @@ pub(crate) fn emit( emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags) } RegMem::Mem { addr: src } => { - let amode = src.finalize(state, sink); + let amode = src.finalize(state.frame_layout(), sink); emit_std_enc_mem(sink, prefix, 0xF7, 1, subopcode, &amode, rex_flags, 0); } } @@ -614,7 +623,7 @@ pub(crate) fn emit( emit_std_enc_enc(sink, prefix, 0xF6, 1, subopcode, src, rex_flags) } RegMem::Mem { addr } => { - let amode = addr.finalize(state, sink); + let amode = addr.finalize(state.frame_layout(), sink); emit_std_enc_mem(sink, prefix, 0xF6, 1, subopcode, &amode, rex_flags, 0); } } @@ -638,7 +647,7 @@ pub(crate) fn emit( } RegMem::Mem { addr } => { - let amode = addr.finalize(state, sink); + let amode = addr.finalize(state.frame_layout(), sink); emit_std_reg_mem(sink, prefix, 0x0FAF, 2, dst, &amode, rex, 0); } } @@ -676,7 +685,7 @@ pub(crate) fn emit( } RegMem::Mem { addr } => { - let amode = addr.finalize(state, sink); + let amode = addr.finalize(state.frame_layout(), sink); emit_std_reg_mem(sink, prefix, opcode, 1, dst, &amode, rex, imm_size); } } @@ -698,7 +707,9 @@ pub(crate) fn emit( RegMem::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMem::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let dst_hi = dst_hi.to_real_reg().unwrap().hw_enc(); @@ -878,7 +889,7 @@ pub(crate) fn emit( } Inst::MovImmM { size, simm32, dst } => { - let dst = &dst.finalize(state, sink).clone(); + let dst = &dst.finalize(state.frame_layout(), sink).clone(); let default_rex = RexFlags::clear_w(); let default_opcode = 0xC7; let bytes = size.to_bytes(); @@ -990,7 +1001,7 @@ pub(crate) fn emit( } RegMem::Mem { addr: src } => { - let src = &src.finalize(state, sink).clone(); + let src = &src.finalize(state.frame_layout(), sink).clone(); emit_std_reg_mem( sink, @@ -1008,7 +1019,7 @@ pub(crate) fn emit( Inst::Mov64MR { src, dst } => { let dst = dst.to_reg().to_reg(); - let src = &src.finalize(state, sink).clone(); + let src = &src.finalize(state.frame_layout(), sink).clone(); emit_std_reg_mem( sink, @@ -1024,7 +1035,7 @@ pub(crate) fn emit( Inst::LoadEffectiveAddress { addr, dst, size } => { let dst = dst.to_reg().to_reg(); - let amode = addr.finalize(state, sink).clone(); + let amode = addr.finalize(state.frame_layout(), sink).clone(); // If this `lea` can actually get encoded as an `add` then do that // instead. Currently all candidate `iadd`s become an `lea` @@ -1148,7 +1159,7 @@ pub(crate) fn emit( } RegMem::Mem { addr: src } => { - let src = &src.finalize(state, sink).clone(); + let src = &src.finalize(state.frame_layout(), sink).clone(); emit_std_reg_mem( sink, @@ -1166,7 +1177,7 @@ pub(crate) fn emit( Inst::MovRM { size, src, dst } => { let src = src.to_reg(); - let dst = &dst.finalize(state, sink).clone(); + let dst = &dst.finalize(state.frame_layout(), sink).clone(); let prefix = match size { OperandSize::Size16 => LegacyPrefixes::_66, @@ -1294,7 +1305,7 @@ pub(crate) fn emit( emit_std_reg_reg(sink, prefix, opcode_bytes, 2, dst, reg, rex); } RegMemImm::Mem { addr } => { - let addr = &addr.finalize(state, sink).clone(); + let addr = &addr.finalize(state.frame_layout(), sink).clone(); emit_std_reg_mem(sink, prefix, opcode_bytes, 2, dst, addr, rex, 0); } RegMemImm::Imm { .. } => unreachable!(), @@ -1341,7 +1352,7 @@ pub(crate) fn emit( } RegMemImm::Mem { addr } => { - let addr = &addr.finalize(state, sink).clone(); + let addr = &addr.finalize(state.frame_layout(), sink).clone(); // Whereas here we revert to the "normal" G-E ordering for CMP. let opcode = match (*size, is_cmp) { (OperandSize::Size8, true) => 0x3A, @@ -1436,7 +1447,7 @@ pub(crate) fn emit( emit_std_reg_reg(sink, prefix, opcode, 2, dst, reg, rex_flags); } RegMem::Mem { addr } => { - let addr = &addr.finalize(state, sink).clone(); + let addr = &addr.finalize(state.frame_layout(), sink).clone(); emit_std_reg_mem(sink, prefix, opcode, 2, dst, addr, rex_flags, 0); } } @@ -1492,7 +1503,7 @@ pub(crate) fn emit( } RegMemImm::Mem { addr } => { - let addr = &addr.finalize(state, sink); + let addr = &addr.finalize(state.frame_layout(), sink); emit_std_enc_mem( sink, LegacyPrefixes::None, @@ -1704,7 +1715,7 @@ pub(crate) fn emit( } RegMem::Mem { addr } => { - let addr = &addr.finalize(state, sink); + let addr = &addr.finalize(state.frame_layout(), sink); emit_std_enc_mem( sink, LegacyPrefixes::None, @@ -1943,7 +1954,7 @@ pub(crate) fn emit( } RegMem::Mem { addr } => { - let addr = &addr.finalize(state, sink); + let addr = &addr.finalize(state.frame_layout(), sink); emit_std_enc_mem( sink, LegacyPrefixes::None, @@ -2128,7 +2139,7 @@ pub(crate) fn emit( emit_std_reg_reg(sink, prefix, opcode, num_opcodes, reg_g, reg_e, rex); } RegMem::Mem { addr } => { - let addr = &addr.finalize(state, sink); + let addr = &addr.finalize(state.frame_layout(), sink); emit_std_reg_mem(sink, prefix, opcode, num_opcodes, reg_g, addr, rex, 0); } }; @@ -2154,7 +2165,7 @@ pub(crate) fn emit( emit_std_reg_reg(sink, prefix, opcode, len, dst, reg, rex); } RegMem::Mem { addr } => { - let addr = &addr.finalize(state, sink); + let addr = &addr.finalize(state.frame_layout(), sink); // N.B.: bytes_at_end == 1, because of the `imm` byte below. emit_std_reg_mem(sink, prefix, opcode, len, dst, addr, rex, 1); } @@ -2168,7 +2179,9 @@ pub(crate) fn emit( RegMem::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMem::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let (prefix, map, w, opcode) = match op { @@ -2195,7 +2208,9 @@ pub(crate) fn emit( RegMem::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMem::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let (opcode, opcode_ext, w) = match op { @@ -2362,7 +2377,7 @@ pub(crate) fn emit( emit_std_reg_reg(sink, prefix, opcode, length, reg_g, reg_e, rex); } RegMem::Mem { addr } => { - let addr = &addr.finalize(state, sink); + let addr = &addr.finalize(state.frame_layout(), sink); emit_std_reg_mem(sink, prefix, opcode, length, reg_g, addr, rex, 0); } } @@ -2395,7 +2410,7 @@ pub(crate) fn emit( emit_std_reg_reg(sink, prefix, opcode, length, reg_g, reg_e, rex); } RegMem::Mem { addr } => { - let addr = &addr.finalize(state, sink); + let addr = &addr.finalize(state.frame_layout(), sink); emit_std_reg_mem(sink, prefix, opcode, length, reg_g, addr, rex, 0); } } @@ -2460,7 +2475,9 @@ pub(crate) fn emit( RegMemImm::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMemImm::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMemImm::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let (prefix, map, opcode) = match op { @@ -2606,7 +2623,9 @@ pub(crate) fn emit( RegMem::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMem::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let (w, prefix, map, opcode) = match op { @@ -2645,7 +2664,9 @@ pub(crate) fn emit( RegMem::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMem::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let (w, map, opcode) = match op { @@ -2684,7 +2705,9 @@ pub(crate) fn emit( RegMem::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMem::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let (w, map, opcode) = match op { @@ -2751,7 +2774,9 @@ pub(crate) fn emit( RegMem::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMem::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let mask = mask.to_reg(); @@ -2780,7 +2805,9 @@ pub(crate) fn emit( RegMem::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMem::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let (prefix, map, opcode) = match op { @@ -2844,7 +2871,9 @@ pub(crate) fn emit( RegMem::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMem::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let (prefix, map, opcode) = match op { @@ -2880,7 +2909,7 @@ pub(crate) fn emit( Inst::XmmMovRMVex { op, src, dst } => { let src = src.to_reg(); - let dst = dst.clone().finalize(state, sink); + let dst = dst.clone().finalize(state.frame_layout(), sink); let (prefix, map, opcode) = match op { AvxOpcode::Vmovdqu => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0x7F), @@ -2902,7 +2931,7 @@ pub(crate) fn emit( Inst::XmmMovRMImmVex { op, src, dst, imm } => { let src = src.to_reg(); - let dst = dst.clone().finalize(state, sink); + let dst = dst.clone().finalize(state.frame_layout(), sink); let (w, prefix, map, opcode) = match op { AvxOpcode::Vpextrb => (false, LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x14), @@ -2997,7 +3026,9 @@ pub(crate) fn emit( RegMem::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMem::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let (prefix, map, opcode) = match op { @@ -3026,7 +3057,9 @@ pub(crate) fn emit( RegMem::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMem::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let (prefix, map, opcode) = match op { @@ -3068,7 +3101,9 @@ pub(crate) fn emit( RegMem::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMem::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let dst = dst.to_reg().to_reg(); if let Some(src1) = reused_src { @@ -3233,7 +3268,7 @@ pub(crate) fn emit( } } RegMem::Mem { addr } => { - let addr = &addr.finalize(state, sink); + let addr = &addr.finalize(state.frame_layout(), sink); assert!( !regs_swapped, "No existing way to encode a mem argument in the ModRM r/m field." @@ -3264,7 +3299,7 @@ pub(crate) fn emit( SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F11), _ => unimplemented!("Opcode {:?} not implemented", op), }; - let dst = &dst.finalize(state, sink); + let dst = &dst.finalize(state.frame_layout(), sink); emit_std_reg_mem(sink, prefix, opcode, 2, src, dst, RexFlags::clear_w(), 0); } @@ -3284,7 +3319,7 @@ pub(crate) fn emit( } else { RexFlags::clear_w() }; - let dst = &dst.finalize(state, sink); + let dst = &dst.finalize(state.frame_layout(), sink); emit_std_reg_mem(sink, prefix, opcode, 3, src, dst, rex, 1); sink.put1(*imm); } @@ -3356,7 +3391,7 @@ pub(crate) fn emit( emit_std_reg_reg(sink, prefix, opcode, 2, reg_g, reg_e, rex); } RegMem::Mem { addr } => { - let addr = &addr.finalize(state, sink); + let addr = &addr.finalize(state.frame_layout(), sink); emit_std_reg_mem(sink, prefix, opcode, 2, reg_g, addr, rex, 0); } } @@ -3379,7 +3414,7 @@ pub(crate) fn emit( emit_std_reg_reg(sink, prefix, opcode, len, src1, reg, rex); } RegMem::Mem { addr } => { - let addr = &addr.finalize(state, sink); + let addr = &addr.finalize(state.frame_layout(), sink); emit_std_reg_mem(sink, prefix, opcode, len, src1, addr, rex, 0); } } @@ -3408,7 +3443,7 @@ pub(crate) fn emit( emit_std_reg_reg(sink, prefix, opcode, 2, dst, src2, rex); } RegMem::Mem { addr } => { - let addr = &addr.finalize(state, sink); + let addr = &addr.finalize(state.frame_layout(), sink); emit_std_reg_mem(sink, prefix, opcode, 2, dst, addr, rex, 0); } } @@ -3427,7 +3462,9 @@ pub(crate) fn emit( RegMem::Reg { reg } => { RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) } - RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + RegMem::Mem { addr } => { + RegisterOrAmode::Amode(addr.finalize(state.frame_layout(), sink)) + } }; let (prefix, map, opcode) = match op { @@ -4034,7 +4071,7 @@ pub(crate) fn emit( _ => unreachable!(), }; let rex = RexFlags::from((OperandSize::from_ty(*ty), replacement)); - let amode = mem.finalize(state, sink); + let amode = mem.finalize(state.frame_layout(), sink); emit_std_reg_mem(sink, prefix, opcodes, 2, replacement, &amode, rex, 0); } @@ -4055,7 +4092,7 @@ pub(crate) fn emit( debug_assert_eq!(dst_old_low.to_reg(), regs::rax()); debug_assert_eq!(dst_old_high.to_reg(), regs::rdx()); - let amode = mem.finalize(state, sink); + let amode = mem.finalize(state.frame_layout(), sink); // lock cmpxchg16b (mem) // Note that 0xF0 is the Lock prefix. emit_std_enc_mem( @@ -4070,6 +4107,45 @@ pub(crate) fn emit( ); } + Inst::LockXadd { + size, + operand, + mem, + dst_old, + } => { + debug_assert_eq!(dst_old.to_reg(), *operand); + // lock xadd{b,w,l,q} %operand, (mem) + // Note that 0xF0 is the Lock prefix. + let (prefix, opcodes) = match size { + OperandSize::Size8 => (LegacyPrefixes::_F0, 0x0FC0), + OperandSize::Size16 => (LegacyPrefixes::_66F0, 0x0FC1), + OperandSize::Size32 => (LegacyPrefixes::_F0, 0x0FC1), + OperandSize::Size64 => (LegacyPrefixes::_F0, 0x0FC1), + }; + let rex = RexFlags::from((*size, *operand)); + let amode = mem.finalize(state.frame_layout(), sink); + emit_std_reg_mem(sink, prefix, opcodes, 2, *operand, &amode, rex, 0); + } + + Inst::Xchg { + size, + operand, + mem, + dst_old, + } => { + debug_assert_eq!(dst_old.to_reg(), *operand); + // xchg{b,w,l,q} %operand, (mem) + let (prefix, opcodes) = match size { + OperandSize::Size8 => (LegacyPrefixes::None, 0x86), + OperandSize::Size16 => (LegacyPrefixes::_66, 0x87), + OperandSize::Size32 => (LegacyPrefixes::None, 0x87), + OperandSize::Size64 => (LegacyPrefixes::None, 0x87), + }; + let rex = RexFlags::from((*size, *operand)); + let amode = mem.finalize(state.frame_layout(), sink); + emit_std_reg_mem(sink, prefix, opcodes, 1, *operand, &amode, rex, 0); + } + Inst::AtomicRmwSeq { ty, op, @@ -4082,7 +4158,7 @@ pub(crate) fn emit( let temp = *temp; let dst_old = *dst_old; debug_assert_eq!(dst_old.to_reg(), regs::rax()); - let mem = mem.finalize(state, sink).clone(); + let mem = mem.finalize(state.frame_layout(), sink).clone(); // Emit this: // mov{zbq,zwq,zlq,q} (%r_address), %rax // rax = old value @@ -4094,15 +4170,6 @@ pub(crate) fn emit( // // Operand conventions: IN: %r_address, %r_operand OUT: %rax (old // value), %r_temp (trashed), %rflags (trashed) - // - // In the case where the operation is 'xchg', the "`op`q" - // instruction is instead: movq %r_operand, - // %r_temp so that we simply write in the destination, the "2nd - // arg for `op`". - // - // TODO: this sequence can be significantly improved (e.g., to `lock - // `) when it is known that `dst_old` is not used later, see - // https://github.com/bytecodealliance/wasmtime/issues/2153. let again_label = sink.get_label(); // mov{zbq,zwq,zlq,q} (%r_address), %rax @@ -4118,13 +4185,8 @@ pub(crate) fn emit( i2.emit(sink, info, state); let operand_rmi = RegMemImm::reg(operand); - use inst_common::MachAtomicRmwOp as RmwOp; + use AtomicRmwSeqOp as RmwOp; match op { - RmwOp::Xchg => { - // movq %r_operand, %r_temp - let i3 = Inst::mov_r_r(OperandSize::Size64, operand, temp); - i3.emit(sink, info, state); - } RmwOp::Nand => { // andq %r_operand, %r_temp let i3 = @@ -4155,20 +4217,13 @@ pub(crate) fn emit( let i4 = Inst::cmove(OperandSize::Size64, cc, RegMem::reg(operand), temp); i4.emit(sink, info, state); } - _ => { + RmwOp::And | RmwOp::Or | RmwOp::Xor => { // opq %r_operand, %r_temp let alu_op = match op { - RmwOp::Add => AluRmiROpcode::Add, - RmwOp::Sub => AluRmiROpcode::Sub, RmwOp::And => AluRmiROpcode::And, RmwOp::Or => AluRmiROpcode::Or, RmwOp::Xor => AluRmiROpcode::Xor, - RmwOp::Xchg - | RmwOp::Nand - | RmwOp::Umin - | RmwOp::Umax - | RmwOp::Smin - | RmwOp::Smax => unreachable!(), + _ => unreachable!(), }; let i3 = Inst::alu_rmi_r(OperandSize::Size64, alu_op, operand_rmi, temp); i3.emit(sink, info, state); @@ -4210,7 +4265,7 @@ pub(crate) fn emit( debug_assert_eq!(temp_high.to_reg(), regs::rcx()); debug_assert_eq!(dst_old_low.to_reg(), regs::rax()); debug_assert_eq!(dst_old_high.to_reg(), regs::rdx()); - let mem = mem.finalize(state, sink).clone(); + let mem = mem.finalize(state.frame_layout(), sink).clone(); let again_label = sink.get_label(); @@ -4232,9 +4287,8 @@ pub(crate) fn emit( // Perform the operation. let operand_low_rmi = RegMemImm::reg(operand_low); let operand_high_rmi = RegMemImm::reg(operand_high); - use inst_common::MachAtomicRmwOp as RmwOp; + use Atomic128RmwSeqOp as RmwOp; match op { - RmwOp::Xchg => panic!("use `Atomic128XchgSeq` instead"), RmwOp::Nand => { // temp &= operand Inst::alu_rmi_r( @@ -4284,7 +4338,7 @@ pub(crate) fn emit( Inst::cmove(OperandSize::Size64, cc, operand_high.into(), temp_high) .emit(sink, info, state); } - _ => { + RmwOp::Add | RmwOp::Sub | RmwOp::And | RmwOp::Or | RmwOp::Xor => { // temp op= operand let (op_low, op_high) = match op { RmwOp::Add => (AluRmiROpcode::Add, AluRmiROpcode::Adc), @@ -4292,12 +4346,7 @@ pub(crate) fn emit( RmwOp::And => (AluRmiROpcode::And, AluRmiROpcode::And), RmwOp::Or => (AluRmiROpcode::Or, AluRmiROpcode::Or), RmwOp::Xor => (AluRmiROpcode::Xor, AluRmiROpcode::Xor), - RmwOp::Xchg - | RmwOp::Nand - | RmwOp::Umin - | RmwOp::Umax - | RmwOp::Smin - | RmwOp::Smax => unreachable!(), + _ => unreachable!(), }; Inst::alu_rmi_r(OperandSize::Size64, op_low, operand_low_rmi, temp_low) .emit(sink, info, state); @@ -4337,7 +4386,7 @@ pub(crate) fn emit( debug_assert_eq!(operand_high, regs::rcx()); debug_assert_eq!(dst_old_low.to_reg(), regs::rax()); debug_assert_eq!(dst_old_high.to_reg(), regs::rdx()); - let mem = mem.finalize(state, sink).clone(); + let mem = mem.finalize(state.frame_layout(), sink).clone(); let again_label = sink.get_label(); diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 1b52d074923a..f6f41d48c0ef 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -15,6 +15,7 @@ use super::*; use crate::ir::{MemFlags, UserExternalNameRef}; use crate::isa::x64; +use crate::isa::x64::lower::isle::generated_code::{Atomic128RmwSeqOp, AtomicRmwSeqOp}; use alloc::vec::Vec; use cranelift_entity::EntityRef as _; @@ -194,7 +195,7 @@ fn test_x64_emit() { let _w_rbp = Writable::::from_reg(rbp); let w_r8 = Writable::::from_reg(r8); let w_r9 = Writable::::from_reg(r9); - let _w_r10 = Writable::::from_reg(r10); + let w_r10 = Writable::::from_reg(r10); let w_r11 = Writable::::from_reg(r11); let w_r12 = Writable::::from_reg(r12); let w_r13 = Writable::::from_reg(r13); @@ -1481,7 +1482,7 @@ fn test_x64_emit() { OperandSize::Size8, AluRmiROpcode::Xor, RegMemImm::imm(10), - _w_r10, + w_r10, ), "4180F20A", "xorb %r10b, $10, %r10b", @@ -1491,7 +1492,7 @@ fn test_x64_emit() { OperandSize::Size8, AluRmiROpcode::Xor, RegMemImm::reg(rcx), - _w_r10, + w_r10, ), "4130CA", "xorb %r10b, %cl, %r10b", @@ -1501,7 +1502,7 @@ fn test_x64_emit() { OperandSize::Size8, AluRmiROpcode::Xor, RegMemImm::reg(rsi), - _w_r10, + w_r10, ), "4130F2", "xorb %r10b, %sil, %r10b", @@ -1511,7 +1512,7 @@ fn test_x64_emit() { OperandSize::Size8, AluRmiROpcode::Xor, RegMemImm::reg(r11), - _w_r10, + w_r10, ), "4530DA", "xorb %r10b, %r11b, %r10b", @@ -1521,7 +1522,7 @@ fn test_x64_emit() { OperandSize::Size8, AluRmiROpcode::Xor, RegMemImm::reg(r15), - _w_r10, + w_r10, ), "4530FA", "xorb %r10b, %r15b, %r10b", @@ -1637,6 +1638,7 @@ fn test_x64_emit() { op: AluRmiROpcode::Add, src1_dst: Amode::imm_reg(99, rdi).into(), src2: Gpr::unwrap_new(r12), + lock: false, }, "44016763", "addl %r12d, 99(%rdi)", @@ -1649,6 +1651,7 @@ fn test_x64_emit() { src1_dst: Amode::imm_reg_reg_shift(0, Gpr::unwrap_new(rbp), Gpr::unwrap_new(rax), 3) .into(), src2: Gpr::unwrap_new(rax), + lock: false, }, "480144C500", "addq %rax, 0(%rbp,%rax,8)", @@ -1660,6 +1663,7 @@ fn test_x64_emit() { op: AluRmiROpcode::Sub, src1_dst: Amode::imm_reg(0, rsp).into(), src2: Gpr::unwrap_new(rcx), + lock: false, }, "290C24", "subl %ecx, 0(%rsp)", @@ -1671,6 +1675,7 @@ fn test_x64_emit() { op: AluRmiROpcode::Sub, src1_dst: Amode::imm_reg(0, rbp).into(), src2: Gpr::unwrap_new(rax), + lock: false, }, "48294500", "subq %rax, 0(%rbp)", @@ -1682,6 +1687,7 @@ fn test_x64_emit() { op: AluRmiROpcode::And, src1_dst: Amode::imm_reg(0, rsp).into(), src2: Gpr::unwrap_new(rcx), + lock: false, }, "210C24", "andl %ecx, 0(%rsp)", @@ -1693,6 +1699,7 @@ fn test_x64_emit() { op: AluRmiROpcode::And, src1_dst: Amode::imm_reg(0, rbp).into(), src2: Gpr::unwrap_new(rax), + lock: false, }, "48214500", "andq %rax, 0(%rbp)", @@ -1704,6 +1711,7 @@ fn test_x64_emit() { op: AluRmiROpcode::Or, src1_dst: Amode::imm_reg(0, rsp).into(), src2: Gpr::unwrap_new(rcx), + lock: false, }, "090C24", "orl %ecx, 0(%rsp)", @@ -1715,6 +1723,7 @@ fn test_x64_emit() { op: AluRmiROpcode::Or, src1_dst: Amode::imm_reg(0, rbp).into(), src2: Gpr::unwrap_new(rax), + lock: false, }, "48094500", "orq %rax, 0(%rbp)", @@ -1726,6 +1735,7 @@ fn test_x64_emit() { op: AluRmiROpcode::Xor, src1_dst: Amode::imm_reg(0, rsp).into(), src2: Gpr::unwrap_new(rcx), + lock: false, }, "310C24", "xorl %ecx, 0(%rsp)", @@ -1737,6 +1747,7 @@ fn test_x64_emit() { op: AluRmiROpcode::Xor, src1_dst: Amode::imm_reg(0, rbp).into(), src2: Gpr::unwrap_new(rax), + lock: false, }, "48314500", "xorq %rax, 0(%rbp)", @@ -1748,6 +1759,7 @@ fn test_x64_emit() { op: AluRmiROpcode::Add, src1_dst: Amode::imm_reg(0, rbp).into(), src2: Gpr::unwrap_new(rax), + lock: false, }, "66014500", "addw %ax, 0(%rbp)", @@ -1758,6 +1770,7 @@ fn test_x64_emit() { op: AluRmiROpcode::Sub, src1_dst: Amode::imm_reg(0, rbp).into(), src2: Gpr::unwrap_new(r12), + lock: false, }, "6644296500", "subw %r12w, 0(%rbp)", @@ -1769,6 +1782,7 @@ fn test_x64_emit() { op: AluRmiROpcode::Add, src1_dst: Amode::imm_reg(0, rbp).into(), src2: Gpr::unwrap_new(rax), + lock: false, }, "004500", "addb %al, 0(%rbp)", @@ -1779,6 +1793,7 @@ fn test_x64_emit() { op: AluRmiROpcode::Sub, src1_dst: Amode::imm_reg(0, rbp).into(), src2: Gpr::unwrap_new(rbp), + lock: false, }, "40286D00", "subb %bpl, 0(%rbp)", @@ -1789,6 +1804,7 @@ fn test_x64_emit() { op: AluRmiROpcode::Xor, src1_dst: Amode::imm_reg(0, rbp).into(), src2: Gpr::unwrap_new(r10), + lock: false, }, "44305500", "xorb %r10b, 0(%rbp)", @@ -1799,11 +1815,57 @@ fn test_x64_emit() { op: AluRmiROpcode::And, src1_dst: Amode::imm_reg(0, rbp).into(), src2: Gpr::unwrap_new(r15), + lock: false, }, "44207D00", "andb %r15b, 0(%rbp)", )); + insns.push(( + Inst::AluRM { + size: OperandSize::Size64, + op: AluRmiROpcode::And, + src1_dst: Amode::imm_reg(0, rdx).into(), + src2: Gpr::unwrap_new(rax), + lock: true, + }, + "F0482102", + "lock andq %rax, 0(%rdx)", + )); + insns.push(( + Inst::AluRM { + size: OperandSize::Size32, + op: AluRmiROpcode::Or, + src1_dst: Amode::imm_reg(0, rdx).into(), + src2: Gpr::unwrap_new(rax), + lock: true, + }, + "F00902", + "lock orl %eax, 0(%rdx)", + )); + insns.push(( + Inst::AluRM { + size: OperandSize::Size16, + op: AluRmiROpcode::Xor, + src1_dst: Amode::imm_reg(0, rdx).into(), + src2: Gpr::unwrap_new(rax), + lock: true, + }, + "66F03102", + "lock xorw %ax, 0(%rdx)", + )); + insns.push(( + Inst::AluRM { + size: OperandSize::Size8, + op: AluRmiROpcode::Add, + src1_dst: Amode::imm_reg(0, r9).into(), + src2: Gpr::unwrap_new(rax), + lock: true, + }, + "F0410001", + "lock addb %al, 0(%r9)", + )); + // ======================================================== // UnaryRmR @@ -4998,72 +5060,156 @@ fn test_x64_emit() { "lock cmpxchg16b -12345(%rcx,%rsi,8), replacement=%rcx:%rbx, expected=%rdx:%rax, dst_old=%rdx:%rax", )); + // LockXadd + insns.push(( + Inst::LockXadd { + size: OperandSize::Size64, + operand: r10, + mem: am3.clone(), + dst_old: w_r10, + }, + "F04D0FC111", + "lock xaddq %r10, 0(%r9), dst_old=%r10", + )); + insns.push(( + Inst::LockXadd { + size: OperandSize::Size32, + operand: r11, + mem: am3.clone(), + dst_old: w_r11, + }, + "F0450FC119", + "lock xaddl %r11d, 0(%r9), dst_old=%r11d", + )); + insns.push(( + Inst::LockXadd { + size: OperandSize::Size16, + operand: r12, + mem: am3.clone(), + dst_old: w_r12, + }, + "66F0450FC121", + "lock xaddw %r12w, 0(%r9), dst_old=%r12w", + )); + insns.push(( + Inst::LockXadd { + size: OperandSize::Size8, + operand: r13, + mem: am3.clone(), + dst_old: w_r13, + }, + "F0450FC029", + "lock xaddb %r13b, 0(%r9), dst_old=%r13b", + )); + + // Xchg + insns.push(( + Inst::Xchg { + size: OperandSize::Size64, + operand: r10, + mem: am3.clone(), + dst_old: w_r10, + }, + "4D8711", + "xchgq %r10, 0(%r9), dst_old=%r10", + )); + insns.push(( + Inst::Xchg { + size: OperandSize::Size32, + operand: r11, + mem: am3.clone(), + dst_old: w_r11, + }, + "458719", + "xchgl %r11d, 0(%r9), dst_old=%r11d", + )); + insns.push(( + Inst::Xchg { + size: OperandSize::Size16, + operand: r12, + mem: am3.clone(), + dst_old: w_r12, + }, + "66458721", + "xchgw %r12w, 0(%r9), dst_old=%r12w", + )); + insns.push(( + Inst::Xchg { + size: OperandSize::Size8, + operand: r13, + mem: am3.clone(), + dst_old: w_r13, + }, + "458629", + "xchgb %r13b, 0(%r9), dst_old=%r13b", + )); + // AtomicRmwSeq insns.push(( Inst::AtomicRmwSeq { ty: types::I8, - op: inst_common::MachAtomicRmwOp::Or, + op: AtomicRmwSeqOp::Or, mem: am3.clone(), operand: r10, temp: w_r11, - dst_old: w_rax + dst_old: w_rax, }, "490FB6014989C34D09D3F0450FB0190F85EFFFFFFF", - "atomically { 8_bits_at_[%r9]) Or= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }" + "atomically { 8_bits_at_[%r9] Or= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }", )); insns.push(( Inst::AtomicRmwSeq { ty: types::I16, - op: inst_common::MachAtomicRmwOp::And, + op: AtomicRmwSeqOp::And, mem: am3.clone(), operand: r10, temp: w_r11, dst_old: w_rax }, "490FB7014989C34D21D366F0450FB1190F85EEFFFFFF", - "atomically { 16_bits_at_[%r9]) And= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }" + "atomically { 16_bits_at_[%r9] And= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }" )); insns.push(( Inst::AtomicRmwSeq { ty: types::I32, - op: inst_common::MachAtomicRmwOp::Xchg, + op: AtomicRmwSeqOp::Nand, mem: am3.clone(), operand: r10, temp: w_r11, dst_old: w_rax }, - "418B014989C34D89D3F0450FB1190F85EFFFFFFF", - "atomically { 32_bits_at_[%r9]) Xchg= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }" + "418B014989C34D21D349F7D3F0450FB1190F85ECFFFFFF", + "atomically { 32_bits_at_[%r9] Nand= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }" )); insns.push(( Inst::AtomicRmwSeq { ty: types::I32, - op: inst_common::MachAtomicRmwOp::Umin, + op: AtomicRmwSeqOp::Umin, mem: am3.clone(), operand: r10, temp: w_r11, dst_old: w_rax }, "418B014989C34539DA4D0F46DAF0450FB1190F85EBFFFFFF", - "atomically { 32_bits_at_[%r9]) Umin= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }" + "atomically { 32_bits_at_[%r9] Umin= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }" )); insns.push(( Inst::AtomicRmwSeq { ty: types::I64, - op: inst_common::MachAtomicRmwOp::Add, + op: AtomicRmwSeqOp::Smax, mem: am3.clone(), operand: r10, temp: w_r11, dst_old: w_rax }, - "498B014989C34D01D3F04D0FB1190F85EFFFFFFF", - "atomically { 64_bits_at_[%r9]) Add= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }" + "498B014989C34D39DA4D0F4DDAF04D0FB1190F85EBFFFFFF", + "atomically { 64_bits_at_[%r9] Smax= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }" )); // Atomic128RmwSeq insns.push(( Inst::Atomic128RmwSeq { - op: inst_common::MachAtomicRmwOp::Or, + op: Atomic128RmwSeqOp::Or, mem: Box::new(am3.clone()), operand_low: r10, operand_high: r11, @@ -5077,7 +5223,7 @@ fn test_x64_emit() { )); insns.push(( Inst::Atomic128RmwSeq { - op: inst_common::MachAtomicRmwOp::And, + op: Atomic128RmwSeqOp::And, mem: Box::new(am3.clone()), operand_low: r10, operand_high: r11, @@ -5091,7 +5237,7 @@ fn test_x64_emit() { )); insns.push(( Inst::Atomic128RmwSeq { - op: inst_common::MachAtomicRmwOp::Umin, + op: Atomic128RmwSeqOp::Umin, mem: Box::new(am3.clone()), operand_low: r10, operand_high: r11, @@ -5105,7 +5251,7 @@ fn test_x64_emit() { )); insns.push(( Inst::Atomic128RmwSeq { - op: inst_common::MachAtomicRmwOp::Add, + op: Atomic128RmwSeqOp::Add, mem: Box::new(am3.clone()), operand_low: r10, operand_high: r11, diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 3e8aea72f5e5..ef251d08e776 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -99,6 +99,8 @@ impl Inst { | Inst::LoadEffectiveAddress { .. } | Inst::LoadExtName { .. } | Inst::LockCmpxchg { .. } + | Inst::LockXadd { .. } + | Inst::Xchg { .. } | Inst::Mov64MR { .. } | Inst::MovImmM { .. } | Inst::MovRM { .. } @@ -702,12 +704,14 @@ impl PrettyPrint for Inst { op, src1_dst, src2, + lock, } => { let size_bytes = size.to_bytes(); let src2 = pretty_print_reg(src2.to_reg(), size_bytes); let src1_dst = src1_dst.pretty_print(size_bytes); let op = ljustify2(op.to_string(), suffix_bwlq(*size)); - format!("{op} {src2}, {src1_dst}") + let prefix = if *lock { "lock " } else { "" }; + format!("{prefix}{op} {src2}, {src1_dst}") } Inst::AluRmRVex { size, @@ -1842,10 +1846,36 @@ impl PrettyPrint for Inst { ) } + Inst::LockXadd { + size, + operand, + mem, + dst_old, + } => { + let operand = pretty_print_reg(*operand, size.to_bytes()); + let dst_old = pretty_print_reg(dst_old.to_reg(), size.to_bytes()); + let mem = mem.pretty_print(size.to_bytes()); + let suffix = suffix_bwlq(*size); + format!("lock xadd{suffix} {operand}, {mem}, dst_old={dst_old}") + } + + Inst::Xchg { + size, + operand, + mem, + dst_old, + } => { + let operand = pretty_print_reg(*operand, size.to_bytes()); + let dst_old = pretty_print_reg(dst_old.to_reg(), size.to_bytes()); + let mem = mem.pretty_print(size.to_bytes()); + let suffix = suffix_bwlq(*size); + format!("xchg{suffix} {operand}, {mem}, dst_old={dst_old}") + } + Inst::AtomicRmwSeq { ty, op, .. } => { let ty = ty.bits(); format!( - "atomically {{ {ty}_bits_at_[%r9]) {op:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}" + "atomically {{ {ty}_bits_at_[%r9] {op:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}" ) } @@ -2547,6 +2577,28 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { mem.get_operands(collector); } + Inst::LockXadd { + operand, + mem, + dst_old, + .. + } => { + collector.reg_use(operand); + collector.reg_reuse_def(dst_old, 0); + mem.get_operands(collector); + } + + Inst::Xchg { + operand, + mem, + dst_old, + .. + } => { + collector.reg_use(operand); + collector.reg_reuse_def(dst_old, 0); + mem.get_operands(collector); + } + Inst::AtomicRmwSeq { operand, temp, diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index cb15cb725c90..0f27e84a4dd4 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -3386,19 +3386,53 @@ ;; Rules for `atomic_rmw` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; This is a simple, general-case atomic update, based on a loop involving -;; `cmpxchg`. Note that we could do much better than this in the case where the -;; old value at the location (that is to say, the SSA `Value` computed by this -;; CLIF instruction) is not required. In that case, we could instead implement -;; this using a single `lock`-prefixed x64 read-modify-write instruction. Also, -;; even in the case where the old value is required, for the `add` and `sub` -;; cases, we can use the single instruction `lock xadd`. However, those -;; improvements have been left for another day. TODO: filed as -;; https://github.com/bytecodealliance/wasmtime/issues/2153. - +;; `cmpxchg`. (rule (lower (has_type (and (fits_in_64 ty) (ty_int _)) (atomic_rmw flags op address input))) - (x64_atomic_rmw_seq ty op (to_amode flags address (zero_offset)) input)) -(rule 1 (lower (has_type $I128 (atomic_rmw flags op address input))) + (x64_atomic_rmw_seq ty (atomic_rmw_seq_op op) (to_amode flags address (zero_offset)) input)) + +;; `Add` and `Sub` can use `lock xadd` +(rule 1 (lower (has_type (and (fits_in_64 ty) (ty_int _)) + (atomic_rmw flags (AtomicRmwOp.Add) address input))) + (x64_xadd (raw_operand_size_of_type ty) (to_amode flags address (zero_offset)) input)) +(rule 1 (lower (has_type (and (fits_in_64 ty) (ty_int _)) + (atomic_rmw flags (AtomicRmwOp.Sub) address input))) + (x64_xadd (raw_operand_size_of_type ty) (to_amode flags address (zero_offset)) (x64_neg ty input))) +;; `Xchg` can use `xchg` +(rule 1 (lower (has_type (and (fits_in_64 ty) (ty_int _)) + (atomic_rmw flags (AtomicRmwOp.Xchg) address input))) + (x64_xchg (raw_operand_size_of_type ty) (to_amode flags address (zero_offset)) input)) + +;; `Add`, `Sub`, `And`, `Or` and `Xor` can use `lock`-prefixed instructions if +;; the old value is not required. +(rule 2 (lower i @ (has_type (fits_in_64 (ty_int ty)) + (atomic_rmw flags (AtomicRmwOp.Add) address input))) + (if-let (first_result res) i) + (if-let $true (value_is_unused res)) + (x64_lock_add (raw_operand_size_of_type ty) (to_amode flags address (zero_offset)) input)) +(rule 2 (lower i @ (has_type (fits_in_64 (ty_int ty)) + (atomic_rmw flags (AtomicRmwOp.Sub) address input))) + (if-let (first_result res) i) + (if-let $true (value_is_unused res)) + (x64_lock_sub (raw_operand_size_of_type ty) (to_amode flags address (zero_offset)) input)) +(rule 2 (lower i @ (has_type (fits_in_64 (ty_int ty)) + (atomic_rmw flags (AtomicRmwOp.And) address input))) + (if-let (first_result res) i) + (if-let $true (value_is_unused res)) + (x64_lock_and (raw_operand_size_of_type ty) (to_amode flags address (zero_offset)) input)) +(rule 2 (lower i @ (has_type (fits_in_64 (ty_int ty)) + (atomic_rmw flags (AtomicRmwOp.Or) address input))) + (if-let (first_result res) i) + (if-let $true (value_is_unused res)) + (x64_lock_or (raw_operand_size_of_type ty) (to_amode flags address (zero_offset)) input)) +(rule 2 (lower i @ (has_type (fits_in_64 (ty_int ty)) + (atomic_rmw flags (AtomicRmwOp.Xor) address input))) + (if-let (first_result res) i) + (if-let $true (value_is_unused res)) + (x64_lock_xor (raw_operand_size_of_type ty) (to_amode flags address (zero_offset)) input)) + +;; 128-bit integers always use a `lock cmpxchg16b` loop. +(rule 3 (lower (has_type $I128 (atomic_rmw flags op address input))) (if-let $true (use_cmpxchg16b)) (x64_atomic_128_rmw_seq op (to_amode flags address (zero_offset)) input)) diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index ed61a4d92654..c0229e6eb5bf 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -22,8 +22,8 @@ use crate::{ inst::{args::*, regs, ReturnCallInfo}, }, machinst::{ - isle::*, ArgPair, CallInfo, InsnInput, InstOutput, IsTailCall, MachAtomicRmwOp, MachInst, - VCodeConstant, VCodeConstantData, + isle::*, ArgPair, CallInfo, InsnInput, InstOutput, IsTailCall, MachInst, VCodeConstant, + VCodeConstantData, }, }; use alloc::vec::Vec; @@ -615,20 +615,6 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { Offset32::new(0) } - #[inline] - fn atomic_rmw_op_to_mach_atomic_rmw_op(&mut self, op: &AtomicRmwOp) -> MachAtomicRmwOp { - MachAtomicRmwOp::from(*op) - } - - #[inline] - fn mach_atomic_rmw_op_is_xchg(&mut self, op: &MachAtomicRmwOp) -> Option<()> { - if *op == MachAtomicRmwOp::Xchg { - Some(()) - } else { - None - } - } - #[inline] fn preg_rbp(&mut self) -> PReg { regs::rbp().to_real_reg().unwrap().into() diff --git a/cranelift/codegen/src/isa/x64/pcc.rs b/cranelift/codegen/src/isa/x64/pcc.rs index e29908429815..cd09897f5249 100644 --- a/cranelift/codegen/src/isa/x64/pcc.rs +++ b/cranelift/codegen/src/isa/x64/pcc.rs @@ -163,6 +163,7 @@ pub(crate) fn check( op: _, ref src1_dst, src2: _, + lock: _, } => { check_load(ctx, None, src1_dst, vcode, size.to_type(), 64)?; check_store(ctx, None, src1_dst, vcode, size.to_type()) @@ -898,6 +899,28 @@ pub(crate) fn check( Ok(()) } + Inst::LockXadd { + size, + ref mem, + dst_old, + operand: _, + } => { + ensure_no_fact(vcode, dst_old.to_reg())?; + check_store(ctx, None, mem, vcode, size.to_type())?; + Ok(()) + } + + Inst::Xchg { + size, + ref mem, + dst_old, + operand: _, + } => { + ensure_no_fact(vcode, dst_old.to_reg())?; + check_store(ctx, None, mem, vcode, size.to_type())?; + Ok(()) + } + Inst::AtomicRmwSeq { ref mem, temp, diff --git a/cranelift/codegen/src/machinst/inst_common.rs b/cranelift/codegen/src/machinst/inst_common.rs index 3fa341ae9fa3..151dd4b9a107 100644 --- a/cranelift/codegen/src/machinst/inst_common.rs +++ b/cranelift/codegen/src/machinst/inst_common.rs @@ -1,6 +1,6 @@ //! A place to park MachInst::Inst fragments which are common across multiple architectures. -use crate::ir::{self, Inst as IRInst}; +use crate::ir::Inst as IRInst; //============================================================================ // Instruction input "slots". @@ -22,54 +22,3 @@ pub(crate) struct InsnOutput { pub(crate) insn: IRInst, pub(crate) output: usize, } - -//============================================================================ -// Atomic instructions. - -/// Atomic memory update operations. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -#[repr(u8)] -pub enum MachAtomicRmwOp { - /// Add - Add, - /// Sub - Sub, - /// And - And, - /// Nand - Nand, - /// Or - Or, - /// Exclusive Or - Xor, - /// Exchange (swap operands) - Xchg, - /// Unsigned min - Umin, - /// Unsigned max - Umax, - /// Signed min - Smin, - /// Signed max - Smax, -} - -impl MachAtomicRmwOp { - /// Converts an `ir::AtomicRmwOp` to the corresponding - /// `inst_common::AtomicRmwOp`. - pub fn from(ir_op: ir::AtomicRmwOp) -> Self { - match ir_op { - ir::AtomicRmwOp::Add => MachAtomicRmwOp::Add, - ir::AtomicRmwOp::Sub => MachAtomicRmwOp::Sub, - ir::AtomicRmwOp::And => MachAtomicRmwOp::And, - ir::AtomicRmwOp::Nand => MachAtomicRmwOp::Nand, - ir::AtomicRmwOp::Or => MachAtomicRmwOp::Or, - ir::AtomicRmwOp::Xor => MachAtomicRmwOp::Xor, - ir::AtomicRmwOp::Xchg => MachAtomicRmwOp::Xchg, - ir::AtomicRmwOp::Umin => MachAtomicRmwOp::Umin, - ir::AtomicRmwOp::Umax => MachAtomicRmwOp::Umax, - ir::AtomicRmwOp::Smin => MachAtomicRmwOp::Smin, - ir::AtomicRmwOp::Smax => MachAtomicRmwOp::Smax, - } - } -} diff --git a/cranelift/filetests/filetests/egraph/not_a_load.clif b/cranelift/filetests/filetests/egraph/not_a_load.clif index c512a3c7bec7..6fdeed24c316 100644 --- a/cranelift/filetests/filetests/egraph/not_a_load.clif +++ b/cranelift/filetests/filetests/egraph/not_a_load.clif @@ -16,7 +16,7 @@ function u0:1302(i64) -> i64 system_v { ; pushq %rbp ; movq %rsp, %rbp ; block0: -; atomically { 64_bits_at_[%r9]) Add= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; lock addq %rdi, 0(%rdi) ; movq %rdi, %rax ; movq %rbp, %rsp ; popq %rbp @@ -27,11 +27,7 @@ function u0:1302(i64) -> i64 system_v { ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movq (%rdi), %rax ; trap: heap_oob -; movq %rax, %rcx -; addq %rdi, %rcx -; lock cmpxchgq %rcx, (%rdi) ; trap: heap_oob -; jne 7 +; lock addq %rdi, (%rdi) ; trap: heap_oob ; movq %rdi, %rax ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/atomic-rmw.clif b/cranelift/filetests/filetests/isa/x64/atomic-rmw.clif new file mode 100644 index 000000000000..628fb64714bb --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/atomic-rmw.clif @@ -0,0 +1,1783 @@ +test compile precise-output +target x86_64 + +function %add_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 add v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rax +; lock xaddq %rax, 0(%rdi), dst_old=%rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rsi, %rax +; lock xaddq %rax, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %add_i32(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 add v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rax +; lock xaddl %eax, 0(%rdi), dst_old=%eax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rsi, %rax +; lock xaddl %eax, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %add_i16(i64, i16) -> i16 { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 add v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rax +; lock xaddw %ax, 0(%rdi), dst_old=%ax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rsi, %rax +; lock xaddw %ax, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %add_i8(i64, i8) -> i8 { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 add v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rax +; lock xaddb %al, 0(%rdi), dst_old=%al +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rsi, %rax +; lock xaddb %al, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %add_i64_no_res(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 add v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock addq %rsi, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock addq %rsi, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %add_i32_no_res(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 add v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock addl %esi, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock addl %esi, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %add_i16_no_res(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 add v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock addw %si, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock addw %si, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %add_i8_no_res(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 add v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock addb %sil, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock addb %sil, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %sub_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 sub v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; negq %rsi, %rsi +; movq %rsi, %rax +; lock xaddq %rax, 0(%rdi), dst_old=%rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; negq %rsi +; movq %rsi, %rax +; lock xaddq %rax, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %sub_i32(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 sub v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; negl %esi, %esi +; movq %rsi, %rax +; lock xaddl %eax, 0(%rdi), dst_old=%eax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; negl %esi +; movq %rsi, %rax +; lock xaddl %eax, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %sub_i16(i64, i16) -> i16 { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 sub v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; negw %si, %si +; movq %rsi, %rax +; lock xaddw %ax, 0(%rdi), dst_old=%ax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; negw %si +; movq %rsi, %rax +; lock xaddw %ax, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %sub_i8(i64, i8) -> i8 { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 sub v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; negb %sil, %sil +; movq %rsi, %rax +; lock xaddb %al, 0(%rdi), dst_old=%al +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; negb %sil +; movq %rsi, %rax +; lock xaddb %al, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %sub_i64_no_res(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 sub v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock subq %rsi, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock subq %rsi, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %sub_i32_no_res(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 sub v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock subl %esi, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock subl %esi, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %sub_i16_no_res(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 sub v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock subw %si, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock subw %si, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %sub_i8_no_res(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 sub v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock subb %sil, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock subb %sil, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %and_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 and v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 64_bits_at_[%r9] And= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; andq %rsi, %rdx +; lock cmpxchgq %rdx, (%rdi) ; trap: heap_oob +; jne 7 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %and_i32(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 and v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 32_bits_at_[%r9] And= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movl (%rdi), %eax ; trap: heap_oob +; movq %rax, %rdx +; andq %rsi, %rdx +; lock cmpxchgl %edx, (%rdi) ; trap: heap_oob +; jne 6 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %and_i16(i64, i16) -> i16 { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 and v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 16_bits_at_[%r9] And= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzwq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; andq %rsi, %rdx +; lock cmpxchgw %dx, (%rdi) ; trap: heap_oob +; jne 8 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %and_i8(i64, i8) -> i8 { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 and v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 8_bits_at_[%r9] And= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzbq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; andq %rsi, %rdx +; lock cmpxchgb %dl, (%rdi) ; trap: heap_oob +; jne 8 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %and_i64_no_res(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 and v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock andq %rsi, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock andq %rsi, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %and_i32_no_res(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 and v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock andl %esi, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock andl %esi, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %and_i16_no_res(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 and v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock andw %si, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock andw %si, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %and_i8_no_res(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 and v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock andb %sil, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock andb %sil, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %nand_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 nand v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 64_bits_at_[%r9] Nand= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; andq %rsi, %rdx +; notq %rdx +; lock cmpxchgq %rdx, (%rdi) ; trap: heap_oob +; jne 7 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %nand_i32(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 nand v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 32_bits_at_[%r9] Nand= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movl (%rdi), %eax ; trap: heap_oob +; movq %rax, %rdx +; andq %rsi, %rdx +; notq %rdx +; lock cmpxchgl %edx, (%rdi) ; trap: heap_oob +; jne 6 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %nand_i16(i64, i16) -> i16 { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 nand v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 16_bits_at_[%r9] Nand= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzwq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; andq %rsi, %rdx +; notq %rdx +; lock cmpxchgw %dx, (%rdi) ; trap: heap_oob +; jne 8 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %nand_i8(i64, i8) -> i8 { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 nand v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 8_bits_at_[%r9] Nand= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzbq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; andq %rsi, %rdx +; notq %rdx +; lock cmpxchgb %dl, (%rdi) ; trap: heap_oob +; jne 8 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %or_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 or v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 64_bits_at_[%r9] Or= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; orq %rsi, %rdx +; lock cmpxchgq %rdx, (%rdi) ; trap: heap_oob +; jne 7 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %or_i32(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 or v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 32_bits_at_[%r9] Or= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movl (%rdi), %eax ; trap: heap_oob +; movq %rax, %rdx +; orq %rsi, %rdx +; lock cmpxchgl %edx, (%rdi) ; trap: heap_oob +; jne 6 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %or_i16(i64, i16) -> i16 { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 or v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 16_bits_at_[%r9] Or= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzwq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; orq %rsi, %rdx +; lock cmpxchgw %dx, (%rdi) ; trap: heap_oob +; jne 8 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %or_i8(i64, i8) -> i8 { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 or v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 8_bits_at_[%r9] Or= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzbq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; orq %rsi, %rdx +; lock cmpxchgb %dl, (%rdi) ; trap: heap_oob +; jne 8 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %or_i64_no_res(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 or v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock orq %rsi, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock orq %rsi, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %or_i32_no_res(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 or v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock orl %esi, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock orl %esi, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %or_i16_no_res(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 or v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock orw %si, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock orw %si, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %or_i8_no_res(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 or v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock orb %sil, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock orb %sil, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %xor_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 xor v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 64_bits_at_[%r9] Xor= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; xorq %rsi, %rdx +; lock cmpxchgq %rdx, (%rdi) ; trap: heap_oob +; jne 7 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %xor_i32(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 xor v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 32_bits_at_[%r9] Xor= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movl (%rdi), %eax ; trap: heap_oob +; movq %rax, %rdx +; xorq %rsi, %rdx +; lock cmpxchgl %edx, (%rdi) ; trap: heap_oob +; jne 6 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %xor_i16(i64, i16) -> i16 { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 xor v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 16_bits_at_[%r9] Xor= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzwq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; xorq %rsi, %rdx +; lock cmpxchgw %dx, (%rdi) ; trap: heap_oob +; jne 8 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %xor_i8(i64, i8) -> i8 { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 xor v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 8_bits_at_[%r9] Xor= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzbq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; xorq %rsi, %rdx +; lock cmpxchgb %dl, (%rdi) ; trap: heap_oob +; jne 8 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %xor_i64_no_res(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 xor v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock xorq %rsi, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock xorq %rsi, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %xor_i32_no_res(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 xor v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock xorl %esi, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock xorl %esi, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %xor_i16_no_res(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 xor v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock xorw %si, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock xorw %si, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %xor_i8_no_res(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 xor v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lock xorb %sil, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; lock xorb %sil, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %xchg_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 xchg v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rax +; xchgq %rax, 0(%rdi), dst_old=%rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rsi, %rax +; xchgq %rax, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %xchg_i32(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 xchg v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rax +; xchgl %eax, 0(%rdi), dst_old=%eax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rsi, %rax +; xchgl %eax, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %xchg_i16(i64, i16) -> i16 { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 xchg v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rax +; xchgw %ax, 0(%rdi), dst_old=%ax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rsi, %rax +; xchgw %ax, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %xchg_i8(i64, i8) -> i8 { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 xchg v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rax +; xchgb %al, 0(%rdi), dst_old=%al +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rsi, %rax +; xchgb %al, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %umin_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 umin v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 64_bits_at_[%r9] Umin= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; cmpq %rdx, %rsi +; cmovbeq %rsi, %rdx +; lock cmpxchgq %rdx, (%rdi) ; trap: heap_oob +; jne 7 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %umin_i32(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 umin v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 32_bits_at_[%r9] Umin= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movl (%rdi), %eax ; trap: heap_oob +; movq %rax, %rdx +; cmpl %edx, %esi +; cmovbeq %rsi, %rdx +; lock cmpxchgl %edx, (%rdi) ; trap: heap_oob +; jne 6 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %umin_i16(i64, i16) -> i16 { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 umin v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 16_bits_at_[%r9] Umin= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzwq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; cmpw %dx, %si +; cmovbeq %rsi, %rdx +; lock cmpxchgw %dx, (%rdi) ; trap: heap_oob +; jne 8 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %umin_i8(i64, i8) -> i8 { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 umin v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 8_bits_at_[%r9] Umin= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzbq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; cmpb %dl, %sil +; cmovbeq %rsi, %rdx +; lock cmpxchgb %dl, (%rdi) ; trap: heap_oob +; jne 8 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %umax_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 umax v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 64_bits_at_[%r9] Umax= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; cmpq %rdx, %rsi +; cmovaeq %rsi, %rdx +; lock cmpxchgq %rdx, (%rdi) ; trap: heap_oob +; jne 7 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %umax_i32(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 umax v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 32_bits_at_[%r9] Umax= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movl (%rdi), %eax ; trap: heap_oob +; movq %rax, %rdx +; cmpl %edx, %esi +; cmovaeq %rsi, %rdx +; lock cmpxchgl %edx, (%rdi) ; trap: heap_oob +; jne 6 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %umax_i16(i64, i16) -> i16 { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 umax v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 16_bits_at_[%r9] Umax= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzwq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; cmpw %dx, %si +; cmovaeq %rsi, %rdx +; lock cmpxchgw %dx, (%rdi) ; trap: heap_oob +; jne 8 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %umax_i8(i64, i8) -> i8 { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 umax v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 8_bits_at_[%r9] Umax= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzbq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; cmpb %dl, %sil +; cmovaeq %rsi, %rdx +; lock cmpxchgb %dl, (%rdi) ; trap: heap_oob +; jne 8 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %smin_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 smin v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 64_bits_at_[%r9] Smin= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; cmpq %rdx, %rsi +; cmovleq %rsi, %rdx +; lock cmpxchgq %rdx, (%rdi) ; trap: heap_oob +; jne 7 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %smin_i32(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 smin v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 32_bits_at_[%r9] Smin= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movl (%rdi), %eax ; trap: heap_oob +; movq %rax, %rdx +; cmpl %edx, %esi +; cmovleq %rsi, %rdx +; lock cmpxchgl %edx, (%rdi) ; trap: heap_oob +; jne 6 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %smin_i16(i64, i16) -> i16 { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 smin v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 16_bits_at_[%r9] Smin= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzwq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; cmpw %dx, %si +; cmovleq %rsi, %rdx +; lock cmpxchgw %dx, (%rdi) ; trap: heap_oob +; jne 8 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %smin_i8(i64, i8) -> i8 { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 smin v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 8_bits_at_[%r9] Smin= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzbq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; cmpb %dl, %sil +; cmovleq %rsi, %rdx +; lock cmpxchgb %dl, (%rdi) ; trap: heap_oob +; jne 8 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %smax_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 smax v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 64_bits_at_[%r9] Smax= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; cmpq %rdx, %rsi +; cmovgeq %rsi, %rdx +; lock cmpxchgq %rdx, (%rdi) ; trap: heap_oob +; jne 7 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %smax_i32(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 smax v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 32_bits_at_[%r9] Smax= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movl (%rdi), %eax ; trap: heap_oob +; movq %rax, %rdx +; cmpl %edx, %esi +; cmovgeq %rsi, %rdx +; lock cmpxchgl %edx, (%rdi) ; trap: heap_oob +; jne 6 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %smax_i16(i64, i16) -> i16 { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 smax v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 16_bits_at_[%r9] Smax= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzwq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; cmpw %dx, %si +; cmovgeq %rsi, %rdx +; lock cmpxchgw %dx, (%rdi) ; trap: heap_oob +; jne 8 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %smax_i8(i64, i8) -> i8 { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 smax v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 8_bits_at_[%r9] Smax= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzbq (%rdi), %rax ; trap: heap_oob +; movq %rax, %rdx +; cmpb %dl, %sil +; cmovgeq %rsi, %rdx +; lock cmpxchgb %dl, (%rdi) ; trap: heap_oob +; jne 8 +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/runtests/atomic-rmw-little.clif b/cranelift/filetests/filetests/runtests/atomic-rmw-little.clif index cbf9d3e46d63..8bfcc8a82031 100644 --- a/cranelift/filetests/filetests/runtests/atomic-rmw-little.clif +++ b/cranelift/filetests/filetests/runtests/atomic-rmw-little.clif @@ -11,7 +11,25 @@ target riscv64 has_c has_zcb ; We can't test that these instructions are right regarding atomicity, but we can ; test if they perform their operation correctly -function %atomic_rmw_add_i64(i64, i64) -> i64 { +function %atomic_rmw_add_i64(i64, i64) -> i64, i64 { + ss0 = explicit_slot 8 + +block0(v0: i64, v1: i64): + v2 = stack_addr.i64 ss0 + store.i64 little v0, v2 + + v3 = atomic_rmw.i64 little add v2, v1 + + v4 = load.i64 little v2 + return v3, v4 +} +; run: %atomic_rmw_add_i64(0, 0) == [0, 0] +; run: %atomic_rmw_add_i64(1, 0) == [1, 1] +; run: %atomic_rmw_add_i64(0, 1) == [0, 1] +; run: %atomic_rmw_add_i64(1, 1) == [1, 2] +; run: %atomic_rmw_add_i64(0xC0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF] + +function %atomic_rmw_add_i64_no_res(i64, i64) -> i64 { ss0 = explicit_slot 8 block0(v0: i64, v1: i64): @@ -23,13 +41,31 @@ block0(v0: i64, v1: i64): v4 = load.i64 little v2 return v4 } -; run: %atomic_rmw_add_i64(0, 0) == 0 -; run: %atomic_rmw_add_i64(1, 0) == 1 -; run: %atomic_rmw_add_i64(0, 1) == 1 -; run: %atomic_rmw_add_i64(1, 1) == 2 -; run: %atomic_rmw_add_i64(0xC0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111) == 0xDECAFFFF_DECAFFFF +; run: %atomic_rmw_add_i64_no_res(0, 0) == 0 +; run: %atomic_rmw_add_i64_no_res(1, 0) == 1 +; run: %atomic_rmw_add_i64_no_res(0, 1) == 1 +; run: %atomic_rmw_add_i64_no_res(1, 1) == 2 +; run: %atomic_rmw_add_i64_no_res(0xC0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111) == 0xDECAFFFF_DECAFFFF -function %atomic_rmw_add_i32(i32, i32) -> i32 { +function %atomic_rmw_add_i32(i32, i32) -> i32, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i32): + v2 = stack_addr.i64 ss0 + store.i32 little v0, v2 + + v3 = atomic_rmw.i32 little add v2, v1 + + v4 = load.i32 little v2 + return v3, v4 +} +; run: %atomic_rmw_add_i32(0, 0) == [0, 0] +; run: %atomic_rmw_add_i32(1, 0) == [1, 1] +; run: %atomic_rmw_add_i32(0, 1) == [0, 1] +; run: %atomic_rmw_add_i32(1, 1) == [1, 2] +; run: %atomic_rmw_add_i32(0xC0FFEEEE, 0x1DCB1111) == [0xC0FFEEEE, 0xDECAFFFF] + +function %atomic_rmw_add_i32_no_res(i32, i32) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i32): @@ -41,15 +77,33 @@ block0(v0: i32, v1: i32): v4 = load.i32 little v2 return v4 } -; run: %atomic_rmw_add_i32(0, 0) == 0 -; run: %atomic_rmw_add_i32(1, 0) == 1 -; run: %atomic_rmw_add_i32(0, 1) == 1 -; run: %atomic_rmw_add_i32(1, 1) == 2 -; run: %atomic_rmw_add_i32(0xC0FFEEEE, 0x1DCB1111) == 0xDECAFFFF +; run: %atomic_rmw_add_i32_no_res(0, 0) == 0 +; run: %atomic_rmw_add_i32_no_res(1, 0) == 1 +; run: %atomic_rmw_add_i32_no_res(0, 1) == 1 +; run: %atomic_rmw_add_i32_no_res(1, 1) == 2 +; run: %atomic_rmw_add_i32_no_res(0xC0FFEEEE, 0x1DCB1111) == 0xDECAFFFF + + + +function %atomic_rmw_sub_i64(i64, i64) -> i64, i64 { + ss0 = explicit_slot 8 + +block0(v0: i64, v1: i64): + v2 = stack_addr.i64 ss0 + store.i64 little v0, v2 + v3 = atomic_rmw.i64 little sub v2, v1 + v4 = load.i64 little v2 + return v3, v4 +} +; run: %atomic_rmw_sub_i64(0, 0) == [0, 0] +; run: %atomic_rmw_sub_i64(1, 0) == [1, 1] +; run: %atomic_rmw_sub_i64(0, 1) == [0, -1] +; run: %atomic_rmw_sub_i64(1, 1) == [1, 0] +; run: %atomic_rmw_sub_i64(0xDECAFFFF_DECAFFFF, 0x1DCB1111_1DCB1111) == [0xDECAFFFF_DECAFFFF, 0xC0FFEEEE_C0FFEEEE] -function %atomic_rmw_sub_i64(i64, i64) -> i64 { +function %atomic_rmw_sub_i64_no_res(i64, i64) -> i64 { ss0 = explicit_slot 8 block0(v0: i64, v1: i64): @@ -61,13 +115,31 @@ block0(v0: i64, v1: i64): v4 = load.i64 little v2 return v4 } -; run: %atomic_rmw_sub_i64(0, 0) == 0 -; run: %atomic_rmw_sub_i64(1, 0) == 1 -; run: %atomic_rmw_sub_i64(0, 1) == -1 -; run: %atomic_rmw_sub_i64(1, 1) == 0 -; run: %atomic_rmw_sub_i64(0xDECAFFFF_DECAFFFF, 0x1DCB1111_1DCB1111) == 0xC0FFEEEE_C0FFEEEE +; run: %atomic_rmw_sub_i64_no_res(0, 0) == 0 +; run: %atomic_rmw_sub_i64_no_res(1, 0) == 1 +; run: %atomic_rmw_sub_i64_no_res(0, 1) == -1 +; run: %atomic_rmw_sub_i64_no_res(1, 1) == 0 +; run: %atomic_rmw_sub_i64_no_res(0xDECAFFFF_DECAFFFF, 0x1DCB1111_1DCB1111) == 0xC0FFEEEE_C0FFEEEE + +function %atomic_rmw_sub_i32(i32, i32) -> i32, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i32): + v2 = stack_addr.i64 ss0 + store.i32 little v0, v2 + + v3 = atomic_rmw.i32 little sub v2, v1 + + v4 = load.i32 little v2 + return v3, v4 +} +; run: %atomic_rmw_sub_i32(0, 0) == [0, 0] +; run: %atomic_rmw_sub_i32(1, 0) == [1, 1] +; run: %atomic_rmw_sub_i32(0, 1) == [0, -1] +; run: %atomic_rmw_sub_i32(1, 1) == [1, 0] +; run: %atomic_rmw_sub_i32(0xDECAFFFF, 0x1DCB1111) == [0xDECAFFFF, 0xC0FFEEEE] -function %atomic_rmw_sub_i32(i32, i32) -> i32 { +function %atomic_rmw_sub_i32_no_res(i32, i32) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i32): @@ -79,15 +151,33 @@ block0(v0: i32, v1: i32): v4 = load.i32 little v2 return v4 } -; run: %atomic_rmw_sub_i32(0, 0) == 0 -; run: %atomic_rmw_sub_i32(1, 0) == 1 -; run: %atomic_rmw_sub_i32(0, 1) == -1 -; run: %atomic_rmw_sub_i32(1, 1) == 0 -; run: %atomic_rmw_sub_i32(0xDECAFFFF, 0x1DCB1111) == 0xC0FFEEEE +; run: %atomic_rmw_sub_i32_no_res(0, 0) == 0 +; run: %atomic_rmw_sub_i32_no_res(1, 0) == 1 +; run: %atomic_rmw_sub_i32_no_res(0, 1) == -1 +; run: %atomic_rmw_sub_i32_no_res(1, 1) == 0 +; run: %atomic_rmw_sub_i32_no_res(0xDECAFFFF, 0x1DCB1111) == 0xC0FFEEEE -function %atomic_rmw_and_i64(i64, i64) -> i64 { +function %atomic_rmw_and_i64(i64, i64) -> i64, i64 { + ss0 = explicit_slot 8 + +block0(v0: i64, v1: i64): + v2 = stack_addr.i64 ss0 + store.i64 little v0, v2 + + v3 = atomic_rmw.i64 little and v2, v1 + + v4 = load.i64 little v2 + return v3, v4 +} +; run: %atomic_rmw_and_i64(0, 0) == [0, 0] +; run: %atomic_rmw_and_i64(1, 0) == [1, 0] +; run: %atomic_rmw_and_i64(0, 1) == [0, 0] +; run: %atomic_rmw_and_i64(1, 1) == [1, 1] +; run: %atomic_rmw_and_i64(0xF1FFFEFE_FEEEFFFF, 0xCEFFEFEF_DFDBFFFF) == [0xF1FFFEFE_FEEEFFFF, 0xC0FFEEEE_DECAFFFF] + +function %atomic_rmw_and_i64_no_res(i64, i64) -> i64 { ss0 = explicit_slot 8 block0(v0: i64, v1: i64): @@ -99,13 +189,32 @@ block0(v0: i64, v1: i64): v4 = load.i64 little v2 return v4 } -; run: %atomic_rmw_and_i64(0, 0) == 0 -; run: %atomic_rmw_and_i64(1, 0) == 0 -; run: %atomic_rmw_and_i64(0, 1) == 0 -; run: %atomic_rmw_and_i64(1, 1) == 1 -; run: %atomic_rmw_and_i64(0xF1FFFEFE_FEEEFFFF, 0xCEFFEFEF_DFDBFFFF) == 0xC0FFEEEE_DECAFFFF +; run: %atomic_rmw_and_i64_no_res(0, 0) == 0 +; run: %atomic_rmw_and_i64_no_res(1, 0) == 0 +; run: %atomic_rmw_and_i64_no_res(0, 1) == 0 +; run: %atomic_rmw_and_i64_no_res(1, 1) == 1 +; run: %atomic_rmw_and_i64_no_res(0xF1FFFEFE_FEEEFFFF, 0xCEFFEFEF_DFDBFFFF) == 0xC0FFEEEE_DECAFFFF + +function %atomic_rmw_and_i32(i32, i32) -> i32, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i32): + v2 = stack_addr.i64 ss0 + store.i32 little v0, v2 + + v3 = atomic_rmw.i32 little and v2, v1 + + v4 = load.i32 little v2 + return v3, v4 +} -function %atomic_rmw_and_i32(i32, i32) -> i32 { +; run: %atomic_rmw_and_i32(0, 0) == [0, 0] +; run: %atomic_rmw_and_i32(1, 0) == [1, 0] +; run: %atomic_rmw_and_i32(0, 1) == [0, 0] +; run: %atomic_rmw_and_i32(1, 1) == [1, 1] +; run: %atomic_rmw_and_i32(0xF1FFFEFE, 0xCEFFEFEF) == [0xF1FFFEFE, 0xC0FFEEEE] + +function %atomic_rmw_and_i32_no_res(i32, i32) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i32): @@ -118,15 +227,33 @@ block0(v0: i32, v1: i32): return v4 } -; run: %atomic_rmw_and_i32(0, 0) == 0 -; run: %atomic_rmw_and_i32(1, 0) == 0 -; run: %atomic_rmw_and_i32(0, 1) == 0 -; run: %atomic_rmw_and_i32(1, 1) == 1 -; run: %atomic_rmw_and_i32(0xF1FFFEFE, 0xCEFFEFEF) == 0xC0FFEEEE +; run: %atomic_rmw_and_i32_no_res(0, 0) == 0 +; run: %atomic_rmw_and_i32_no_res(1, 0) == 0 +; run: %atomic_rmw_and_i32_no_res(0, 1) == 0 +; run: %atomic_rmw_and_i32_no_res(1, 1) == 1 +; run: %atomic_rmw_and_i32_no_res(0xF1FFFEFE, 0xCEFFEFEF) == 0xC0FFEEEE -function %atomic_rmw_or_i64(i64, i64) -> i64 { +function %atomic_rmw_or_i64(i64, i64) -> i64, i64 { + ss0 = explicit_slot 8 + +block0(v0: i64, v1: i64): + v2 = stack_addr.i64 ss0 + store.i64 little v0, v2 + + v3 = atomic_rmw.i64 little or v2, v1 + + v4 = load.i64 little v2 + return v3, v4 +} +; run: %atomic_rmw_or_i64(0, 0) == [0, 0] +; run: %atomic_rmw_or_i64(1, 0) == [1, 1] +; run: %atomic_rmw_or_i64(0, 1) == [0, 1] +; run: %atomic_rmw_or_i64(1, 1) == [1, 1] +; run: %atomic_rmw_or_i64(0x80AAAAAA_8A8AAAAA, 0x40554444_54405555) == [0x80AAAAAA_8A8AAAAA, 0xC0FFEEEE_DECAFFFF] + +function %atomic_rmw_or_i64_no_res(i64, i64) -> i64 { ss0 = explicit_slot 8 block0(v0: i64, v1: i64): @@ -138,13 +265,32 @@ block0(v0: i64, v1: i64): v4 = load.i64 little v2 return v4 } -; run: %atomic_rmw_or_i64(0, 0) == 0 -; run: %atomic_rmw_or_i64(1, 0) == 1 -; run: %atomic_rmw_or_i64(0, 1) == 1 -; run: %atomic_rmw_or_i64(1, 1) == 1 -; run: %atomic_rmw_or_i64(0x80AAAAAA_8A8AAAAA, 0x40554444_54405555) == 0xC0FFEEEE_DECAFFFF +; run: %atomic_rmw_or_i64_no_res(0, 0) == 0 +; run: %atomic_rmw_or_i64_no_res(1, 0) == 1 +; run: %atomic_rmw_or_i64_no_res(0, 1) == 1 +; run: %atomic_rmw_or_i64_no_res(1, 1) == 1 +; run: %atomic_rmw_or_i64_no_res(0x80AAAAAA_8A8AAAAA, 0x40554444_54405555) == 0xC0FFEEEE_DECAFFFF + +function %atomic_rmw_or_i32(i32, i32) -> i32, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i32): + v2 = stack_addr.i64 ss0 + store.i32 little v0, v2 + + v3 = atomic_rmw.i32 little or v2, v1 + + v4 = load.i32 little v2 + return v3, v4 +} -function %atomic_rmw_or_i32(i32, i32) -> i32 { +; run: %atomic_rmw_or_i32(0, 0) == [0, 0] +; run: %atomic_rmw_or_i32(1, 0) == [1, 1] +; run: %atomic_rmw_or_i32(0, 1) == [0, 1] +; run: %atomic_rmw_or_i32(1, 1) == [1, 1] +; run: %atomic_rmw_or_i32(0x80AAAAAA, 0x40554444) == [0x80AAAAAA, 0xC0FFEEEE] + +function %atomic_rmw_or_i32_no_res(i32, i32) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i32): @@ -157,15 +303,33 @@ block0(v0: i32, v1: i32): return v4 } -; run: %atomic_rmw_or_i32(0, 0) == 0 -; run: %atomic_rmw_or_i32(1, 0) == 1 -; run: %atomic_rmw_or_i32(0, 1) == 1 -; run: %atomic_rmw_or_i32(1, 1) == 1 -; run: %atomic_rmw_or_i32(0x80AAAAAA, 0x40554444) == 0xC0FFEEEE +; run: %atomic_rmw_or_i32_no_res(0, 0) == 0 +; run: %atomic_rmw_or_i32_no_res(1, 0) == 1 +; run: %atomic_rmw_or_i32_no_res(0, 1) == 1 +; run: %atomic_rmw_or_i32_no_res(1, 1) == 1 +; run: %atomic_rmw_or_i32_no_res(0x80AAAAAA, 0x40554444) == 0xC0FFEEEE -function %atomic_rmw_xor_i64(i64, i64) -> i64 { +function %atomic_rmw_xor_i64(i64, i64) -> i64, i64 { + ss0 = explicit_slot 8 + +block0(v0: i64, v1: i64): + v2 = stack_addr.i64 ss0 + store.i64 little v0, v2 + + v3 = atomic_rmw.i64 little xor v2, v1 + + v4 = load.i64 little v2 + return v3, v4 +} +; run: %atomic_rmw_xor_i64(0, 0) == [0, 0] +; run: %atomic_rmw_xor_i64(1, 0) == [1, 1] +; run: %atomic_rmw_xor_i64(0, 1) == [0, 1] +; run: %atomic_rmw_xor_i64(1, 1) == [1, 0] +; run: %atomic_rmw_xor_i64(0x8FA50A64_9440A07D, 0x4F5AE48A_4A8A5F82) == [0x8FA50A64_9440A07D, 0xC0FFEEEE_DECAFFFF] + +function %atomic_rmw_xor_i64_no_res(i64, i64) -> i64 { ss0 = explicit_slot 8 block0(v0: i64, v1: i64): @@ -177,13 +341,31 @@ block0(v0: i64, v1: i64): v4 = load.i64 little v2 return v4 } -; run: %atomic_rmw_xor_i64(0, 0) == 0 -; run: %atomic_rmw_xor_i64(1, 0) == 1 -; run: %atomic_rmw_xor_i64(0, 1) == 1 -; run: %atomic_rmw_xor_i64(1, 1) == 0 -; run: %atomic_rmw_xor_i64(0x8FA50A64_9440A07D, 0x4F5AE48A_4A8A5F82) == 0xC0FFEEEE_DECAFFFF +; run: %atomic_rmw_xor_i64_no_res(0, 0) == 0 +; run: %atomic_rmw_xor_i64_no_res(1, 0) == 1 +; run: %atomic_rmw_xor_i64_no_res(0, 1) == 1 +; run: %atomic_rmw_xor_i64_no_res(1, 1) == 0 +; run: %atomic_rmw_xor_i64_no_res(0x8FA50A64_9440A07D, 0x4F5AE48A_4A8A5F82) == 0xC0FFEEEE_DECAFFFF -function %atomic_rmw_xor_i32(i32, i32) -> i32 { +function %atomic_rmw_xor_i32(i32, i32) -> i32, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i32): + v2 = stack_addr.i64 ss0 + store.i32 little v0, v2 + + v3 = atomic_rmw.i32 little xor v2, v1 + + v4 = load.i32 little v2 + return v3, v4 +} +; run: %atomic_rmw_xor_i32(0, 0) == [0, 0] +; run: %atomic_rmw_xor_i32(1, 0) == [1, 1] +; run: %atomic_rmw_xor_i32(0, 1) == [0, 1] +; run: %atomic_rmw_xor_i32(1, 1) == [1, 0] +; run: %atomic_rmw_xor_i32(0x8FA50A64, 0x4F5AE48A) == [0x8FA50A64, 0xC0FFEEEE] + +function %atomic_rmw_xor_i32_no_res(i32, i32) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i32): @@ -195,15 +377,33 @@ block0(v0: i32, v1: i32): v4 = load.i32 little v2 return v4 } -; run: %atomic_rmw_xor_i32(0, 0) == 0 -; run: %atomic_rmw_xor_i32(1, 0) == 1 -; run: %atomic_rmw_xor_i32(0, 1) == 1 -; run: %atomic_rmw_xor_i32(1, 1) == 0 -; run: %atomic_rmw_xor_i32(0x8FA50A64, 0x4F5AE48A) == 0xC0FFEEEE +; run: %atomic_rmw_xor_i32_no_res(0, 0) == 0 +; run: %atomic_rmw_xor_i32_no_res(1, 0) == 1 +; run: %atomic_rmw_xor_i32_no_res(0, 1) == 1 +; run: %atomic_rmw_xor_i32_no_res(1, 1) == 0 +; run: %atomic_rmw_xor_i32_no_res(0x8FA50A64, 0x4F5AE48A) == 0xC0FFEEEE + + + +function %atomic_rmw_nand_i64(i64, i64) -> i64, i64 { + ss0 = explicit_slot 8 +block0(v0: i64, v1: i64): + v2 = stack_addr.i64 ss0 + store.i64 little v0, v2 + + v3 = atomic_rmw.i64 little nand v2, v1 + v4 = load.i64 little v2 + return v3, v4 +} +; run: %atomic_rmw_nand_i64(0, 0) == [0, -1] +; run: %atomic_rmw_nand_i64(1, 0) == [1, -1] +; run: %atomic_rmw_nand_i64(0, 1) == [0, -1] +; run: %atomic_rmw_nand_i64(1, 1) == [1, -2] +; run: %atomic_rmw_nand_i64(0xC0FFEEEE_DECAFFFF, 0x7DCB5691_7DCB5691) == [0xC0FFEEEE_DECAFFFF, 0xBF34B97F_A335A96E] -function %atomic_rmw_nand_i64(i64, i64) -> i64 { +function %atomic_rmw_nand_i64_no_res(i64, i64) -> i64 { ss0 = explicit_slot 8 block0(v0: i64, v1: i64): @@ -215,13 +415,31 @@ block0(v0: i64, v1: i64): v4 = load.i64 little v2 return v4 } -; run: %atomic_rmw_nand_i64(0, 0) == -1 -; run: %atomic_rmw_nand_i64(1, 0) == -1 -; run: %atomic_rmw_nand_i64(0, 1) == -1 -; run: %atomic_rmw_nand_i64(1, 1) == -2 -; run: %atomic_rmw_nand_i64(0xC0FFEEEE_DECAFFFF, 0x7DCB5691_7DCB5691) == 0xBF34B97F_A335A96E +; run: %atomic_rmw_nand_i64_no_res(0, 0) == -1 +; run: %atomic_rmw_nand_i64_no_res(1, 0) == -1 +; run: %atomic_rmw_nand_i64_no_res(0, 1) == -1 +; run: %atomic_rmw_nand_i64_no_res(1, 1) == -2 +; run: %atomic_rmw_nand_i64_no_res(0xC0FFEEEE_DECAFFFF, 0x7DCB5691_7DCB5691) == 0xBF34B97F_A335A96E -function %atomic_rmw_nand_i32(i32, i32) -> i32 { +function %atomic_rmw_nand_i32(i32, i32) -> i32, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i32): + v2 = stack_addr.i64 ss0 + store.i32 little v0, v2 + + v3 = atomic_rmw.i32 little nand v2, v1 + + v4 = load.i32 little v2 + return v3, v4 +} +; run: %atomic_rmw_nand_i32(0, 0) == [0, -1] +; run: %atomic_rmw_nand_i32(1, 0) == [1, -1] +; run: %atomic_rmw_nand_i32(0, 1) == [0, -1] +; run: %atomic_rmw_nand_i32(1, 1) == [1, -2] +; run: %atomic_rmw_nand_i32(0xC0FFEEEE, 0x7DCB5691) == [0xC0FFEEEE, 0xBF34B97F] + +function %atomic_rmw_nand_i32_no_res(i32, i32) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i32): @@ -233,15 +451,34 @@ block0(v0: i32, v1: i32): v4 = load.i32 little v2 return v4 } -; run: %atomic_rmw_nand_i32(0, 0) == -1 -; run: %atomic_rmw_nand_i32(1, 0) == -1 -; run: %atomic_rmw_nand_i32(0, 1) == -1 -; run: %atomic_rmw_nand_i32(1, 1) == -2 -; run: %atomic_rmw_nand_i32(0xC0FFEEEE, 0x7DCB5691) == 0xBF34B97F +; run: %atomic_rmw_nand_i32_no_res(0, 0) == -1 +; run: %atomic_rmw_nand_i32_no_res(1, 0) == -1 +; run: %atomic_rmw_nand_i32_no_res(0, 1) == -1 +; run: %atomic_rmw_nand_i32_no_res(1, 1) == -2 +; run: %atomic_rmw_nand_i32_no_res(0xC0FFEEEE, 0x7DCB5691) == 0xBF34B97F + + + +function %atomic_rmw_umin_i64(i64, i64) -> i64, i64 { + ss0 = explicit_slot 8 +block0(v0: i64, v1: i64): + v2 = stack_addr.i64 ss0 + store.i64 little v0, v2 + v3 = atomic_rmw.i64 little umin v2, v1 -function %atomic_rmw_umin_i64(i64, i64) -> i64 { + v4 = load.i64 little v2 + return v3, v4 +} +; run: %atomic_rmw_umin_i64(0, 0) == [0, 0] +; run: %atomic_rmw_umin_i64(1, 0) == [1, 0] +; run: %atomic_rmw_umin_i64(0, 1) == [0, 0] +; run: %atomic_rmw_umin_i64(1, 1) == [1, 1] +; run: %atomic_rmw_umin_i64(-1, 1) == [-1, 1] +; run: %atomic_rmw_umin_i64(-1, -3) == [-1, -3] + +function %atomic_rmw_umin_i64_no_res(i64, i64) -> i64 { ss0 = explicit_slot 8 block0(v0: i64, v1: i64): @@ -253,14 +490,33 @@ block0(v0: i64, v1: i64): v4 = load.i64 little v2 return v4 } -; run: %atomic_rmw_umin_i64(0, 0) == 0 -; run: %atomic_rmw_umin_i64(1, 0) == 0 -; run: %atomic_rmw_umin_i64(0, 1) == 0 -; run: %atomic_rmw_umin_i64(1, 1) == 1 -; run: %atomic_rmw_umin_i64(-1, 1) == 1 -; run: %atomic_rmw_umin_i64(-1, -3) == -3 +; run: %atomic_rmw_umin_i64_no_res(0, 0) == 0 +; run: %atomic_rmw_umin_i64_no_res(1, 0) == 0 +; run: %atomic_rmw_umin_i64_no_res(0, 1) == 0 +; run: %atomic_rmw_umin_i64_no_res(1, 1) == 1 +; run: %atomic_rmw_umin_i64_no_res(-1, 1) == 1 +; run: %atomic_rmw_umin_i64_no_res(-1, -3) == -3 + +function %atomic_rmw_umin_i32(i32, i32) -> i32, i32 { + ss0 = explicit_slot 4 -function %atomic_rmw_umin_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = stack_addr.i64 ss0 + store.i32 little v0, v2 + + v3 = atomic_rmw.i32 little umin v2, v1 + + v4 = load.i32 little v2 + return v3, v4 +} +; run: %atomic_rmw_umin_i32(0, 0) == [0, 0] +; run: %atomic_rmw_umin_i32(1, 0) == [1, 0] +; run: %atomic_rmw_umin_i32(0, 1) == [0, 0] +; run: %atomic_rmw_umin_i32(1, 1) == [1, 1] +; run: %atomic_rmw_umin_i32(-1, 1) == [-1, 1] +; run: %atomic_rmw_umin_i32(-1, -3) == [-1, -3] + +function %atomic_rmw_umin_i32_no_res(i32, i32) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i32): @@ -272,16 +528,35 @@ block0(v0: i32, v1: i32): v4 = load.i32 little v2 return v4 } -; run: %atomic_rmw_umin_i32(0, 0) == 0 -; run: %atomic_rmw_umin_i32(1, 0) == 0 -; run: %atomic_rmw_umin_i32(0, 1) == 0 -; run: %atomic_rmw_umin_i32(1, 1) == 1 -; run: %atomic_rmw_umin_i32(-1, 1) == 1 -; run: %atomic_rmw_umin_i32(-1, -3) == -3 +; run: %atomic_rmw_umin_i32_no_res(0, 0) == 0 +; run: %atomic_rmw_umin_i32_no_res(1, 0) == 0 +; run: %atomic_rmw_umin_i32_no_res(0, 1) == 0 +; run: %atomic_rmw_umin_i32_no_res(1, 1) == 1 +; run: %atomic_rmw_umin_i32_no_res(-1, 1) == 1 +; run: %atomic_rmw_umin_i32_no_res(-1, -3) == -3 -function %atomic_rmw_umax_i64(i64, i64) -> i64 { +function %atomic_rmw_umax_i64(i64, i64) -> i64, i64 { + ss0 = explicit_slot 8 + +block0(v0: i64, v1: i64): + v2 = stack_addr.i64 ss0 + store.i64 little v0, v2 + + v3 = atomic_rmw.i64 little umax v2, v1 + + v4 = load.i64 little v2 + return v3, v4 +} +; run: %atomic_rmw_umax_i64(0, 0) == [0, 0] +; run: %atomic_rmw_umax_i64(1, 0) == [1, 1] +; run: %atomic_rmw_umax_i64(0, 1) == [0, 1] +; run: %atomic_rmw_umax_i64(1, 1) == [1, 1] +; run: %atomic_rmw_umax_i64(-1, 1) == [-1, -1] +; run: %atomic_rmw_umax_i64(-1, -3) == [-1, -1] + +function %atomic_rmw_umax_i64_no_res(i64, i64) -> i64 { ss0 = explicit_slot 8 block0(v0: i64, v1: i64): @@ -293,14 +568,33 @@ block0(v0: i64, v1: i64): v4 = load.i64 little v2 return v4 } -; run: %atomic_rmw_umax_i64(0, 0) == 0 -; run: %atomic_rmw_umax_i64(1, 0) == 1 -; run: %atomic_rmw_umax_i64(0, 1) == 1 -; run: %atomic_rmw_umax_i64(1, 1) == 1 -; run: %atomic_rmw_umax_i64(-1, 1) == -1 -; run: %atomic_rmw_umax_i64(-1, -3) == -1 +; run: %atomic_rmw_umax_i64_no_res(0, 0) == 0 +; run: %atomic_rmw_umax_i64_no_res(1, 0) == 1 +; run: %atomic_rmw_umax_i64_no_res(0, 1) == 1 +; run: %atomic_rmw_umax_i64_no_res(1, 1) == 1 +; run: %atomic_rmw_umax_i64_no_res(-1, 1) == -1 +; run: %atomic_rmw_umax_i64_no_res(-1, -3) == -1 + +function %atomic_rmw_umax_i32(i32, i32) -> i32, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i32): + v2 = stack_addr.i64 ss0 + store.i32 little v0, v2 + + v3 = atomic_rmw.i32 little umax v2, v1 -function %atomic_rmw_umax_i32(i32, i32) -> i32 { + v4 = load.i32 little v2 + return v3, v4 +} +; run: %atomic_rmw_umax_i32(0, 0) == [0, 0] +; run: %atomic_rmw_umax_i32(1, 0) == [1, 1] +; run: %atomic_rmw_umax_i32(0, 1) == [0, 1] +; run: %atomic_rmw_umax_i32(1, 1) == [1, 1] +; run: %atomic_rmw_umax_i32(-1, 1) == [-1, -1] +; run: %atomic_rmw_umax_i32(-1, -3) == [-1, -1] + +function %atomic_rmw_umax_i32_no_res(i32, i32) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i32): @@ -312,16 +606,35 @@ block0(v0: i32, v1: i32): v4 = load.i32 little v2 return v4 } -; run: %atomic_rmw_umax_i32(0, 0) == 0 -; run: %atomic_rmw_umax_i32(1, 0) == 1 -; run: %atomic_rmw_umax_i32(0, 1) == 1 -; run: %atomic_rmw_umax_i32(1, 1) == 1 -; run: %atomic_rmw_umax_i32(-1, 1) == -1 -; run: %atomic_rmw_umax_i32(-1, -3) == -1 +; run: %atomic_rmw_umax_i32_no_res(0, 0) == 0 +; run: %atomic_rmw_umax_i32_no_res(1, 0) == 1 +; run: %atomic_rmw_umax_i32_no_res(0, 1) == 1 +; run: %atomic_rmw_umax_i32_no_res(1, 1) == 1 +; run: %atomic_rmw_umax_i32_no_res(-1, 1) == -1 +; run: %atomic_rmw_umax_i32_no_res(-1, -3) == -1 + + +function %atomic_rmw_smin_i64(i64, i64) -> i64, i64 { + ss0 = explicit_slot 8 + +block0(v0: i64, v1: i64): + v2 = stack_addr.i64 ss0 + store.i64 little v0, v2 + v3 = atomic_rmw.i64 little smin v2, v1 -function %atomic_rmw_smin_i64(i64, i64) -> i64 { + v4 = load.i64 little v2 + return v3, v4 +} +; run: %atomic_rmw_smin_i64(0, 0) == [0, 0] +; run: %atomic_rmw_smin_i64(1, 0) == [1, 0] +; run: %atomic_rmw_smin_i64(0, 1) == [0, 0] +; run: %atomic_rmw_smin_i64(1, 1) == [1, 1] +; run: %atomic_rmw_smin_i64(-1, 1) == [-1, -1] +; run: %atomic_rmw_smin_i64(-1, -3) == [-1, -3] + +function %atomic_rmw_smin_i64_no_res(i64, i64) -> i64 { ss0 = explicit_slot 8 block0(v0: i64, v1: i64): @@ -333,14 +646,33 @@ block0(v0: i64, v1: i64): v4 = load.i64 little v2 return v4 } -; run: %atomic_rmw_smin_i64(0, 0) == 0 -; run: %atomic_rmw_smin_i64(1, 0) == 0 -; run: %atomic_rmw_smin_i64(0, 1) == 0 -; run: %atomic_rmw_smin_i64(1, 1) == 1 -; run: %atomic_rmw_smin_i64(-1, 1) == -1 -; run: %atomic_rmw_smin_i64(-1, -3) == -3 +; run: %atomic_rmw_smin_i64_no_res(0, 0) == 0 +; run: %atomic_rmw_smin_i64_no_res(1, 0) == 0 +; run: %atomic_rmw_smin_i64_no_res(0, 1) == 0 +; run: %atomic_rmw_smin_i64_no_res(1, 1) == 1 +; run: %atomic_rmw_smin_i64_no_res(-1, 1) == -1 +; run: %atomic_rmw_smin_i64_no_res(-1, -3) == -3 + +function %atomic_rmw_smin_i32(i32, i32) -> i32, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i32): + v2 = stack_addr.i64 ss0 + store.i32 little v0, v2 + + v3 = atomic_rmw.i32 little smin v2, v1 -function %atomic_rmw_smin_i32(i32, i32) -> i32 { + v4 = load.i32 little v2 + return v3, v4 +} +; run: %atomic_rmw_smin_i32(0, 0) == [0, 0] +; run: %atomic_rmw_smin_i32(1, 0) == [1, 0] +; run: %atomic_rmw_smin_i32(0, 1) == [0, 0] +; run: %atomic_rmw_smin_i32(1, 1) == [1, 1] +; run: %atomic_rmw_smin_i32(-1, -1) == [-1, -1] +; run: %atomic_rmw_smin_i32(-1, -3) == [-1, -3] + +function %atomic_rmw_smin_i32_no_res(i32, i32) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i32): @@ -352,16 +684,35 @@ block0(v0: i32, v1: i32): v4 = load.i32 little v2 return v4 } -; run: %atomic_rmw_smin_i32(0, 0) == 0 -; run: %atomic_rmw_smin_i32(1, 0) == 0 -; run: %atomic_rmw_smin_i32(0, 1) == 0 -; run: %atomic_rmw_smin_i32(1, 1) == 1 -; run: %atomic_rmw_smin_i32(-1, -1) == -1 -; run: %atomic_rmw_smin_i32(-1, -3) == -3 +; run: %atomic_rmw_smin_i32_no_res(0, 0) == 0 +; run: %atomic_rmw_smin_i32_no_res(1, 0) == 0 +; run: %atomic_rmw_smin_i32_no_res(0, 1) == 0 +; run: %atomic_rmw_smin_i32_no_res(1, 1) == 1 +; run: %atomic_rmw_smin_i32_no_res(-1, -1) == -1 +; run: %atomic_rmw_smin_i32_no_res(-1, -3) == -3 + + +function %atomic_rmw_smax_i64(i64, i64) -> i64, i64 { + ss0 = explicit_slot 8 + +block0(v0: i64, v1: i64): + v2 = stack_addr.i64 ss0 + store.i64 little v0, v2 + v3 = atomic_rmw.i64 little smax v2, v1 -function %atomic_rmw_smax_i64(i64, i64) -> i64 { + v4 = load.i64 little v2 + return v3, v4 +} +; run: %atomic_rmw_smax_i64(0, 0) == [0, 0] +; run: %atomic_rmw_smax_i64(1, 0) == [1, 1] +; run: %atomic_rmw_smax_i64(0, 1) == [0, 1] +; run: %atomic_rmw_smax_i64(1, 1) == [1, 1] +; run: %atomic_rmw_smax_i64(-1, 1) == [-1, 1] +; run: %atomic_rmw_smax_i64(-1, -3) == [-1, -1] + +function %atomic_rmw_smax_i64_no_res(i64, i64) -> i64 { ss0 = explicit_slot 8 block0(v0: i64, v1: i64): @@ -373,14 +724,33 @@ block0(v0: i64, v1: i64): v4 = load.i64 little v2 return v4 } -; run: %atomic_rmw_smax_i64(0, 0) == 0 -; run: %atomic_rmw_smax_i64(1, 0) == 1 -; run: %atomic_rmw_smax_i64(0, 1) == 1 -; run: %atomic_rmw_smax_i64(1, 1) == 1 -; run: %atomic_rmw_smax_i64(-1, 1) == 1 -; run: %atomic_rmw_smax_i64(-1, -3) == -1 +; run: %atomic_rmw_smax_i64_no_res(0, 0) == 0 +; run: %atomic_rmw_smax_i64_no_res(1, 0) == 1 +; run: %atomic_rmw_smax_i64_no_res(0, 1) == 1 +; run: %atomic_rmw_smax_i64_no_res(1, 1) == 1 +; run: %atomic_rmw_smax_i64_no_res(-1, 1) == 1 +; run: %atomic_rmw_smax_i64_no_res(-1, -3) == -1 + +function %atomic_rmw_smax_i32(i32, i32) -> i32, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i32): + v2 = stack_addr.i64 ss0 + store.i32 little v0, v2 + + v3 = atomic_rmw.i32 little smax v2, v1 -function %atomic_rmw_smax_i32(i32, i32) -> i32 { + v4 = load.i32 little v2 + return v3, v4 +} +; run: %atomic_rmw_smax_i32(0, 0) == [0, 0] +; run: %atomic_rmw_smax_i32(1, 0) == [1, 1] +; run: %atomic_rmw_smax_i32(0, 1) == [0, 1] +; run: %atomic_rmw_smax_i32(1, 1) == [1, 1] +; run: %atomic_rmw_smax_i32(-1, 1) == [-1, 1] +; run: %atomic_rmw_smax_i32(-1, -3) == [-1, -1] + +function %atomic_rmw_smax_i32_no_res(i32, i32) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i32): @@ -392,16 +762,33 @@ block0(v0: i32, v1: i32): v4 = load.i32 little v2 return v4 } -; run: %atomic_rmw_smax_i32(0, 0) == 0 -; run: %atomic_rmw_smax_i32(1, 0) == 1 -; run: %atomic_rmw_smax_i32(0, 1) == 1 -; run: %atomic_rmw_smax_i32(1, 1) == 1 -; run: %atomic_rmw_smax_i32(-1, 1) == 1 -; run: %atomic_rmw_smax_i32(-1, -3) == -1 +; run: %atomic_rmw_smax_i32_no_res(0, 0) == 0 +; run: %atomic_rmw_smax_i32_no_res(1, 0) == 1 +; run: %atomic_rmw_smax_i32_no_res(0, 1) == 1 +; run: %atomic_rmw_smax_i32_no_res(1, 1) == 1 +; run: %atomic_rmw_smax_i32_no_res(-1, 1) == 1 +; run: %atomic_rmw_smax_i32_no_res(-1, -3) == -1 -function %atomic_rmw_xchg_i64(i64, i64) -> i64 { +function %atomic_rmw_xchg_i64(i64, i64) -> i64, i64 { + ss0 = explicit_slot 8 + +block0(v0: i64, v1: i64): + v2 = stack_addr.i64 ss0 + store.i64 little v0, v2 + + v3 = atomic_rmw.i64 little xchg v2, v1 + + v4 = load.i64 little v2 + return v3, v4 +} +; run: %atomic_rmw_xchg_i64(0, 0) == [0, 0] +; run: %atomic_rmw_xchg_i64(1, 0) == [1, 0] +; run: %atomic_rmw_xchg_i64(0, 1) == [0, 1] +; run: %atomic_rmw_xchg_i64(0, 0xC0FFEEEE_DECAFFFF) == [0, 0xC0FFEEEE_DECAFFFF] + +function %atomic_rmw_xchg_i64_no_res(i64, i64) -> i64 { ss0 = explicit_slot 8 block0(v0: i64, v1: i64): @@ -413,12 +800,29 @@ block0(v0: i64, v1: i64): v4 = load.i64 little v2 return v4 } -; run: %atomic_rmw_xchg_i64(0, 0) == 0 -; run: %atomic_rmw_xchg_i64(1, 0) == 0 -; run: %atomic_rmw_xchg_i64(0, 1) == 1 -; run: %atomic_rmw_xchg_i64(0, 0xC0FFEEEE_DECAFFFF) == 0xC0FFEEEE_DECAFFFF +; run: %atomic_rmw_xchg_i64_no_res(0, 0) == 0 +; run: %atomic_rmw_xchg_i64_no_res(1, 0) == 0 +; run: %atomic_rmw_xchg_i64_no_res(0, 1) == 1 +; run: %atomic_rmw_xchg_i64_no_res(0, 0xC0FFEEEE_DECAFFFF) == 0xC0FFEEEE_DECAFFFF + +function %atomic_rmw_xchg_i32(i32, i32) -> i32, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i32): + v2 = stack_addr.i64 ss0 + store.i32 little v0, v2 + + v3 = atomic_rmw.i32 little xchg v2, v1 + + v4 = load.i32 little v2 + return v3, v4 +} +; run: %atomic_rmw_xchg_i32(0, 0) == [0, 0] +; run: %atomic_rmw_xchg_i32(1, 0) == [1, 0] +; run: %atomic_rmw_xchg_i32(0, 1) == [0, 1] +; run: %atomic_rmw_xchg_i32(0, 0xC0FFEEEE) == [0, 0xC0FFEEEE] -function %atomic_rmw_xchg_i32(i32, i32) -> i32 { +function %atomic_rmw_xchg_i32_no_res(i32, i32) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i32): @@ -430,7 +834,7 @@ block0(v0: i32, v1: i32): v4 = load.i32 little v2 return v4 } -; run: %atomic_rmw_xchg_i32(0, 0) == 0 -; run: %atomic_rmw_xchg_i32(1, 0) == 0 -; run: %atomic_rmw_xchg_i32(0, 1) == 1 -; run: %atomic_rmw_xchg_i32(0, 0xC0FFEEEE) == 0xC0FFEEEE +; run: %atomic_rmw_xchg_i32_no_res(0, 0) == 0 +; run: %atomic_rmw_xchg_i32_no_res(1, 0) == 0 +; run: %atomic_rmw_xchg_i32_no_res(0, 1) == 1 +; run: %atomic_rmw_xchg_i32_no_res(0, 0xC0FFEEEE) == 0xC0FFEEEE diff --git a/cranelift/filetests/filetests/runtests/atomic-rmw-subword-big.clif b/cranelift/filetests/filetests/runtests/atomic-rmw-subword-big.clif index f0f52c12a095..8ab516cb271c 100644 --- a/cranelift/filetests/filetests/runtests/atomic-rmw-subword-big.clif +++ b/cranelift/filetests/filetests/runtests/atomic-rmw-subword-big.clif @@ -6,7 +6,25 @@ target s390x has_mie2 ; We can't test that these instructions are right regarding atomicity, but we can ; test if they perform their operation correctly -function %atomic_rmw_add_big_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_add_big_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 big add v4, v2 + + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_add_big_i16(0x12345678, 0, 0x1111) == [0x1234, 0x23455678] +; run: %atomic_rmw_add_big_i16(0x12345678, 0, 0xffff) == [0x1234, 0x12335678] +; run: %atomic_rmw_add_big_i16(0x12345678, 2, 0x1111) == [0x5678, 0x12346789] +; run: %atomic_rmw_add_big_i16(0x12345678, 2, 0xffff) == [0x5678, 0x12345677] + +function %atomic_rmw_add_big_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -19,12 +37,34 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_add_big_i16(0x12345678, 0, 0x1111) == 0x23455678 -; run: %atomic_rmw_add_big_i16(0x12345678, 0, 0xffff) == 0x12335678 -; run: %atomic_rmw_add_big_i16(0x12345678, 2, 0x1111) == 0x12346789 -; run: %atomic_rmw_add_big_i16(0x12345678, 2, 0xffff) == 0x12345677 +; run: %atomic_rmw_add_big_i16_no_res(0x12345678, 0, 0x1111) == 0x23455678 +; run: %atomic_rmw_add_big_i16_no_res(0x12345678, 0, 0xffff) == 0x12335678 +; run: %atomic_rmw_add_big_i16_no_res(0x12345678, 2, 0x1111) == 0x12346789 +; run: %atomic_rmw_add_big_i16_no_res(0x12345678, 2, 0xffff) == 0x12345677 + +function %atomic_rmw_add_big_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 big add v4, v2 -function %atomic_rmw_add_big_i8(i32, i64, i8) -> i32 { + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_add_big_i8(0x12345678, 0, 0x11) == [0x12, 0x23345678] +; run: %atomic_rmw_add_big_i8(0x12345678, 0, 0xff) == [0x12, 0x11345678] +; run: %atomic_rmw_add_big_i8(0x12345678, 1, 0x11) == [0x34, 0x12455678] +; run: %atomic_rmw_add_big_i8(0x12345678, 1, 0xff) == [0x34, 0x12335678] +; run: %atomic_rmw_add_big_i8(0x12345678, 2, 0x11) == [0x56, 0x12346778] +; run: %atomic_rmw_add_big_i8(0x12345678, 2, 0xff) == [0x56, 0x12345578] +; run: %atomic_rmw_add_big_i8(0x12345678, 3, 0x11) == [0x78, 0x12345689] +; run: %atomic_rmw_add_big_i8(0x12345678, 3, 0xff) == [0x78, 0x12345677] + +function %atomic_rmw_add_big_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -37,16 +77,34 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_add_big_i8(0x12345678, 0, 0x11) == 0x23345678 -; run: %atomic_rmw_add_big_i8(0x12345678, 0, 0xff) == 0x11345678 -; run: %atomic_rmw_add_big_i8(0x12345678, 1, 0x11) == 0x12455678 -; run: %atomic_rmw_add_big_i8(0x12345678, 1, 0xff) == 0x12335678 -; run: %atomic_rmw_add_big_i8(0x12345678, 2, 0x11) == 0x12346778 -; run: %atomic_rmw_add_big_i8(0x12345678, 2, 0xff) == 0x12345578 -; run: %atomic_rmw_add_big_i8(0x12345678, 3, 0x11) == 0x12345689 -; run: %atomic_rmw_add_big_i8(0x12345678, 3, 0xff) == 0x12345677 +; run: %atomic_rmw_add_big_i8_no_res(0x12345678, 0, 0x11) == 0x23345678 +; run: %atomic_rmw_add_big_i8_no_res(0x12345678, 0, 0xff) == 0x11345678 +; run: %atomic_rmw_add_big_i8_no_res(0x12345678, 1, 0x11) == 0x12455678 +; run: %atomic_rmw_add_big_i8_no_res(0x12345678, 1, 0xff) == 0x12335678 +; run: %atomic_rmw_add_big_i8_no_res(0x12345678, 2, 0x11) == 0x12346778 +; run: %atomic_rmw_add_big_i8_no_res(0x12345678, 2, 0xff) == 0x12345578 +; run: %atomic_rmw_add_big_i8_no_res(0x12345678, 3, 0x11) == 0x12345689 +; run: %atomic_rmw_add_big_i8_no_res(0x12345678, 3, 0xff) == 0x12345677 + +function %atomic_rmw_sub_big_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 -function %atomic_rmw_sub_big_i16(i32, i64, i16) -> i32 { +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 big sub v4, v2 + + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_sub_big_i16(0x12345678, 0, 0x1111) == [0x1234, 0x01235678] +; run: %atomic_rmw_sub_big_i16(0x12345678, 0, 0xffff) == [0x1234, 0x12355678] +; run: %atomic_rmw_sub_big_i16(0x12345678, 2, 0x1111) == [0x5678, 0x12344567] +; run: %atomic_rmw_sub_big_i16(0x12345678, 2, 0xffff) == [0x5678, 0x12345679] + +function %atomic_rmw_sub_big_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -59,13 +117,35 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_sub_big_i16(0x12345678, 0, 0x1111) == 0x01235678 -; run: %atomic_rmw_sub_big_i16(0x12345678, 0, 0xffff) == 0x12355678 -; run: %atomic_rmw_sub_big_i16(0x12345678, 2, 0x1111) == 0x12344567 -; run: %atomic_rmw_sub_big_i16(0x12345678, 2, 0xffff) == 0x12345679 +; run: %atomic_rmw_sub_big_i16_no_res(0x12345678, 0, 0x1111) == 0x01235678 +; run: %atomic_rmw_sub_big_i16_no_res(0x12345678, 0, 0xffff) == 0x12355678 +; run: %atomic_rmw_sub_big_i16_no_res(0x12345678, 2, 0x1111) == 0x12344567 +; run: %atomic_rmw_sub_big_i16_no_res(0x12345678, 2, 0xffff) == 0x12345679 + + +function %atomic_rmw_sub_big_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 big sub v4, v2 -function %atomic_rmw_sub_big_i8(i32, i64, i8) -> i32 { + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_sub_big_i8(0x12345678, 0, 0x11) == [0x12, 0x01345678] +; run: %atomic_rmw_sub_big_i8(0x12345678, 0, 0xff) == [0x12, 0x13345678] +; run: %atomic_rmw_sub_big_i8(0x12345678, 1, 0x11) == [0x34, 0x12235678] +; run: %atomic_rmw_sub_big_i8(0x12345678, 1, 0xff) == [0x34, 0x12355678] +; run: %atomic_rmw_sub_big_i8(0x12345678, 2, 0x11) == [0x56, 0x12344578] +; run: %atomic_rmw_sub_big_i8(0x12345678, 2, 0xff) == [0x56, 0x12345778] +; run: %atomic_rmw_sub_big_i8(0x12345678, 3, 0x11) == [0x78, 0x12345667] +; run: %atomic_rmw_sub_big_i8(0x12345678, 3, 0xff) == [0x78, 0x12345679] + +function %atomic_rmw_sub_big_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -78,17 +158,35 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_sub_big_i8(0x12345678, 0, 0x11) == 0x01345678 -; run: %atomic_rmw_sub_big_i8(0x12345678, 0, 0xff) == 0x13345678 -; run: %atomic_rmw_sub_big_i8(0x12345678, 1, 0x11) == 0x12235678 -; run: %atomic_rmw_sub_big_i8(0x12345678, 1, 0xff) == 0x12355678 -; run: %atomic_rmw_sub_big_i8(0x12345678, 2, 0x11) == 0x12344578 -; run: %atomic_rmw_sub_big_i8(0x12345678, 2, 0xff) == 0x12345778 -; run: %atomic_rmw_sub_big_i8(0x12345678, 3, 0x11) == 0x12345667 -; run: %atomic_rmw_sub_big_i8(0x12345678, 3, 0xff) == 0x12345679 +; run: %atomic_rmw_sub_big_i8_no_res(0x12345678, 0, 0x11) == 0x01345678 +; run: %atomic_rmw_sub_big_i8_no_res(0x12345678, 0, 0xff) == 0x13345678 +; run: %atomic_rmw_sub_big_i8_no_res(0x12345678, 1, 0x11) == 0x12235678 +; run: %atomic_rmw_sub_big_i8_no_res(0x12345678, 1, 0xff) == 0x12355678 +; run: %atomic_rmw_sub_big_i8_no_res(0x12345678, 2, 0x11) == 0x12344578 +; run: %atomic_rmw_sub_big_i8_no_res(0x12345678, 2, 0xff) == 0x12345778 +; run: %atomic_rmw_sub_big_i8_no_res(0x12345678, 3, 0x11) == 0x12345667 +; run: %atomic_rmw_sub_big_i8_no_res(0x12345678, 3, 0xff) == 0x12345679 -function %atomic_rmw_and_big_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_and_big_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 big and v4, v2 + + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_and_big_i16(0x12345678, 0, 0xf000) == [0x1234, 0x10005678] +; run: %atomic_rmw_and_big_i16(0x12345678, 0, 0x000f) == [0x1234, 0x00045678] +; run: %atomic_rmw_and_big_i16(0x12345678, 2, 0xf000) == [0x5678, 0x12345000] +; run: %atomic_rmw_and_big_i16(0x12345678, 2, 0x000f) == [0x5678, 0x12340008] + +function %atomic_rmw_and_big_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -101,13 +199,35 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_and_big_i16(0x12345678, 0, 0xf000) == 0x10005678 -; run: %atomic_rmw_and_big_i16(0x12345678, 0, 0x000f) == 0x00045678 -; run: %atomic_rmw_and_big_i16(0x12345678, 2, 0xf000) == 0x12345000 -; run: %atomic_rmw_and_big_i16(0x12345678, 2, 0x000f) == 0x12340008 +; run: %atomic_rmw_and_big_i16_no_res(0x12345678, 0, 0xf000) == 0x10005678 +; run: %atomic_rmw_and_big_i16_no_res(0x12345678, 0, 0x000f) == 0x00045678 +; run: %atomic_rmw_and_big_i16_no_res(0x12345678, 2, 0xf000) == 0x12345000 +; run: %atomic_rmw_and_big_i16_no_res(0x12345678, 2, 0x000f) == 0x12340008 + + +function %atomic_rmw_and_big_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 big and v4, v2 -function %atomic_rmw_and_big_i8(i32, i64, i8) -> i32 { + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_and_big_i8(0x12345678, 0, 0xf0) == [0x12, 0x10345678] +; run: %atomic_rmw_and_big_i8(0x12345678, 0, 0x0f) == [0x12, 0x02345678] +; run: %atomic_rmw_and_big_i8(0x12345678, 1, 0xf0) == [0x34, 0x12305678] +; run: %atomic_rmw_and_big_i8(0x12345678, 1, 0x0f) == [0x34, 0x12045678] +; run: %atomic_rmw_and_big_i8(0x12345678, 2, 0xf0) == [0x56, 0x12345078] +; run: %atomic_rmw_and_big_i8(0x12345678, 2, 0x0f) == [0x56, 0x12340678] +; run: %atomic_rmw_and_big_i8(0x12345678, 3, 0xf0) == [0x78, 0x12345670] +; run: %atomic_rmw_and_big_i8(0x12345678, 3, 0x0f) == [0x78, 0x12345608] + +function %atomic_rmw_and_big_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -120,17 +240,35 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_and_big_i8(0x12345678, 0, 0xf0) == 0x10345678 -; run: %atomic_rmw_and_big_i8(0x12345678, 0, 0x0f) == 0x02345678 -; run: %atomic_rmw_and_big_i8(0x12345678, 1, 0xf0) == 0x12305678 -; run: %atomic_rmw_and_big_i8(0x12345678, 1, 0x0f) == 0x12045678 -; run: %atomic_rmw_and_big_i8(0x12345678, 2, 0xf0) == 0x12345078 -; run: %atomic_rmw_and_big_i8(0x12345678, 2, 0x0f) == 0x12340678 -; run: %atomic_rmw_and_big_i8(0x12345678, 3, 0xf0) == 0x12345670 -; run: %atomic_rmw_and_big_i8(0x12345678, 3, 0x0f) == 0x12345608 +; run: %atomic_rmw_and_big_i8_no_res(0x12345678, 0, 0xf0) == 0x10345678 +; run: %atomic_rmw_and_big_i8_no_res(0x12345678, 0, 0x0f) == 0x02345678 +; run: %atomic_rmw_and_big_i8_no_res(0x12345678, 1, 0xf0) == 0x12305678 +; run: %atomic_rmw_and_big_i8_no_res(0x12345678, 1, 0x0f) == 0x12045678 +; run: %atomic_rmw_and_big_i8_no_res(0x12345678, 2, 0xf0) == 0x12345078 +; run: %atomic_rmw_and_big_i8_no_res(0x12345678, 2, 0x0f) == 0x12340678 +; run: %atomic_rmw_and_big_i8_no_res(0x12345678, 3, 0xf0) == 0x12345670 +; run: %atomic_rmw_and_big_i8_no_res(0x12345678, 3, 0x0f) == 0x12345608 + + +function %atomic_rmw_or_big_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 big or v4, v2 + + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_or_big_i16(0x12345678, 0, 0xf000) == [0x1234, 0xf2345678] +; run: %atomic_rmw_or_big_i16(0x12345678, 0, 0x000f) == [0x1234, 0x123f5678] +; run: %atomic_rmw_or_big_i16(0x12345678, 2, 0xf000) == [0x5678, 0x1234f678] +; run: %atomic_rmw_or_big_i16(0x12345678, 2, 0x000f) == [0x5678, 0x1234567f] -function %atomic_rmw_or_big_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_or_big_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -143,13 +281,35 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_or_big_i16(0x12345678, 0, 0xf000) == 0xf2345678 -; run: %atomic_rmw_or_big_i16(0x12345678, 0, 0x000f) == 0x123f5678 -; run: %atomic_rmw_or_big_i16(0x12345678, 2, 0xf000) == 0x1234f678 -; run: %atomic_rmw_or_big_i16(0x12345678, 2, 0x000f) == 0x1234567f +; run: %atomic_rmw_or_big_i16_no_res(0x12345678, 0, 0xf000) == 0xf2345678 +; run: %atomic_rmw_or_big_i16_no_res(0x12345678, 0, 0x000f) == 0x123f5678 +; run: %atomic_rmw_or_big_i16_no_res(0x12345678, 2, 0xf000) == 0x1234f678 +; run: %atomic_rmw_or_big_i16_no_res(0x12345678, 2, 0x000f) == 0x1234567f + + +function %atomic_rmw_or_big_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 big or v4, v2 -function %atomic_rmw_or_big_i8(i32, i64, i8) -> i32 { + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_or_big_i8(0x12345678, 0, 0xf0) == [0x12, 0xf2345678] +; run: %atomic_rmw_or_big_i8(0x12345678, 0, 0x0f) == [0x12, 0x1f345678] +; run: %atomic_rmw_or_big_i8(0x12345678, 1, 0xf0) == [0x34, 0x12f45678] +; run: %atomic_rmw_or_big_i8(0x12345678, 1, 0x0f) == [0x34, 0x123f5678] +; run: %atomic_rmw_or_big_i8(0x12345678, 2, 0xf0) == [0x56, 0x1234f678] +; run: %atomic_rmw_or_big_i8(0x12345678, 2, 0x0f) == [0x56, 0x12345f78] +; run: %atomic_rmw_or_big_i8(0x12345678, 3, 0xf0) == [0x78, 0x123456f8] +; run: %atomic_rmw_or_big_i8(0x12345678, 3, 0x0f) == [0x78, 0x1234567f] + +function %atomic_rmw_or_big_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -162,17 +322,35 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_or_big_i8(0x12345678, 0, 0xf0) == 0xf2345678 -; run: %atomic_rmw_or_big_i8(0x12345678, 0, 0x0f) == 0x1f345678 -; run: %atomic_rmw_or_big_i8(0x12345678, 1, 0xf0) == 0x12f45678 -; run: %atomic_rmw_or_big_i8(0x12345678, 1, 0x0f) == 0x123f5678 -; run: %atomic_rmw_or_big_i8(0x12345678, 2, 0xf0) == 0x1234f678 -; run: %atomic_rmw_or_big_i8(0x12345678, 2, 0x0f) == 0x12345f78 -; run: %atomic_rmw_or_big_i8(0x12345678, 3, 0xf0) == 0x123456f8 -; run: %atomic_rmw_or_big_i8(0x12345678, 3, 0x0f) == 0x1234567f +; run: %atomic_rmw_or_big_i8_no_res(0x12345678, 0, 0xf0) == 0xf2345678 +; run: %atomic_rmw_or_big_i8_no_res(0x12345678, 0, 0x0f) == 0x1f345678 +; run: %atomic_rmw_or_big_i8_no_res(0x12345678, 1, 0xf0) == 0x12f45678 +; run: %atomic_rmw_or_big_i8_no_res(0x12345678, 1, 0x0f) == 0x123f5678 +; run: %atomic_rmw_or_big_i8_no_res(0x12345678, 2, 0xf0) == 0x1234f678 +; run: %atomic_rmw_or_big_i8_no_res(0x12345678, 2, 0x0f) == 0x12345f78 +; run: %atomic_rmw_or_big_i8_no_res(0x12345678, 3, 0xf0) == 0x123456f8 +; run: %atomic_rmw_or_big_i8_no_res(0x12345678, 3, 0x0f) == 0x1234567f -function %atomic_rmw_xor_big_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_xor_big_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 big xor v4, v2 + + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_xor_big_i16(0x12345678, 0, 0xf000) == [0x1234, 0xe2345678] +; run: %atomic_rmw_xor_big_i16(0x12345678, 0, 0x000f) == [0x1234, 0x123b5678] +; run: %atomic_rmw_xor_big_i16(0x12345678, 2, 0xf000) == [0x5678, 0x1234a678] +; run: %atomic_rmw_xor_big_i16(0x12345678, 2, 0x000f) == [0x5678, 0x12345677] + +function %atomic_rmw_xor_big_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -185,13 +363,35 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_xor_big_i16(0x12345678, 0, 0xf000) == 0xe2345678 -; run: %atomic_rmw_xor_big_i16(0x12345678, 0, 0x000f) == 0x123b5678 -; run: %atomic_rmw_xor_big_i16(0x12345678, 2, 0xf000) == 0x1234a678 -; run: %atomic_rmw_xor_big_i16(0x12345678, 2, 0x000f) == 0x12345677 +; run: %atomic_rmw_xor_big_i16_no_res(0x12345678, 0, 0xf000) == 0xe2345678 +; run: %atomic_rmw_xor_big_i16_no_res(0x12345678, 0, 0x000f) == 0x123b5678 +; run: %atomic_rmw_xor_big_i16_no_res(0x12345678, 2, 0xf000) == 0x1234a678 +; run: %atomic_rmw_xor_big_i16_no_res(0x12345678, 2, 0x000f) == 0x12345677 -function %atomic_rmw_xor_big_i8(i32, i64, i8) -> i32 { +function %atomic_rmw_xor_big_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 big xor v4, v2 + + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_xor_big_i8(0x12345678, 0, 0xf0) == [0x12, 0xe2345678] +; run: %atomic_rmw_xor_big_i8(0x12345678, 0, 0x0f) == [0x12, 0x1d345678] +; run: %atomic_rmw_xor_big_i8(0x12345678, 1, 0xf0) == [0x34, 0x12c45678] +; run: %atomic_rmw_xor_big_i8(0x12345678, 1, 0x0f) == [0x34, 0x123b5678] +; run: %atomic_rmw_xor_big_i8(0x12345678, 2, 0xf0) == [0x56, 0x1234a678] +; run: %atomic_rmw_xor_big_i8(0x12345678, 2, 0x0f) == [0x56, 0x12345978] +; run: %atomic_rmw_xor_big_i8(0x12345678, 3, 0xf0) == [0x78, 0x12345688] +; run: %atomic_rmw_xor_big_i8(0x12345678, 3, 0x0f) == [0x78, 0x12345677] + +function %atomic_rmw_xor_big_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -204,16 +404,34 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_xor_big_i8(0x12345678, 0, 0xf0) == 0xe2345678 -; run: %atomic_rmw_xor_big_i8(0x12345678, 0, 0x0f) == 0x1d345678 -; run: %atomic_rmw_xor_big_i8(0x12345678, 1, 0xf0) == 0x12c45678 -; run: %atomic_rmw_xor_big_i8(0x12345678, 1, 0x0f) == 0x123b5678 -; run: %atomic_rmw_xor_big_i8(0x12345678, 2, 0xf0) == 0x1234a678 -; run: %atomic_rmw_xor_big_i8(0x12345678, 2, 0x0f) == 0x12345978 -; run: %atomic_rmw_xor_big_i8(0x12345678, 3, 0xf0) == 0x12345688 -; run: %atomic_rmw_xor_big_i8(0x12345678, 3, 0x0f) == 0x12345677 +; run: %atomic_rmw_xor_big_i8_no_res(0x12345678, 0, 0xf0) == 0xe2345678 +; run: %atomic_rmw_xor_big_i8_no_res(0x12345678, 0, 0x0f) == 0x1d345678 +; run: %atomic_rmw_xor_big_i8_no_res(0x12345678, 1, 0xf0) == 0x12c45678 +; run: %atomic_rmw_xor_big_i8_no_res(0x12345678, 1, 0x0f) == 0x123b5678 +; run: %atomic_rmw_xor_big_i8_no_res(0x12345678, 2, 0xf0) == 0x1234a678 +; run: %atomic_rmw_xor_big_i8_no_res(0x12345678, 2, 0x0f) == 0x12345978 +; run: %atomic_rmw_xor_big_i8_no_res(0x12345678, 3, 0xf0) == 0x12345688 +; run: %atomic_rmw_xor_big_i8_no_res(0x12345678, 3, 0x0f) == 0x12345677 + +function %atomic_rmw_nand_big_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 -function %atomic_rmw_nand_big_i16(i32, i64, i16) -> i32 { + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 big nand v4, v2 + + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_nand_big_i16(0x12345678, 0, 0xf000) == [0x1234, 0xefff5678] +; run: %atomic_rmw_nand_big_i16(0x12345678, 0, 0x000f) == [0x1234, 0xfffb5678] +; run: %atomic_rmw_nand_big_i16(0x12345678, 2, 0xf000) == [0x5678, 0x1234afff] +; run: %atomic_rmw_nand_big_i16(0x12345678, 2, 0x000f) == [0x5678, 0x1234fff7] + +function %atomic_rmw_nand_big_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -226,12 +444,34 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_nand_big_i16(0x12345678, 0, 0xf000) == 0xefff5678 -; run: %atomic_rmw_nand_big_i16(0x12345678, 0, 0x000f) == 0xfffb5678 -; run: %atomic_rmw_nand_big_i16(0x12345678, 2, 0xf000) == 0x1234afff -; run: %atomic_rmw_nand_big_i16(0x12345678, 2, 0x000f) == 0x1234fff7 +; run: %atomic_rmw_nand_big_i16_no_res(0x12345678, 0, 0xf000) == 0xefff5678 +; run: %atomic_rmw_nand_big_i16_no_res(0x12345678, 0, 0x000f) == 0xfffb5678 +; run: %atomic_rmw_nand_big_i16_no_res(0x12345678, 2, 0xf000) == 0x1234afff +; run: %atomic_rmw_nand_big_i16_no_res(0x12345678, 2, 0x000f) == 0x1234fff7 -function %atomic_rmw_nand_big_i8(i32, i64, i8) -> i32 { +function %atomic_rmw_nand_big_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 big nand v4, v2 + + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_nand_big_i8(0x12345678, 0, 0xf0) == [0x12, 0xef345678] +; run: %atomic_rmw_nand_big_i8(0x12345678, 0, 0x0f) == [0x12, 0xfd345678] +; run: %atomic_rmw_nand_big_i8(0x12345678, 1, 0xf0) == [0x34, 0x12cf5678] +; run: %atomic_rmw_nand_big_i8(0x12345678, 1, 0x0f) == [0x34, 0x12fb5678] +; run: %atomic_rmw_nand_big_i8(0x12345678, 2, 0xf0) == [0x56, 0x1234af78] +; run: %atomic_rmw_nand_big_i8(0x12345678, 2, 0x0f) == [0x56, 0x1234f978] +; run: %atomic_rmw_nand_big_i8(0x12345678, 3, 0xf0) == [0x78, 0x1234568f] +; run: %atomic_rmw_nand_big_i8(0x12345678, 3, 0x0f) == [0x78, 0x123456f7] + +function %atomic_rmw_nand_big_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -244,17 +484,35 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_nand_big_i8(0x12345678, 0, 0xf0) == 0xef345678 -; run: %atomic_rmw_nand_big_i8(0x12345678, 0, 0x0f) == 0xfd345678 -; run: %atomic_rmw_nand_big_i8(0x12345678, 1, 0xf0) == 0x12cf5678 -; run: %atomic_rmw_nand_big_i8(0x12345678, 1, 0x0f) == 0x12fb5678 -; run: %atomic_rmw_nand_big_i8(0x12345678, 2, 0xf0) == 0x1234af78 -; run: %atomic_rmw_nand_big_i8(0x12345678, 2, 0x0f) == 0x1234f978 -; run: %atomic_rmw_nand_big_i8(0x12345678, 3, 0xf0) == 0x1234568f -; run: %atomic_rmw_nand_big_i8(0x12345678, 3, 0x0f) == 0x123456f7 +; run: %atomic_rmw_nand_big_i8_no_res(0x12345678, 0, 0xf0) == 0xef345678 +; run: %atomic_rmw_nand_big_i8_no_res(0x12345678, 0, 0x0f) == 0xfd345678 +; run: %atomic_rmw_nand_big_i8_no_res(0x12345678, 1, 0xf0) == 0x12cf5678 +; run: %atomic_rmw_nand_big_i8_no_res(0x12345678, 1, 0x0f) == 0x12fb5678 +; run: %atomic_rmw_nand_big_i8_no_res(0x12345678, 2, 0xf0) == 0x1234af78 +; run: %atomic_rmw_nand_big_i8_no_res(0x12345678, 2, 0x0f) == 0x1234f978 +; run: %atomic_rmw_nand_big_i8_no_res(0x12345678, 3, 0xf0) == 0x1234568f +; run: %atomic_rmw_nand_big_i8_no_res(0x12345678, 3, 0x0f) == 0x123456f7 + + +function %atomic_rmw_umin_big_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 big umin v4, v2 + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_umin_big_i16(0x12345678, 0, 0x1111) == [0x1234, 0x11115678] +; run: %atomic_rmw_umin_big_i16(0x12345678, 0, 0xffff) == [0x1234, 0x12345678] +; run: %atomic_rmw_umin_big_i16(0x12345678, 2, 0x1111) == [0x5678, 0x12341111] +; run: %atomic_rmw_umin_big_i16(0x12345678, 2, 0xffff) == [0x5678, 0x12345678] -function %atomic_rmw_umin_big_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_umin_big_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -267,12 +525,34 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_umin_big_i16(0x12345678, 0, 0x1111) == 0x11115678 -; run: %atomic_rmw_umin_big_i16(0x12345678, 0, 0xffff) == 0x12345678 -; run: %atomic_rmw_umin_big_i16(0x12345678, 2, 0x1111) == 0x12341111 -; run: %atomic_rmw_umin_big_i16(0x12345678, 2, 0xffff) == 0x12345678 +; run: %atomic_rmw_umin_big_i16_no_res(0x12345678, 0, 0x1111) == 0x11115678 +; run: %atomic_rmw_umin_big_i16_no_res(0x12345678, 0, 0xffff) == 0x12345678 +; run: %atomic_rmw_umin_big_i16_no_res(0x12345678, 2, 0x1111) == 0x12341111 +; run: %atomic_rmw_umin_big_i16_no_res(0x12345678, 2, 0xffff) == 0x12345678 -function %atomic_rmw_umin_big_i8(i32, i64, i8) -> i32 { +function %atomic_rmw_umin_big_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 big umin v4, v2 + + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_umin_big_i8(0x12345678, 0, 0x11) == [0x12, 0x11345678] +; run: %atomic_rmw_umin_big_i8(0x12345678, 0, 0xff) == [0x12, 0x12345678] +; run: %atomic_rmw_umin_big_i8(0x12345678, 1, 0x11) == [0x34, 0x12115678] +; run: %atomic_rmw_umin_big_i8(0x12345678, 1, 0xff) == [0x34, 0x12345678] +; run: %atomic_rmw_umin_big_i8(0x12345678, 2, 0x11) == [0x56, 0x12341178] +; run: %atomic_rmw_umin_big_i8(0x12345678, 2, 0xff) == [0x56, 0x12345678] +; run: %atomic_rmw_umin_big_i8(0x12345678, 3, 0x11) == [0x78, 0x12345611] +; run: %atomic_rmw_umin_big_i8(0x12345678, 3, 0xff) == [0x78, 0x12345678] + +function %atomic_rmw_umin_big_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -285,17 +565,35 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_umin_big_i8(0x12345678, 0, 0x11) == 0x11345678 -; run: %atomic_rmw_umin_big_i8(0x12345678, 0, 0xff) == 0x12345678 -; run: %atomic_rmw_umin_big_i8(0x12345678, 1, 0x11) == 0x12115678 -; run: %atomic_rmw_umin_big_i8(0x12345678, 1, 0xff) == 0x12345678 -; run: %atomic_rmw_umin_big_i8(0x12345678, 2, 0x11) == 0x12341178 -; run: %atomic_rmw_umin_big_i8(0x12345678, 2, 0xff) == 0x12345678 -; run: %atomic_rmw_umin_big_i8(0x12345678, 3, 0x11) == 0x12345611 -; run: %atomic_rmw_umin_big_i8(0x12345678, 3, 0xff) == 0x12345678 +; run: %atomic_rmw_umin_big_i8_no_res(0x12345678, 0, 0x11) == 0x11345678 +; run: %atomic_rmw_umin_big_i8_no_res(0x12345678, 0, 0xff) == 0x12345678 +; run: %atomic_rmw_umin_big_i8_no_res(0x12345678, 1, 0x11) == 0x12115678 +; run: %atomic_rmw_umin_big_i8_no_res(0x12345678, 1, 0xff) == 0x12345678 +; run: %atomic_rmw_umin_big_i8_no_res(0x12345678, 2, 0x11) == 0x12341178 +; run: %atomic_rmw_umin_big_i8_no_res(0x12345678, 2, 0xff) == 0x12345678 +; run: %atomic_rmw_umin_big_i8_no_res(0x12345678, 3, 0x11) == 0x12345611 +; run: %atomic_rmw_umin_big_i8_no_res(0x12345678, 3, 0xff) == 0x12345678 + + +function %atomic_rmw_umax_big_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 big umax v4, v2 + + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_umax_big_i16(0x12345678, 0, 0x1111) == [0x1234, 0x12345678] +; run: %atomic_rmw_umax_big_i16(0x12345678, 0, 0xffff) == [0x1234, 0xffff5678] +; run: %atomic_rmw_umax_big_i16(0x12345678, 2, 0x1111) == [0x5678, 0x12345678] +; run: %atomic_rmw_umax_big_i16(0x12345678, 2, 0xffff) == [0x5678, 0x1234ffff] -function %atomic_rmw_umax_big_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_umax_big_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -308,12 +606,34 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_umax_big_i16(0x12345678, 0, 0x1111) == 0x12345678 -; run: %atomic_rmw_umax_big_i16(0x12345678, 0, 0xffff) == 0xffff5678 -; run: %atomic_rmw_umax_big_i16(0x12345678, 2, 0x1111) == 0x12345678 -; run: %atomic_rmw_umax_big_i16(0x12345678, 2, 0xffff) == 0x1234ffff +; run: %atomic_rmw_umax_big_i16_no_res(0x12345678, 0, 0x1111) == 0x12345678 +; run: %atomic_rmw_umax_big_i16_no_res(0x12345678, 0, 0xffff) == 0xffff5678 +; run: %atomic_rmw_umax_big_i16_no_res(0x12345678, 2, 0x1111) == 0x12345678 +; run: %atomic_rmw_umax_big_i16_no_res(0x12345678, 2, 0xffff) == 0x1234ffff -function %atomic_rmw_umax_big_i8(i32, i64, i8) -> i32 { +function %atomic_rmw_umax_big_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 big umax v4, v2 + + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_umax_big_i8(0x12345678, 0, 0x11) == [0x12, 0x12345678] +; run: %atomic_rmw_umax_big_i8(0x12345678, 0, 0xff) == [0x12, 0xff345678] +; run: %atomic_rmw_umax_big_i8(0x12345678, 1, 0x11) == [0x34, 0x12345678] +; run: %atomic_rmw_umax_big_i8(0x12345678, 1, 0xff) == [0x34, 0x12ff5678] +; run: %atomic_rmw_umax_big_i8(0x12345678, 2, 0x11) == [0x56, 0x12345678] +; run: %atomic_rmw_umax_big_i8(0x12345678, 2, 0xff) == [0x56, 0x1234ff78] +; run: %atomic_rmw_umax_big_i8(0x12345678, 3, 0x11) == [0x78, 0x12345678] +; run: %atomic_rmw_umax_big_i8(0x12345678, 3, 0xff) == [0x78, 0x123456ff] + +function %atomic_rmw_umax_big_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -326,17 +646,35 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_umax_big_i8(0x12345678, 0, 0x11) == 0x12345678 -; run: %atomic_rmw_umax_big_i8(0x12345678, 0, 0xff) == 0xff345678 -; run: %atomic_rmw_umax_big_i8(0x12345678, 1, 0x11) == 0x12345678 -; run: %atomic_rmw_umax_big_i8(0x12345678, 1, 0xff) == 0x12ff5678 -; run: %atomic_rmw_umax_big_i8(0x12345678, 2, 0x11) == 0x12345678 -; run: %atomic_rmw_umax_big_i8(0x12345678, 2, 0xff) == 0x1234ff78 -; run: %atomic_rmw_umax_big_i8(0x12345678, 3, 0x11) == 0x12345678 -; run: %atomic_rmw_umax_big_i8(0x12345678, 3, 0xff) == 0x123456ff +; run: %atomic_rmw_umax_big_i8_no_res(0x12345678, 0, 0x11) == 0x12345678 +; run: %atomic_rmw_umax_big_i8_no_res(0x12345678, 0, 0xff) == 0xff345678 +; run: %atomic_rmw_umax_big_i8_no_res(0x12345678, 1, 0x11) == 0x12345678 +; run: %atomic_rmw_umax_big_i8_no_res(0x12345678, 1, 0xff) == 0x12ff5678 +; run: %atomic_rmw_umax_big_i8_no_res(0x12345678, 2, 0x11) == 0x12345678 +; run: %atomic_rmw_umax_big_i8_no_res(0x12345678, 2, 0xff) == 0x1234ff78 +; run: %atomic_rmw_umax_big_i8_no_res(0x12345678, 3, 0x11) == 0x12345678 +; run: %atomic_rmw_umax_big_i8_no_res(0x12345678, 3, 0xff) == 0x123456ff -function %atomic_rmw_smin_big_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_smin_big_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 big smin v4, v2 + + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_smin_big_i16(0x12345678, 0, 0x1111) == [0x1234, 0x11115678] +; run: %atomic_rmw_smin_big_i16(0x12345678, 0, 0xffff) == [0x1234, 0xffff5678] +; run: %atomic_rmw_smin_big_i16(0x12345678, 2, 0x1111) == [0x5678, 0x12341111] +; run: %atomic_rmw_smin_big_i16(0x12345678, 2, 0xffff) == [0x5678, 0x1234ffff] + +function %atomic_rmw_smin_big_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -349,12 +687,34 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_smin_big_i16(0x12345678, 0, 0x1111) == 0x11115678 -; run: %atomic_rmw_smin_big_i16(0x12345678, 0, 0xffff) == 0xffff5678 -; run: %atomic_rmw_smin_big_i16(0x12345678, 2, 0x1111) == 0x12341111 -; run: %atomic_rmw_smin_big_i16(0x12345678, 2, 0xffff) == 0x1234ffff +; run: %atomic_rmw_smin_big_i16_no_res(0x12345678, 0, 0x1111) == 0x11115678 +; run: %atomic_rmw_smin_big_i16_no_res(0x12345678, 0, 0xffff) == 0xffff5678 +; run: %atomic_rmw_smin_big_i16_no_res(0x12345678, 2, 0x1111) == 0x12341111 +; run: %atomic_rmw_smin_big_i16_no_res(0x12345678, 2, 0xffff) == 0x1234ffff + +function %atomic_rmw_smin_big_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 big smin v4, v2 -function %atomic_rmw_smin_big_i8(i32, i64, i8) -> i32 { + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_smin_big_i8(0x12345678, 0, 0x11) == [0x12, 0x11345678] +; run: %atomic_rmw_smin_big_i8(0x12345678, 0, 0xff) == [0x12, 0xff345678] +; run: %atomic_rmw_smin_big_i8(0x12345678, 1, 0x11) == [0x34, 0x12115678] +; run: %atomic_rmw_smin_big_i8(0x12345678, 1, 0xff) == [0x34, 0x12ff5678] +; run: %atomic_rmw_smin_big_i8(0x12345678, 2, 0x11) == [0x56, 0x12341178] +; run: %atomic_rmw_smin_big_i8(0x12345678, 2, 0xff) == [0x56, 0x1234ff78] +; run: %atomic_rmw_smin_big_i8(0x12345678, 3, 0x11) == [0x78, 0x12345611] +; run: %atomic_rmw_smin_big_i8(0x12345678, 3, 0xff) == [0x78, 0x123456ff] + +function %atomic_rmw_smin_big_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -367,17 +727,35 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_smin_big_i8(0x12345678, 0, 0x11) == 0x11345678 -; run: %atomic_rmw_smin_big_i8(0x12345678, 0, 0xff) == 0xff345678 -; run: %atomic_rmw_smin_big_i8(0x12345678, 1, 0x11) == 0x12115678 -; run: %atomic_rmw_smin_big_i8(0x12345678, 1, 0xff) == 0x12ff5678 -; run: %atomic_rmw_smin_big_i8(0x12345678, 2, 0x11) == 0x12341178 -; run: %atomic_rmw_smin_big_i8(0x12345678, 2, 0xff) == 0x1234ff78 -; run: %atomic_rmw_smin_big_i8(0x12345678, 3, 0x11) == 0x12345611 -; run: %atomic_rmw_smin_big_i8(0x12345678, 3, 0xff) == 0x123456ff +; run: %atomic_rmw_smin_big_i8_no_res(0x12345678, 0, 0x11) == 0x11345678 +; run: %atomic_rmw_smin_big_i8_no_res(0x12345678, 0, 0xff) == 0xff345678 +; run: %atomic_rmw_smin_big_i8_no_res(0x12345678, 1, 0x11) == 0x12115678 +; run: %atomic_rmw_smin_big_i8_no_res(0x12345678, 1, 0xff) == 0x12ff5678 +; run: %atomic_rmw_smin_big_i8_no_res(0x12345678, 2, 0x11) == 0x12341178 +; run: %atomic_rmw_smin_big_i8_no_res(0x12345678, 2, 0xff) == 0x1234ff78 +; run: %atomic_rmw_smin_big_i8_no_res(0x12345678, 3, 0x11) == 0x12345611 +; run: %atomic_rmw_smin_big_i8_no_res(0x12345678, 3, 0xff) == 0x123456ff -function %atomic_rmw_smax_big_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_smax_big_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 big smax v4, v2 + + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_smax_big_i16(0x12345678, 0, 0xffff) == [0x1234, 0x12345678] +; run: %atomic_rmw_smax_big_i16(0x12345678, 0, 0x7fff) == [0x1234, 0x7fff5678] +; run: %atomic_rmw_smax_big_i16(0x12345678, 2, 0xffff) == [0x5678, 0x12345678] +; run: %atomic_rmw_smax_big_i16(0x12345678, 2, 0x7fff) == [0x5678, 0x12347fff] + +function %atomic_rmw_smax_big_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -390,13 +768,35 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_smax_big_i16(0x12345678, 0, 0xffff) == 0x12345678 -; run: %atomic_rmw_smax_big_i16(0x12345678, 0, 0x7fff) == 0x7fff5678 -; run: %atomic_rmw_smax_big_i16(0x12345678, 2, 0xffff) == 0x12345678 -; run: %atomic_rmw_smax_big_i16(0x12345678, 2, 0x7fff) == 0x12347fff +; run: %atomic_rmw_smax_big_i16_no_res(0x12345678, 0, 0xffff) == 0x12345678 +; run: %atomic_rmw_smax_big_i16_no_res(0x12345678, 0, 0x7fff) == 0x7fff5678 +; run: %atomic_rmw_smax_big_i16_no_res(0x12345678, 2, 0xffff) == 0x12345678 +; run: %atomic_rmw_smax_big_i16_no_res(0x12345678, 2, 0x7fff) == 0x12347fff + + +function %atomic_rmw_smax_big_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 big smax v4, v2 -function %atomic_rmw_smax_big_i8(i32, i64, i8) -> i32 { + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_smax_big_i8(0x12345678, 0, 0xff) == [0x12, 0x12345678] +; run: %atomic_rmw_smax_big_i8(0x12345678, 0, 0x7f) == [0x12, 0x7f345678] +; run: %atomic_rmw_smax_big_i8(0x12345678, 1, 0xff) == [0x34, 0x12345678] +; run: %atomic_rmw_smax_big_i8(0x12345678, 1, 0x7f) == [0x34, 0x127f5678] +; run: %atomic_rmw_smax_big_i8(0x12345678, 2, 0xff) == [0x56, 0x12345678] +; run: %atomic_rmw_smax_big_i8(0x12345678, 2, 0x7f) == [0x56, 0x12347f78] +; run: %atomic_rmw_smax_big_i8(0x12345678, 3, 0xff) == [0x78, 0x12345678] +; run: %atomic_rmw_smax_big_i8(0x12345678, 3, 0x7f) == [0x78, 0x1234567f] + +function %atomic_rmw_smax_big_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -409,17 +809,35 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_smax_big_i8(0x12345678, 0, 0xff) == 0x12345678 -; run: %atomic_rmw_smax_big_i8(0x12345678, 0, 0x7f) == 0x7f345678 -; run: %atomic_rmw_smax_big_i8(0x12345678, 1, 0xff) == 0x12345678 -; run: %atomic_rmw_smax_big_i8(0x12345678, 1, 0x7f) == 0x127f5678 -; run: %atomic_rmw_smax_big_i8(0x12345678, 2, 0xff) == 0x12345678 -; run: %atomic_rmw_smax_big_i8(0x12345678, 2, 0x7f) == 0x12347f78 -; run: %atomic_rmw_smax_big_i8(0x12345678, 3, 0xff) == 0x12345678 -; run: %atomic_rmw_smax_big_i8(0x12345678, 3, 0x7f) == 0x1234567f +; run: %atomic_rmw_smax_big_i8_no_res(0x12345678, 0, 0xff) == 0x12345678 +; run: %atomic_rmw_smax_big_i8_no_res(0x12345678, 0, 0x7f) == 0x7f345678 +; run: %atomic_rmw_smax_big_i8_no_res(0x12345678, 1, 0xff) == 0x12345678 +; run: %atomic_rmw_smax_big_i8_no_res(0x12345678, 1, 0x7f) == 0x127f5678 +; run: %atomic_rmw_smax_big_i8_no_res(0x12345678, 2, 0xff) == 0x12345678 +; run: %atomic_rmw_smax_big_i8_no_res(0x12345678, 2, 0x7f) == 0x12347f78 +; run: %atomic_rmw_smax_big_i8_no_res(0x12345678, 3, 0xff) == 0x12345678 +; run: %atomic_rmw_smax_big_i8_no_res(0x12345678, 3, 0x7f) == 0x1234567f -function %atomic_rmw_xchg_big_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_xchg_big_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 big xchg v4, v2 + + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_xchg_big_i16(0x12345678, 0, 0x1111) == [0x1234, 0x11115678] +; run: %atomic_rmw_xchg_big_i16(0x12345678, 0, 0xffff) == [0x1234, 0xffff5678] +; run: %atomic_rmw_xchg_big_i16(0x12345678, 2, 0x1111) == [0x5678, 0x12341111] +; run: %atomic_rmw_xchg_big_i16(0x12345678, 2, 0xffff) == [0x5678, 0x1234ffff] + +function %atomic_rmw_xchg_big_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -432,13 +850,35 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_xchg_big_i16(0x12345678, 0, 0x1111) == 0x11115678 -; run: %atomic_rmw_xchg_big_i16(0x12345678, 0, 0xffff) == 0xffff5678 -; run: %atomic_rmw_xchg_big_i16(0x12345678, 2, 0x1111) == 0x12341111 -; run: %atomic_rmw_xchg_big_i16(0x12345678, 2, 0xffff) == 0x1234ffff +; run: %atomic_rmw_xchg_big_i16_no_res(0x12345678, 0, 0x1111) == 0x11115678 +; run: %atomic_rmw_xchg_big_i16_no_res(0x12345678, 0, 0xffff) == 0xffff5678 +; run: %atomic_rmw_xchg_big_i16_no_res(0x12345678, 2, 0x1111) == 0x12341111 +; run: %atomic_rmw_xchg_big_i16_no_res(0x12345678, 2, 0xffff) == 0x1234ffff -function %atomic_rmw_xchg_big_i8(i32, i64, i8) -> i32 { +function %atomic_rmw_xchg_big_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 big v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 big xchg v4, v2 + + v6 = load.i32 big v3 + return v5, v6 +} +; run: %atomic_rmw_xchg_big_i8(0x12345678, 0, 0x11) == [0x12, 0x11345678] +; run: %atomic_rmw_xchg_big_i8(0x12345678, 0, 0xff) == [0x12, 0xff345678] +; run: %atomic_rmw_xchg_big_i8(0x12345678, 1, 0x11) == [0x34, 0x12115678] +; run: %atomic_rmw_xchg_big_i8(0x12345678, 1, 0xff) == [0x34, 0x12ff5678] +; run: %atomic_rmw_xchg_big_i8(0x12345678, 2, 0x11) == [0x56, 0x12341178] +; run: %atomic_rmw_xchg_big_i8(0x12345678, 2, 0xff) == [0x56, 0x1234ff78] +; run: %atomic_rmw_xchg_big_i8(0x12345678, 3, 0x11) == [0x78, 0x12345611] +; run: %atomic_rmw_xchg_big_i8(0x12345678, 3, 0xff) == [0x78, 0x123456ff] + +function %atomic_rmw_xchg_big_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -451,11 +891,11 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 big v3 return v6 } -; run: %atomic_rmw_xchg_big_i8(0x12345678, 0, 0x11) == 0x11345678 -; run: %atomic_rmw_xchg_big_i8(0x12345678, 0, 0xff) == 0xff345678 -; run: %atomic_rmw_xchg_big_i8(0x12345678, 1, 0x11) == 0x12115678 -; run: %atomic_rmw_xchg_big_i8(0x12345678, 1, 0xff) == 0x12ff5678 -; run: %atomic_rmw_xchg_big_i8(0x12345678, 2, 0x11) == 0x12341178 -; run: %atomic_rmw_xchg_big_i8(0x12345678, 2, 0xff) == 0x1234ff78 -; run: %atomic_rmw_xchg_big_i8(0x12345678, 3, 0x11) == 0x12345611 -; run: %atomic_rmw_xchg_big_i8(0x12345678, 3, 0xff) == 0x123456ff +; run: %atomic_rmw_xchg_big_i8_no_res(0x12345678, 0, 0x11) == 0x11345678 +; run: %atomic_rmw_xchg_big_i8_no_res(0x12345678, 0, 0xff) == 0xff345678 +; run: %atomic_rmw_xchg_big_i8_no_res(0x12345678, 1, 0x11) == 0x12115678 +; run: %atomic_rmw_xchg_big_i8_no_res(0x12345678, 1, 0xff) == 0x12ff5678 +; run: %atomic_rmw_xchg_big_i8_no_res(0x12345678, 2, 0x11) == 0x12341178 +; run: %atomic_rmw_xchg_big_i8_no_res(0x12345678, 2, 0xff) == 0x1234ff78 +; run: %atomic_rmw_xchg_big_i8_no_res(0x12345678, 3, 0x11) == 0x12345611 +; run: %atomic_rmw_xchg_big_i8_no_res(0x12345678, 3, 0xff) == 0x123456ff diff --git a/cranelift/filetests/filetests/runtests/atomic-rmw-subword-little.clif b/cranelift/filetests/filetests/runtests/atomic-rmw-subword-little.clif index c3d3bd0e5dc3..c0ce64e7a1f0 100644 --- a/cranelift/filetests/filetests/runtests/atomic-rmw-subword-little.clif +++ b/cranelift/filetests/filetests/runtests/atomic-rmw-subword-little.clif @@ -11,7 +11,25 @@ target riscv64 has_c has_zcb ; We can't test that these instructions are right regarding atomicity, but we can ; test if they perform their operation correctly -function %atomic_rmw_add_little_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_add_little_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 little add v4, v2 + + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_add_little_i16(0x12345678, 2, 0x1111) == [0x1234, 0x23455678] +; run: %atomic_rmw_add_little_i16(0x12345678, 2, 0xffff) == [0x1234, 0x12335678] +; run: %atomic_rmw_add_little_i16(0x12345678, 0, 0x1111) == [0x5678, 0x12346789] +; run: %atomic_rmw_add_little_i16(0x12345678, 0, 0xffff) == [0x5678, 0x12345677] + +function %atomic_rmw_add_little_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -24,12 +42,34 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_add_little_i16(0x12345678, 2, 0x1111) == 0x23455678 -; run: %atomic_rmw_add_little_i16(0x12345678, 2, 0xffff) == 0x12335678 -; run: %atomic_rmw_add_little_i16(0x12345678, 0, 0x1111) == 0x12346789 -; run: %atomic_rmw_add_little_i16(0x12345678, 0, 0xffff) == 0x12345677 +; run: %atomic_rmw_add_little_i16_no_res(0x12345678, 2, 0x1111) == 0x23455678 +; run: %atomic_rmw_add_little_i16_no_res(0x12345678, 2, 0xffff) == 0x12335678 +; run: %atomic_rmw_add_little_i16_no_res(0x12345678, 0, 0x1111) == 0x12346789 +; run: %atomic_rmw_add_little_i16_no_res(0x12345678, 0, 0xffff) == 0x12345677 + +function %atomic_rmw_add_little_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 little add v4, v2 -function %atomic_rmw_add_little_i8(i32, i64, i8) -> i32 { + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_add_little_i8(0x12345678, 3, 0x11) == [0x12, 0x23345678] +; run: %atomic_rmw_add_little_i8(0x12345678, 3, 0xff) == [0x12, 0x11345678] +; run: %atomic_rmw_add_little_i8(0x12345678, 2, 0x11) == [0x34, 0x12455678] +; run: %atomic_rmw_add_little_i8(0x12345678, 2, 0xff) == [0x34, 0x12335678] +; run: %atomic_rmw_add_little_i8(0x12345678, 1, 0x11) == [0x56, 0x12346778] +; run: %atomic_rmw_add_little_i8(0x12345678, 1, 0xff) == [0x56, 0x12345578] +; run: %atomic_rmw_add_little_i8(0x12345678, 0, 0x11) == [0x78, 0x12345689] +; run: %atomic_rmw_add_little_i8(0x12345678, 0, 0xff) == [0x78, 0x12345677] + +function %atomic_rmw_add_little_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -42,16 +82,34 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_add_little_i8(0x12345678, 3, 0x11) == 0x23345678 -; run: %atomic_rmw_add_little_i8(0x12345678, 3, 0xff) == 0x11345678 -; run: %atomic_rmw_add_little_i8(0x12345678, 2, 0x11) == 0x12455678 -; run: %atomic_rmw_add_little_i8(0x12345678, 2, 0xff) == 0x12335678 -; run: %atomic_rmw_add_little_i8(0x12345678, 1, 0x11) == 0x12346778 -; run: %atomic_rmw_add_little_i8(0x12345678, 1, 0xff) == 0x12345578 -; run: %atomic_rmw_add_little_i8(0x12345678, 0, 0x11) == 0x12345689 -; run: %atomic_rmw_add_little_i8(0x12345678, 0, 0xff) == 0x12345677 +; run: %atomic_rmw_add_little_i8_no_res(0x12345678, 3, 0x11) == 0x23345678 +; run: %atomic_rmw_add_little_i8_no_res(0x12345678, 3, 0xff) == 0x11345678 +; run: %atomic_rmw_add_little_i8_no_res(0x12345678, 2, 0x11) == 0x12455678 +; run: %atomic_rmw_add_little_i8_no_res(0x12345678, 2, 0xff) == 0x12335678 +; run: %atomic_rmw_add_little_i8_no_res(0x12345678, 1, 0x11) == 0x12346778 +; run: %atomic_rmw_add_little_i8_no_res(0x12345678, 1, 0xff) == 0x12345578 +; run: %atomic_rmw_add_little_i8_no_res(0x12345678, 0, 0x11) == 0x12345689 +; run: %atomic_rmw_add_little_i8_no_res(0x12345678, 0, 0xff) == 0x12345677 + +function %atomic_rmw_sub_little_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 -function %atomic_rmw_sub_little_i16(i32, i64, i16) -> i32 { +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 little sub v4, v2 + + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_sub_little_i16(0x12345678, 2, 0x1111) == [0x1234, 0x01235678] +; run: %atomic_rmw_sub_little_i16(0x12345678, 2, 0xffff) == [0x1234, 0x12355678] +; run: %atomic_rmw_sub_little_i16(0x12345678, 0, 0x1111) == [0x5678, 0x12344567] +; run: %atomic_rmw_sub_little_i16(0x12345678, 0, 0xffff) == [0x5678, 0x12345679] + +function %atomic_rmw_sub_little_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -64,12 +122,34 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_sub_little_i16(0x12345678, 2, 0x1111) == 0x01235678 -; run: %atomic_rmw_sub_little_i16(0x12345678, 2, 0xffff) == 0x12355678 -; run: %atomic_rmw_sub_little_i16(0x12345678, 0, 0x1111) == 0x12344567 -; run: %atomic_rmw_sub_little_i16(0x12345678, 0, 0xffff) == 0x12345679 +; run: %atomic_rmw_sub_little_i16_no_res(0x12345678, 2, 0x1111) == 0x01235678 +; run: %atomic_rmw_sub_little_i16_no_res(0x12345678, 2, 0xffff) == 0x12355678 +; run: %atomic_rmw_sub_little_i16_no_res(0x12345678, 0, 0x1111) == 0x12344567 +; run: %atomic_rmw_sub_little_i16_no_res(0x12345678, 0, 0xffff) == 0x12345679 + +function %atomic_rmw_sub_little_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 -function %atomic_rmw_sub_little_i8(i32, i64, i8) -> i32 { + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 little sub v4, v2 + + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_sub_little_i8(0x12345678, 3, 0x11) == [0x12, 0x01345678] +; run: %atomic_rmw_sub_little_i8(0x12345678, 3, 0xff) == [0x12, 0x13345678] +; run: %atomic_rmw_sub_little_i8(0x12345678, 2, 0x11) == [0x34, 0x12235678] +; run: %atomic_rmw_sub_little_i8(0x12345678, 2, 0xff) == [0x34, 0x12355678] +; run: %atomic_rmw_sub_little_i8(0x12345678, 1, 0x11) == [0x56, 0x12344578] +; run: %atomic_rmw_sub_little_i8(0x12345678, 1, 0xff) == [0x56, 0x12345778] +; run: %atomic_rmw_sub_little_i8(0x12345678, 0, 0x11) == [0x78, 0x12345667] +; run: %atomic_rmw_sub_little_i8(0x12345678, 0, 0xff) == [0x78, 0x12345679] + +function %atomic_rmw_sub_little_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -82,16 +162,34 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_sub_little_i8(0x12345678, 3, 0x11) == 0x01345678 -; run: %atomic_rmw_sub_little_i8(0x12345678, 3, 0xff) == 0x13345678 -; run: %atomic_rmw_sub_little_i8(0x12345678, 2, 0x11) == 0x12235678 -; run: %atomic_rmw_sub_little_i8(0x12345678, 2, 0xff) == 0x12355678 -; run: %atomic_rmw_sub_little_i8(0x12345678, 1, 0x11) == 0x12344578 -; run: %atomic_rmw_sub_little_i8(0x12345678, 1, 0xff) == 0x12345778 -; run: %atomic_rmw_sub_little_i8(0x12345678, 0, 0x11) == 0x12345667 -; run: %atomic_rmw_sub_little_i8(0x12345678, 0, 0xff) == 0x12345679 +; run: %atomic_rmw_sub_little_i8_no_res(0x12345678, 3, 0x11) == 0x01345678 +; run: %atomic_rmw_sub_little_i8_no_res(0x12345678, 3, 0xff) == 0x13345678 +; run: %atomic_rmw_sub_little_i8_no_res(0x12345678, 2, 0x11) == 0x12235678 +; run: %atomic_rmw_sub_little_i8_no_res(0x12345678, 2, 0xff) == 0x12355678 +; run: %atomic_rmw_sub_little_i8_no_res(0x12345678, 1, 0x11) == 0x12344578 +; run: %atomic_rmw_sub_little_i8_no_res(0x12345678, 1, 0xff) == 0x12345778 +; run: %atomic_rmw_sub_little_i8_no_res(0x12345678, 0, 0x11) == 0x12345667 +; run: %atomic_rmw_sub_little_i8_no_res(0x12345678, 0, 0xff) == 0x12345679 + +function %atomic_rmw_and_little_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 -function %atomic_rmw_and_little_i16(i32, i64, i16) -> i32 { +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 little and v4, v2 + + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_and_little_i16(0x12345678, 2, 0xf000) == [0x1234, 0x10005678] +; run: %atomic_rmw_and_little_i16(0x12345678, 2, 0x000f) == [0x1234, 0x00045678] +; run: %atomic_rmw_and_little_i16(0x12345678, 0, 0xf000) == [0x5678, 0x12345000] +; run: %atomic_rmw_and_little_i16(0x12345678, 0, 0x000f) == [0x5678, 0x12340008] + +function %atomic_rmw_and_little_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -104,12 +202,34 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_and_little_i16(0x12345678, 2, 0xf000) == 0x10005678 -; run: %atomic_rmw_and_little_i16(0x12345678, 2, 0x000f) == 0x00045678 -; run: %atomic_rmw_and_little_i16(0x12345678, 0, 0xf000) == 0x12345000 -; run: %atomic_rmw_and_little_i16(0x12345678, 0, 0x000f) == 0x12340008 +; run: %atomic_rmw_and_little_i16_no_res(0x12345678, 2, 0xf000) == 0x10005678 +; run: %atomic_rmw_and_little_i16_no_res(0x12345678, 2, 0x000f) == 0x00045678 +; run: %atomic_rmw_and_little_i16_no_res(0x12345678, 0, 0xf000) == 0x12345000 +; run: %atomic_rmw_and_little_i16_no_res(0x12345678, 0, 0x000f) == 0x12340008 -function %atomic_rmw_and_little_i8(i32, i64, i8) -> i32 { +function %atomic_rmw_and_little_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 little and v4, v2 + + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_and_little_i8(0x12345678, 3, 0xf0) == [0x12, 0x10345678] +; run: %atomic_rmw_and_little_i8(0x12345678, 3, 0x0f) == [0x12, 0x02345678] +; run: %atomic_rmw_and_little_i8(0x12345678, 2, 0xf0) == [0x34, 0x12305678] +; run: %atomic_rmw_and_little_i8(0x12345678, 2, 0x0f) == [0x34, 0x12045678] +; run: %atomic_rmw_and_little_i8(0x12345678, 1, 0xf0) == [0x56, 0x12345078] +; run: %atomic_rmw_and_little_i8(0x12345678, 1, 0x0f) == [0x56, 0x12340678] +; run: %atomic_rmw_and_little_i8(0x12345678, 0, 0xf0) == [0x78, 0x12345670] +; run: %atomic_rmw_and_little_i8(0x12345678, 0, 0x0f) == [0x78, 0x12345608] + +function %atomic_rmw_and_little_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -122,17 +242,35 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_and_little_i8(0x12345678, 3, 0xf0) == 0x10345678 -; run: %atomic_rmw_and_little_i8(0x12345678, 3, 0x0f) == 0x02345678 -; run: %atomic_rmw_and_little_i8(0x12345678, 2, 0xf0) == 0x12305678 -; run: %atomic_rmw_and_little_i8(0x12345678, 2, 0x0f) == 0x12045678 -; run: %atomic_rmw_and_little_i8(0x12345678, 1, 0xf0) == 0x12345078 -; run: %atomic_rmw_and_little_i8(0x12345678, 1, 0x0f) == 0x12340678 -; run: %atomic_rmw_and_little_i8(0x12345678, 0, 0xf0) == 0x12345670 -; run: %atomic_rmw_and_little_i8(0x12345678, 0, 0x0f) == 0x12345608 +; run: %atomic_rmw_and_little_i8_no_res(0x12345678, 3, 0xf0) == 0x10345678 +; run: %atomic_rmw_and_little_i8_no_res(0x12345678, 3, 0x0f) == 0x02345678 +; run: %atomic_rmw_and_little_i8_no_res(0x12345678, 2, 0xf0) == 0x12305678 +; run: %atomic_rmw_and_little_i8_no_res(0x12345678, 2, 0x0f) == 0x12045678 +; run: %atomic_rmw_and_little_i8_no_res(0x12345678, 1, 0xf0) == 0x12345078 +; run: %atomic_rmw_and_little_i8_no_res(0x12345678, 1, 0x0f) == 0x12340678 +; run: %atomic_rmw_and_little_i8_no_res(0x12345678, 0, 0xf0) == 0x12345670 +; run: %atomic_rmw_and_little_i8_no_res(0x12345678, 0, 0x0f) == 0x12345608 + + +function %atomic_rmw_or_little_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 little or v4, v2 + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_or_little_i16(0x12345678, 2, 0xf000) == [0x1234, 0xf2345678] +; run: %atomic_rmw_or_little_i16(0x12345678, 2, 0x000f) == [0x1234, 0x123f5678] +; run: %atomic_rmw_or_little_i16(0x12345678, 0, 0xf000) == [0x5678, 0x1234f678] +; run: %atomic_rmw_or_little_i16(0x12345678, 0, 0x000f) == [0x5678, 0x1234567f] -function %atomic_rmw_or_little_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_or_little_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -145,12 +283,34 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_or_little_i16(0x12345678, 2, 0xf000) == 0xf2345678 -; run: %atomic_rmw_or_little_i16(0x12345678, 2, 0x000f) == 0x123f5678 -; run: %atomic_rmw_or_little_i16(0x12345678, 0, 0xf000) == 0x1234f678 -; run: %atomic_rmw_or_little_i16(0x12345678, 0, 0x000f) == 0x1234567f +; run: %atomic_rmw_or_little_i16_no_res(0x12345678, 2, 0xf000) == 0xf2345678 +; run: %atomic_rmw_or_little_i16_no_res(0x12345678, 2, 0x000f) == 0x123f5678 +; run: %atomic_rmw_or_little_i16_no_res(0x12345678, 0, 0xf000) == 0x1234f678 +; run: %atomic_rmw_or_little_i16_no_res(0x12345678, 0, 0x000f) == 0x1234567f -function %atomic_rmw_or_little_i8(i32, i64, i8) -> i32 { +function %atomic_rmw_or_little_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 little or v4, v2 + + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_or_little_i8(0x12345678, 3, 0xf0) == [0x12, 0xf2345678] +; run: %atomic_rmw_or_little_i8(0x12345678, 3, 0x0f) == [0x12, 0x1f345678] +; run: %atomic_rmw_or_little_i8(0x12345678, 2, 0xf0) == [0x34, 0x12f45678] +; run: %atomic_rmw_or_little_i8(0x12345678, 2, 0x0f) == [0x34, 0x123f5678] +; run: %atomic_rmw_or_little_i8(0x12345678, 1, 0xf0) == [0x56, 0x1234f678] +; run: %atomic_rmw_or_little_i8(0x12345678, 1, 0x0f) == [0x56, 0x12345f78] +; run: %atomic_rmw_or_little_i8(0x12345678, 0, 0xf0) == [0x78, 0x123456f8] +; run: %atomic_rmw_or_little_i8(0x12345678, 0, 0x0f) == [0x78, 0x1234567f] + +function %atomic_rmw_or_little_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -163,16 +323,34 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_or_little_i8(0x12345678, 3, 0xf0) == 0xf2345678 -; run: %atomic_rmw_or_little_i8(0x12345678, 3, 0x0f) == 0x1f345678 -; run: %atomic_rmw_or_little_i8(0x12345678, 2, 0xf0) == 0x12f45678 -; run: %atomic_rmw_or_little_i8(0x12345678, 2, 0x0f) == 0x123f5678 -; run: %atomic_rmw_or_little_i8(0x12345678, 1, 0xf0) == 0x1234f678 -; run: %atomic_rmw_or_little_i8(0x12345678, 1, 0x0f) == 0x12345f78 -; run: %atomic_rmw_or_little_i8(0x12345678, 0, 0xf0) == 0x123456f8 -; run: %atomic_rmw_or_little_i8(0x12345678, 0, 0x0f) == 0x1234567f +; run: %atomic_rmw_or_little_i8_no_res(0x12345678, 3, 0xf0) == 0xf2345678 +; run: %atomic_rmw_or_little_i8_no_res(0x12345678, 3, 0x0f) == 0x1f345678 +; run: %atomic_rmw_or_little_i8_no_res(0x12345678, 2, 0xf0) == 0x12f45678 +; run: %atomic_rmw_or_little_i8_no_res(0x12345678, 2, 0x0f) == 0x123f5678 +; run: %atomic_rmw_or_little_i8_no_res(0x12345678, 1, 0xf0) == 0x1234f678 +; run: %atomic_rmw_or_little_i8_no_res(0x12345678, 1, 0x0f) == 0x12345f78 +; run: %atomic_rmw_or_little_i8_no_res(0x12345678, 0, 0xf0) == 0x123456f8 +; run: %atomic_rmw_or_little_i8_no_res(0x12345678, 0, 0x0f) == 0x1234567f + +function %atomic_rmw_xor_little_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 little xor v4, v2 + + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_xor_little_i16(0x12345678, 2, 0xf000) == [0x1234, 0xe2345678] +; run: %atomic_rmw_xor_little_i16(0x12345678, 2, 0x000f) == [0x1234, 0x123b5678] +; run: %atomic_rmw_xor_little_i16(0x12345678, 0, 0xf000) == [0x5678, 0x1234a678] +; run: %atomic_rmw_xor_little_i16(0x12345678, 0, 0x000f) == [0x5678, 0x12345677] -function %atomic_rmw_xor_little_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_xor_little_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -185,12 +363,34 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_xor_little_i16(0x12345678, 2, 0xf000) == 0xe2345678 -; run: %atomic_rmw_xor_little_i16(0x12345678, 2, 0x000f) == 0x123b5678 -; run: %atomic_rmw_xor_little_i16(0x12345678, 0, 0xf000) == 0x1234a678 -; run: %atomic_rmw_xor_little_i16(0x12345678, 0, 0x000f) == 0x12345677 +; run: %atomic_rmw_xor_little_i16_no_res(0x12345678, 2, 0xf000) == 0xe2345678 +; run: %atomic_rmw_xor_little_i16_no_res(0x12345678, 2, 0x000f) == 0x123b5678 +; run: %atomic_rmw_xor_little_i16_no_res(0x12345678, 0, 0xf000) == 0x1234a678 +; run: %atomic_rmw_xor_little_i16_no_res(0x12345678, 0, 0x000f) == 0x12345677 -function %atomic_rmw_xor_little_i8(i32, i64, i8) -> i32 { +function %atomic_rmw_xor_little_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 little xor v4, v2 + + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_xor_little_i8(0x12345678, 3, 0xf0) == [0x12, 0xe2345678] +; run: %atomic_rmw_xor_little_i8(0x12345678, 3, 0x0f) == [0x12, 0x1d345678] +; run: %atomic_rmw_xor_little_i8(0x12345678, 2, 0xf0) == [0x34, 0x12c45678] +; run: %atomic_rmw_xor_little_i8(0x12345678, 2, 0x0f) == [0x34, 0x123b5678] +; run: %atomic_rmw_xor_little_i8(0x12345678, 1, 0xf0) == [0x56, 0x1234a678] +; run: %atomic_rmw_xor_little_i8(0x12345678, 1, 0x0f) == [0x56, 0x12345978] +; run: %atomic_rmw_xor_little_i8(0x12345678, 0, 0xf0) == [0x78, 0x12345688] +; run: %atomic_rmw_xor_little_i8(0x12345678, 0, 0x0f) == [0x78, 0x12345677] + +function %atomic_rmw_xor_little_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -203,17 +403,35 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_xor_little_i8(0x12345678, 3, 0xf0) == 0xe2345678 -; run: %atomic_rmw_xor_little_i8(0x12345678, 3, 0x0f) == 0x1d345678 -; run: %atomic_rmw_xor_little_i8(0x12345678, 2, 0xf0) == 0x12c45678 -; run: %atomic_rmw_xor_little_i8(0x12345678, 2, 0x0f) == 0x123b5678 -; run: %atomic_rmw_xor_little_i8(0x12345678, 1, 0xf0) == 0x1234a678 -; run: %atomic_rmw_xor_little_i8(0x12345678, 1, 0x0f) == 0x12345978 -; run: %atomic_rmw_xor_little_i8(0x12345678, 0, 0xf0) == 0x12345688 -; run: %atomic_rmw_xor_little_i8(0x12345678, 0, 0x0f) == 0x12345677 +; run: %atomic_rmw_xor_little_i8_no_res(0x12345678, 3, 0xf0) == 0xe2345678 +; run: %atomic_rmw_xor_little_i8_no_res(0x12345678, 3, 0x0f) == 0x1d345678 +; run: %atomic_rmw_xor_little_i8_no_res(0x12345678, 2, 0xf0) == 0x12c45678 +; run: %atomic_rmw_xor_little_i8_no_res(0x12345678, 2, 0x0f) == 0x123b5678 +; run: %atomic_rmw_xor_little_i8_no_res(0x12345678, 1, 0xf0) == 0x1234a678 +; run: %atomic_rmw_xor_little_i8_no_res(0x12345678, 1, 0x0f) == 0x12345978 +; run: %atomic_rmw_xor_little_i8_no_res(0x12345678, 0, 0xf0) == 0x12345688 +; run: %atomic_rmw_xor_little_i8_no_res(0x12345678, 0, 0x0f) == 0x12345677 -function %atomic_rmw_nand_little_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_nand_little_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 little nand v4, v2 + + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_nand_little_i16(0x12345678, 2, 0xf000) == [0x1234, 0xefff5678] +; run: %atomic_rmw_nand_little_i16(0x12345678, 2, 0x000f) == [0x1234, 0xfffb5678] +; run: %atomic_rmw_nand_little_i16(0x12345678, 0, 0xf000) == [0x5678, 0x1234afff] +; run: %atomic_rmw_nand_little_i16(0x12345678, 0, 0x000f) == [0x5678, 0x1234fff7] + +function %atomic_rmw_nand_little_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -226,12 +444,34 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_nand_little_i16(0x12345678, 2, 0xf000) == 0xefff5678 -; run: %atomic_rmw_nand_little_i16(0x12345678, 2, 0x000f) == 0xfffb5678 -; run: %atomic_rmw_nand_little_i16(0x12345678, 0, 0xf000) == 0x1234afff -; run: %atomic_rmw_nand_little_i16(0x12345678, 0, 0x000f) == 0x1234fff7 +; run: %atomic_rmw_nand_little_i16_no_res(0x12345678, 2, 0xf000) == 0xefff5678 +; run: %atomic_rmw_nand_little_i16_no_res(0x12345678, 2, 0x000f) == 0xfffb5678 +; run: %atomic_rmw_nand_little_i16_no_res(0x12345678, 0, 0xf000) == 0x1234afff +; run: %atomic_rmw_nand_little_i16_no_res(0x12345678, 0, 0x000f) == 0x1234fff7 + +function %atomic_rmw_nand_little_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 little nand v4, v2 -function %atomic_rmw_nand_little_i8(i32, i64, i8) -> i32 { + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_nand_little_i8(0x12345678, 3, 0xf0) == [0x12, 0xef345678] +; run: %atomic_rmw_nand_little_i8(0x12345678, 3, 0x0f) == [0x12, 0xfd345678] +; run: %atomic_rmw_nand_little_i8(0x12345678, 2, 0xf0) == [0x34, 0x12cf5678] +; run: %atomic_rmw_nand_little_i8(0x12345678, 2, 0x0f) == [0x34, 0x12fb5678] +; run: %atomic_rmw_nand_little_i8(0x12345678, 1, 0xf0) == [0x56, 0x1234af78] +; run: %atomic_rmw_nand_little_i8(0x12345678, 1, 0x0f) == [0x56, 0x1234f978] +; run: %atomic_rmw_nand_little_i8(0x12345678, 0, 0xf0) == [0x78, 0x1234568f] +; run: %atomic_rmw_nand_little_i8(0x12345678, 0, 0x0f) == [0x78, 0x123456f7] + +function %atomic_rmw_nand_little_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -244,17 +484,35 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_nand_little_i8(0x12345678, 3, 0xf0) == 0xef345678 -; run: %atomic_rmw_nand_little_i8(0x12345678, 3, 0x0f) == 0xfd345678 -; run: %atomic_rmw_nand_little_i8(0x12345678, 2, 0xf0) == 0x12cf5678 -; run: %atomic_rmw_nand_little_i8(0x12345678, 2, 0x0f) == 0x12fb5678 -; run: %atomic_rmw_nand_little_i8(0x12345678, 1, 0xf0) == 0x1234af78 -; run: %atomic_rmw_nand_little_i8(0x12345678, 1, 0x0f) == 0x1234f978 -; run: %atomic_rmw_nand_little_i8(0x12345678, 0, 0xf0) == 0x1234568f -; run: %atomic_rmw_nand_little_i8(0x12345678, 0, 0x0f) == 0x123456f7 +; run: %atomic_rmw_nand_little_i8_no_res(0x12345678, 3, 0xf0) == 0xef345678 +; run: %atomic_rmw_nand_little_i8_no_res(0x12345678, 3, 0x0f) == 0xfd345678 +; run: %atomic_rmw_nand_little_i8_no_res(0x12345678, 2, 0xf0) == 0x12cf5678 +; run: %atomic_rmw_nand_little_i8_no_res(0x12345678, 2, 0x0f) == 0x12fb5678 +; run: %atomic_rmw_nand_little_i8_no_res(0x12345678, 1, 0xf0) == 0x1234af78 +; run: %atomic_rmw_nand_little_i8_no_res(0x12345678, 1, 0x0f) == 0x1234f978 +; run: %atomic_rmw_nand_little_i8_no_res(0x12345678, 0, 0xf0) == 0x1234568f +; run: %atomic_rmw_nand_little_i8_no_res(0x12345678, 0, 0x0f) == 0x123456f7 + + +function %atomic_rmw_umin_little_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 little umin v4, v2 + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_umin_little_i16(0x12345678, 2, 0x1111) == [0x1234, 0x11115678] +; run: %atomic_rmw_umin_little_i16(0x12345678, 2, 0xffff) == [0x1234, 0x12345678] +; run: %atomic_rmw_umin_little_i16(0x12345678, 0, 0x1111) == [0x5678, 0x12341111] +; run: %atomic_rmw_umin_little_i16(0x12345678, 0, 0xffff) == [0x5678, 0x12345678] -function %atomic_rmw_umin_little_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_umin_little_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -267,12 +525,34 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_umin_little_i16(0x12345678, 2, 0x1111) == 0x11115678 -; run: %atomic_rmw_umin_little_i16(0x12345678, 2, 0xffff) == 0x12345678 -; run: %atomic_rmw_umin_little_i16(0x12345678, 0, 0x1111) == 0x12341111 -; run: %atomic_rmw_umin_little_i16(0x12345678, 0, 0xffff) == 0x12345678 +; run: %atomic_rmw_umin_little_i16_no_res(0x12345678, 2, 0x1111) == 0x11115678 +; run: %atomic_rmw_umin_little_i16_no_res(0x12345678, 2, 0xffff) == 0x12345678 +; run: %atomic_rmw_umin_little_i16_no_res(0x12345678, 0, 0x1111) == 0x12341111 +; run: %atomic_rmw_umin_little_i16_no_res(0x12345678, 0, 0xffff) == 0x12345678 -function %atomic_rmw_umin_little_i8(i32, i64, i8) -> i32 { +function %atomic_rmw_umin_little_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 little umin v4, v2 + + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_umin_little_i8(0x12345678, 3, 0x11) == [0x12, 0x11345678] +; run: %atomic_rmw_umin_little_i8(0x12345678, 3, 0xff) == [0x12, 0x12345678] +; run: %atomic_rmw_umin_little_i8(0x12345678, 2, 0x11) == [0x34, 0x12115678] +; run: %atomic_rmw_umin_little_i8(0x12345678, 2, 0xff) == [0x34, 0x12345678] +; run: %atomic_rmw_umin_little_i8(0x12345678, 1, 0x11) == [0x56, 0x12341178] +; run: %atomic_rmw_umin_little_i8(0x12345678, 1, 0xff) == [0x56, 0x12345678] +; run: %atomic_rmw_umin_little_i8(0x12345678, 0, 0x11) == [0x78, 0x12345611] +; run: %atomic_rmw_umin_little_i8(0x12345678, 0, 0xff) == [0x78, 0x12345678] + +function %atomic_rmw_umin_little_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -285,17 +565,35 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_umin_little_i8(0x12345678, 3, 0x11) == 0x11345678 -; run: %atomic_rmw_umin_little_i8(0x12345678, 3, 0xff) == 0x12345678 -; run: %atomic_rmw_umin_little_i8(0x12345678, 2, 0x11) == 0x12115678 -; run: %atomic_rmw_umin_little_i8(0x12345678, 2, 0xff) == 0x12345678 -; run: %atomic_rmw_umin_little_i8(0x12345678, 1, 0x11) == 0x12341178 -; run: %atomic_rmw_umin_little_i8(0x12345678, 1, 0xff) == 0x12345678 -; run: %atomic_rmw_umin_little_i8(0x12345678, 0, 0x11) == 0x12345611 -; run: %atomic_rmw_umin_little_i8(0x12345678, 0, 0xff) == 0x12345678 +; run: %atomic_rmw_umin_little_i8_no_res(0x12345678, 3, 0x11) == 0x11345678 +; run: %atomic_rmw_umin_little_i8_no_res(0x12345678, 3, 0xff) == 0x12345678 +; run: %atomic_rmw_umin_little_i8_no_res(0x12345678, 2, 0x11) == 0x12115678 +; run: %atomic_rmw_umin_little_i8_no_res(0x12345678, 2, 0xff) == 0x12345678 +; run: %atomic_rmw_umin_little_i8_no_res(0x12345678, 1, 0x11) == 0x12341178 +; run: %atomic_rmw_umin_little_i8_no_res(0x12345678, 1, 0xff) == 0x12345678 +; run: %atomic_rmw_umin_little_i8_no_res(0x12345678, 0, 0x11) == 0x12345611 +; run: %atomic_rmw_umin_little_i8_no_res(0x12345678, 0, 0xff) == 0x12345678 + + +function %atomic_rmw_umax_little_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 little umax v4, v2 + + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_umax_little_i16(0x12345678, 2, 0x1111) == [0x1234, 0x12345678] +; run: %atomic_rmw_umax_little_i16(0x12345678, 2, 0xffff) == [0x1234, 0xffff5678] +; run: %atomic_rmw_umax_little_i16(0x12345678, 0, 0x1111) == [0x5678, 0x12345678] +; run: %atomic_rmw_umax_little_i16(0x12345678, 0, 0xffff) == [0x5678, 0x1234ffff] -function %atomic_rmw_umax_little_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_umax_little_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -308,12 +606,34 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_umax_little_i16(0x12345678, 2, 0x1111) == 0x12345678 -; run: %atomic_rmw_umax_little_i16(0x12345678, 2, 0xffff) == 0xffff5678 -; run: %atomic_rmw_umax_little_i16(0x12345678, 0, 0x1111) == 0x12345678 -; run: %atomic_rmw_umax_little_i16(0x12345678, 0, 0xffff) == 0x1234ffff +; run: %atomic_rmw_umax_little_i16_no_res(0x12345678, 2, 0x1111) == 0x12345678 +; run: %atomic_rmw_umax_little_i16_no_res(0x12345678, 2, 0xffff) == 0xffff5678 +; run: %atomic_rmw_umax_little_i16_no_res(0x12345678, 0, 0x1111) == 0x12345678 +; run: %atomic_rmw_umax_little_i16_no_res(0x12345678, 0, 0xffff) == 0x1234ffff -function %atomic_rmw_umax_little_i8(i32, i64, i8) -> i32 { +function %atomic_rmw_umax_little_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 little umax v4, v2 + + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_umax_little_i8(0x12345678, 3, 0x11) == [0x12, 0x12345678] +; run: %atomic_rmw_umax_little_i8(0x12345678, 3, 0xff) == [0x12, 0xff345678] +; run: %atomic_rmw_umax_little_i8(0x12345678, 2, 0x11) == [0x34, 0x12345678] +; run: %atomic_rmw_umax_little_i8(0x12345678, 2, 0xff) == [0x34, 0x12ff5678] +; run: %atomic_rmw_umax_little_i8(0x12345678, 1, 0x11) == [0x56, 0x12345678] +; run: %atomic_rmw_umax_little_i8(0x12345678, 1, 0xff) == [0x56, 0x1234ff78] +; run: %atomic_rmw_umax_little_i8(0x12345678, 0, 0x11) == [0x78, 0x12345678] +; run: %atomic_rmw_umax_little_i8(0x12345678, 0, 0xff) == [0x78, 0x123456ff] + +function %atomic_rmw_umax_little_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -326,17 +646,35 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_umax_little_i8(0x12345678, 3, 0x11) == 0x12345678 -; run: %atomic_rmw_umax_little_i8(0x12345678, 3, 0xff) == 0xff345678 -; run: %atomic_rmw_umax_little_i8(0x12345678, 2, 0x11) == 0x12345678 -; run: %atomic_rmw_umax_little_i8(0x12345678, 2, 0xff) == 0x12ff5678 -; run: %atomic_rmw_umax_little_i8(0x12345678, 1, 0x11) == 0x12345678 -; run: %atomic_rmw_umax_little_i8(0x12345678, 1, 0xff) == 0x1234ff78 -; run: %atomic_rmw_umax_little_i8(0x12345678, 0, 0x11) == 0x12345678 -; run: %atomic_rmw_umax_little_i8(0x12345678, 0, 0xff) == 0x123456ff +; run: %atomic_rmw_umax_little_i8_no_res(0x12345678, 3, 0x11) == 0x12345678 +; run: %atomic_rmw_umax_little_i8_no_res(0x12345678, 3, 0xff) == 0xff345678 +; run: %atomic_rmw_umax_little_i8_no_res(0x12345678, 2, 0x11) == 0x12345678 +; run: %atomic_rmw_umax_little_i8_no_res(0x12345678, 2, 0xff) == 0x12ff5678 +; run: %atomic_rmw_umax_little_i8_no_res(0x12345678, 1, 0x11) == 0x12345678 +; run: %atomic_rmw_umax_little_i8_no_res(0x12345678, 1, 0xff) == 0x1234ff78 +; run: %atomic_rmw_umax_little_i8_no_res(0x12345678, 0, 0x11) == 0x12345678 +; run: %atomic_rmw_umax_little_i8_no_res(0x12345678, 0, 0xff) == 0x123456ff -function %atomic_rmw_smin_little_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_smin_little_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 little smin v4, v2 + + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_smin_little_i16(0x12345678, 2, 0x1111) == [0x1234, 0x11115678] +; run: %atomic_rmw_smin_little_i16(0x12345678, 2, 0xffff) == [0x1234, 0xffff5678] +; run: %atomic_rmw_smin_little_i16(0x12345678, 0, 0x1111) == [0x5678, 0x12341111] +; run: %atomic_rmw_smin_little_i16(0x12345678, 0, 0xffff) == [0x5678, 0x1234ffff] + +function %atomic_rmw_smin_little_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -349,13 +687,35 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_smin_little_i16(0x12345678, 2, 0x1111) == 0x11115678 -; run: %atomic_rmw_smin_little_i16(0x12345678, 2, 0xffff) == 0xffff5678 -; run: %atomic_rmw_smin_little_i16(0x12345678, 0, 0x1111) == 0x12341111 -; run: %atomic_rmw_smin_little_i16(0x12345678, 0, 0xffff) == 0x1234ffff +; run: %atomic_rmw_smin_little_i16_no_res(0x12345678, 2, 0x1111) == 0x11115678 +; run: %atomic_rmw_smin_little_i16_no_res(0x12345678, 2, 0xffff) == 0xffff5678 +; run: %atomic_rmw_smin_little_i16_no_res(0x12345678, 0, 0x1111) == 0x12341111 +; run: %atomic_rmw_smin_little_i16_no_res(0x12345678, 0, 0xffff) == 0x1234ffff + + +function %atomic_rmw_smin_little_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 little smin v4, v2 -function %atomic_rmw_smin_little_i8(i32, i64, i8) -> i32 { + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_smin_little_i8(0x12345678, 3, 0x11) == [0x12, 0x11345678] +; run: %atomic_rmw_smin_little_i8(0x12345678, 3, 0xff) == [0x12, 0xff345678] +; run: %atomic_rmw_smin_little_i8(0x12345678, 2, 0x11) == [0x34, 0x12115678] +; run: %atomic_rmw_smin_little_i8(0x12345678, 2, 0xff) == [0x34, 0x12ff5678] +; run: %atomic_rmw_smin_little_i8(0x12345678, 1, 0x11) == [0x56, 0x12341178] +; run: %atomic_rmw_smin_little_i8(0x12345678, 1, 0xff) == [0x56, 0x1234ff78] +; run: %atomic_rmw_smin_little_i8(0x12345678, 0, 0x11) == [0x78, 0x12345611] +; run: %atomic_rmw_smin_little_i8(0x12345678, 0, 0xff) == [0x78, 0x123456ff] + +function %atomic_rmw_smin_little_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -368,17 +728,35 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_smin_little_i8(0x12345678, 3, 0x11) == 0x11345678 -; run: %atomic_rmw_smin_little_i8(0x12345678, 3, 0xff) == 0xff345678 -; run: %atomic_rmw_smin_little_i8(0x12345678, 2, 0x11) == 0x12115678 -; run: %atomic_rmw_smin_little_i8(0x12345678, 2, 0xff) == 0x12ff5678 -; run: %atomic_rmw_smin_little_i8(0x12345678, 1, 0x11) == 0x12341178 -; run: %atomic_rmw_smin_little_i8(0x12345678, 1, 0xff) == 0x1234ff78 -; run: %atomic_rmw_smin_little_i8(0x12345678, 0, 0x11) == 0x12345611 -; run: %atomic_rmw_smin_little_i8(0x12345678, 0, 0xff) == 0x123456ff +; run: %atomic_rmw_smin_little_i8_no_res(0x12345678, 3, 0x11) == 0x11345678 +; run: %atomic_rmw_smin_little_i8_no_res(0x12345678, 3, 0xff) == 0xff345678 +; run: %atomic_rmw_smin_little_i8_no_res(0x12345678, 2, 0x11) == 0x12115678 +; run: %atomic_rmw_smin_little_i8_no_res(0x12345678, 2, 0xff) == 0x12ff5678 +; run: %atomic_rmw_smin_little_i8_no_res(0x12345678, 1, 0x11) == 0x12341178 +; run: %atomic_rmw_smin_little_i8_no_res(0x12345678, 1, 0xff) == 0x1234ff78 +; run: %atomic_rmw_smin_little_i8_no_res(0x12345678, 0, 0x11) == 0x12345611 +; run: %atomic_rmw_smin_little_i8_no_res(0x12345678, 0, 0xff) == 0x123456ff -function %atomic_rmw_smax_little_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_smax_little_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 little smax v4, v2 + + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_smax_little_i16(0x12345678, 2, 0xffff) == [0x1234, 0x12345678] +; run: %atomic_rmw_smax_little_i16(0x12345678, 2, 0x7fff) == [0x1234, 0x7fff5678] +; run: %atomic_rmw_smax_little_i16(0x12345678, 0, 0xffff) == [0x5678, 0x12345678] +; run: %atomic_rmw_smax_little_i16(0x12345678, 0, 0x7fff) == [0x5678, 0x12347fff] + +function %atomic_rmw_smax_little_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -391,13 +769,35 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_smax_little_i16(0x12345678, 2, 0xffff) == 0x12345678 -; run: %atomic_rmw_smax_little_i16(0x12345678, 2, 0x7fff) == 0x7fff5678 -; run: %atomic_rmw_smax_little_i16(0x12345678, 0, 0xffff) == 0x12345678 -; run: %atomic_rmw_smax_little_i16(0x12345678, 0, 0x7fff) == 0x12347fff +; run: %atomic_rmw_smax_little_i16_no_res(0x12345678, 2, 0xffff) == 0x12345678 +; run: %atomic_rmw_smax_little_i16_no_res(0x12345678, 2, 0x7fff) == 0x7fff5678 +; run: %atomic_rmw_smax_little_i16_no_res(0x12345678, 0, 0xffff) == 0x12345678 +; run: %atomic_rmw_smax_little_i16_no_res(0x12345678, 0, 0x7fff) == 0x12347fff + + +function %atomic_rmw_smax_little_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 little smax v4, v2 -function %atomic_rmw_smax_little_i8(i32, i64, i8) -> i32 { + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_smax_little_i8(0x12345678, 3, 0xff) == [0x12, 0x12345678] +; run: %atomic_rmw_smax_little_i8(0x12345678, 3, 0x7f) == [0x12, 0x7f345678] +; run: %atomic_rmw_smax_little_i8(0x12345678, 2, 0xff) == [0x34, 0x12345678] +; run: %atomic_rmw_smax_little_i8(0x12345678, 2, 0x7f) == [0x34, 0x127f5678] +; run: %atomic_rmw_smax_little_i8(0x12345678, 1, 0xff) == [0x56, 0x12345678] +; run: %atomic_rmw_smax_little_i8(0x12345678, 1, 0x7f) == [0x56, 0x12347f78] +; run: %atomic_rmw_smax_little_i8(0x12345678, 0, 0xff) == [0x78, 0x12345678] +; run: %atomic_rmw_smax_little_i8(0x12345678, 0, 0x7f) == [0x78, 0x1234567f] + +function %atomic_rmw_smax_little_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -410,17 +810,35 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_smax_little_i8(0x12345678, 3, 0xff) == 0x12345678 -; run: %atomic_rmw_smax_little_i8(0x12345678, 3, 0x7f) == 0x7f345678 -; run: %atomic_rmw_smax_little_i8(0x12345678, 2, 0xff) == 0x12345678 -; run: %atomic_rmw_smax_little_i8(0x12345678, 2, 0x7f) == 0x127f5678 -; run: %atomic_rmw_smax_little_i8(0x12345678, 1, 0xff) == 0x12345678 -; run: %atomic_rmw_smax_little_i8(0x12345678, 1, 0x7f) == 0x12347f78 -; run: %atomic_rmw_smax_little_i8(0x12345678, 0, 0xff) == 0x12345678 -; run: %atomic_rmw_smax_little_i8(0x12345678, 0, 0x7f) == 0x1234567f +; run: %atomic_rmw_smax_little_i8_no_res(0x12345678, 3, 0xff) == 0x12345678 +; run: %atomic_rmw_smax_little_i8_no_res(0x12345678, 3, 0x7f) == 0x7f345678 +; run: %atomic_rmw_smax_little_i8_no_res(0x12345678, 2, 0xff) == 0x12345678 +; run: %atomic_rmw_smax_little_i8_no_res(0x12345678, 2, 0x7f) == 0x127f5678 +; run: %atomic_rmw_smax_little_i8_no_res(0x12345678, 1, 0xff) == 0x12345678 +; run: %atomic_rmw_smax_little_i8_no_res(0x12345678, 1, 0x7f) == 0x12347f78 +; run: %atomic_rmw_smax_little_i8_no_res(0x12345678, 0, 0xff) == 0x12345678 +; run: %atomic_rmw_smax_little_i8_no_res(0x12345678, 0, 0x7f) == 0x1234567f -function %atomic_rmw_xchg_little_i16(i32, i64, i16) -> i32 { +function %atomic_rmw_xchg_little_i16(i32, i64, i16) -> i16, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i16): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i16 little xchg v4, v2 + + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_xchg_little_i16(0x12345678, 2, 0x1111) == [0x1234, 0x11115678] +; run: %atomic_rmw_xchg_little_i16(0x12345678, 2, 0xffff) == [0x1234, 0xffff5678] +; run: %atomic_rmw_xchg_little_i16(0x12345678, 0, 0x1111) == [0x5678, 0x12341111] +; run: %atomic_rmw_xchg_little_i16(0x12345678, 0, 0xffff) == [0x5678, 0x1234ffff] + +function %atomic_rmw_xchg_little_i16_no_res(i32, i64, i16) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i16): @@ -433,13 +851,35 @@ block0(v0: i32, v1: i64, v2: i16): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_xchg_little_i16(0x12345678, 2, 0x1111) == 0x11115678 -; run: %atomic_rmw_xchg_little_i16(0x12345678, 2, 0xffff) == 0xffff5678 -; run: %atomic_rmw_xchg_little_i16(0x12345678, 0, 0x1111) == 0x12341111 -; run: %atomic_rmw_xchg_little_i16(0x12345678, 0, 0xffff) == 0x1234ffff +; run: %atomic_rmw_xchg_little_i16_no_res(0x12345678, 2, 0x1111) == 0x11115678 +; run: %atomic_rmw_xchg_little_i16_no_res(0x12345678, 2, 0xffff) == 0xffff5678 +; run: %atomic_rmw_xchg_little_i16_no_res(0x12345678, 0, 0x1111) == 0x12341111 +; run: %atomic_rmw_xchg_little_i16_no_res(0x12345678, 0, 0xffff) == 0x1234ffff -function %atomic_rmw_xchg_little_i8(i32, i64, i8) -> i32 { +function %atomic_rmw_xchg_little_i8(i32, i64, i8) -> i8, i32 { + ss0 = explicit_slot 4 + +block0(v0: i32, v1: i64, v2: i8): + v3 = stack_addr.i64 ss0 + store.i32 little v0, v3 + + v4 = iadd.i64 v3, v1 + v5 = atomic_rmw.i8 little xchg v4, v2 + + v6 = load.i32 little v3 + return v5, v6 +} +; run: %atomic_rmw_xchg_little_i8(0x12345678, 3, 0x11) == [0x12, 0x11345678] +; run: %atomic_rmw_xchg_little_i8(0x12345678, 3, 0xff) == [0x12, 0xff345678] +; run: %atomic_rmw_xchg_little_i8(0x12345678, 2, 0x11) == [0x34, 0x12115678] +; run: %atomic_rmw_xchg_little_i8(0x12345678, 2, 0xff) == [0x34, 0x12ff5678] +; run: %atomic_rmw_xchg_little_i8(0x12345678, 1, 0x11) == [0x56, 0x12341178] +; run: %atomic_rmw_xchg_little_i8(0x12345678, 1, 0xff) == [0x56, 0x1234ff78] +; run: %atomic_rmw_xchg_little_i8(0x12345678, 0, 0x11) == [0x78, 0x12345611] +; run: %atomic_rmw_xchg_little_i8(0x12345678, 0, 0xff) == [0x78, 0x123456ff] + +function %atomic_rmw_xchg_little_i8_no_res(i32, i64, i8) -> i32 { ss0 = explicit_slot 4 block0(v0: i32, v1: i64, v2: i8): @@ -452,11 +892,11 @@ block0(v0: i32, v1: i64, v2: i8): v6 = load.i32 little v3 return v6 } -; run: %atomic_rmw_xchg_little_i8(0x12345678, 3, 0x11) == 0x11345678 -; run: %atomic_rmw_xchg_little_i8(0x12345678, 3, 0xff) == 0xff345678 -; run: %atomic_rmw_xchg_little_i8(0x12345678, 2, 0x11) == 0x12115678 -; run: %atomic_rmw_xchg_little_i8(0x12345678, 2, 0xff) == 0x12ff5678 -; run: %atomic_rmw_xchg_little_i8(0x12345678, 1, 0x11) == 0x12341178 -; run: %atomic_rmw_xchg_little_i8(0x12345678, 1, 0xff) == 0x1234ff78 -; run: %atomic_rmw_xchg_little_i8(0x12345678, 0, 0x11) == 0x12345611 -; run: %atomic_rmw_xchg_little_i8(0x12345678, 0, 0xff) == 0x123456ff +; run: %atomic_rmw_xchg_little_i8_no_res(0x12345678, 3, 0x11) == 0x11345678 +; run: %atomic_rmw_xchg_little_i8_no_res(0x12345678, 3, 0xff) == 0xff345678 +; run: %atomic_rmw_xchg_little_i8_no_res(0x12345678, 2, 0x11) == 0x12115678 +; run: %atomic_rmw_xchg_little_i8_no_res(0x12345678, 2, 0xff) == 0x12ff5678 +; run: %atomic_rmw_xchg_little_i8_no_res(0x12345678, 1, 0x11) == 0x12341178 +; run: %atomic_rmw_xchg_little_i8_no_res(0x12345678, 1, 0xff) == 0x1234ff78 +; run: %atomic_rmw_xchg_little_i8_no_res(0x12345678, 0, 0x11) == 0x12345611 +; run: %atomic_rmw_xchg_little_i8_no_res(0x12345678, 0, 0xff) == 0x123456ff diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs index ecfd20c8cdd7..2f9111e3db94 100644 --- a/crates/cli-flags/src/lib.rs +++ b/crates/cli-flags/src/lib.rs @@ -192,6 +192,9 @@ wasmtime_option_group! { pub parallel_compilation: Option, /// Whether to enable proof-carrying code (PCC)-based validation. pub pcc: Option, + /// Controls whether native unwind information is present in compiled + /// object files. + pub native_unwind_info: Option, #[prefixed = "cranelift"] /// Set a cranelift-specific option. Use `wasmtime settings` to see @@ -662,6 +665,9 @@ impl CommonOptions { if let Some(enable) = self.opts.signals_based_traps { config.signals_based_traps(enable); } + if let Some(enable) = self.codegen.native_unwind_info { + config.native_unwind_info(enable); + } if let Some(wasmfx_stack_size) = self.wasm.wasmfx_stack_size { config.wasmfx_stack_size(wasmfx_stack_size); diff --git a/crates/test-programs/src/bin/preview2_tcp_connect.rs b/crates/test-programs/src/bin/preview2_tcp_connect.rs index d840cc5934ff..6ffae5983c80 100644 --- a/crates/test-programs/src/bin/preview2_tcp_connect.rs +++ b/crates/test-programs/src/bin/preview2_tcp_connect.rs @@ -95,6 +95,30 @@ fn test_tcp_connect_dual_stack(net: &Network) { )); } +/// Client sockets can be explicitly bound. +fn test_tcp_connect_explicit_bind(net: &Network, family: IpAddressFamily) { + let ip = IpAddress::new_loopback(family); + + let listener = { + let bind_address = IpSocketAddress::new(ip, 0); + let listener = TcpSocket::new(family).unwrap(); + listener.blocking_bind(&net, bind_address).unwrap(); + listener.blocking_listen().unwrap(); + listener + }; + + let listener_address = listener.local_address().unwrap(); + let client = TcpSocket::new(family).unwrap(); + + // Manually bind the client: + client + .blocking_bind(net, IpSocketAddress::new(ip, 0)) + .unwrap(); + + // Connect should work: + client.blocking_connect(net, listener_address).unwrap(); +} + fn main() { let net = Network::default(); @@ -110,4 +134,7 @@ fn main() { test_tcp_connect_non_unicast(&net); test_tcp_connect_dual_stack(&net); + + test_tcp_connect_explicit_bind(&net, IpAddressFamily::Ipv4); + test_tcp_connect_explicit_bind(&net, IpAddressFamily::Ipv6); } diff --git a/crates/wasi/src/tcp.rs b/crates/wasi/src/tcp.rs index 44ba68416c07..5b05ebb25339 100644 --- a/crates/wasi/src/tcp.rs +++ b/crates/wasi/src/tcp.rs @@ -238,7 +238,7 @@ impl TcpSocket { pub fn start_connect(&mut self, remote_address: SocketAddr) -> SocketResult<()> { match self.tcp_state { - TcpState::Default(..) => {} + TcpState::Default(..) | TcpState::Bound(..) => {} TcpState::Connecting(..) | TcpState::ConnectReady(..) => { return Err(ErrorCode::ConcurrencyConflict.into()) @@ -251,7 +251,7 @@ impl TcpSocket { network::util::validate_remote_address(&remote_address)?; network::util::validate_address_family(&remote_address, &self.family)?; - let TcpState::Default(tokio_socket) = + let (TcpState::Default(tokio_socket) | TcpState::Bound(tokio_socket)) = std::mem::replace(&mut self.tcp_state, TcpState::Closed) else { unreachable!(); diff --git a/crates/wast/src/wast.rs b/crates/wast/src/wast.rs index f4c70cad041c..0fc1bd13ee17 100644 --- a/crates/wast/src/wast.rs +++ b/crates/wast/src/wast.rs @@ -644,6 +644,7 @@ fn is_matching_assert_invalid_error_message(expected: &str, actual: &str) -> boo actual.contains(expected) // slight difference in error messages || (expected.contains("unknown elem segment") && actual.contains("unknown element segment")) + || (expected.contains("type mismatch") && actual.contains("indirect calls must go through a table with type <= funcref")) // The same test here is asserted to have one error message in // `memory.wast` and a different error message in // `memory64/memory.wast`, so we equate these two error messages to get diff --git a/tests/spec_testsuite b/tests/spec_testsuite index cbde6d5f26ba..13527d8d4232 160000 --- a/tests/spec_testsuite +++ b/tests/spec_testsuite @@ -1 +1 @@ -Subproject commit cbde6d5f26ba12d4f455b65bd0648cdba4d95f15 +Subproject commit 13527d8d423277004295c4aaacfd2fa199c4f5a4 diff --git a/tests/wast.rs b/tests/wast.rs index 54bd427a7f0c..1f6bb0b07aa1 100644 --- a/tests/wast.rs +++ b/tests/wast.rs @@ -103,6 +103,7 @@ fn should_fail(test: &Path, strategy: Strategy) -> bool { "spec_testsuite/table_set.wast", "spec_testsuite/table_size.wast", "spec_testsuite/unreached-invalid.wast", + "spec_testsuite/call_indirect.wast", // simd-related failures "annotations/simd_lane.wast", "memory64/simd.wast", @@ -266,7 +267,7 @@ fn run_wast(wast: &Path, strategy: Strategy, pooling: bool) -> anyhow::Result<() let function_references = gc || memory64 || stack_switching || feature_found(wast, "function-references"); let reference_types = !(threads && feature_found(wast, "proposals")); - let relaxed_simd = feature_found(wast, "relaxed-simd"); + let relaxed_simd = feature_found(wast, "relaxed-simd") || memory64; let tail_call = function_references || feature_found(wast, "tail-call"); let use_shared_memory = feature_found_src(&wast_bytes, "shared_memory") || feature_found_src(&wast_bytes, "shared)");