From 1bc36a0827f852994f7bf6813d29de26883801d4 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Tue, 26 Sep 2023 09:51:57 -0700 Subject: [PATCH] cranelift(x64): Make xmm{8..15} registers non-preferred These registers require an additional byte to reference when encoded in certain AVX instruction formats (and maybe other situations as well?) so prefer xmm{0..7} when they are available and only fall back to xmm{8..15} when register pressure is higher. --- cranelift/codegen/src/isa/x64/abi.rs | 25 +-- .../filetests/isa/x64/call-conv.clif | 24 +-- .../filetests/isa/x64/cmp-mem-bug.clif | 16 +- .../filetests/filetests/isa/x64/fastcall.clif | 168 +++++++++--------- .../filetests/isa/x64/fcopysign.clif | 16 +- .../filetests/filetests/isa/x64/fcvt.clif | 94 +++++----- .../filetests/isa/x64/float-avx.clif | 36 ++-- .../filetests/filetests/isa/x64/fma-call.clif | 32 ++-- .../filetests/isa/x64/simd-arith-avx.clif | 50 +++--- .../filetests/isa/x64/simd-bitselect.clif | 11 +- .../filetests/isa/x64/simd-bitwise-avx.clif | 9 +- .../isa/x64/simd-bitwise-compile.clif | 99 ++++++----- .../filetests/isa/x64/simd-cmp-avx.clif | 72 ++++---- .../isa/x64/simd-comparison-legalize.clif | 10 +- .../filetests/isa/x64/simd-splat-avx.clif | 8 +- .../filetests/isa/x64/simd-widen-mul.clif | 36 ++-- .../wasm/x64-relaxed-simd-deterministic.wat | 76 ++++---- .../filetests/wasm/x64-relaxed-simd.wat | 34 ++-- 18 files changed, 416 insertions(+), 400 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 51758010162a..be005e102243 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -1243,7 +1243,8 @@ fn create_reg_env_systemv(enable_pinned_reg: bool) -> MachineEnv { preg(regs::r10()), preg(regs::r11()), ], - // Preferred XMMs: all of them. + // Preferred XMMs: the first 8, which can have smaller encodings + // with AVX instructions. vec![ preg(regs::xmm0()), preg(regs::xmm1()), @@ -1253,14 +1254,6 @@ fn create_reg_env_systemv(enable_pinned_reg: bool) -> MachineEnv { preg(regs::xmm5()), preg(regs::xmm6()), preg(regs::xmm7()), - preg(regs::xmm8()), - preg(regs::xmm9()), - preg(regs::xmm10()), - preg(regs::xmm11()), - preg(regs::xmm12()), - preg(regs::xmm13()), - preg(regs::xmm14()), - preg(regs::xmm15()), ], // The Vector Regclass is unused vec![], @@ -1273,8 +1266,18 @@ fn create_reg_env_systemv(enable_pinned_reg: bool) -> MachineEnv { preg(regs::r13()), preg(regs::r14()), ], - // Non-preferred XMMs: none. - vec![], + // Non-preferred XMMs: the last 8 registers, which can have larger + // encodings with AVX instructions. + vec![ + preg(regs::xmm8()), + preg(regs::xmm9()), + preg(regs::xmm10()), + preg(regs::xmm11()), + preg(regs::xmm12()), + preg(regs::xmm13()), + preg(regs::xmm14()), + preg(regs::xmm15()), + ], // The Vector Regclass is unused vec![], ], diff --git a/cranelift/filetests/filetests/isa/x64/call-conv.clif b/cranelift/filetests/filetests/isa/x64/call-conv.clif index a4e383e817f0..1e0cede45e3b 100644 --- a/cranelift/filetests/filetests/isa/x64/call-conv.clif +++ b/cranelift/filetests/filetests/isa/x64/call-conv.clif @@ -202,8 +202,8 @@ block0( ; movq %rax, %rdi ; movq 16(%rbp), %r10 ; movq 24(%rbp), %r11 -; movss 32(%rbp), %xmm9 -; movsd 40(%rbp), %xmm8 +; movss 32(%rbp), %xmm11 +; movsd 40(%rbp), %xmm13 ; subq %rsp, $144, %rsp ; virtual_sp_offset_adjust 144 ; movq %r8, 32(%rsp) @@ -218,8 +218,8 @@ block0( ; movsd %xmm7, 104(%rsp) ; movq %r10, 112(%rsp) ; movl %r11d, 120(%rsp) -; movss %xmm9, 128(%rsp) -; movsd %xmm8, 136(%rsp) +; movss %xmm11, 128(%rsp) +; movsd %xmm13, 136(%rsp) ; movq %rdi, %r9 ; movq %rcx, %r8 ; movq %rsi, %rcx @@ -242,8 +242,8 @@ block0( ; movq %rax, %rdi ; movq 0x10(%rbp), %r10 ; movq 0x18(%rbp), %r11 -; movss 0x20(%rbp), %xmm9 -; movsd 0x28(%rbp), %xmm8 +; movss 0x20(%rbp), %xmm11 +; movsd 0x28(%rbp), %xmm13 ; subq $0x90, %rsp ; movq %r8, 0x20(%rsp) ; movq %r9, 0x28(%rsp) @@ -257,8 +257,8 @@ block0( ; movsd %xmm7, 0x68(%rsp) ; movq %r10, 0x70(%rsp) ; movl %r11d, 0x78(%rsp) -; movss %xmm9, 0x80(%rsp) -; movsd %xmm8, 0x88(%rsp) +; movss %xmm11, 0x80(%rsp) +; movsd %xmm13, 0x88(%rsp) ; movq %rdi, %r9 ; movq %rcx, %r8 ; movq %rsi, %rcx @@ -330,7 +330,7 @@ block0(v0: i32, v1: f32, v2: i64, v3: f64, v4: i32, v5: i32, v6: i32, v7: f32, v ; block0: ; movq %rsi, %r9 ; movq %rdi, %rsi -; movdqa %xmm1, %xmm12 +; movdqa %xmm1, %xmm6 ; movdqa %xmm0, %xmm1 ; subq %rsp, $96, %rsp ; virtual_sp_offset_adjust 96 @@ -343,7 +343,7 @@ block0(v0: i32, v1: f32, v2: i64, v3: f64, v4: i32, v5: i32, v6: i32, v7: f32, v ; movsd %xmm5, 80(%rsp) ; movq %rsi, %rcx ; movq %r9, %r8 -; movdqa %xmm12, %xmm3 +; movdqa %xmm6, %xmm3 ; call *%rcx ; addq %rsp, $96, %rsp ; virtual_sp_offset_adjust -96 @@ -358,7 +358,7 @@ block0(v0: i32, v1: f32, v2: i64, v3: f64, v4: i32, v5: i32, v6: i32, v7: f32, v ; block1: ; offset 0x4 ; movq %rsi, %r9 ; movq %rdi, %rsi -; movdqa %xmm1, %xmm12 +; movdqa %xmm1, %xmm6 ; movdqa %xmm0, %xmm1 ; subq $0x60, %rsp ; movl %edx, 0x20(%rsp) @@ -370,7 +370,7 @@ block0(v0: i32, v1: f32, v2: i64, v3: f64, v4: i32, v5: i32, v6: i32, v7: f32, v ; movsd %xmm5, 0x50(%rsp) ; movq %rsi, %rcx ; movq %r9, %r8 -; movdqa %xmm12, %xmm3 +; movdqa %xmm6, %xmm3 ; callq *%rcx ; addq $0x60, %rsp ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif index 331239f54827..2574ddfb3de5 100644 --- a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif +++ b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif @@ -54,13 +54,13 @@ block0(v0: f64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movsd 0(%rdi), %xmm9 -; ucomisd %xmm9, %xmm0 +; movsd 0(%rdi), %xmm1 +; ucomisd %xmm1, %xmm0 ; setnp %dil ; setz %al ; andl %edi, %eax, %edi ; movzbq %dil, %rax -; ucomisd %xmm9, %xmm0 +; ucomisd %xmm1, %xmm0 ; movdqa %xmm0, %xmm2 ; movsd %xmm0, %xmm0; jnp $next; movsd %xmm2, %xmm0; $next: ; movsd %xmm0, %xmm0; jz $next; movsd %xmm2, %xmm0; $next: @@ -73,17 +73,17 @@ block0(v0: f64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movsd (%rdi), %xmm9 ; trap: heap_oob -; ucomisd %xmm9, %xmm0 +; movsd (%rdi), %xmm1 ; trap: heap_oob +; ucomisd %xmm1, %xmm0 ; setnp %dil ; sete %al ; andl %eax, %edi ; movzbq %dil, %rax -; ucomisd %xmm9, %xmm0 +; ucomisd %xmm1, %xmm0 ; movdqa %xmm0, %xmm2 -; jnp 0x2f +; jnp 0x2c ; movsd %xmm2, %xmm0 -; je 0x39 +; je 0x36 ; movsd %xmm2, %xmm0 ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif index 0bf874402fdb..86e865b61493 100644 --- a/cranelift/filetests/filetests/isa/x64/fastcall.clif +++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif @@ -356,51 +356,53 @@ block0(v0: i64): ; unwind SaveReg { clobber_offset: 144, reg: p15f } ; block0: ; movsd 0(%rcx), %xmm0 -; movsd 8(%rcx), %xmm10 -; movdqu %xmm10, rsp(48 + virtual offset) -; movsd 16(%rcx), %xmm5 -; movsd 24(%rcx), %xmm14 -; movdqu %xmm14, rsp(32 + virtual offset) -; movsd 32(%rcx), %xmm13 -; movsd 40(%rcx), %xmm15 -; movdqu %xmm15, rsp(16 + virtual offset) +; movsd 8(%rcx), %xmm8 +; movdqu %xmm8, rsp(48 + virtual offset) +; movsd 16(%rcx), %xmm10 +; movdqu %xmm10, rsp(0 + virtual offset) +; movsd 24(%rcx), %xmm9 +; movdqa %xmm9, %xmm10 +; movsd 32(%rcx), %xmm5 +; movsd 40(%rcx), %xmm6 +; movdqu %xmm6, rsp(32 + virtual offset) ; movsd 48(%rcx), %xmm7 -; movsd 56(%rcx), %xmm8 -; movdqu %xmm8, rsp(0 + virtual offset) -; movsd 64(%rcx), %xmm12 -; movsd 72(%rcx), %xmm2 -; movsd 80(%rcx), %xmm9 -; movsd 88(%rcx), %xmm4 +; movsd 56(%rcx), %xmm12 +; movdqu %xmm12, rsp(16 + virtual offset) +; movsd 64(%rcx), %xmm4 +; movsd 72(%rcx), %xmm12 +; movsd 80(%rcx), %xmm1 +; movsd 88(%rcx), %xmm14 ; movsd 96(%rcx), %xmm3 -; movsd 104(%rcx), %xmm8 +; movsd 104(%rcx), %xmm15 ; movsd 112(%rcx), %xmm11 -; movsd 120(%rcx), %xmm10 -; movsd 128(%rcx), %xmm6 -; movsd 136(%rcx), %xmm14 -; movsd 144(%rcx), %xmm1 -; movdqu rsp(48 + virtual offset), %xmm15 -; addsd %xmm0, %xmm15, %xmm0 -; movdqu rsp(32 + virtual offset), %xmm15 -; addsd %xmm5, %xmm15, %xmm5 -; movdqu rsp(16 + virtual offset), %xmm15 -; addsd %xmm13, %xmm15, %xmm13 -; movdqu rsp(0 + virtual offset), %xmm15 -; addsd %xmm7, %xmm15, %xmm7 -; addsd %xmm12, %xmm2, %xmm12 -; addsd %xmm9, %xmm4, %xmm9 -; addsd %xmm3, %xmm8, %xmm3 -; addsd %xmm11, %xmm10, %xmm11 -; addsd %xmm6, %xmm14, %xmm6 -; addsd %xmm1, 152(%rcx), %xmm1 -; addsd %xmm0, %xmm5, %xmm0 -; addsd %xmm13, %xmm7, %xmm13 -; addsd %xmm12, %xmm9, %xmm12 -; addsd %xmm3, %xmm11, %xmm3 -; addsd %xmm6, %xmm1, %xmm6 -; addsd %xmm0, %xmm13, %xmm0 -; addsd %xmm12, %xmm3, %xmm12 -; addsd %xmm0, %xmm12, %xmm0 +; movsd 120(%rcx), %xmm8 +; movsd 128(%rcx), %xmm2 +; movsd 136(%rcx), %xmm9 +; movsd 144(%rcx), %xmm13 +; movdqu rsp(48 + virtual offset), %xmm6 ; addsd %xmm0, %xmm6, %xmm0 +; movdqa %xmm10, %xmm6 +; movdqu rsp(0 + virtual offset), %xmm10 +; addsd %xmm10, %xmm6, %xmm10 +; movdqu rsp(32 + virtual offset), %xmm6 +; addsd %xmm5, %xmm6, %xmm5 +; movdqu rsp(16 + virtual offset), %xmm6 +; addsd %xmm7, %xmm6, %xmm7 +; addsd %xmm4, %xmm12, %xmm4 +; addsd %xmm1, %xmm14, %xmm1 +; addsd %xmm3, %xmm15, %xmm3 +; addsd %xmm11, %xmm8, %xmm11 +; addsd %xmm2, %xmm9, %xmm2 +; addsd %xmm13, 152(%rcx), %xmm13 +; addsd %xmm0, %xmm10, %xmm0 +; addsd %xmm5, %xmm7, %xmm5 +; addsd %xmm4, %xmm1, %xmm4 +; addsd %xmm3, %xmm11, %xmm3 +; addsd %xmm2, %xmm13, %xmm2 +; addsd %xmm0, %xmm5, %xmm0 +; addsd %xmm4, %xmm3, %xmm4 +; addsd %xmm0, %xmm4, %xmm0 +; addsd %xmm0, %xmm2, %xmm0 ; movdqu 64(%rsp), %xmm6 ; movdqu 80(%rsp), %xmm7 ; movdqu 96(%rsp), %xmm8 @@ -433,51 +435,53 @@ block0(v0: i64): ; movdqu %xmm15, 0xd0(%rsp) ; block1: ; offset 0x61 ; movsd (%rcx), %xmm0 ; trap: heap_oob -; movsd 8(%rcx), %xmm10 ; trap: heap_oob -; movdqu %xmm10, 0x30(%rsp) -; movsd 0x10(%rcx), %xmm5 ; trap: heap_oob -; movsd 0x18(%rcx), %xmm14 ; trap: heap_oob -; movdqu %xmm14, 0x20(%rsp) -; movsd 0x20(%rcx), %xmm13 ; trap: heap_oob -; movsd 0x28(%rcx), %xmm15 ; trap: heap_oob -; movdqu %xmm15, 0x10(%rsp) +; movsd 8(%rcx), %xmm8 ; trap: heap_oob +; movdqu %xmm8, 0x30(%rsp) +; movsd 0x10(%rcx), %xmm10 ; trap: heap_oob +; movdqu %xmm10, (%rsp) +; movsd 0x18(%rcx), %xmm9 ; trap: heap_oob +; movdqa %xmm9, %xmm10 +; movsd 0x20(%rcx), %xmm5 ; trap: heap_oob +; movsd 0x28(%rcx), %xmm6 ; trap: heap_oob +; movdqu %xmm6, 0x20(%rsp) ; movsd 0x30(%rcx), %xmm7 ; trap: heap_oob -; movsd 0x38(%rcx), %xmm8 ; trap: heap_oob -; movdqu %xmm8, (%rsp) -; movsd 0x40(%rcx), %xmm12 ; trap: heap_oob -; movsd 0x48(%rcx), %xmm2 ; trap: heap_oob -; movsd 0x50(%rcx), %xmm9 ; trap: heap_oob -; movsd 0x58(%rcx), %xmm4 ; trap: heap_oob +; movsd 0x38(%rcx), %xmm12 ; trap: heap_oob +; movdqu %xmm12, 0x10(%rsp) +; movsd 0x40(%rcx), %xmm4 ; trap: heap_oob +; movsd 0x48(%rcx), %xmm12 ; trap: heap_oob +; movsd 0x50(%rcx), %xmm1 ; trap: heap_oob +; movsd 0x58(%rcx), %xmm14 ; trap: heap_oob ; movsd 0x60(%rcx), %xmm3 ; trap: heap_oob -; movsd 0x68(%rcx), %xmm8 ; trap: heap_oob +; movsd 0x68(%rcx), %xmm15 ; trap: heap_oob ; movsd 0x70(%rcx), %xmm11 ; trap: heap_oob -; movsd 0x78(%rcx), %xmm10 ; trap: heap_oob -; movsd 0x80(%rcx), %xmm6 ; trap: heap_oob -; movsd 0x88(%rcx), %xmm14 ; trap: heap_oob -; movsd 0x90(%rcx), %xmm1 ; trap: heap_oob -; movdqu 0x30(%rsp), %xmm15 -; addsd %xmm15, %xmm0 -; movdqu 0x20(%rsp), %xmm15 -; addsd %xmm15, %xmm5 -; movdqu 0x10(%rsp), %xmm15 -; addsd %xmm15, %xmm13 -; movdqu (%rsp), %xmm15 -; addsd %xmm15, %xmm7 -; addsd %xmm2, %xmm12 -; addsd %xmm4, %xmm9 -; addsd %xmm8, %xmm3 -; addsd %xmm10, %xmm11 -; addsd %xmm14, %xmm6 -; addsd 0x98(%rcx), %xmm1 ; trap: heap_oob -; addsd %xmm5, %xmm0 -; addsd %xmm7, %xmm13 -; addsd %xmm9, %xmm12 -; addsd %xmm11, %xmm3 -; addsd %xmm1, %xmm6 -; addsd %xmm13, %xmm0 -; addsd %xmm3, %xmm12 -; addsd %xmm12, %xmm0 +; movsd 0x78(%rcx), %xmm8 ; trap: heap_oob +; movsd 0x80(%rcx), %xmm2 ; trap: heap_oob +; movsd 0x88(%rcx), %xmm9 ; trap: heap_oob +; movsd 0x90(%rcx), %xmm13 ; trap: heap_oob +; movdqu 0x30(%rsp), %xmm6 ; addsd %xmm6, %xmm0 +; movdqa %xmm10, %xmm6 +; movdqu (%rsp), %xmm10 +; addsd %xmm6, %xmm10 +; movdqu 0x20(%rsp), %xmm6 +; addsd %xmm6, %xmm5 +; movdqu 0x10(%rsp), %xmm6 +; addsd %xmm6, %xmm7 +; addsd %xmm12, %xmm4 +; addsd %xmm14, %xmm1 +; addsd %xmm15, %xmm3 +; addsd %xmm8, %xmm11 +; addsd %xmm9, %xmm2 +; addsd 0x98(%rcx), %xmm13 ; trap: heap_oob +; addsd %xmm10, %xmm0 +; addsd %xmm7, %xmm5 +; addsd %xmm1, %xmm4 +; addsd %xmm11, %xmm3 +; addsd %xmm13, %xmm2 +; addsd %xmm5, %xmm0 +; addsd %xmm3, %xmm4 +; addsd %xmm4, %xmm0 +; addsd %xmm2, %xmm0 ; movdqu 0x40(%rsp), %xmm6 ; movdqu 0x50(%rsp), %xmm7 ; movdqu 0x60(%rsp), %xmm8 diff --git a/cranelift/filetests/filetests/isa/x64/fcopysign.clif b/cranelift/filetests/filetests/isa/x64/fcopysign.clif index a6140711c829..c2f39049ac96 100644 --- a/cranelift/filetests/filetests/isa/x64/fcopysign.clif +++ b/cranelift/filetests/filetests/isa/x64/fcopysign.clif @@ -13,9 +13,9 @@ block0(v0: f32, v1: f32): ; block0: ; movl $-2147483648, %ecx ; movd %ecx, %xmm7 -; movdqa %xmm0, %xmm10 +; movdqa %xmm0, %xmm2 ; movdqa %xmm7, %xmm0 -; andnps %xmm0, %xmm10, %xmm0 +; andnps %xmm0, %xmm2, %xmm0 ; andps %xmm7, %xmm1, %xmm7 ; orps %xmm0, %xmm7, %xmm0 ; movq %rbp, %rsp @@ -29,9 +29,9 @@ block0(v0: f32, v1: f32): ; block1: ; offset 0x4 ; movl $0x80000000, %ecx ; movd %ecx, %xmm7 -; movdqa %xmm0, %xmm10 +; movdqa %xmm0, %xmm2 ; movdqa %xmm7, %xmm0 -; andnps %xmm10, %xmm0 +; andnps %xmm2, %xmm0 ; andps %xmm1, %xmm7 ; orps %xmm7, %xmm0 ; movq %rbp, %rsp @@ -50,9 +50,9 @@ block0(v0: f64, v1: f64): ; block0: ; movabsq $-9223372036854775808, %rcx ; movq %rcx, %xmm7 -; movdqa %xmm0, %xmm10 +; movdqa %xmm0, %xmm2 ; movdqa %xmm7, %xmm0 -; andnpd %xmm0, %xmm10, %xmm0 +; andnpd %xmm0, %xmm2, %xmm0 ; andpd %xmm7, %xmm1, %xmm7 ; orpd %xmm0, %xmm7, %xmm0 ; movq %rbp, %rsp @@ -66,9 +66,9 @@ block0(v0: f64, v1: f64): ; block1: ; offset 0x4 ; movabsq $9223372036854775808, %rcx ; movq %rcx, %xmm7 -; movdqa %xmm0, %xmm10 +; movdqa %xmm0, %xmm2 ; movdqa %xmm7, %xmm0 -; andnpd %xmm10, %xmm0 +; andnpd %xmm2, %xmm0 ; andpd %xmm1, %xmm7 ; orpd %xmm7, %xmm0 ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/fcvt.clif b/cranelift/filetests/filetests/isa/x64/fcvt.clif index cbe440745cf4..0dfa8637e0cd 100644 --- a/cranelift/filetests/filetests/isa/x64/fcvt.clif +++ b/cranelift/filetests/filetests/isa/x64/fcvt.clif @@ -257,10 +257,10 @@ block0(v0: i8, v1: i16, v2: i32, v3: i64): ; cvtsi2ss %r9, %xmm1 ; movl %edx, %r9d ; cvtsi2ss %r9, %xmm2 -; u64_to_f32_seq %rcx, %xmm14, %r9, %r10 +; u64_to_f32_seq %rcx, %xmm6, %r9, %r10 ; addss %xmm0, %xmm1, %xmm0 ; addss %xmm0, %xmm2, %xmm0 -; addss %xmm0, %xmm14, %xmm0 +; addss %xmm0, %xmm6, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -278,18 +278,18 @@ block0(v0: i8, v1: i16, v2: i32, v3: i64): ; cvtsi2ssq %r9, %xmm2 ; cmpq $0, %rcx ; jl 0x32 -; cvtsi2ssq %rcx, %xmm14 -; jmp 0x4d +; cvtsi2ssq %rcx, %xmm6 +; jmp 0x4c ; movq %rcx, %r9 ; shrq $1, %r9 ; movq %rcx, %r10 ; andq $1, %r10 ; orq %r9, %r10 -; cvtsi2ssq %r10, %xmm14 -; addss %xmm14, %xmm14 +; cvtsi2ssq %r10, %xmm6 +; addss %xmm6, %xmm6 ; addss %xmm1, %xmm0 ; addss %xmm2, %xmm0 -; addss %xmm14, %xmm0 +; addss %xmm6, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq @@ -353,13 +353,12 @@ block0(v0: i32x4): ; movdqa %xmm0, %xmm3 ; pslld %xmm3, $16, %xmm3 ; psrld %xmm3, $16, %xmm3 -; movdqa %xmm0, %xmm9 -; psubd %xmm9, %xmm3, %xmm9 -; cvtdq2ps %xmm3, %xmm8 -; psrld %xmm9, $1, %xmm9 -; cvtdq2ps %xmm9, %xmm0 +; psubd %xmm0, %xmm3, %xmm0 +; cvtdq2ps %xmm3, %xmm1 +; psrld %xmm0, $1, %xmm0 +; cvtdq2ps %xmm0, %xmm0 ; addps %xmm0, %xmm0, %xmm0 -; addps %xmm0, %xmm8, %xmm0 +; addps %xmm0, %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -372,13 +371,12 @@ block0(v0: i32x4): ; movdqa %xmm0, %xmm3 ; pslld $0x10, %xmm3 ; psrld $0x10, %xmm3 -; movdqa %xmm0, %xmm9 -; psubd %xmm3, %xmm9 -; cvtdq2ps %xmm3, %xmm8 -; psrld $1, %xmm9 -; cvtdq2ps %xmm9, %xmm0 +; psubd %xmm3, %xmm0 +; cvtdq2ps %xmm3, %xmm1 +; psrld $1, %xmm0 +; cvtdq2ps %xmm0, %xmm0 ; addps %xmm0, %xmm0 -; addps %xmm8, %xmm0 +; addps %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq @@ -1035,20 +1033,20 @@ block0(v0: f32x4): ; block0: ; uninit %xmm6 ; xorps %xmm6, %xmm6, %xmm6 -; movdqa %xmm0, %xmm11 -; maxps %xmm11, %xmm6, %xmm11 +; movdqa %xmm0, %xmm3 +; maxps %xmm3, %xmm6, %xmm3 ; pcmpeqd %xmm6, %xmm6, %xmm6 ; psrld %xmm6, $1, %xmm6 -; cvtdq2ps %xmm6, %xmm15 -; cvttps2dq %xmm11, %xmm14 -; subps %xmm11, %xmm15, %xmm11 -; cmpps $2, %xmm15, %xmm11, %xmm15 -; cvttps2dq %xmm11, %xmm0 -; pxor %xmm0, %xmm15, %xmm0 -; uninit %xmm9 -; pxor %xmm9, %xmm9, %xmm9 -; pmaxsd %xmm0, %xmm9, %xmm0 -; paddd %xmm0, %xmm14, %xmm0 +; cvtdq2ps %xmm6, %xmm7 +; cvttps2dq %xmm3, %xmm6 +; subps %xmm3, %xmm7, %xmm3 +; cmpps $2, %xmm7, %xmm3, %xmm7 +; cvttps2dq %xmm3, %xmm0 +; pxor %xmm0, %xmm7, %xmm0 +; uninit %xmm1 +; pxor %xmm1, %xmm1, %xmm1 +; pmaxsd %xmm0, %xmm1, %xmm0 +; paddd %xmm0, %xmm6, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -1059,19 +1057,19 @@ block0(v0: f32x4): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; xorps %xmm6, %xmm6 -; movdqa %xmm0, %xmm11 -; maxps %xmm6, %xmm11 +; movdqa %xmm0, %xmm3 +; maxps %xmm6, %xmm3 ; pcmpeqd %xmm6, %xmm6 ; psrld $1, %xmm6 -; cvtdq2ps %xmm6, %xmm15 -; cvttps2dq %xmm11, %xmm14 -; subps %xmm15, %xmm11 -; cmpleps %xmm11, %xmm15 -; cvttps2dq %xmm11, %xmm0 -; pxor %xmm15, %xmm0 -; pxor %xmm9, %xmm9 -; pmaxsd %xmm9, %xmm0 -; paddd %xmm14, %xmm0 +; cvtdq2ps %xmm6, %xmm7 +; cvttps2dq %xmm3, %xmm6 +; subps %xmm7, %xmm3 +; cmpleps %xmm3, %xmm7 +; cvttps2dq %xmm3, %xmm0 +; pxor %xmm7, %xmm0 +; pxor %xmm1, %xmm1 +; pmaxsd %xmm1, %xmm0 +; paddd %xmm6, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq @@ -1091,11 +1089,11 @@ block0(v0: f32x4): ; movdqa %xmm0, %xmm5 ; andps %xmm5, %xmm4, %xmm5 ; pxor %xmm4, %xmm5, %xmm4 -; cvttps2dq %xmm5, %xmm8 -; movdqa %xmm8, %xmm0 +; cvttps2dq %xmm5, %xmm1 +; movdqa %xmm1, %xmm0 ; pand %xmm0, %xmm4, %xmm0 ; psrad %xmm0, $31, %xmm0 -; pxor %xmm0, %xmm8, %xmm0 +; pxor %xmm0, %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -1110,11 +1108,11 @@ block0(v0: f32x4): ; movdqa %xmm0, %xmm5 ; andps %xmm4, %xmm5 ; pxor %xmm5, %xmm4 -; cvttps2dq %xmm5, %xmm8 -; movdqa %xmm8, %xmm0 +; cvttps2dq %xmm5, %xmm1 +; movdqa %xmm1, %xmm0 ; pand %xmm4, %xmm0 ; psrad $0x1f, %xmm0 -; pxor %xmm8, %xmm0 +; pxor %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq diff --git a/cranelift/filetests/filetests/isa/x64/float-avx.clif b/cranelift/filetests/filetests/isa/x64/float-avx.clif index 948056a03d14..e7ddfa2fb7e4 100644 --- a/cranelift/filetests/filetests/isa/x64/float-avx.clif +++ b/cranelift/filetests/filetests/isa/x64/float-avx.clif @@ -444,11 +444,11 @@ block0(v0: i32x4): ; vpslld %xmm0, $16, %xmm2 ; vpsrld %xmm2, $16, %xmm4 ; vpsubd %xmm0, %xmm4, %xmm6 -; vcvtdq2ps %xmm4, %xmm8 -; vpsrld %xmm6, $1, %xmm10 -; vcvtdq2ps %xmm10, %xmm12 -; vaddps %xmm12, %xmm12, %xmm14 -; vaddps %xmm14, %xmm8, %xmm0 +; vcvtdq2ps %xmm4, %xmm0 +; vpsrld %xmm6, $1, %xmm2 +; vcvtdq2ps %xmm2, %xmm4 +; vaddps %xmm4, %xmm4, %xmm6 +; vaddps %xmm6, %xmm0, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -461,11 +461,11 @@ block0(v0: i32x4): ; vpslld $0x10, %xmm0, %xmm2 ; vpsrld $0x10, %xmm2, %xmm4 ; vpsubd %xmm4, %xmm0, %xmm6 -; vcvtdq2ps %xmm4, %xmm8 -; vpsrld $1, %xmm6, %xmm10 -; vcvtdq2ps %xmm10, %xmm12 -; vaddps %xmm12, %xmm12, %xmm14 -; vaddps %xmm8, %xmm14, %xmm0 +; vcvtdq2ps %xmm4, %xmm0 +; vpsrld $1, %xmm6, %xmm2 +; vcvtdq2ps %xmm2, %xmm4 +; vaddps %xmm4, %xmm4, %xmm6 +; vaddps %xmm0, %xmm6, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq @@ -533,10 +533,10 @@ block0(v0: f32x4): ; vcmpps $0, %xmm0, %xmm0, %xmm2 ; vandps %xmm0, %xmm2, %xmm4 ; vpxor %xmm2, %xmm4, %xmm6 -; vcvttps2dq %xmm4, %xmm8 -; vpand %xmm8, %xmm6, %xmm10 -; vpsrad %xmm10, $31, %xmm12 -; vpxor %xmm12, %xmm8, %xmm0 +; vcvttps2dq %xmm4, %xmm0 +; vpand %xmm0, %xmm6, %xmm2 +; vpsrad %xmm2, $31, %xmm4 +; vpxor %xmm4, %xmm0, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -549,10 +549,10 @@ block0(v0: f32x4): ; vcmpeqps %xmm0, %xmm0, %xmm2 ; vandps %xmm2, %xmm0, %xmm4 ; vpxor %xmm4, %xmm2, %xmm6 -; vcvttps2dq %xmm4, %xmm8 -; vpand %xmm6, %xmm8, %xmm10 -; vpsrad $0x1f, %xmm10, %xmm12 -; vpxor %xmm8, %xmm12, %xmm0 +; vcvttps2dq %xmm4, %xmm0 +; vpand %xmm6, %xmm0, %xmm2 +; vpsrad $0x1f, %xmm2, %xmm4 +; vpxor %xmm0, %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq diff --git a/cranelift/filetests/filetests/isa/x64/fma-call.clif b/cranelift/filetests/filetests/isa/x64/fma-call.clif index 0b92c3f44e79..3bb595a4fe26 100644 --- a/cranelift/filetests/filetests/isa/x64/fma-call.clif +++ b/cranelift/filetests/filetests/isa/x64/fma-call.clif @@ -77,17 +77,17 @@ block0(v0: f32x4, v1: f32x4, v2: f32x4): ; movdqu %xmm0, rsp(48 + virtual offset) ; movdqu rsp(0 + virtual offset), %xmm4 ; pshufd $1, %xmm4, %xmm0 -; movdqu rsp(16 + virtual offset), %xmm8 -; pshufd $1, %xmm8, %xmm1 -; movdqu rsp(32 + virtual offset), %xmm12 -; pshufd $1, %xmm12, %xmm2 +; movdqu rsp(16 + virtual offset), %xmm2 +; pshufd $1, %xmm2, %xmm1 +; movdqu rsp(32 + virtual offset), %xmm4 +; pshufd $1, %xmm4, %xmm2 ; load_ext_name %FmaF32+0, %r9 ; call *%r9 ; movdqu %xmm0, rsp(64 + virtual offset) ; movdqu rsp(0 + virtual offset), %xmm4 ; pshufd $2, %xmm4, %xmm0 -; movdqu rsp(16 + virtual offset), %xmm14 -; pshufd $2, %xmm14, %xmm1 +; movdqu rsp(16 + virtual offset), %xmm6 +; pshufd $2, %xmm6, %xmm1 ; movdqu rsp(32 + virtual offset), %xmm3 ; pshufd $2, %xmm3, %xmm2 ; load_ext_name %FmaF32+0, %r10 @@ -131,17 +131,17 @@ block0(v0: f32x4, v1: f32x4, v2: f32x4): ; movdqu %xmm0, 0x30(%rsp) ; movdqu (%rsp), %xmm4 ; pshufd $1, %xmm4, %xmm0 -; movdqu 0x10(%rsp), %xmm8 -; pshufd $1, %xmm8, %xmm1 -; movdqu 0x20(%rsp), %xmm12 -; pshufd $1, %xmm12, %xmm2 +; movdqu 0x10(%rsp), %xmm2 +; pshufd $1, %xmm2, %xmm1 +; movdqu 0x20(%rsp), %xmm4 +; pshufd $1, %xmm4, %xmm2 ; movabsq $0, %r9 ; reloc_external Abs8 %FmaF32 0 ; callq *%r9 ; movdqu %xmm0, 0x40(%rsp) ; movdqu (%rsp), %xmm4 ; pshufd $2, %xmm4, %xmm0 -; movdqu 0x10(%rsp), %xmm14 -; pshufd $2, %xmm14, %xmm1 +; movdqu 0x10(%rsp), %xmm6 +; pshufd $2, %xmm6, %xmm1 ; movdqu 0x20(%rsp), %xmm3 ; pshufd $2, %xmm3, %xmm2 ; movabsq $0, %r10 ; reloc_external Abs8 %FmaF32 0 @@ -196,9 +196,9 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2): ; pshufd $238, %xmm2, %xmm2 ; load_ext_name %FmaF64+0, %r9 ; call *%r9 -; movdqa %xmm0, %xmm14 +; movdqa %xmm0, %xmm6 ; movdqu rsp(48 + virtual offset), %xmm0 -; movlhps %xmm0, %xmm14, %xmm0 +; movlhps %xmm0, %xmm6, %xmm0 ; addq %rsp, $64, %rsp ; movq %rbp, %rsp ; popq %rbp @@ -227,9 +227,9 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2): ; pshufd $0xee, %xmm2, %xmm2 ; movabsq $0, %r9 ; reloc_external Abs8 %FmaF64 0 ; callq *%r9 -; movdqa %xmm0, %xmm14 +; movdqa %xmm0, %xmm6 ; movdqu 0x30(%rsp), %xmm0 -; movlhps %xmm14, %xmm0 +; movlhps %xmm6, %xmm0 ; addq $0x40, %rsp ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif b/cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif index f8c390ba8a17..8cd260a3f536 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif @@ -919,10 +919,10 @@ block0(v0: i8x16, v1: i32): ; vpunpcklbw %xmm0, %xmm0, %xmm5 ; vpunpckhbw %xmm0, %xmm0, %xmm7 ; addl %r9d, $8, %r9d -; vmovd %r9d, %xmm11 -; vpsraw %xmm5, %xmm11, %xmm13 -; vpsraw %xmm7, %xmm11, %xmm15 -; vpacksswb %xmm13, %xmm15, %xmm0 +; vmovd %r9d, %xmm3 +; vpsraw %xmm5, %xmm3, %xmm5 +; vpsraw %xmm7, %xmm3, %xmm7 +; vpacksswb %xmm5, %xmm7, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -937,10 +937,10 @@ block0(v0: i8x16, v1: i32): ; vpunpcklbw %xmm0, %xmm0, %xmm5 ; vpunpckhbw %xmm0, %xmm0, %xmm7 ; addl $8, %r9d -; vmovd %r9d, %xmm11 -; vpsraw %xmm11, %xmm5, %xmm13 -; vpsraw %xmm11, %xmm7, %xmm15 -; vpacksswb %xmm15, %xmm13, %xmm0 +; vmovd %r9d, %xmm3 +; vpsraw %xmm3, %xmm5, %xmm5 +; vpsraw %xmm3, %xmm7, %xmm7 +; vpacksswb %xmm7, %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq @@ -959,8 +959,8 @@ block0(v0: i8x16): ; vpunpcklbw %xmm0, %xmm0, %xmm2 ; vpunpckhbw %xmm0, %xmm0, %xmm4 ; vpsraw %xmm2, $11, %xmm6 -; vpsraw %xmm4, $11, %xmm8 -; vpacksswb %xmm6, %xmm8, %xmm0 +; vpsraw %xmm4, $11, %xmm0 +; vpacksswb %xmm6, %xmm0, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -973,8 +973,8 @@ block0(v0: i8x16): ; vpunpcklbw %xmm0, %xmm0, %xmm2 ; vpunpckhbw %xmm0, %xmm0, %xmm4 ; vpsraw $0xb, %xmm2, %xmm6 -; vpsraw $0xb, %xmm4, %xmm8 -; vpacksswb %xmm8, %xmm6, %xmm0 +; vpsraw $0xb, %xmm4, %xmm0 +; vpacksswb %xmm0, %xmm6, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq @@ -1350,10 +1350,10 @@ block0(v0: f64x2): ; uninit %xmm2 ; vxorpd %xmm2, %xmm2, %xmm4 ; vmaxpd %xmm0, %xmm4, %xmm6 -; vminpd %xmm6, const(0), %xmm8 -; vroundpd $3, %xmm8, %xmm10 -; vaddpd %xmm10, const(1), %xmm12 -; vshufps $136, %xmm12, %xmm4, %xmm0 +; vminpd %xmm6, const(0), %xmm0 +; vroundpd $3, %xmm0, %xmm2 +; vaddpd %xmm2, const(1), %xmm5 +; vshufps $136, %xmm5, %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -1365,10 +1365,10 @@ block0(v0: f64x2): ; block1: ; offset 0x4 ; vxorpd %xmm2, %xmm2, %xmm4 ; vmaxpd %xmm4, %xmm0, %xmm6 -; vminpd 0x1c(%rip), %xmm6, %xmm8 -; vroundpd $3, %xmm8, %xmm10 -; vaddpd 0x1e(%rip), %xmm10, %xmm12 -; vshufps $0x88, %xmm4, %xmm12, %xmm0 +; vminpd 0x1c(%rip), %xmm6, %xmm0 +; vroundpd $3, %xmm0, %xmm2 +; vaddpd 0x1e(%rip), %xmm2, %xmm5 +; vshufps $0x88, %xmm4, %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq @@ -1393,8 +1393,8 @@ block0(v0: i8x16, v1: i32): ; vpsllw %xmm0, %xmm5, %xmm7 ; lea const(0), %rsi ; shlq $4, %r10, %r10 -; vmovdqu 0(%rsi,%r10,1), %xmm13 -; vpand %xmm7, %xmm13, %xmm0 +; vmovdqu 0(%rsi,%r10,1), %xmm5 +; vpand %xmm7, %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -1410,12 +1410,12 @@ block0(v0: i8x16, v1: i32): ; vpsllw %xmm5, %xmm0, %xmm7 ; leaq 0x15(%rip), %rsi ; shlq $4, %r10 -; vmovdqu (%rsi, %r10), %xmm13 -; vpand %xmm13, %xmm7, %xmm0 +; vmovdqu (%rsi, %r10), %xmm5 +; vpand %xmm5, %xmm7, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq -; addb %bh, %bh +; addb %al, (%rax) function %i8x16_shl_imm(i8x16) -> i8x16 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitselect.clif b/cranelift/filetests/filetests/isa/x64/simd-bitselect.clif index 50eb59680e30..6ffc922ee764 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitselect.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitselect.clif @@ -211,9 +211,9 @@ block0(v0: i8x16, v1: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movdqa %xmm0, %xmm8 +; movdqa %xmm0, %xmm2 ; movdqu const(0), %xmm0 -; movdqa %xmm8, %xmm4 +; movdqa %xmm2, %xmm4 ; pand %xmm4, %xmm0, %xmm4 ; pandn %xmm0, %xmm1, %xmm0 ; por %xmm0, %xmm4, %xmm0 @@ -226,9 +226,9 @@ block0(v0: i8x16, v1: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movdqa %xmm0, %xmm8 -; movdqu 0x1f(%rip), %xmm0 -; movdqa %xmm8, %xmm4 +; movdqa %xmm0, %xmm2 +; movdqu 0x20(%rip), %xmm0 +; movdqa %xmm2, %xmm4 ; pand %xmm0, %xmm4 ; pandn %xmm1, %xmm0 ; por %xmm4, %xmm0 @@ -240,6 +240,7 @@ block0(v0: i8x16, v1: i8x16): ; addb %al, (%rax) ; addb %al, (%rax) ; addb %al, (%rax) +; addb %al, (%rax) ; addb %dh, %al ; addb %al, (%rax) ; incl (%rax) diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-avx.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-avx.clif index 8b540989c2b1..0de407a7abc7 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-avx.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-avx.clif @@ -42,8 +42,8 @@ block0(v0: i64): ; movl $-2147483648, %eax ; vmovd %eax, %xmm4 ; vandnps %xmm4, const(0), %xmm6 -; vandps %xmm4, 0(%rdi), %xmm8 -; vorps %xmm6, %xmm8, %xmm0 +; vandps %xmm4, 0(%rdi), %xmm0 +; vorps %xmm6, %xmm0, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -56,8 +56,8 @@ block0(v0: i64): ; movl $0x80000000, %eax ; vmovd %eax, %xmm4 ; vandnps 0x1b(%rip), %xmm4, %xmm6 -; vandps (%rdi), %xmm4, %xmm8 -; vorps %xmm8, %xmm6, %xmm0 +; vandps (%rdi), %xmm4, %xmm0 +; vorps %xmm0, %xmm6, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq @@ -75,6 +75,7 @@ block0(v0: i64): ; addb %al, (%rax) ; addb %al, (%rax) ; addb %al, (%rax) +; addb %al, (%rax) function %bor_f32x4(f32x4, f32x4) -> f32x4 { block0(v0: f32x4, v1: f32x4): diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index 817598710f4e..b1ed1466069f 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -337,8 +337,8 @@ block0(v0: i32): ; psllw %xmm0, %xmm5, %xmm0 ; lea const(0), %rsi ; shlq $4, %r10, %r10 -; movdqu 0(%rsi,%r10,1), %xmm13 -; pand %xmm0, %xmm13, %xmm0 +; movdqu 0(%rsi,%r10,1), %xmm5 +; pand %xmm0, %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -355,8 +355,8 @@ block0(v0: i32): ; psllw %xmm5, %xmm0 ; leaq 0x2d(%rip), %rsi ; shlq $4, %r10 -; movdqu (%rsi, %r10), %xmm13 -; pand %xmm13, %xmm0 +; movdqu (%rsi, %r10), %xmm5 +; pand %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq @@ -365,10 +365,9 @@ block0(v0: i32): ; addb %al, (%rax) ; addb %al, (%rax) ; addb %al, (%rax) -; addl %eax, (%rdx) -; addl 0x9080706(, %rax), %eax -; orb (%rbx), %cl -; orb $0xd, %al +; addb %al, (%rcx) +; addb (%rbx), %al +; addb $5, %al function %ishl_i8x16_imm(i8x16) -> i8x16 { block0(v0: i8x16): @@ -605,17 +604,17 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movdqu const(0), %xmm8 +; movdqu const(0), %xmm1 ; movq %rdi, %r9 ; andq %r9, $7, %r9 -; movdqa %xmm8, %xmm0 -; punpcklbw %xmm0, %xmm8, %xmm0 -; punpckhbw %xmm8, %xmm8, %xmm8 +; movdqa %xmm1, %xmm0 +; punpcklbw %xmm0, %xmm1, %xmm0 +; punpckhbw %xmm1, %xmm1, %xmm1 ; addl %r9d, $8, %r9d -; movd %r9d, %xmm11 -; psraw %xmm0, %xmm11, %xmm0 -; psraw %xmm8, %xmm11, %xmm8 -; packsswb %xmm0, %xmm8, %xmm0 +; movd %r9d, %xmm3 +; psraw %xmm0, %xmm3, %xmm0 +; psraw %xmm1, %xmm3, %xmm1 +; packsswb %xmm0, %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -625,23 +624,28 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movdqu 0x33(%rip), %xmm8 +; movdqu 0x34(%rip), %xmm1 ; movq %rdi, %r9 ; andq $7, %r9 -; movdqa %xmm8, %xmm0 -; punpcklbw %xmm8, %xmm0 -; punpckhbw %xmm8, %xmm8 +; movdqa %xmm1, %xmm0 +; punpcklbw %xmm1, %xmm0 +; punpckhbw %xmm1, %xmm1 ; addl $8, %r9d -; movd %r9d, %xmm11 -; psraw %xmm11, %xmm0 -; psraw %xmm11, %xmm8 -; packsswb %xmm8, %xmm0 +; movd %r9d, %xmm3 +; psraw %xmm3, %xmm0 +; psraw %xmm3, %xmm1 +; packsswb %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq -; addb %al, (%rcx) -; addb (%rbx), %al -; addb $5, %al +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addl %eax, (%rdx) +; addl 0x9080706(, %rax), %eax +; orb (%rbx), %cl +; orb $0xd, %al function %sshr_i8x16_imm(i8x16, i32) -> i8x16 { block0(v0: i8x16, v1: i32): @@ -655,10 +659,10 @@ block0(v0: i8x16, v1: i32): ; block0: ; movdqa %xmm0, %xmm7 ; punpcklbw %xmm7, %xmm0, %xmm7 -; movdqa %xmm7, %xmm8 +; movdqa %xmm7, %xmm1 ; movdqa %xmm0, %xmm7 ; punpckhbw %xmm7, %xmm0, %xmm7 -; movdqa %xmm8, %xmm0 +; movdqa %xmm1, %xmm0 ; psraw %xmm0, $11, %xmm0 ; psraw %xmm7, $11, %xmm7 ; packsswb %xmm0, %xmm7, %xmm0 @@ -673,10 +677,10 @@ block0(v0: i8x16, v1: i32): ; block1: ; offset 0x4 ; movdqa %xmm0, %xmm7 ; punpcklbw %xmm0, %xmm7 -; movdqa %xmm7, %xmm8 +; movdqa %xmm7, %xmm1 ; movdqa %xmm0, %xmm7 ; punpckhbw %xmm0, %xmm7 -; movdqa %xmm8, %xmm0 +; movdqa %xmm1, %xmm0 ; psraw $0xb, %xmm0 ; psraw $0xb, %xmm7 ; packsswb %xmm7, %xmm0 @@ -899,13 +903,13 @@ block0(v0: i64x2, v1: i32): ; movq %rdi, %rcx ; andq %rcx, $63, %rcx ; movq %rcx, %xmm5 -; movdqu const(0), %xmm8 -; psrlq %xmm8, %xmm5, %xmm8 -; movdqa %xmm0, %xmm11 -; psrlq %xmm11, %xmm5, %xmm11 -; movdqa %xmm8, %xmm0 -; pxor %xmm0, %xmm11, %xmm0 -; psubq %xmm0, %xmm8, %xmm0 +; movdqu const(0), %xmm1 +; psrlq %xmm1, %xmm5, %xmm1 +; movdqa %xmm0, %xmm3 +; psrlq %xmm3, %xmm5, %xmm3 +; movdqa %xmm1, %xmm0 +; pxor %xmm0, %xmm3, %xmm0 +; psubq %xmm0, %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -918,13 +922,13 @@ block0(v0: i64x2, v1: i32): ; movq %rdi, %rcx ; andq $0x3f, %rcx ; movq %rcx, %xmm5 -; movdqu 0x27(%rip), %xmm8 -; psrlq %xmm5, %xmm8 -; movdqa %xmm0, %xmm11 -; psrlq %xmm5, %xmm11 -; movdqa %xmm8, %xmm0 -; pxor %xmm11, %xmm0 -; psubq %xmm8, %xmm0 +; movdqu 0x28(%rip), %xmm1 +; psrlq %xmm5, %xmm1 +; movdqa %xmm0, %xmm3 +; psrlq %xmm5, %xmm3 +; movdqa %xmm1, %xmm0 +; pxor %xmm3, %xmm0 +; psubq %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq @@ -935,4 +939,9 @@ block0(v0: i64x2, v1: i32): ; addb %al, (%rax) ; addb %al, (%rax) ; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb $0, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) diff --git a/cranelift/filetests/filetests/isa/x64/simd-cmp-avx.clif b/cranelift/filetests/filetests/isa/x64/simd-cmp-avx.clif index bccc8657cb9a..60bd126554ad 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-cmp-avx.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-cmp-avx.clif @@ -214,10 +214,10 @@ block0(v0: f32x4, v1: f32x4): ; vminps %xmm0, %xmm1, %xmm3 ; vminps %xmm1, %xmm0, %xmm5 ; vorps %xmm3, %xmm5, %xmm7 -; vcmpps $3, %xmm7, %xmm5, %xmm9 -; vorps %xmm7, %xmm9, %xmm11 -; vpsrld %xmm9, $10, %xmm13 -; vandnps %xmm13, %xmm11, %xmm0 +; vcmpps $3, %xmm7, %xmm5, %xmm1 +; vorps %xmm7, %xmm1, %xmm3 +; vpsrld %xmm1, $10, %xmm5 +; vandnps %xmm5, %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -230,10 +230,10 @@ block0(v0: f32x4, v1: f32x4): ; vminps %xmm1, %xmm0, %xmm3 ; vminps %xmm0, %xmm1, %xmm5 ; vorps %xmm5, %xmm3, %xmm7 -; vcmpunordps %xmm5, %xmm7, %xmm9 -; vorps %xmm9, %xmm7, %xmm11 -; vpsrld $0xa, %xmm9, %xmm13 -; vandnps %xmm11, %xmm13, %xmm0 +; vcmpunordps %xmm5, %xmm7, %xmm1 +; vorps %xmm1, %xmm7, %xmm3 +; vpsrld $0xa, %xmm1, %xmm5 +; vandnps %xmm3, %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq @@ -251,10 +251,10 @@ block0(v0: f64x2, v1: f64x2): ; vminpd %xmm0, %xmm1, %xmm3 ; vminpd %xmm1, %xmm0, %xmm5 ; vorpd %xmm3, %xmm5, %xmm7 -; vcmppd $3, %xmm3, %xmm5, %xmm9 -; vorpd %xmm7, %xmm9, %xmm11 -; vpsrlq %xmm9, $13, %xmm13 -; vandnpd %xmm13, %xmm11, %xmm0 +; vcmppd $3, %xmm3, %xmm5, %xmm1 +; vorpd %xmm7, %xmm1, %xmm3 +; vpsrlq %xmm1, $13, %xmm5 +; vandnpd %xmm5, %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -267,10 +267,10 @@ block0(v0: f64x2, v1: f64x2): ; vminpd %xmm1, %xmm0, %xmm3 ; vminpd %xmm0, %xmm1, %xmm5 ; vorpd %xmm5, %xmm3, %xmm7 -; vcmpunordpd %xmm5, %xmm3, %xmm9 -; vorpd %xmm9, %xmm7, %xmm11 -; vpsrlq $0xd, %xmm9, %xmm13 -; vandnpd %xmm11, %xmm13, %xmm0 +; vcmpunordpd %xmm5, %xmm3, %xmm1 +; vorpd %xmm1, %xmm7, %xmm3 +; vpsrlq $0xd, %xmm1, %xmm5 +; vandnpd %xmm3, %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq @@ -288,11 +288,11 @@ block0(v0: f32x4, v1: f32x4): ; vmaxps %xmm0, %xmm1, %xmm3 ; vmaxps %xmm1, %xmm0, %xmm5 ; vxorps %xmm3, %xmm5, %xmm7 -; vorps %xmm3, %xmm7, %xmm9 -; vsubps %xmm9, %xmm7, %xmm11 -; vcmpps $3, %xmm9, %xmm9, %xmm13 -; vpsrld %xmm13, $10, %xmm15 -; vandnps %xmm15, %xmm11, %xmm0 +; vorps %xmm3, %xmm7, %xmm1 +; vsubps %xmm1, %xmm7, %xmm3 +; vcmpps $3, %xmm1, %xmm1, %xmm5 +; vpsrld %xmm5, $10, %xmm7 +; vandnps %xmm7, %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -305,11 +305,11 @@ block0(v0: f32x4, v1: f32x4): ; vmaxps %xmm1, %xmm0, %xmm3 ; vmaxps %xmm0, %xmm1, %xmm5 ; vxorps %xmm5, %xmm3, %xmm7 -; vorps %xmm7, %xmm3, %xmm9 -; vsubps %xmm7, %xmm9, %xmm11 -; vcmpunordps %xmm9, %xmm9, %xmm13 -; vpsrld $0xa, %xmm13, %xmm15 -; vandnps %xmm11, %xmm15, %xmm0 +; vorps %xmm7, %xmm3, %xmm1 +; vsubps %xmm7, %xmm1, %xmm3 +; vcmpunordps %xmm1, %xmm1, %xmm5 +; vpsrld $0xa, %xmm5, %xmm7 +; vandnps %xmm3, %xmm7, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq @@ -327,11 +327,11 @@ block0(v0: f64x2, v1: f64x2): ; vmaxpd %xmm0, %xmm1, %xmm3 ; vmaxpd %xmm1, %xmm0, %xmm5 ; vxorpd %xmm3, %xmm5, %xmm7 -; vorpd %xmm3, %xmm7, %xmm9 -; vsubpd %xmm9, %xmm7, %xmm11 -; vcmppd $3, %xmm9, %xmm9, %xmm13 -; vpsrlq %xmm13, $13, %xmm15 -; vandnpd %xmm15, %xmm11, %xmm0 +; vorpd %xmm3, %xmm7, %xmm1 +; vsubpd %xmm1, %xmm7, %xmm3 +; vcmppd $3, %xmm1, %xmm1, %xmm5 +; vpsrlq %xmm5, $13, %xmm7 +; vandnpd %xmm7, %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -344,11 +344,11 @@ block0(v0: f64x2, v1: f64x2): ; vmaxpd %xmm1, %xmm0, %xmm3 ; vmaxpd %xmm0, %xmm1, %xmm5 ; vxorpd %xmm5, %xmm3, %xmm7 -; vorpd %xmm7, %xmm3, %xmm9 -; vsubpd %xmm7, %xmm9, %xmm11 -; vcmpunordpd %xmm9, %xmm9, %xmm13 -; vpsrlq $0xd, %xmm13, %xmm15 -; vandnpd %xmm11, %xmm15, %xmm0 +; vorpd %xmm7, %xmm3, %xmm1 +; vsubpd %xmm7, %xmm1, %xmm3 +; vcmpunordpd %xmm1, %xmm1, %xmm5 +; vpsrlq $0xd, %xmm5, %xmm7 +; vandnpd %xmm3, %xmm7, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq diff --git a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif index 805258582eb4..26f10fdcca1a 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif @@ -43,9 +43,9 @@ block0(v0: i32x4, v1: i32x4): ; block0: ; pmaxud %xmm0, %xmm1, %xmm0 ; pcmpeqd %xmm0, %xmm1, %xmm0 -; uninit %xmm8 -; pcmpeqd %xmm8, %xmm8, %xmm8 -; pxor %xmm0, %xmm8, %xmm0 +; uninit %xmm1 +; pcmpeqd %xmm1, %xmm1, %xmm1 +; pxor %xmm0, %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -57,8 +57,8 @@ block0(v0: i32x4, v1: i32x4): ; block1: ; offset 0x4 ; pmaxud %xmm1, %xmm0 ; pcmpeqd %xmm1, %xmm0 -; pcmpeqd %xmm8, %xmm8 -; pxor %xmm8, %xmm0 +; pcmpeqd %xmm1, %xmm1 +; pxor %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq diff --git a/cranelift/filetests/filetests/isa/x64/simd-splat-avx.clif b/cranelift/filetests/filetests/isa/x64/simd-splat-avx.clif index c7a3a92af894..9e994d56593a 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-splat-avx.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-splat-avx.clif @@ -178,8 +178,8 @@ block0(v0: i64): ; uninit %xmm2 ; vpinsrb $0, %xmm2, 0(%rdi), %xmm4 ; uninit %xmm6 -; vpxor %xmm6, %xmm6, %xmm8 -; vpshufb %xmm4, %xmm8, %xmm0 +; vpxor %xmm6, %xmm6, %xmm0 +; vpshufb %xmm4, %xmm0, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -190,8 +190,8 @@ block0(v0: i64): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; vpinsrb $0, (%rdi), %xmm2, %xmm4 ; trap: heap_oob -; vpxor %xmm6, %xmm6, %xmm8 -; vpshufb %xmm8, %xmm4, %xmm0 +; vpxor %xmm6, %xmm6, %xmm0 +; vpshufb %xmm0, %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq diff --git a/cranelift/filetests/filetests/isa/x64/simd-widen-mul.clif b/cranelift/filetests/filetests/isa/x64/simd-widen-mul.clif index 4941dda84a70..4890d8bc4f75 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-widen-mul.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-widen-mul.clif @@ -18,8 +18,8 @@ block0(v0: i8x16, v1: i8x16): ; pmovsxbw %xmm6, %xmm0 ; movdqa %xmm1, %xmm6 ; palignr $8, %xmm6, %xmm1, %xmm6 -; pmovsxbw %xmm6, %xmm8 -; pmullw %xmm0, %xmm8, %xmm0 +; pmovsxbw %xmm6, %xmm1 +; pmullw %xmm0, %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -34,8 +34,8 @@ block0(v0: i8x16, v1: i8x16): ; pmovsxbw %xmm6, %xmm0 ; movdqa %xmm1, %xmm6 ; palignr $8, %xmm1, %xmm6 -; pmovsxbw %xmm6, %xmm8 -; pmullw %xmm8, %xmm0 +; pmovsxbw %xmm6, %xmm1 +; pmullw %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq @@ -223,14 +223,14 @@ block0(v0: i8x16, v1: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; uninit %xmm8 -; pxor %xmm8, %xmm8, %xmm8 -; punpckhbw %xmm0, %xmm8, %xmm0 -; uninit %xmm8 -; pxor %xmm8, %xmm8, %xmm8 -; movdqa %xmm1, %xmm11 -; punpckhbw %xmm11, %xmm8, %xmm11 -; pmullw %xmm0, %xmm11, %xmm0 +; uninit %xmm2 +; pxor %xmm2, %xmm2, %xmm2 +; punpckhbw %xmm0, %xmm2, %xmm0 +; uninit %xmm2 +; pxor %xmm2, %xmm2, %xmm2 +; movdqa %xmm1, %xmm3 +; punpckhbw %xmm3, %xmm2, %xmm3 +; pmullw %xmm0, %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -240,12 +240,12 @@ block0(v0: i8x16, v1: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; pxor %xmm8, %xmm8 -; punpckhbw %xmm8, %xmm0 -; pxor %xmm8, %xmm8 -; movdqa %xmm1, %xmm11 -; punpckhbw %xmm8, %xmm11 -; pmullw %xmm11, %xmm0 +; pxor %xmm2, %xmm2 +; punpckhbw %xmm2, %xmm0 +; pxor %xmm2, %xmm2 +; movdqa %xmm1, %xmm3 +; punpckhbw %xmm2, %xmm3 +; pmullw %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq diff --git a/cranelift/filetests/filetests/wasm/x64-relaxed-simd-deterministic.wat b/cranelift/filetests/filetests/wasm/x64-relaxed-simd-deterministic.wat index 3d5a9fb3363b..381a74fe34c8 100644 --- a/cranelift/filetests/filetests/wasm/x64-relaxed-simd-deterministic.wat +++ b/cranelift/filetests/filetests/wasm/x64-relaxed-simd-deterministic.wat @@ -47,10 +47,10 @@ ;; vcmpps $0, %xmm0, %xmm0, %xmm3 ;; vandps %xmm0, %xmm3, %xmm5 ;; vpxor %xmm3, %xmm5, %xmm7 -;; vcvttps2dq %xmm5, %xmm9 -;; vpand %xmm9, %xmm7, %xmm11 -;; vpsrad %xmm11, $31, %xmm13 -;; vpxor %xmm13, %xmm9, %xmm0 +;; vcvttps2dq %xmm5, %xmm1 +;; vpand %xmm1, %xmm7, %xmm3 +;; vpsrad %xmm3, $31, %xmm5 +;; vpxor %xmm5, %xmm1, %xmm0 ;; jmp label1 ;; block1: ;; movq %rbp, %rsp @@ -65,19 +65,19 @@ ;; block0: ;; uninit %xmm3 ;; vxorps %xmm3, %xmm3, %xmm5 -;; vmaxps %xmm0, %xmm5, %xmm7 -;; vpcmpeqd %xmm5, %xmm5, %xmm9 -;; vpsrld %xmm9, $1, %xmm11 -;; vcvtdq2ps %xmm11, %xmm13 -;; vcvttps2dq %xmm7, %xmm15 -;; vsubps %xmm7, %xmm13, %xmm1 -;; vcmpps $2, %xmm13, %xmm1, %xmm3 +;; vmaxps %xmm0, %xmm5, %xmm0 +;; vpcmpeqd %xmm5, %xmm5, %xmm1 +;; vpsrld %xmm1, $1, %xmm3 +;; vcvtdq2ps %xmm3, %xmm5 +;; vcvttps2dq %xmm0, %xmm7 +;; vsubps %xmm0, %xmm5, %xmm1 +;; vcmpps $2, %xmm5, %xmm1, %xmm3 ;; vcvttps2dq %xmm1, %xmm5 -;; vpxor %xmm5, %xmm3, %xmm7 -;; uninit %xmm9 -;; vpxor %xmm9, %xmm9, %xmm11 -;; vpmaxsd %xmm7, %xmm11, %xmm13 -;; vpaddd %xmm13, %xmm15, %xmm0 +;; vpxor %xmm5, %xmm3, %xmm0 +;; uninit %xmm1 +;; vpxor %xmm1, %xmm1, %xmm3 +;; vpmaxsd %xmm0, %xmm3, %xmm5 +;; vpaddd %xmm5, %xmm7, %xmm0 ;; jmp label1 ;; block1: ;; movq %rbp, %rsp @@ -109,10 +109,10 @@ ;; uninit %xmm3 ;; vxorpd %xmm3, %xmm3, %xmm5 ;; vmaxpd %xmm0, %xmm5, %xmm7 -;; vminpd %xmm7, const(0), %xmm9 -;; vroundpd $3, %xmm9, %xmm11 -;; vaddpd %xmm11, const(1), %xmm13 -;; vshufps $136, %xmm13, %xmm5, %xmm0 +;; vminpd %xmm7, const(0), %xmm1 +;; vroundpd $3, %xmm1, %xmm3 +;; vaddpd %xmm3, const(1), %xmm6 +;; vshufps $136, %xmm6, %xmm5, %xmm0 ;; jmp label1 ;; block1: ;; movq %rbp, %rsp @@ -125,15 +125,15 @@ ;; movq %rsp, %rbp ;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } ;; block0: -;; vpmovsxbw %xmm0, %xmm12 -;; vpmovsxbw %xmm1, %xmm13 -;; vpmullw %xmm12, %xmm13, %xmm12 -;; vpalignr $8, %xmm0, %xmm0, %xmm11 -;; vpmovsxbw %xmm11, %xmm13 -;; vpalignr $8, %xmm1, %xmm1, %xmm11 -;; vpmovsxbw %xmm11, %xmm14 -;; vpmullw %xmm13, %xmm14, %xmm13 -;; vphaddw %xmm12, %xmm13, %xmm0 +;; vpmovsxbw %xmm0, %xmm4 +;; vpmovsxbw %xmm1, %xmm5 +;; vpmullw %xmm4, %xmm5, %xmm4 +;; vpalignr $8, %xmm0, %xmm0, %xmm3 +;; vpmovsxbw %xmm3, %xmm5 +;; vpalignr $8, %xmm1, %xmm1, %xmm3 +;; vpmovsxbw %xmm3, %xmm6 +;; vpmullw %xmm5, %xmm6, %xmm5 +;; vphaddw %xmm4, %xmm5, %xmm0 ;; jmp label1 ;; block1: ;; movq %rbp, %rsp @@ -146,17 +146,17 @@ ;; movq %rsp, %rbp ;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } ;; block0: -;; vpmovsxbw %xmm0, %xmm15 +;; vpmovsxbw %xmm0, %xmm7 ;; vpmovsxbw %xmm1, %xmm3 -;; vpmullw %xmm15, %xmm3, %xmm15 -;; vpalignr $8, %xmm0, %xmm0, %xmm14 -;; vpmovsxbw %xmm14, %xmm0 -;; vpalignr $8, %xmm1, %xmm1, %xmm14 -;; vpmovsxbw %xmm14, %xmm1 +;; vpmullw %xmm7, %xmm3, %xmm7 +;; vpalignr $8, %xmm0, %xmm0, %xmm6 +;; vpmovsxbw %xmm6, %xmm0 +;; vpalignr $8, %xmm1, %xmm1, %xmm6 +;; vpmovsxbw %xmm6, %xmm1 ;; vpmullw %xmm0, %xmm1, %xmm0 -;; vphaddw %xmm15, %xmm0, %xmm15 -;; vpmaddwd %xmm15, const(0), %xmm15 -;; vpaddd %xmm15, %xmm2, %xmm0 +;; vphaddw %xmm7, %xmm0, %xmm7 +;; vpmaddwd %xmm7, const(0), %xmm7 +;; vpaddd %xmm7, %xmm2, %xmm0 ;; jmp label1 ;; block1: ;; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/wasm/x64-relaxed-simd.wat b/cranelift/filetests/filetests/wasm/x64-relaxed-simd.wat index 39ebc2324b95..d3bbfd4441e2 100644 --- a/cranelift/filetests/filetests/wasm/x64-relaxed-simd.wat +++ b/cranelift/filetests/filetests/wasm/x64-relaxed-simd.wat @@ -58,20 +58,20 @@ ;; block0: ;; uninit %xmm7 ;; xorps %xmm7, %xmm7, %xmm7 -;; movdqa %xmm0, %xmm12 -;; maxps %xmm12, %xmm7, %xmm12 +;; movdqa %xmm0, %xmm4 +;; maxps %xmm4, %xmm7, %xmm4 ;; pcmpeqd %xmm7, %xmm7, %xmm7 ;; psrld %xmm7, $1, %xmm7 ;; cvtdq2ps %xmm7, %xmm1 -;; cvttps2dq %xmm12, %xmm15 -;; subps %xmm12, %xmm1, %xmm12 -;; cmpps $2, %xmm1, %xmm12, %xmm1 -;; cvttps2dq %xmm12, %xmm0 +;; cvttps2dq %xmm4, %xmm7 +;; subps %xmm4, %xmm1, %xmm4 +;; cmpps $2, %xmm1, %xmm4, %xmm1 +;; cvttps2dq %xmm4, %xmm0 ;; pxor %xmm0, %xmm1, %xmm0 -;; uninit %xmm10 -;; pxor %xmm10, %xmm10, %xmm10 -;; pmaxsd %xmm0, %xmm10, %xmm0 -;; paddd %xmm0, %xmm15, %xmm0 +;; uninit %xmm2 +;; pxor %xmm2, %xmm2, %xmm2 +;; pmaxsd %xmm0, %xmm2, %xmm0 +;; paddd %xmm0, %xmm7, %xmm0 ;; jmp label1 ;; block1: ;; movq %rbp, %rsp @@ -99,10 +99,9 @@ ;; block0: ;; uninit %xmm4 ;; xorpd %xmm4, %xmm4, %xmm4 -;; movdqa %xmm0, %xmm8 -;; maxpd %xmm8, %xmm4, %xmm8 -;; minpd %xmm8, const(0), %xmm8 -;; roundpd $3, %xmm8, %xmm0 +;; maxpd %xmm0, %xmm4, %xmm0 +;; minpd %xmm0, const(0), %xmm0 +;; roundpd $3, %xmm0, %xmm0 ;; addpd %xmm0, const(1), %xmm0 ;; shufps $136, %xmm0, %xmm4, %xmm0 ;; jmp label1 @@ -132,9 +131,10 @@ ;; movq %rsp, %rbp ;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } ;; block0: -;; movdqa %xmm0, %xmm8 -;; movdqa %xmm1, %xmm0 -;; pmaddubsw %xmm0, %xmm8, %xmm0 +;; movdqa %xmm1, %xmm3 +;; movdqa %xmm0, %xmm1 +;; movdqa %xmm3, %xmm0 +;; pmaddubsw %xmm0, %xmm1, %xmm0 ;; pmaddwd %xmm0, const(0), %xmm0 ;; paddd %xmm0, %xmm2, %xmm0 ;; jmp label1