diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3ec6b9b795079..acb3debf80d8b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -596,10 +596,17 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, // Op's type. For expedience, just check power-of-2 integer types. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned DemandedSize = DemandedBits.getActiveBits(); + // Types of LHS and RHS may differ before legalization (e.g., shl), so we + // need to check both. + unsigned MinWidth = + std::min(Op.getOperand(0).getValueType().getScalarSizeInBits(), + Op.getOperand(1).getValueType().getScalarSizeInBits()); for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize); - SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { + SmallVTBits < MinWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits); - if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) { + if (TLI.isTruncateFree(Op.getOperand(0).getValueType(), SmallVT) && + TLI.isTruncateFree(Op.getOperand(1).getValueType(), SmallVT) && + TLI.isZExtFree(SmallVT, VT)) { // We found a type with free casts. SDValue X = DAG.getNode( Op.getOpcode(), dl, SmallVT, diff --git a/llvm/test/CodeGen/X86/btc_bts_btr.ll b/llvm/test/CodeGen/X86/btc_bts_btr.ll index efd9d1105d975..e110e5c9274db 100644 --- a/llvm/test/CodeGen/X86/btc_bts_btr.ll +++ b/llvm/test/CodeGen/X86/btc_bts_btr.ll @@ -1021,7 +1021,7 @@ define i64 @btr_64_mask_zeros(i64 %x, i64 %n) { ; X64-LABEL: btr_64_mask_zeros: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shll $2, %esi +; X64-NEXT: shlq $2, %rsi ; X64-NEXT: btrq %rsi, %rax ; X64-NEXT: retq ; @@ -1056,7 +1056,7 @@ define i64 @bts_64_mask_zeros(i64 %x, i64 %n) { ; X64-LABEL: bts_64_mask_zeros: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shll $2, %esi +; X64-NEXT: shlq $2, %rsi ; X64-NEXT: btsq %rsi, %rax ; X64-NEXT: retq ; @@ -1088,7 +1088,7 @@ define i64 @btc_64_mask_zeros(i64 %x, i64 %n) { ; X64-LABEL: btc_64_mask_zeros: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shll $2, %esi +; X64-NEXT: shlq $2, %rsi ; X64-NEXT: btcq %rsi, %rax ; X64-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/narrow-shl-cst.ll b/llvm/test/CodeGen/X86/narrow-shl-cst.ll index 296ef52c3bff9..107f14a0e2d2c 100644 --- a/llvm/test/CodeGen/X86/narrow-shl-cst.ll +++ b/llvm/test/CodeGen/X86/narrow-shl-cst.ll @@ -151,7 +151,7 @@ define i32 @test12(i32 %x, ptr %y) nounwind { define i64 @test13(i64 %x, ptr %y) nounwind { ; CHECK-LABEL: test13: ; CHECK: # %bb.0: -; CHECK-NEXT: addl %edi, %edi +; CHECK-NEXT: addq %rdi, %rdi ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: movq %rax, (%rsi) ; CHECK-NEXT: retq @@ -212,7 +212,7 @@ define i64 @test18(i64 %x) nounwind { ; CHECK-LABEL: test18: ; CHECK: # %bb.0: ; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: shll $10, %eax +; CHECK-NEXT: shlq $10, %rax ; CHECK-NEXT: retq %and = shl i64 %x, 10 %shl = and i64 %and, 261120 @@ -234,7 +234,7 @@ define i64 @test20(i64 %x) nounwind { ; CHECK-LABEL: test20: ; CHECK: # %bb.0: ; CHECK-NEXT: movzwl %di, %eax -; CHECK-NEXT: shll $10, %eax +; CHECK-NEXT: shlq $10, %rax ; CHECK-NEXT: retq %and = shl i64 %x, 10 %shl = and i64 %and, 67107840 diff --git a/llvm/test/CodeGen/X86/pr27202.ll b/llvm/test/CodeGen/X86/pr27202.ll index 3bd3be62fb4c8..9da22f635c266 100644 --- a/llvm/test/CodeGen/X86/pr27202.ll +++ b/llvm/test/CodeGen/X86/pr27202.ll @@ -45,8 +45,8 @@ define zeroext i1 @g(i32 %x) optsize { define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize { ; CHECK-LABEL: PR46237: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: shll $6, %eax +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: shlq $6, %rax ; CHECK-NEXT: movzbl %al, %ecx ; CHECK-NEXT: andl $7, %esi ; CHECK-NEXT: andl $7, %edx diff --git a/llvm/test/CodeGen/X86/pr49162.ll b/llvm/test/CodeGen/X86/pr49162.ll index 0e65e121531bf..db8cec61acd6b 100644 --- a/llvm/test/CodeGen/X86/pr49162.ll +++ b/llvm/test/CodeGen/X86/pr49162.ll @@ -17,10 +17,7 @@ define ptr @PR49162(ptr %base, ptr %ptr160) { ; ; X64-LABEL: PR49162: ; X64: # %bb.0: -; X64-NEXT: movl 8(%rsi), %eax -; X64-NEXT: shll $16, %eax -; X64-NEXT: cltq -; X64-NEXT: sarq $16, %rax +; X64-NEXT: movswq 8(%rsi), %rax ; X64-NEXT: leaq (%rdi,%rax,4), %rax ; X64-NEXT: retq %load160 = load i160, ptr %ptr160, align 4 diff --git a/llvm/test/CodeGen/X86/pr92720.ll b/llvm/test/CodeGen/X86/pr92720.ll new file mode 100644 index 0000000000000..b2543c08328c7 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr92720.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s + +; Make sure we don't crash when shrinking the shift amount before legalization. +define i64 @pr92720(i64 %x) { +; CHECK-LABEL: pr92720: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $8589934592, %rax # imm = 0x200000000 +; CHECK-NEXT: retq + %or = or i64 %x, 255 + %sub = sub i64 0, %or + %shl = shl i64 1, %sub + %sext = shl i64 %shl, 32 + ret i64 %sext +} diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll index 785b97d8c2402..53d3367cce4d3 100644 --- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll +++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll @@ -13,7 +13,7 @@ define i256 @test1(i256 %a) nounwind { ; ILP-LABEL: test1: ; ILP: # %bb.0: ; ILP-NEXT: movq %rdi, %rax -; ILP-NEXT: leal (%rsi,%rsi), %ecx +; ILP-NEXT: leaq (%rsi,%rsi), %rcx ; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp) ; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp) ; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp) @@ -43,7 +43,7 @@ define i256 @test1(i256 %a) nounwind { ; ILP-NEXT: shlq %cl, %rsi ; ILP-NEXT: notb %cl ; ILP-NEXT: shrq %rdx -; ILP-NEXT: # kill: def $cl killed $cl killed $ecx +; ILP-NEXT: # kill: def $cl killed $cl killed $rcx ; ILP-NEXT: shrq %cl, %rdx ; ILP-NEXT: orq %rsi, %rdx ; ILP-NEXT: movq %rdx, 16(%rax) @@ -60,7 +60,7 @@ define i256 @test1(i256 %a) nounwind { ; HYBRID-NEXT: movq $0, -{{[0-9]+}}(%rsp) ; HYBRID-NEXT: movq $0, -{{[0-9]+}}(%rsp) ; HYBRID-NEXT: movq $0, -{{[0-9]+}}(%rsp) -; HYBRID-NEXT: addl %esi, %esi +; HYBRID-NEXT: addq %rsi, %rsi ; HYBRID-NEXT: addb $3, %sil ; HYBRID-NEXT: movl %esi, %ecx ; HYBRID-NEXT: andb $7, %cl @@ -97,7 +97,7 @@ define i256 @test1(i256 %a) nounwind { ; BURR-NEXT: movq $0, -{{[0-9]+}}(%rsp) ; BURR-NEXT: movq $0, -{{[0-9]+}}(%rsp) ; BURR-NEXT: movq $0, -{{[0-9]+}}(%rsp) -; BURR-NEXT: addl %esi, %esi +; BURR-NEXT: addq %rsi, %rsi ; BURR-NEXT: addb $3, %sil ; BURR-NEXT: movl %esi, %ecx ; BURR-NEXT: andb $7, %cl @@ -126,7 +126,7 @@ define i256 @test1(i256 %a) nounwind { ; SRC-LABEL: test1: ; SRC: # %bb.0: ; SRC-NEXT: movq %rdi, %rax -; SRC-NEXT: addl %esi, %esi +; SRC-NEXT: addq %rsi, %rsi ; SRC-NEXT: addb $3, %sil ; SRC-NEXT: movq $0, -{{[0-9]+}}(%rsp) ; SRC-NEXT: movq $0, -{{[0-9]+}}(%rsp) @@ -167,7 +167,7 @@ define i256 @test1(i256 %a) nounwind { ; LIN-LABEL: test1: ; LIN: # %bb.0: ; LIN-NEXT: movq %rdi, %rax -; LIN-NEXT: leal (%rsi,%rsi), %edx +; LIN-NEXT: leaq (%rsi,%rsi), %rdx ; LIN-NEXT: addb $3, %dl ; LIN-NEXT: movl %edx, %ecx ; LIN-NEXT: shrb $3, %cl diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll index 85c1e25c29ed5..739d5b1d32e86 100644 --- a/llvm/test/CodeGen/X86/vector-sext.ll +++ b/llvm/test/CodeGen/X86/vector-sext.ll @@ -3615,13 +3615,10 @@ define <4 x i32> @sext_4i17_to_4i32(ptr %ptr) { ; SSE2-NEXT: movd %ecx, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movl 8(%rdi), %ecx -; SSE2-NEXT: shll $28, %ecx -; SSE2-NEXT: movq %rax, %rdx -; SSE2-NEXT: shrq $51, %rdx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: sarl $15, %edx -; SSE2-NEXT: movd %edx, %xmm1 +; SSE2-NEXT: shldq $13, %rax, %rcx +; SSE2-NEXT: shll $15, %ecx +; SSE2-NEXT: sarl $15, %ecx +; SSE2-NEXT: movd %ecx, %xmm1 ; SSE2-NEXT: shrq $34, %rax ; SSE2-NEXT: shll $15, %eax ; SSE2-NEXT: sarl $15, %eax @@ -3644,13 +3641,10 @@ define <4 x i32> @sext_4i17_to_4i32(ptr %ptr) { ; SSSE3-NEXT: movd %ecx, %xmm1 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: movl 8(%rdi), %ecx -; SSSE3-NEXT: shll $28, %ecx -; SSSE3-NEXT: movq %rax, %rdx -; SSSE3-NEXT: shrq $51, %rdx -; SSSE3-NEXT: shll $15, %edx -; SSSE3-NEXT: orl %ecx, %edx -; SSSE3-NEXT: sarl $15, %edx -; SSSE3-NEXT: movd %edx, %xmm1 +; SSSE3-NEXT: shldq $13, %rax, %rcx +; SSSE3-NEXT: shll $15, %ecx +; SSSE3-NEXT: sarl $15, %ecx +; SSSE3-NEXT: movd %ecx, %xmm1 ; SSSE3-NEXT: shrq $34, %rax ; SSSE3-NEXT: shll $15, %eax ; SSSE3-NEXT: sarl $15, %eax @@ -3662,53 +3656,47 @@ define <4 x i32> @sext_4i17_to_4i32(ptr %ptr) { ; SSE41-LABEL: sext_4i17_to_4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movq (%rdi), %rax -; SSE41-NEXT: movq %rax, %rcx -; SSE41-NEXT: shrq $17, %rcx +; SSE41-NEXT: movl %eax, %ecx +; SSE41-NEXT: movq %rax, %rdx +; SSE41-NEXT: movl 8(%rdi), %esi +; SSE41-NEXT: shldq $13, %rax, %rsi +; SSE41-NEXT: shrq $17, %rax +; SSE41-NEXT: shll $15, %eax +; SSE41-NEXT: sarl $15, %eax ; SSE41-NEXT: shll $15, %ecx ; SSE41-NEXT: sarl $15, %ecx -; SSE41-NEXT: movl %eax, %edx +; SSE41-NEXT: movd %ecx, %xmm0 +; SSE41-NEXT: pinsrd $1, %eax, %xmm0 +; SSE41-NEXT: shrq $34, %rdx ; SSE41-NEXT: shll $15, %edx ; SSE41-NEXT: sarl $15, %edx -; SSE41-NEXT: movd %edx, %xmm0 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm0 -; SSE41-NEXT: movq %rax, %rcx -; SSE41-NEXT: shrq $34, %rcx -; SSE41-NEXT: shll $15, %ecx -; SSE41-NEXT: sarl $15, %ecx -; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 -; SSE41-NEXT: movl 8(%rdi), %ecx -; SSE41-NEXT: shll $28, %ecx -; SSE41-NEXT: shrq $51, %rax -; SSE41-NEXT: shll $15, %eax -; SSE41-NEXT: orl %ecx, %eax -; SSE41-NEXT: sarl $15, %eax -; SSE41-NEXT: pinsrd $3, %eax, %xmm0 +; SSE41-NEXT: pinsrd $2, %edx, %xmm0 +; SSE41-NEXT: shll $15, %esi +; SSE41-NEXT: sarl $15, %esi +; SSE41-NEXT: pinsrd $3, %esi, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: sext_4i17_to_4i32: ; AVX: # %bb.0: ; AVX-NEXT: movq (%rdi), %rax -; AVX-NEXT: movq %rax, %rcx -; AVX-NEXT: shrq $17, %rcx +; AVX-NEXT: movl %eax, %ecx +; AVX-NEXT: movq %rax, %rdx +; AVX-NEXT: movl 8(%rdi), %esi +; AVX-NEXT: shldq $13, %rax, %rsi +; AVX-NEXT: shrq $17, %rax +; AVX-NEXT: shll $15, %eax +; AVX-NEXT: sarl $15, %eax ; AVX-NEXT: shll $15, %ecx ; AVX-NEXT: sarl $15, %ecx -; AVX-NEXT: movl %eax, %edx +; AVX-NEXT: vmovd %ecx, %xmm0 +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: shrq $34, %rdx ; AVX-NEXT: shll $15, %edx ; AVX-NEXT: sarl $15, %edx -; AVX-NEXT: vmovd %edx, %xmm0 -; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX-NEXT: movq %rax, %rcx -; AVX-NEXT: shrq $34, %rcx -; AVX-NEXT: shll $15, %ecx -; AVX-NEXT: sarl $15, %ecx -; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 -; AVX-NEXT: movl 8(%rdi), %ecx -; AVX-NEXT: shll $28, %ecx -; AVX-NEXT: shrq $51, %rax -; AVX-NEXT: shll $15, %eax -; AVX-NEXT: orl %ecx, %eax -; AVX-NEXT: sarl $15, %eax -; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; AVX-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 +; AVX-NEXT: shll $15, %esi +; AVX-NEXT: sarl $15, %esi +; AVX-NEXT: vpinsrd $3, %esi, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; X86-SSE2-LABEL: sext_4i17_to_4i32: diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll index 74926f46ffa43..25c438cc4c4fc 100644 --- a/llvm/test/CodeGen/X86/vector-zext.ll +++ b/llvm/test/CodeGen/X86/vector-zext.ll @@ -2332,11 +2332,8 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) { ; SSE2-NEXT: movd %ecx, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movl 8(%rdi), %ecx -; SSE2-NEXT: shll $13, %ecx -; SSE2-NEXT: movq %rax, %rdx -; SSE2-NEXT: shrq $51, %rdx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movd %edx, %xmm1 +; SSE2-NEXT: shldq $13, %rax, %rcx +; SSE2-NEXT: movd %ecx, %xmm1 ; SSE2-NEXT: shrq $34, %rax ; SSE2-NEXT: movd %eax, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] @@ -2353,11 +2350,8 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) { ; SSSE3-NEXT: movd %ecx, %xmm1 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: movl 8(%rdi), %ecx -; SSSE3-NEXT: shll $13, %ecx -; SSSE3-NEXT: movq %rax, %rdx -; SSSE3-NEXT: shrq $51, %rdx -; SSSE3-NEXT: orl %ecx, %edx -; SSSE3-NEXT: movd %edx, %xmm1 +; SSSE3-NEXT: shldq $13, %rax, %rcx +; SSSE3-NEXT: movd %ecx, %xmm1 ; SSSE3-NEXT: shrq $34, %rax ; SSSE3-NEXT: movd %eax, %xmm2 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] @@ -2367,15 +2361,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) { ; ; SSE41-LABEL: zext_4i17_to_4i32: ; SSE41: # %bb.0: -; SSE41-NEXT: movl 8(%rdi), %eax -; SSE41-NEXT: shll $13, %eax -; SSE41-NEXT: movq (%rdi), %rcx -; SSE41-NEXT: movq %rcx, %rdx -; SSE41-NEXT: shrq $51, %rdx -; SSE41-NEXT: orl %eax, %edx -; SSE41-NEXT: movq %rcx, %rax +; SSE41-NEXT: movq (%rdi), %rax +; SSE41-NEXT: movd %eax, %xmm0 +; SSE41-NEXT: movq %rax, %rcx +; SSE41-NEXT: movl 8(%rdi), %edx +; SSE41-NEXT: shldq $13, %rax, %rdx ; SSE41-NEXT: shrq $17, %rax -; SSE41-NEXT: movd %ecx, %xmm0 ; SSE41-NEXT: pinsrd $1, %eax, %xmm0 ; SSE41-NEXT: shrq $34, %rcx ; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 @@ -2385,15 +2376,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) { ; ; AVX1-LABEL: zext_4i17_to_4i32: ; AVX1: # %bb.0: -; AVX1-NEXT: movl 8(%rdi), %eax -; AVX1-NEXT: shll $13, %eax -; AVX1-NEXT: movq (%rdi), %rcx -; AVX1-NEXT: movq %rcx, %rdx -; AVX1-NEXT: shrq $51, %rdx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: movq (%rdi), %rax +; AVX1-NEXT: vmovd %eax, %xmm0 +; AVX1-NEXT: movq %rax, %rcx +; AVX1-NEXT: movl 8(%rdi), %edx +; AVX1-NEXT: shldq $13, %rax, %rdx ; AVX1-NEXT: shrq $17, %rax -; AVX1-NEXT: vmovd %ecx, %xmm0 ; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX1-NEXT: shrq $34, %rcx ; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 @@ -2403,15 +2391,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) { ; ; AVX2-LABEL: zext_4i17_to_4i32: ; AVX2: # %bb.0: -; AVX2-NEXT: movl 8(%rdi), %eax -; AVX2-NEXT: shll $13, %eax -; AVX2-NEXT: movq (%rdi), %rcx -; AVX2-NEXT: movq %rcx, %rdx -; AVX2-NEXT: shrq $51, %rdx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: movq (%rdi), %rax +; AVX2-NEXT: vmovd %eax, %xmm0 +; AVX2-NEXT: movq %rax, %rcx +; AVX2-NEXT: movl 8(%rdi), %edx +; AVX2-NEXT: shldq $13, %rax, %rdx ; AVX2-NEXT: shrq $17, %rax -; AVX2-NEXT: vmovd %ecx, %xmm0 ; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX2-NEXT: shrq $34, %rcx ; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 @@ -2422,15 +2407,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) { ; ; AVX512-LABEL: zext_4i17_to_4i32: ; AVX512: # %bb.0: -; AVX512-NEXT: movl 8(%rdi), %eax -; AVX512-NEXT: shll $13, %eax -; AVX512-NEXT: movq (%rdi), %rcx -; AVX512-NEXT: movq %rcx, %rdx -; AVX512-NEXT: shrq $51, %rdx -; AVX512-NEXT: orl %eax, %edx -; AVX512-NEXT: movq %rcx, %rax +; AVX512-NEXT: movq (%rdi), %rax +; AVX512-NEXT: vmovd %eax, %xmm0 +; AVX512-NEXT: movq %rax, %rcx +; AVX512-NEXT: movl 8(%rdi), %edx +; AVX512-NEXT: shldq $13, %rax, %rdx ; AVX512-NEXT: shrq $17, %rax -; AVX512-NEXT: vmovd %ecx, %xmm0 ; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX512-NEXT: shrq $34, %rcx ; AVX512-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll index 9ae1f270e8833..2611399458c27 100644 --- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll +++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll @@ -171,16 +171,16 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; X64-NO-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half: ; X64-NO-BMI2: # %bb.0: -; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx +; X64-NO-BMI2-NEXT: leaq (,%rsi,8), %rcx ; X64-NO-BMI2-NEXT: movl (%rdi), %eax -; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NO-BMI2-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NEXT: movb %al, (%rdx) ; X64-NO-BMI2-NEXT: retq ; ; X64-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: shll $3, %esi +; X64-BMI2-NEXT: shlq $3, %rsi ; X64-BMI2-NEXT: movl (%rdi), %eax ; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax ; X64-BMI2-NEXT: movb %al, (%rdx) @@ -248,16 +248,16 @@ define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 define void @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; X64-NO-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half: ; X64-NO-BMI2: # %bb.0: -; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx +; X64-NO-BMI2-NEXT: leaq (,%rsi,8), %rcx ; X64-NO-BMI2-NEXT: movl (%rdi), %eax -; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NO-BMI2-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NEXT: movw %ax, (%rdx) ; X64-NO-BMI2-NEXT: retq ; ; X64-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: shll $3, %esi +; X64-BMI2-NEXT: shlq $3, %rsi ; X64-BMI2-NEXT: movl (%rdi), %eax ; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax ; X64-BMI2-NEXT: movw %ax, (%rdx) @@ -324,16 +324,16 @@ define void @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 define void @load_4byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; X64-NO-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half: ; X64-NO-BMI2: # %bb.0: -; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx +; X64-NO-BMI2-NEXT: leaq (,%rsi,8), %rcx ; X64-NO-BMI2-NEXT: movl (%rdi), %eax -; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NO-BMI2-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NEXT: movl %eax, (%rdx) ; X64-NO-BMI2-NEXT: retq ; ; X64-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: shll $3, %esi +; X64-BMI2-NEXT: shlq $3, %rsi ; X64-BMI2-NEXT: movl (%rdi), %eax ; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax ; X64-BMI2-NEXT: movl %eax, (%rdx) @@ -402,7 +402,7 @@ define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6 ; X64-NO-BMI2-NO-SHLD: # %bb.0: ; X64-NO-BMI2-NO-SHLD-NEXT: movq %rsi, %rcx ; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rcx ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi ; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl @@ -414,7 +414,7 @@ define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6 ; X64-SHLD: # %bb.0: ; X64-SHLD-NEXT: movq %rsi, %rcx ; X64-SHLD-NEXT: movq (%rdi), %rax -; X64-SHLD-NEXT: shll $3, %ecx +; X64-SHLD-NEXT: shlq $3, %rcx ; X64-SHLD-NEXT: xorl %esi, %esi ; X64-SHLD-NEXT: shrdq %cl, %rsi, %rax ; X64-SHLD-NEXT: testb $64, %cl @@ -424,7 +424,7 @@ define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6 ; ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: ; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlq $3, %rsi ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, (%rdi), %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx ; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil @@ -475,7 +475,7 @@ define void @load_2byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6 ; X64-NO-BMI2-NO-SHLD: # %bb.0: ; X64-NO-BMI2-NO-SHLD-NEXT: movq %rsi, %rcx ; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rcx ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi ; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl @@ -487,7 +487,7 @@ define void @load_2byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6 ; X64-SHLD: # %bb.0: ; X64-SHLD-NEXT: movq %rsi, %rcx ; X64-SHLD-NEXT: movq (%rdi), %rax -; X64-SHLD-NEXT: shll $3, %ecx +; X64-SHLD-NEXT: shlq $3, %rcx ; X64-SHLD-NEXT: xorl %esi, %esi ; X64-SHLD-NEXT: shrdq %cl, %rsi, %rax ; X64-SHLD-NEXT: testb $64, %cl @@ -497,7 +497,7 @@ define void @load_2byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6 ; ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: ; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlq $3, %rsi ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, (%rdi), %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx ; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil @@ -547,7 +547,7 @@ define void @load_4byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6 ; X64-NO-BMI2-NO-SHLD: # %bb.0: ; X64-NO-BMI2-NO-SHLD-NEXT: movq %rsi, %rcx ; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rcx ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi ; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl @@ -559,7 +559,7 @@ define void @load_4byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6 ; X64-SHLD: # %bb.0: ; X64-SHLD-NEXT: movq %rsi, %rcx ; X64-SHLD-NEXT: movq (%rdi), %rax -; X64-SHLD-NEXT: shll $3, %ecx +; X64-SHLD-NEXT: shlq $3, %rcx ; X64-SHLD-NEXT: xorl %esi, %esi ; X64-SHLD-NEXT: shrdq %cl, %rsi, %rax ; X64-SHLD-NEXT: testb $64, %cl @@ -569,7 +569,7 @@ define void @load_4byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6 ; ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: ; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlq $3, %rsi ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, (%rdi), %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx ; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil @@ -619,7 +619,7 @@ define void @load_8byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6 ; X64-NO-BMI2-NO-SHLD: # %bb.0: ; X64-NO-BMI2-NO-SHLD-NEXT: movq %rsi, %rcx ; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rcx ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi ; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl @@ -631,7 +631,7 @@ define void @load_8byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6 ; X64-SHLD: # %bb.0: ; X64-SHLD-NEXT: movq %rsi, %rcx ; X64-SHLD-NEXT: movq (%rdi), %rax -; X64-SHLD-NEXT: shll $3, %ecx +; X64-SHLD-NEXT: shlq $3, %rcx ; X64-SHLD-NEXT: xorl %esi, %esi ; X64-SHLD-NEXT: shrdq %cl, %rsi, %rax ; X64-SHLD-NEXT: testb $64, %cl @@ -641,7 +641,7 @@ define void @load_8byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6 ; ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: ; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlq $3, %rsi ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, (%rdi), %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx ; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil @@ -692,7 +692,7 @@ define void @load_1byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i6 ; X64-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: ; X64: # %bb.0: ; X64-NEXT: movdqu (%rdi), %xmm0 -; X64-NEXT: shll $3, %esi +; X64-NEXT: shlq $3, %rsi ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) @@ -759,7 +759,7 @@ define void @load_2byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i6 ; X64-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: ; X64: # %bb.0: ; X64-NEXT: movdqu (%rdi), %xmm0 -; X64-NEXT: shll $3, %esi +; X64-NEXT: shlq $3, %rsi ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) @@ -825,7 +825,7 @@ define void @load_4byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i6 ; X64-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: ; X64: # %bb.0: ; X64-NEXT: movdqu (%rdi), %xmm0 -; X64-NEXT: shll $3, %esi +; X64-NEXT: shlq $3, %rsi ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) @@ -891,7 +891,7 @@ define void @load_8byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i6 ; X64-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: ; X64: # %bb.0: ; X64-NEXT: movdqu (%rdi), %xmm0 -; X64-NEXT: shll $3, %esi +; X64-NEXT: shlq $3, %rsi ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) @@ -959,7 +959,7 @@ define void @load_16byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i ; X64-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: ; X64: # %bb.0: ; X64-NEXT: movdqu (%rdi), %xmm0 -; X64-NEXT: shll $3, %esi +; X64-NEXT: shlq $3, %rsi ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll index 929671d674e5e..38a3a0ca7ab05 100644 --- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll +++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll @@ -169,15 +169,15 @@ define void @load_1byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-NO-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca: ; X64-NO-BMI2: # %bb.0: ; X64-NO-BMI2-NEXT: movq (%rdi), %rax -; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx -; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NO-BMI2-NEXT: leaq (,%rsi,8), %rcx +; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NO-BMI2-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NEXT: movb %al, (%rdx) ; X64-NO-BMI2-NEXT: retq ; ; X64-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: shll $3, %esi +; X64-BMI2-NEXT: shlq $3, %rsi ; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax ; X64-BMI2-NEXT: movb %al, (%rdx) ; X64-BMI2-NEXT: retq @@ -293,15 +293,15 @@ define void @load_2byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-NO-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca: ; X64-NO-BMI2: # %bb.0: ; X64-NO-BMI2-NEXT: movq (%rdi), %rax -; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx -; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NO-BMI2-NEXT: leaq (,%rsi,8), %rcx +; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NO-BMI2-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NEXT: movw %ax, (%rdx) ; X64-NO-BMI2-NEXT: retq ; ; X64-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: shll $3, %esi +; X64-BMI2-NEXT: shlq $3, %rsi ; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax ; X64-BMI2-NEXT: movw %ax, (%rdx) ; X64-BMI2-NEXT: retq @@ -414,15 +414,15 @@ define void @load_4byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-NO-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca: ; X64-NO-BMI2: # %bb.0: ; X64-NO-BMI2-NEXT: movq (%rdi), %rax -; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx -; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NO-BMI2-NEXT: leaq (,%rsi,8), %rcx +; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NO-BMI2-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NEXT: movl %eax, (%rdx) ; X64-NO-BMI2-NEXT: retq ; ; X64-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: shll $3, %esi +; X64-BMI2-NEXT: shlq $3, %rsi ; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax ; X64-BMI2-NEXT: movl %eax, (%rdx) ; X64-BMI2-NEXT: retq @@ -537,7 +537,7 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: ; X64-NO-BMI2-NO-SHLD: # %bb.0: ; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rsi ; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi @@ -558,7 +558,7 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-NO-BMI2-HAVE-SHLD: # %bb.0: ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq $3, %rcx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax ; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi @@ -572,7 +572,7 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: ; X64-HAVE-BMI2-NO-SHLD: # %bb.0: ; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlq $3, %rsi ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx @@ -592,7 +592,7 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlq $3, %rcx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi @@ -645,7 +645,7 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: ; X64-NO-BMI2-NO-SHLD: # %bb.0: ; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rsi ; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi @@ -666,7 +666,7 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-NO-BMI2-HAVE-SHLD: # %bb.0: ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq $3, %rcx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax ; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi @@ -680,7 +680,7 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: ; X64-HAVE-BMI2-NO-SHLD: # %bb.0: ; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlq $3, %rsi ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx @@ -700,7 +700,7 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlq $3, %rcx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi @@ -752,7 +752,7 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: ; X64-NO-BMI2-NO-SHLD: # %bb.0: ; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rsi ; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi @@ -773,7 +773,7 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-NO-BMI2-HAVE-SHLD: # %bb.0: ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq $3, %rcx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax ; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi @@ -787,7 +787,7 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: ; X64-HAVE-BMI2-NO-SHLD: # %bb.0: ; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlq $3, %rsi ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx @@ -807,7 +807,7 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlq $3, %rcx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi @@ -859,7 +859,7 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: ; X64-NO-BMI2-NO-SHLD: # %bb.0: ; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rsi ; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi @@ -880,7 +880,7 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-NO-BMI2-HAVE-SHLD: # %bb.0: ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq $3, %rcx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax ; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi @@ -894,7 +894,7 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: ; X64-HAVE-BMI2-NO-SHLD: # %bb.0: ; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlq $3, %rsi ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx @@ -914,7 +914,7 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlq $3, %rcx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi @@ -971,7 +971,7 @@ define void @load_1byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64: # %bb.0: ; X64-NEXT: movdqu (%rdi), %xmm0 ; X64-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NEXT: shll $3, %esi +; X64-NEXT: shlq $3, %rsi ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] ; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) @@ -1042,7 +1042,7 @@ define void @load_2byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64: # %bb.0: ; X64-NEXT: movdqu (%rdi), %xmm0 ; X64-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NEXT: shll $3, %esi +; X64-NEXT: shlq $3, %rsi ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] ; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) @@ -1112,7 +1112,7 @@ define void @load_4byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64: # %bb.0: ; X64-NEXT: movdqu (%rdi), %xmm0 ; X64-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NEXT: shll $3, %esi +; X64-NEXT: shlq $3, %rsi ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] ; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) @@ -1182,7 +1182,7 @@ define void @load_8byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; X64: # %bb.0: ; X64-NEXT: movdqu (%rdi), %xmm0 ; X64-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NEXT: shll $3, %esi +; X64-NEXT: shlq $3, %rsi ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] ; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) @@ -1254,7 +1254,7 @@ define void @load_16byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst ; X64: # %bb.0: ; X64-NEXT: movdqu (%rdi), %xmm0 ; X64-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NEXT: shll $3, %esi +; X64-NEXT: shlq $3, %rsi ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] ; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll b/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll index 3f64a383abd2c..9d9e13dd8dfe5 100644 --- a/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll +++ b/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll @@ -15,7 +15,7 @@ define i64 @test1(ptr %data) { ; X64-LABEL: test1: ; X64: # %bb.0: # %entry ; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: shll $2, %eax +; X64-NEXT: shlq $2, %rax ; X64-NEXT: andl $60, %eax ; X64-NEXT: retq entry: