From aad725928d767243dff1fc66f26b7afb17e29865 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 23 Dec 2022 20:13:23 +0300 Subject: [PATCH] [NFC][Codegen][X86] Add codegen test coverage for the variably-indexed load of alloca w/zero upper half --- ...ad-of-small-alloca-with-zero-upper-half.ll | 10973 ++++++++++++++++ 1 file changed, 10973 insertions(+) create mode 100644 llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll new file mode 100644 index 00000000000000..d1ad4192a6d2db --- /dev/null +++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll @@ -0,0 +1,10973 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-NO-BMI2,X64-NO-SHLD,X64-NO-BMI2-NO-SHLD +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-NO-BMI2,X64-SHLD,X64-NO-BMI2-HAVE-SHLD +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-BMI2,X64-NO-SHLD,X64-HAVE-BMI2-NO-SHLD +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-BMI2,X64-SHLD,X64-HAVE-BMI2-HAVE-SHLD +; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X32,X32-NO-BMI2,X32-NO-SHLD,X32-NO-BMI2-NO-SHLD +; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X32,X32-NO-BMI2,X32-SHLD,X32-NO-BMI2-HAVE-SHLD +; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X32,X32-BMI2,X32-NO-SHLD,X32-HAVE-BMI2-NO-SHLD +; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X32,X32-BMI2,X32-SHLD,X32-HAVE-BMI2-HAVE-SHLD + +define void @load_1byte_chunk_of_2byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-LABEL: load_1byte_chunk_of_2byte_alloca_with_zero_upper_half: +; X64-NO-BMI2: # %bb.0: +; X64-NO-BMI2-NEXT: movzbl (%rdi), %eax +; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx +; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NO-BMI2-NEXT: shrl %cl, %eax +; X64-NO-BMI2-NEXT: movb %al, (%rdx) +; X64-NO-BMI2-NEXT: retq +; +; X64-BMI2-LABEL: load_1byte_chunk_of_2byte_alloca_with_zero_upper_half: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movzbl (%rdi), %eax +; X64-BMI2-NEXT: shll $3, %esi +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: movb %al, (%rdx) +; X64-BMI2-NEXT: retq +; +; X32-NO-BMI2-LABEL: load_1byte_chunk_of_2byte_alloca_with_zero_upper_half: +; X32-NO-BMI2: # %bb.0: +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NEXT: movzbl (%eax), %eax +; X32-NO-BMI2-NEXT: shll $3, %ecx +; X32-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-NO-BMI2-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NEXT: movb %al, (%edx) +; X32-NO-BMI2-NEXT: retl +; +; X32-BMI2-LABEL: load_1byte_chunk_of_2byte_alloca_with_zero_upper_half: +; X32-BMI2: # %bb.0: +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-BMI2-NEXT: movzbl (%edx), %edx +; X32-BMI2-NEXT: shll $3, %ecx +; X32-BMI2-NEXT: shrxl %ecx, %edx, %ecx +; X32-BMI2-NEXT: movb %cl, (%eax) +; X32-BMI2-NEXT: retl + %init1 = load i8, ptr %src, align 1 + %intermediate.sroa.0.0.vec.insert = insertelement <2 x i8> , i8 %init1, i64 0 + %intermediate.val.frozen = freeze <2 x i8> %intermediate.sroa.0.0.vec.insert + %intermediate.val.frozen.bits = bitcast <2 x i8> %intermediate.val.frozen to i16 + %byteOff.tr = trunc i64 %byteOff to i16 + %byteOff.numbits.wide = shl i16 %byteOff.tr, 3 + %intermediate.val.frozen.bits.positioned = lshr i16 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i16 %intermediate.val.frozen.bits.positioned to i8 + %1 = insertelement <1 x i8> poison, i8 %intermediate.val.frozen.bits.positioned.extracted, i64 0 + store <1 x i8> %1, ptr %dst, align 1 + ret void +} + +define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half: +; X64-NO-BMI2: # %bb.0: +; X64-NO-BMI2-NEXT: movzwl (%rdi), %eax +; X64-NO-BMI2-NEXT: movzwl %ax, %eax +; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx +; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NO-BMI2-NEXT: shrl %cl, %eax +; X64-NO-BMI2-NEXT: movb %al, (%rdx) +; X64-NO-BMI2-NEXT: retq +; +; X64-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: shll $3, %esi +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: movb %al, (%rdx) +; X64-BMI2-NEXT: retq +; +; X32-NO-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half: +; X32-NO-BMI2: # %bb.0: +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NEXT: movzwl (%edx), %edx +; X32-NO-BMI2-NEXT: movzwl %dx, %edx +; X32-NO-BMI2-NEXT: shll $3, %ecx +; X32-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-NO-BMI2-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NEXT: movb %dl, (%eax) +; X32-NO-BMI2-NEXT: retl +; +; X32-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half: +; X32-BMI2: # %bb.0: +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-BMI2-NEXT: movzwl (%edx), %edx +; X32-BMI2-NEXT: movzwl %dx, %edx +; X32-BMI2-NEXT: shll $3, %ecx +; X32-BMI2-NEXT: shrxl %ecx, %edx, %ecx +; X32-BMI2-NEXT: movb %cl, (%eax) +; X32-BMI2-NEXT: retl + %init = load <2 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <2 x i8> %init, <2 x i8> poison, <4 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <4 x i8> %intermediate.sroa.0.0.vec.expand, <4 x i8> , <4 x i32> + %intermediate.val.frozen = freeze <4 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <4 x i8> %intermediate.val.frozen to i32 + %byteOff.tr = trunc i64 %byteOff to i32 + %byteOff.numbits.wide = shl i32 %byteOff.tr, 3 + %intermediate.val.frozen.bits.positioned = lshr i32 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i32 %intermediate.val.frozen.bits.positioned to i8 + %1 = insertelement <1 x i8> poison, i8 %intermediate.val.frozen.bits.positioned.extracted, i64 0 + store <1 x i8> %1, ptr %dst, align 1 + ret void +} + +define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca_with_zero_upper_half: +; X64-NO-BMI2: # %bb.0: +; X64-NO-BMI2-NEXT: movzwl (%rdi), %eax +; X64-NO-BMI2-NEXT: movzwl %ax, %eax +; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx +; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NO-BMI2-NEXT: shrl %cl, %eax +; X64-NO-BMI2-NEXT: movw %ax, (%rdx) +; X64-NO-BMI2-NEXT: retq +; +; X64-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca_with_zero_upper_half: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: shll $3, %esi +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: movw %ax, (%rdx) +; X64-BMI2-NEXT: retq +; +; X32-NO-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca_with_zero_upper_half: +; X32-NO-BMI2: # %bb.0: +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NEXT: movzwl (%edx), %edx +; X32-NO-BMI2-NEXT: movzwl %dx, %edx +; X32-NO-BMI2-NEXT: shll $3, %ecx +; X32-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-NO-BMI2-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NEXT: movw %dx, (%eax) +; X32-NO-BMI2-NEXT: retl +; +; X32-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca_with_zero_upper_half: +; X32-BMI2: # %bb.0: +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-BMI2-NEXT: movzwl (%edx), %edx +; X32-BMI2-NEXT: movzwl %dx, %edx +; X32-BMI2-NEXT: shll $3, %ecx +; X32-BMI2-NEXT: shrxl %ecx, %edx, %ecx +; X32-BMI2-NEXT: movw %cx, (%eax) +; X32-BMI2-NEXT: retl + %init = load <2 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <2 x i8> %init, <2 x i8> poison, <4 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <4 x i8> %intermediate.sroa.0.0.vec.expand, <4 x i8> , <4 x i32> + %intermediate.val.frozen = freeze <4 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <4 x i8> %intermediate.val.frozen to i32 + %byteOff.tr = trunc i64 %byteOff to i32 + %byteOff.numbits.wide = shl i32 %byteOff.tr, 3 + %intermediate.val.frozen.bits.positioned = lshr i32 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i32 %intermediate.val.frozen.bits.positioned to i16 + store i16 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 2 + ret void +} + +define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X64-NO-BMI2: # %bb.0: +; X64-NO-BMI2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx +; X64-NO-BMI2-NEXT: movq %xmm0, %rax +; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NO-BMI2-NEXT: shrq %cl, %rax +; X64-NO-BMI2-NEXT: movb %al, (%rdx) +; X64-NO-BMI2-NEXT: retq +; +; X64-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-BMI2-NEXT: shll $3, %esi +; X64-BMI2-NEXT: movq %xmm0, %rax +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: movb %al, (%rdx) +; X64-BMI2-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, (%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dl, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %bl, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <4 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <4 x i8> %init, <4 x i8> poison, <8 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <8 x i8> %intermediate.sroa.0.0.vec.expand, <8 x i8> , <8 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <8 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <8 x i8> %intermediate.val.frozen to i64 + %intermediate.val.frozen.bits.positioned = lshr i64 %intermediate.val.frozen.bits, %byteOff.numbits + %intermediate.val.frozen.bits.positioned.extracted = trunc i64 %intermediate.val.frozen.bits.positioned to i8 + %1 = insertelement <1 x i8> poison, i8 %intermediate.val.frozen.bits.positioned.extracted, i64 0 + store <1 x i8> %1, ptr %dst, align 1 + ret void +} + +define void @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X64-NO-BMI2: # %bb.0: +; X64-NO-BMI2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx +; X64-NO-BMI2-NEXT: movq %xmm0, %rax +; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NO-BMI2-NEXT: shrq %cl, %rax +; X64-NO-BMI2-NEXT: movw %ax, (%rdx) +; X64-NO-BMI2-NEXT: retq +; +; X64-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-BMI2-NEXT: shll $3, %esi +; X64-BMI2-NEXT: movq %xmm0, %rax +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: movw %ax, (%rdx) +; X64-BMI2-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movw %si, (%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movw %si, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movw %dx, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movw %si, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <4 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <4 x i8> %init, <4 x i8> poison, <8 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <8 x i8> %intermediate.sroa.0.0.vec.expand, <8 x i8> , <8 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <8 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <8 x i8> %intermediate.val.frozen to i64 + %intermediate.val.frozen.bits.positioned = lshr i64 %intermediate.val.frozen.bits, %byteOff.numbits + %intermediate.val.frozen.bits.positioned.extracted = trunc i64 %intermediate.val.frozen.bits.positioned to i16 + store i16 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 2 + ret void +} + +define void @load_4byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X64-NO-BMI2: # %bb.0: +; X64-NO-BMI2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx +; X64-NO-BMI2-NEXT: movq %xmm0, %rax +; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NO-BMI2-NEXT: shrq %cl, %rax +; X64-NO-BMI2-NEXT: movl %eax, (%rdx) +; X64-NO-BMI2-NEXT: retq +; +; X64-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-BMI2-NEXT: shll $3, %esi +; X64-BMI2-NEXT: movq %xmm0, %rax +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: movl %eax, (%rdx) +; X64-BMI2-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <4 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <4 x i8> %init, <4 x i8> poison, <8 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <8 x i8> %intermediate.sroa.0.0.vec.expand, <8 x i8> , <8 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <8 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <8 x i8> %intermediate.val.frozen to i64 + %intermediate.val.frozen.bits.positioned = lshr i64 %intermediate.val.frozen.bits, %byteOff.numbits + %intermediate.val.frozen.bits.positioned.extracted = trunc i64 %intermediate.val.frozen.bits.positioned to i32 + store i32 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 4 + ret void +} + +define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movb %al, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movb %sil, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movb %al, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb %sil, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al +; X32-NO-BMI2-NO-SHLD-NEXT: negb %al +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $4, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %bl, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <8 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <8 x i8> %init, <8 x i8> poison, <16 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <16 x i8> %intermediate.sroa.0.0.vec.expand, <16 x i8> , <16 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <16 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <16 x i8> %intermediate.val.frozen to i128 + %byteOff.numbits.wide = zext i64 %byteOff.numbits to i128 + %intermediate.val.frozen.bits.positioned = lshr i128 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i128 %intermediate.val.frozen.bits.positioned to i8 + %1 = insertelement <1 x i8> poison, i8 %intermediate.val.frozen.bits.positioned.extracted, i64 0 + store <1 x i8> %1, ptr %dst, align 1 + ret void +} + +define void @load_2byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movw %ax, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movw %si, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movw %ax, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movw %si, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al +; X32-NO-BMI2-NO-SHLD-NEXT: negb %al +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movw %cx, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movw %bp, (%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movw %bp, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movw %di, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <8 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <8 x i8> %init, <8 x i8> poison, <16 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <16 x i8> %intermediate.sroa.0.0.vec.expand, <16 x i8> , <16 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <16 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <16 x i8> %intermediate.val.frozen to i128 + %byteOff.numbits.wide = zext i64 %byteOff.numbits to i128 + %intermediate.val.frozen.bits.positioned = lshr i128 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i128 %intermediate.val.frozen.bits.positioned to i16 + store i16 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 2 + ret void +} + +define void @load_4byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al +; X32-NO-BMI2-NO-SHLD-NEXT: negb %al +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <8 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <8 x i8> %init, <8 x i8> poison, <16 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <16 x i8> %intermediate.sroa.0.0.vec.expand, <16 x i8> , <16 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <16 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <16 x i8> %intermediate.val.frozen to i128 + %byteOff.numbits.wide = zext i64 %byteOff.numbits to i128 + %intermediate.val.frozen.bits.positioned = lshr i128 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i128 %intermediate.val.frozen.bits.positioned to i32 + store i32 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 4 + ret void +} + +define void @load_8byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rax, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $24, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, (%esp) # 4-byte Folded Spill +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: negb %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: addl %edi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 4(%ecx) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $24, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $24, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $24, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $28, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $28, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $28, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, (%esp), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $28, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <8 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <8 x i8> %init, <8 x i8> poison, <16 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <16 x i8> %intermediate.sroa.0.0.vec.expand, <16 x i8> , <16 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <16 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <16 x i8> %intermediate.val.frozen to i128 + %byteOff.numbits.wide = zext i64 %byteOff.numbits to i128 + %intermediate.val.frozen.bits.positioned = lshr i128 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i128 %intermediate.val.frozen.bits.positioned to i64 + store i64 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 8 + ret void +} + +define void @load_1byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi +; X64-NO-BMI2-NO-SHLD-NEXT: movb %dil, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movb %al, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r8d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb %al, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $8, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 +; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al +; X32-NO-BMI2-NO-SHLD-NEXT: negb %al +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $8, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, (%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 +; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, (%esp), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dl, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $8, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $8, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <16 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <16 x i8> %init, <16 x i8> poison, <32 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <32 x i8> %intermediate.sroa.0.0.vec.expand, <32 x i8> , <32 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <32 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <32 x i8> %intermediate.val.frozen to i256 + %byteOff.numbits.wide = zext i64 %byteOff.numbits to i256 + %intermediate.val.frozen.bits.positioned = lshr i256 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i256 %intermediate.val.frozen.bits.positioned to i8 + %1 = insertelement <1 x i8> poison, i8 %intermediate.val.frozen.bits.positioned.extracted, i64 0 + store <1 x i8> %1, ptr %dst, align 1 + ret void +} + +define void @load_2byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi +; X64-NO-BMI2-NO-SHLD-NEXT: movw %di, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movw %ax, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r8d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movw %cx, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movw %ax, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $8, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 +; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, (%esp) # 4-byte Folded Spill +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al +; X32-NO-BMI2-NO-SHLD-NEXT: negb %al +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movw %si, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $8, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movw %si, (%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 +; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, (%esp), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movw %dx, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $8, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movw %cx, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $8, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <16 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <16 x i8> %init, <16 x i8> poison, <32 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <32 x i8> %intermediate.sroa.0.0.vec.expand, <32 x i8> , <32 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <32 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <32 x i8> %intermediate.val.frozen to i256 + %byteOff.numbits.wide = zext i64 %byteOff.numbits to i256 + %intermediate.val.frozen.bits.positioned = lshr i256 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i256 %intermediate.val.frozen.bits.positioned to i16 + store i16 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 2 + ret void +} + +define void @load_4byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi +; X64-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r8d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $8, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 +; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, (%esp) # 4-byte Folded Spill +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al +; X32-NO-BMI2-NO-SHLD-NEXT: negb %al +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $8, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 +; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, (%esp), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $8, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $8, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <16 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <16 x i8> %init, <16 x i8> poison, <32 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <32 x i8> %intermediate.sroa.0.0.vec.expand, <32 x i8> , <32 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <32 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <32 x i8> %intermediate.val.frozen to i256 + %byteOff.numbits.wide = zext i64 %byteOff.numbits to i256 + %intermediate.val.frozen.bits.positioned = lshr i256 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i256 %intermediate.val.frozen.bits.positioned to i32 + store i32 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 4 + ret void +} + +define void @load_8byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rax, %rcx +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %rcx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rcx, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %rsi, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r8d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %rax, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %rsi, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $24, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, (%esp) # 4-byte Folded Spill +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: negb %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: addl %edx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: addl %edi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%ecx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, 4(%ecx) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $24, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $24, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $24, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $32, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $32, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $24, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, (%esp) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $24, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <16 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <16 x i8> %init, <16 x i8> poison, <32 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <32 x i8> %intermediate.sroa.0.0.vec.expand, <32 x i8> , <32 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <32 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <32 x i8> %intermediate.val.frozen to i256 + %byteOff.numbits.wide = zext i64 %byteOff.numbits to i256 + %intermediate.val.frozen.bits.positioned = lshr i256 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i256 %intermediate.val.frozen.bits.positioned to i64 + store i64 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 8 + ret void +} + +define void @load_16byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rax, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %rcx, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %rcx, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r8, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %r9, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %r9, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rsi, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, 8(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r8d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r9d, %r9d +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rdi, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %r9, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %r9, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 8(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r8, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %r9, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %r9, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rsi, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, 8(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $44, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: negb %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 8(%ecx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 4(%ecx) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $44, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $44, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %esi, %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 12(%edx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 8(%edx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%edx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 4(%edx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $44, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $36, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 12(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 8(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $36, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <16 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <16 x i8> %init, <16 x i8> poison, <32 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <32 x i8> %intermediate.sroa.0.0.vec.expand, <32 x i8> , <32 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <32 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <32 x i8> %intermediate.val.frozen to i256 + %byteOff.numbits.wide = zext i64 %byteOff.numbits to i256 + %intermediate.val.frozen.bits.positioned = lshr i256 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i256 %intermediate.val.frozen.bits.positioned to i128 + store i128 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 16 + ret void +} + +define void @load_1byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl +; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r14, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %r9d +; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %edi +; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi +; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovael %r10d, %edi +; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi +; X64-NO-BMI2-NO-SHLD-NEXT: movb %dil, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %r8d +; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rax, %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %r8d, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovael %r9d, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movb %al, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r10b +; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r10b +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r8, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %r10d +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebx +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r14, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r9, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %r10d, %r9d +; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r10d +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %r8, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r10d, %r11d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %rbx, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %rcx, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %r9d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r9 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %r9d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %r9d, %r8d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %r11d, %r8d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb %r8b, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $12, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%ecx,8), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: subb %dl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: testl %edx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: subb %al, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%ecx), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: negl %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $12, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $8, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%ecx,8), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%esi), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $8, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $12, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ebp), %xmm0 +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, (%esp), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%esi), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael (%esp), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dl, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $12, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm2 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%ebx,8), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebp, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%ebx), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %ebx # imm = 0x100 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <32 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <64 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <64 x i8> %intermediate.val.frozen to i512 + %byteOff.numbits.wide = zext i64 %byteOff.numbits to i512 + %intermediate.val.frozen.bits.positioned = lshr i512 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i512 %intermediate.val.frozen.bits.positioned to i8 + %1 = insertelement <1 x i8> poison, i8 %intermediate.val.frozen.bits.positioned.extracted, i64 0 + store <1 x i8> %1, ptr %dst, align 1 + ret void +} + +define void @load_2byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl +; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r14, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %r9d +; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %edi +; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi +; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovael %r10d, %edi +; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi +; X64-NO-BMI2-NO-SHLD-NEXT: movw %di, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %r8d +; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rax, %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %r8d, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovael %r9d, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movw %ax, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r10b +; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r10b +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r8, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %r10d +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebx +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r14, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r9, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %r10d, %r9d +; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r10d +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %r8, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r10d, %r11d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %rbx, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %rcx, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movw %cx, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %r9d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r9 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %r9d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %r9d, %r8d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %r11d, %r8d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movw %r8w, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $16, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%ecx,8), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: subb %dl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: testl %edx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: subb %al, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %al +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%ebx), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: negl %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael (%esp), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movw %bx, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $16, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $8, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%ecx,8), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%edi), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %edi # imm = 0x100 +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movw %si, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $8, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $12, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ebp), %xmm0 +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, (%esp), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%esi), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael (%esp), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movw %dx, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $12, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm2 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%ebx,8), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebp, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%ebx), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %ebx # imm = 0x100 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movw %cx, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <32 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <64 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <64 x i8> %intermediate.val.frozen to i512 + %byteOff.numbits.wide = zext i64 %byteOff.numbits to i512 + %intermediate.val.frozen.bits.positioned = lshr i512 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i512 %intermediate.val.frozen.bits.positioned to i16 + store i16 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 2 + ret void +} + +define void @load_4byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl +; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r14, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %r9d +; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %edi +; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi +; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovael %r10d, %edi +; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi +; X64-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %r8d +; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rax, %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %r8d, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovael %r9d, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r10b +; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r10b +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r8, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %r10d +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebx +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r14, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r9, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %r10d, %r9d +; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r10d +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %r8, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r10d, %r11d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %rbx, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %rcx, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %r9d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r9 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %r9d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %r9d, %r8d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %r11d, %r8d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %r8d, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $12, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%ecx,8), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%ecx), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: negl %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $12, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $8, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%ecx,8), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%edi), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %edi # imm = 0x100 +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $8, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $12, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ebp), %xmm0 +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, (%esp), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%esi), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael (%esp), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $12, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm2 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%ebx,8), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebp, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%ebx), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %ebx # imm = 0x100 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <32 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <64 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <64 x i8> %intermediate.val.frozen to i512 + %byteOff.numbits.wide = zext i64 %byteOff.numbits to i512 + %intermediate.val.frozen.bits.positioned = lshr i512 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i512 %intermediate.val.frozen.bits.positioned to i32 + store i32 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 4 + ret void +} + +define void @load_8byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl +; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r14, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmovbq %r9, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r10, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r9 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r10d, %r10d +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rbx, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r11, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rax, %r9 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbq %r8, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %r10, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r10b +; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r10b +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r8, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rdi, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebx +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r14, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r9, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r10d +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %r8, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r10d, %r11d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %rbx, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %rcx, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbq %r9, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %rdi, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rcx, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %rcx, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbq %r10, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $68, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax +; X32-NO-BMI2-NO-SHLD-NEXT: leal (,%edx,8), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%ebx), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: negl %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testl %eax, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: subb %al, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, (%esp) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: addl %edx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testl %edx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %ecx # imm = 0x100 +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 4(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $68, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $72, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%ecx,8), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: # kill: def $al killed $al killed $eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $-128, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %eax # imm = 0x100 +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, 4(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $72, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $80, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (,%ecx,8), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%eax), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %al, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $al killed $al killed $eax def $eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %ecx # imm = 0x100 +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 4(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $80, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $72, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%ecx,8), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %cl, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%eax), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %edi # imm = 0x100 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 4(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $72, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <32 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <64 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <64 x i8> %intermediate.val.frozen to i512 + %byteOff.numbits.wide = zext i64 %byteOff.numbits to i512 + %intermediate.val.frozen.bits.positioned = lshr i512 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i512 %intermediate.val.frozen.bits.positioned to i64 + store i64 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 8 + ret void +} + +define void @load_16byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %al +; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %al +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r12, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r14, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r15, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r10, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmovbq %r14, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovbq %r11, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %rbx, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %rbx, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, 8(%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r15, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r10d, %r10d +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %rbx, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rdi, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbq %r11, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbq %r15, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %r10, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %r10, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r8b +; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r8b +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %rdi, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r8d, %r11d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r14d +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r14, %r9, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %r9, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r11d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %r12, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r12d, %r12d +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r14d, %r8d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r10, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r15, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r14, %rdi, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r14b +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rdi, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbq %rbx, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbq %r9, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r12, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r12, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 8(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r8, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %rax, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r11, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %rbx, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r10, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r10, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbq %r14, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbq %r15, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r9, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r9, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 8(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $112, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: leal (,%edx,8), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%eax), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: negl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %al +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: notb %al +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%ecx,8), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testl %edi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: subb %al, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testl %edi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testl %eax, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testl %eax, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %edx # imm = 0x100 +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testl %edx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 4(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 12(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 8(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $112, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $88, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal (,%esi,8), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%ebp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%edx,8), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl (%esp), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %cl, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebp, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %ecx # imm = 0x100 +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 8(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $88, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $124, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (,%ecx,8), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%eax), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm3, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, (%esp) # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $al killed $al def $eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %al, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $al killed $al killed $eax def $eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, (%esp) # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %edi # imm = 0x100 +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 4(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 8(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $124, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $92, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal (,%ecx,8), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%ecx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%eax,8), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %cl, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %cl, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %ecx # imm = 0x100 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 12(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, 8(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $92, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <32 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <64 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <64 x i8> %intermediate.val.frozen to i512 + %byteOff.numbits.wide = zext i64 %byteOff.numbits to i512 + %intermediate.val.frozen.bits.positioned = lshr i512 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i512 %intermediate.val.frozen.bits.positioned to i128 + store i128 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 16 + ret void +} + +define void @load_32byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm1 +; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm0 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rdx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r13, %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X64-NO-BMI2-NO-SHLD-NEXT: notb %al +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdx +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %al +; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %al +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbp, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r12, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmovbq %rbx, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovbq %r13, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdx, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, 24(%r8) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r15, 16(%r8) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r12, 8(%r8) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, (%r8) +; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbq %r12, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbq %r13, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, 24(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r11, 16(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r15, 8(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r14, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r9d, %r9d +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rdi, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r13d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r14, %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r10, %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r14, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r13b +; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r13b +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r13d, %r15d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r15b +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r15, %rbp, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %rax, %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r13b +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r10, %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r12, %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r12d +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r12d, %ebx +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r14, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r12, %r10, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r12b +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r12, %rax, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r14, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbq %r13, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbq %r15, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, 24(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, 16(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rbx, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, 8(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %rdi, %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r12, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %rax, %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rdi, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbq %r12, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbq %r13, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 24(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r15, 8(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r14, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $168, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm1 +; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1] +; X32-NO-BMI2-NO-SHLD-NEXT: leal (,%eax,8), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, (%esp) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %ch # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: addl %edx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%edx), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: negl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: subb %al, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: notb %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %al +; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testl %edx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ecx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testl %eax, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testl %edi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testl %edi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testl %edi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %ebp # imm = 0x100 +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 28(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 24(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 20(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 16(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 12(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 8(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $168, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $140, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal (,%ecx,8), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%ebx), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%eax,8), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 28(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 24(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 20(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 4(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 12(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, 8(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $140, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $192, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm1 +; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm4, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm4, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm1[3,3,3,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (,%edx,8), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm4, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm3, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%edi), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %esi, %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %bl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, (%esp), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael (%esp), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dh +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %bl, %dh +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dh, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dh, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dh +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %bl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dh +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %ecx # imm = 0x100 +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 28(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 24(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 16(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 20(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 8(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $192, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $140, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,1,1] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal (,%ecx,8), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %cl, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel (%esp), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $-128, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%eax,8), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %eax # imm = 0x100 +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 28(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 24(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 20(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, 16(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 12(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $140, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %init = load <32 x i8>, ptr %src, align 1 + %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> + %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> + %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 + %intermediate.val.frozen = freeze <64 x i8> %intermediate.sroa.0.0.vecblend + %intermediate.val.frozen.bits = bitcast <64 x i8> %intermediate.val.frozen to i512 + %byteOff.numbits.wide = zext i64 %byteOff.numbits to i512 + %intermediate.val.frozen.bits.positioned = lshr i512 %intermediate.val.frozen.bits, %byteOff.numbits.wide + %intermediate.val.frozen.bits.positioned.extracted = trunc i512 %intermediate.val.frozen.bits.positioned to i256 + store i256 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 32 + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; ALL: {{.*}} +; X32: {{.*}} +; X32-NO-SHLD: {{.*}} +; X32-SHLD: {{.*}} +; X64: {{.*}} +; X64-NO-SHLD: {{.*}} +; X64-SHLD: {{.*}}