Skip to content

Commit

Permalink
[X86] Allow speculative BSR/BSF instructions on targets with CMOV (ll…
Browse files Browse the repository at this point in the history
…vm#102885)

Currently targets without LZCNT/TZCNT won't speculate with BSR/BSF instructions in case they have a zero value input, meaning we always insert a test+branch for the zero-input case.

This patch proposes we allow speculation if the target has CMOV, and perform a branchless select instead to handle the zero input case. This will predominately help x86-64 targets where we haven't set any particular cpu target. We already always perform BSR/BSF instructions if we were lowering a CTLZ/CTTZ_ZERO_UNDEF instruction.
  • Loading branch information
RKSimon authored and cjdb committed Aug 23, 2024
1 parent 6f1e270 commit bda93ee
Show file tree
Hide file tree
Showing 20 changed files with 516 additions and 682 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3239,14 +3239,14 @@ bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,

bool X86TargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
// Speculate cttz only if we can directly use TZCNT or can promote to i32/i64.
return Subtarget.hasBMI() ||
return Subtarget.hasBMI() || Subtarget.canUseCMOV() ||
(!Ty->isVectorTy() &&
Ty->getScalarSizeInBits() < (Subtarget.is64Bit() ? 64u : 32u));
}

bool X86TargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
// Speculate ctlz only if we can directly use LZCNT.
return Subtarget.hasLZCNT();
return Subtarget.hasLZCNT() || Subtarget.canUseCMOV();
}

bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const {
Expand Down
10 changes: 5 additions & 5 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4210,9 +4210,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::ABS, MVT::i64, { 1, 2, 3, 3 } }, // SUB+CMOV
{ ISD::BITREVERSE, MVT::i64, { 10, 12, 20, 22 } },
{ ISD::BSWAP, MVT::i64, { 1, 2, 1, 2 } },
{ ISD::CTLZ, MVT::i64, { 3, 2, 6, 6 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i64, { 2, 2, 4, 5 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ_ZERO_UNDEF, MVT::i64,{ 1, 2, 2, 2 } }, // BSR+XOR
{ ISD::CTTZ, MVT::i64, { 2, 2, 5, 5 } }, // TEST+BSF+CMOV/BRANCH
{ ISD::CTTZ, MVT::i64, { 2, 2, 3, 4 } }, // TEST+BSF+CMOV/BRANCH
{ ISD::CTTZ_ZERO_UNDEF, MVT::i64,{ 1, 2, 1, 2 } }, // BSF
{ ISD::CTPOP, MVT::i64, { 10, 6, 19, 19 } },
{ ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
Expand Down Expand Up @@ -4241,9 +4241,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::BITREVERSE, MVT::i8, { 7, 9, 13, 14 } },
{ ISD::BSWAP, MVT::i32, { 1, 1, 1, 1 } },
{ ISD::BSWAP, MVT::i16, { 1, 2, 1, 2 } }, // ROL
{ ISD::CTLZ, MVT::i32, { 3, 2, 6, 6 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i16, { 3, 2, 6, 6 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i8, { 3, 2, 7, 7 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i32, { 2, 2, 4, 5 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i16, { 2, 2, 4, 5 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i8, { 2, 2, 5, 6 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ_ZERO_UNDEF, MVT::i32,{ 1, 2, 2, 2 } }, // BSR+XOR
{ ISD::CTLZ_ZERO_UNDEF, MVT::i16,{ 2, 2, 2, 2 } }, // BSR+XOR
{ ISD::CTLZ_ZERO_UNDEF, MVT::i8, { 2, 2, 3, 3 } }, // BSR+XOR
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Analysis/CostModel/X86/ctlz-codesize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ declare i8 @llvm.ctlz.i8(i8, i1)

define i64 @var_ctlz_i64(i64 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i64'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i64'
Expand All @@ -43,7 +43,7 @@ define i64 @var_ctlz_i64u(i64 %a) {

define i32 @var_ctlz_i32(i32 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i32'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i32'
Expand All @@ -69,7 +69,7 @@ define i32 @var_ctlz_i32u(i32 %a) {

define i16 @var_ctlz_i16(i16 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i16'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i16'
Expand All @@ -95,7 +95,7 @@ define i16 @var_ctlz_i16u(i16 %a) {

define i8 @var_ctlz_i8(i8 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i8'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i8'
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Analysis/CostModel/X86/ctlz-sizelatency.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ declare i8 @llvm.ctlz.i8(i8, i1)

define i64 @var_ctlz_i64(i64 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i64'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i64'
Expand All @@ -43,7 +43,7 @@ define i64 @var_ctlz_i64u(i64 %a) {

define i32 @var_ctlz_i32(i32 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i32'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i32'
Expand All @@ -69,7 +69,7 @@ define i32 @var_ctlz_i32u(i32 %a) {

define i16 @var_ctlz_i16(i16 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i16'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i16'
Expand All @@ -95,7 +95,7 @@ define i16 @var_ctlz_i16u(i16 %a) {

define i8 @var_ctlz_i8(i8 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i8'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i8'
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Analysis/CostModel/X86/ctlz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ declare i8 @llvm.ctlz.i8(i8, i1)

define i64 @var_ctlz_i64(i64 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i64'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i64'
Expand All @@ -43,7 +43,7 @@ define i64 @var_ctlz_i64u(i64 %a) {

define i32 @var_ctlz_i32(i32 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i32'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i32'
Expand All @@ -69,7 +69,7 @@ define i32 @var_ctlz_i32u(i32 %a) {

define i16 @var_ctlz_i16(i16 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i16'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i16'
Expand All @@ -95,7 +95,7 @@ define i16 @var_ctlz_i16u(i16 %a) {

define i8 @var_ctlz_i8(i8 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i8'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i8'
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/CostModel/X86/cttz-codesize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ declare i8 @llvm.cttz.i8(i8, i1)

define i64 @var_cttz_i64(i64 %a) {
; NOBMI-LABEL: 'var_cttz_i64'
; NOBMI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %cttz
;
; BMI-LABEL: 'var_cttz_i64'
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ declare i8 @llvm.cttz.i8(i8, i1)

define i64 @var_cttz_i64(i64 %a) {
; NOBMI-LABEL: 'var_cttz_i64'
; NOBMI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %cttz
;
; BMI-LABEL: 'var_cttz_i64'
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/X86/atomic-bit-test.ll
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,6 @@ define i32 @split_hoist_and(i32 %0) nounwind {
; X64-NEXT: lock btsl $3, v32(%rip)
; X64-NEXT: setb %al
; X64-NEXT: shll $3, %eax
; X64-NEXT: testl %edi, %edi
; X64-NEXT: retq
%2 = atomicrmw or ptr @v32, i32 8 monotonic, align 4
%3 = tail call i32 @llvm.ctlz.i32(i32 %0, i1 false)
Expand Down
53 changes: 17 additions & 36 deletions llvm/test/CodeGen/X86/bit_ceil.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,12 @@
define i32 @bit_ceil_i32(i32 %x) {
; NOBMI-LABEL: bit_ceil_i32:
; NOBMI: # %bb.0:
; NOBMI-NEXT: movl %edi, %eax
; NOBMI-NEXT: decl %eax
; NOBMI-NEXT: je .LBB0_1
; NOBMI-NEXT: # %bb.2: # %cond.false
; NOBMI-NEXT: bsrl %eax, %ecx
; NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
; NOBMI-NEXT: leal -1(%rdi), %eax
; NOBMI-NEXT: bsrl %eax, %eax
; NOBMI-NEXT: movl $63, %ecx
; NOBMI-NEXT: cmovnel %eax, %ecx
; NOBMI-NEXT: xorl $31, %ecx
; NOBMI-NEXT: jmp .LBB0_3
; NOBMI-NEXT: .LBB0_1:
; NOBMI-NEXT: movl $32, %ecx
; NOBMI-NEXT: .LBB0_3: # %cond.end
; NOBMI-NEXT: negb %cl
; NOBMI-NEXT: movl $1, %edx
; NOBMI-NEXT: movl $1, %eax
Expand Down Expand Up @@ -51,15 +47,10 @@ define i32 @bit_ceil_i32(i32 %x) {
define i32 @bit_ceil_i32_plus1(i32 noundef %x) {
; NOBMI-LABEL: bit_ceil_i32_plus1:
; NOBMI: # %bb.0: # %entry
; NOBMI-NEXT: testl %edi, %edi
; NOBMI-NEXT: je .LBB1_1
; NOBMI-NEXT: # %bb.2: # %cond.false
; NOBMI-NEXT: bsrl %edi, %ecx
; NOBMI-NEXT: bsrl %edi, %eax
; NOBMI-NEXT: movl $63, %ecx
; NOBMI-NEXT: cmovnel %eax, %ecx
; NOBMI-NEXT: xorl $31, %ecx
; NOBMI-NEXT: jmp .LBB1_3
; NOBMI-NEXT: .LBB1_1:
; NOBMI-NEXT: movl $32, %ecx
; NOBMI-NEXT: .LBB1_3: # %cond.end
; NOBMI-NEXT: negb %cl
; NOBMI-NEXT: movl $1, %edx
; NOBMI-NEXT: movl $1, %eax
Expand Down Expand Up @@ -94,16 +85,11 @@ entry:
define i64 @bit_ceil_i64(i64 %x) {
; NOBMI-LABEL: bit_ceil_i64:
; NOBMI: # %bb.0:
; NOBMI-NEXT: movq %rdi, %rax
; NOBMI-NEXT: decq %rax
; NOBMI-NEXT: je .LBB2_1
; NOBMI-NEXT: # %bb.2: # %cond.false
; NOBMI-NEXT: bsrq %rax, %rcx
; NOBMI-NEXT: xorq $63, %rcx
; NOBMI-NEXT: jmp .LBB2_3
; NOBMI-NEXT: .LBB2_1:
; NOBMI-NEXT: movl $64, %ecx
; NOBMI-NEXT: .LBB2_3: # %cond.end
; NOBMI-NEXT: leaq -1(%rdi), %rax
; NOBMI-NEXT: bsrq %rax, %rax
; NOBMI-NEXT: movl $127, %ecx
; NOBMI-NEXT: cmovneq %rax, %rcx
; NOBMI-NEXT: xorl $63, %ecx
; NOBMI-NEXT: negb %cl
; NOBMI-NEXT: movl $1, %edx
; NOBMI-NEXT: movl $1, %eax
Expand Down Expand Up @@ -136,15 +122,10 @@ define i64 @bit_ceil_i64(i64 %x) {
define i64 @bit_ceil_i64_plus1(i64 noundef %x) {
; NOBMI-LABEL: bit_ceil_i64_plus1:
; NOBMI: # %bb.0: # %entry
; NOBMI-NEXT: testq %rdi, %rdi
; NOBMI-NEXT: je .LBB3_1
; NOBMI-NEXT: # %bb.2: # %cond.false
; NOBMI-NEXT: bsrq %rdi, %rcx
; NOBMI-NEXT: xorq $63, %rcx
; NOBMI-NEXT: jmp .LBB3_3
; NOBMI-NEXT: .LBB3_1:
; NOBMI-NEXT: movl $64, %ecx
; NOBMI-NEXT: .LBB3_3: # %cond.end
; NOBMI-NEXT: bsrq %rdi, %rax
; NOBMI-NEXT: movl $127, %ecx
; NOBMI-NEXT: cmovneq %rax, %rcx
; NOBMI-NEXT: xorl $63, %ecx
; NOBMI-NEXT: negb %cl
; NOBMI-NEXT: movl $1, %edx
; NOBMI-NEXT: movl $1, %eax
Expand Down
47 changes: 20 additions & 27 deletions llvm/test/CodeGen/X86/combine-or.ll
Original file line number Diff line number Diff line change
Expand Up @@ -213,21 +213,18 @@ define i64 @PR89533(<64 x i8> %a0) {
; SSE-NEXT: shll $16, %ecx
; SSE-NEXT: orl %eax, %ecx
; SSE-NEXT: pcmpeqb %xmm4, %xmm2
; SSE-NEXT: pmovmskb %xmm2, %edx
; SSE-NEXT: xorl $65535, %edx # imm = 0xFFFF
; SSE-NEXT: pmovmskb %xmm2, %eax
; SSE-NEXT: xorl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: pcmpeqb %xmm4, %xmm3
; SSE-NEXT: pmovmskb %xmm3, %eax
; SSE-NEXT: notl %eax
; SSE-NEXT: shll $16, %eax
; SSE-NEXT: orl %edx, %eax
; SSE-NEXT: shlq $32, %rax
; SSE-NEXT: orq %rcx, %rax
; SSE-NEXT: je .LBB11_2
; SSE-NEXT: # %bb.1: # %cond.false
; SSE-NEXT: rep bsfq %rax, %rax
; SSE-NEXT: retq
; SSE-NEXT: .LBB11_2: # %cond.end
; SSE-NEXT: pmovmskb %xmm3, %edx
; SSE-NEXT: notl %edx
; SSE-NEXT: shll $16, %edx
; SSE-NEXT: orl %eax, %edx
; SSE-NEXT: shlq $32, %rdx
; SSE-NEXT: orq %rcx, %rdx
; SSE-NEXT: bsfq %rdx, %rcx
; SSE-NEXT: movl $64, %eax
; SSE-NEXT: cmovneq %rcx, %rax
; SSE-NEXT: retq
;
; AVX1-LABEL: PR89533:
Expand All @@ -243,23 +240,19 @@ define i64 @PR89533(<64 x i8> %a0) {
; AVX1-NEXT: shll $16, %ecx
; AVX1-NEXT: orl %eax, %ecx
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %edx
; AVX1-NEXT: xorl $65535, %edx # imm = 0xFFFF
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: xorl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: notl %eax
; AVX1-NEXT: shll $16, %eax
; AVX1-NEXT: orl %edx, %eax
; AVX1-NEXT: shlq $32, %rax
; AVX1-NEXT: orq %rcx, %rax
; AVX1-NEXT: je .LBB11_2
; AVX1-NEXT: # %bb.1: # %cond.false
; AVX1-NEXT: rep bsfq %rax, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX1-NEXT: .LBB11_2: # %cond.end
; AVX1-NEXT: vpmovmskb %xmm0, %edx
; AVX1-NEXT: notl %edx
; AVX1-NEXT: shll $16, %edx
; AVX1-NEXT: orl %eax, %edx
; AVX1-NEXT: shlq $32, %rdx
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: bsfq %rdx, %rcx
; AVX1-NEXT: movl $64, %eax
; AVX1-NEXT: cmovneq %rcx, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
Expand Down
Loading

0 comments on commit bda93ee

Please sign in to comment.