From 5d501096ca1fae74f910411cfeb0491d94c635b7 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 9 Oct 2020 10:57:37 -0700 Subject: [PATCH 01/17] [lldb] Update docs with new buildbot URLs Buildbot got upgraded and now the (LLDB) builders have different URLs. --- lldb/docs/resources/bots.rst | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lldb/docs/resources/bots.rst b/lldb/docs/resources/bots.rst index d9ddcde41abcc2..926259bd92beef 100644 --- a/lldb/docs/resources/bots.rst +++ b/lldb/docs/resources/bots.rst @@ -7,11 +7,15 @@ Buildbot LLVM Buildbot is the place where volunteers provide build machines. Everyone can `add a buildbot for LLDB `_. -* `lldb-x64-windows-ninja `_ -* `lldb-x86_64-debian `_ -* `lldb-aarch64-ubuntu `_ -* `lldb-arm-ubuntu `_ -* `lldb-x86_64-fedora `_ +* `lldb-x64-windows-ninja `_ +* `lldb-x86_64-debian `_ +* `lldb-aarch64-ubuntu `_ +* `lldb-arm-ubuntu `_ +* `lldb-x86_64-fedora `_ + +An overview of all LLDB builders can be found here: + +`http://lab.llvm.org:8011/#/builders?tags=lldb `_ GreenDragon ----------- From 191fbda5d2a5ceb4b5af894d987a69537b8431b4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 9 Oct 2020 19:19:35 +0100 Subject: [PATCH 02/17] [ARM][MIPS] Add funnel shift test coverage Based on offline discussions regarding D89139 and D88783 - we want to make sure targets aren't doing anything particularly dumb Tests copied from aarch64 which has a mixture of general, legalization and special case tests --- llvm/test/CodeGen/ARM/funnel-shift-rot.ll | 367 +++++++++++++ llvm/test/CodeGen/ARM/funnel-shift.ll | 398 ++++++++++++++ llvm/test/CodeGen/Mips/funnel-shift-rot.ll | 415 ++++++++++++++ llvm/test/CodeGen/Mips/funnel-shift.ll | 601 +++++++++++++++++++++ 4 files changed, 1781 insertions(+) create mode 100644 llvm/test/CodeGen/ARM/funnel-shift-rot.ll create mode 100644 llvm/test/CodeGen/ARM/funnel-shift.ll create mode 100644 llvm/test/CodeGen/Mips/funnel-shift-rot.ll create mode 100644 llvm/test/CodeGen/Mips/funnel-shift.ll diff --git a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll new file mode 100644 index 00000000000000..55157875d355f9 --- /dev/null +++ b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll @@ -0,0 +1,367 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 | FileCheck %s --check-prefixes=CHECK,SCALAR +; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 -mattr=+neon | FileCheck %s --check-prefixes=CHECK,NEON + +declare i8 @llvm.fshl.i8(i8, i8, i8) +declare i16 @llvm.fshl.i16(i16, i16, i16) +declare i32 @llvm.fshl.i32(i32, i32, i32) +declare i64 @llvm.fshl.i64(i64, i64, i64) +declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) + +declare i8 @llvm.fshr.i8(i8, i8, i8) +declare i16 @llvm.fshr.i16(i16, i16, i16) +declare i32 @llvm.fshr.i32(i32, i32, i32) +declare i64 @llvm.fshr.i64(i64, i64, i64) +declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) + +; When first 2 operands match, it's a rotate. + +define i8 @rotl_i8_const_shift(i8 %x) { +; CHECK-LABEL: rotl_i8_const_shift: +; CHECK: @ %bb.0: +; CHECK-NEXT: uxtb r1, r0 +; CHECK-NEXT: lsl r0, r0, #3 +; CHECK-NEXT: orr r0, r0, r1, lsr #5 +; CHECK-NEXT: bx lr + %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3) + ret i8 %f +} + +define i64 @rotl_i64_const_shift(i64 %x) { +; CHECK-LABEL: rotl_i64_const_shift: +; CHECK: @ %bb.0: +; CHECK-NEXT: lsl r2, r0, #3 +; CHECK-NEXT: orr r2, r2, r1, lsr #29 +; CHECK-NEXT: lsl r1, r1, #3 +; CHECK-NEXT: orr r1, r1, r0, lsr #29 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: bx lr + %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3) + ret i64 %f +} + +; When first 2 operands match, it's a rotate (by variable amount). + +define i16 @rotl_i16(i16 %x, i16 %z) { +; CHECK-LABEL: rotl_i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: and r2, r1, #15 +; CHECK-NEXT: rsb r1, r1, #0 +; CHECK-NEXT: and r1, r1, #15 +; CHECK-NEXT: lsl r2, r0, r2 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: orr r0, r2, r0, lsr r1 +; CHECK-NEXT: bx lr + %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z) + ret i16 %f +} + +define i32 @rotl_i32(i32 %x, i32 %z) { +; CHECK-LABEL: rotl_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: rsb r1, r1, #0 +; CHECK-NEXT: ror r0, r0, r1 +; CHECK-NEXT: bx lr + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z) + ret i32 %f +} + +define i64 @rotl_i64(i64 %x, i64 %z) { +; SCALAR-LABEL: rotl_i64: +; SCALAR: @ %bb.0: +; SCALAR-NEXT: .save {r4, r5, r11, lr} +; SCALAR-NEXT: push {r4, r5, r11, lr} +; SCALAR-NEXT: rsb r3, r2, #0 +; SCALAR-NEXT: and r4, r2, #63 +; SCALAR-NEXT: and lr, r3, #63 +; SCALAR-NEXT: rsb r3, lr, #32 +; SCALAR-NEXT: lsl r2, r0, r4 +; SCALAR-NEXT: lsr r12, r0, lr +; SCALAR-NEXT: orr r3, r12, r1, lsl r3 +; SCALAR-NEXT: subs r12, lr, #32 +; SCALAR-NEXT: lsrpl r3, r1, r12 +; SCALAR-NEXT: subs r5, r4, #32 +; SCALAR-NEXT: movwpl r2, #0 +; SCALAR-NEXT: cmp r5, #0 +; SCALAR-NEXT: orr r2, r2, r3 +; SCALAR-NEXT: rsb r3, r4, #32 +; SCALAR-NEXT: lsr r3, r0, r3 +; SCALAR-NEXT: orr r3, r3, r1, lsl r4 +; SCALAR-NEXT: lslpl r3, r0, r5 +; SCALAR-NEXT: lsr r0, r1, lr +; SCALAR-NEXT: cmp r12, #0 +; SCALAR-NEXT: movwpl r0, #0 +; SCALAR-NEXT: orr r1, r3, r0 +; SCALAR-NEXT: mov r0, r2 +; SCALAR-NEXT: pop {r4, r5, r11, pc} +; +; NEON-LABEL: rotl_i64: +; NEON: @ %bb.0: +; NEON-NEXT: .save {r4, r5, r11, lr} +; NEON-NEXT: push {r4, r5, r11, lr} +; NEON-NEXT: and r12, r2, #63 +; NEON-NEXT: rsb r2, r2, #0 +; NEON-NEXT: rsb r3, r12, #32 +; NEON-NEXT: and r4, r2, #63 +; NEON-NEXT: subs lr, r12, #32 +; NEON-NEXT: lsr r3, r0, r3 +; NEON-NEXT: lsr r2, r1, r4 +; NEON-NEXT: orr r3, r3, r1, lsl r12 +; NEON-NEXT: lslpl r3, r0, lr +; NEON-NEXT: subs r5, r4, #32 +; NEON-NEXT: movwpl r2, #0 +; NEON-NEXT: cmp r5, #0 +; NEON-NEXT: orr r2, r3, r2 +; NEON-NEXT: lsr r3, r0, r4 +; NEON-NEXT: rsb r4, r4, #32 +; NEON-NEXT: lsl r0, r0, r12 +; NEON-NEXT: orr r3, r3, r1, lsl r4 +; NEON-NEXT: lsrpl r3, r1, r5 +; NEON-NEXT: cmp lr, #0 +; NEON-NEXT: movwpl r0, #0 +; NEON-NEXT: mov r1, r2 +; NEON-NEXT: orr r0, r0, r3 +; NEON-NEXT: pop {r4, r5, r11, pc} + %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z) + ret i64 %f +} + +; Vector rotate. + +define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) { +; SCALAR-LABEL: rotl_v4i32: +; SCALAR: @ %bb.0: +; SCALAR-NEXT: ldr r12, [sp] +; SCALAR-NEXT: rsb r12, r12, #0 +; SCALAR-NEXT: ror r0, r0, r12 +; SCALAR-NEXT: ldr r12, [sp, #4] +; SCALAR-NEXT: rsb r12, r12, #0 +; SCALAR-NEXT: ror r1, r1, r12 +; SCALAR-NEXT: ldr r12, [sp, #8] +; SCALAR-NEXT: rsb r12, r12, #0 +; SCALAR-NEXT: ror r2, r2, r12 +; SCALAR-NEXT: ldr r12, [sp, #12] +; SCALAR-NEXT: rsb r12, r12, #0 +; SCALAR-NEXT: ror r3, r3, r12 +; SCALAR-NEXT: bx lr +; +; NEON-LABEL: rotl_v4i32: +; NEON: @ %bb.0: +; NEON-NEXT: mov r12, sp +; NEON-NEXT: vld1.64 {d16, d17}, [r12] +; NEON-NEXT: vmov.i32 q10, #0x1f +; NEON-NEXT: vneg.s32 q9, q8 +; NEON-NEXT: vmov d23, r2, r3 +; NEON-NEXT: vand q9, q9, q10 +; NEON-NEXT: vand q8, q8, q10 +; NEON-NEXT: vmov d22, r0, r1 +; NEON-NEXT: vneg.s32 q9, q9 +; NEON-NEXT: vshl.u32 q8, q11, q8 +; NEON-NEXT: vshl.u32 q9, q11, q9 +; NEON-NEXT: vorr q8, q8, q9 +; NEON-NEXT: vmov r0, r1, d16 +; NEON-NEXT: vmov r2, r3, d17 +; NEON-NEXT: bx lr + %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) + ret <4 x i32> %f +} + +; Vector rotate by constant splat amount. + +define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) { +; SCALAR-LABEL: rotl_v4i32_rotl_const_shift: +; SCALAR: @ %bb.0: +; SCALAR-NEXT: ror r0, r0, #29 +; SCALAR-NEXT: ror r1, r1, #29 +; SCALAR-NEXT: ror r2, r2, #29 +; SCALAR-NEXT: ror r3, r3, #29 +; SCALAR-NEXT: bx lr +; +; NEON-LABEL: rotl_v4i32_rotl_const_shift: +; NEON: @ %bb.0: +; NEON-NEXT: vmov d17, r2, r3 +; NEON-NEXT: vmov d16, r0, r1 +; NEON-NEXT: vshr.u32 q9, q8, #29 +; NEON-NEXT: vshl.i32 q8, q8, #3 +; NEON-NEXT: vorr q8, q8, q9 +; NEON-NEXT: vmov r0, r1, d16 +; NEON-NEXT: vmov r2, r3, d17 +; NEON-NEXT: bx lr + %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> ) + ret <4 x i32> %f +} + +; Repeat everything for funnel shift right. + +; When first 2 operands match, it's a rotate. + +define i8 @rotr_i8_const_shift(i8 %x) { +; CHECK-LABEL: rotr_i8_const_shift: +; CHECK: @ %bb.0: +; CHECK-NEXT: uxtb r1, r0 +; CHECK-NEXT: lsr r1, r1, #3 +; CHECK-NEXT: orr r0, r1, r0, lsl #5 +; CHECK-NEXT: bx lr + %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3) + ret i8 %f +} + +define i32 @rotr_i32_const_shift(i32 %x) { +; CHECK-LABEL: rotr_i32_const_shift: +; CHECK: @ %bb.0: +; CHECK-NEXT: ror r0, r0, #3 +; CHECK-NEXT: bx lr + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3) + ret i32 %f +} + +; When first 2 operands match, it's a rotate (by variable amount). + +define i16 @rotr_i16(i16 %x, i16 %z) { +; CHECK-LABEL: rotr_i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: and r2, r1, #15 +; CHECK-NEXT: rsb r1, r1, #0 +; CHECK-NEXT: and r1, r1, #15 +; CHECK-NEXT: uxth r3, r0 +; CHECK-NEXT: lsr r2, r3, r2 +; CHECK-NEXT: orr r0, r2, r0, lsl r1 +; CHECK-NEXT: bx lr + %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z) + ret i16 %f +} + +define i32 @rotr_i32(i32 %x, i32 %z) { +; CHECK-LABEL: rotr_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: ror r0, r0, r1 +; CHECK-NEXT: bx lr + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z) + ret i32 %f +} + +define i64 @rotr_i64(i64 %x, i64 %z) { +; CHECK-LABEL: rotr_i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r11, lr} +; CHECK-NEXT: push {r4, r5, r11, lr} +; CHECK-NEXT: and lr, r2, #63 +; CHECK-NEXT: rsb r2, r2, #0 +; CHECK-NEXT: rsb r3, lr, #32 +; CHECK-NEXT: and r4, r2, #63 +; CHECK-NEXT: lsr r12, r0, lr +; CHECK-NEXT: orr r3, r12, r1, lsl r3 +; CHECK-NEXT: subs r12, lr, #32 +; CHECK-NEXT: lsl r2, r0, r4 +; CHECK-NEXT: lsrpl r3, r1, r12 +; CHECK-NEXT: subs r5, r4, #32 +; CHECK-NEXT: movwpl r2, #0 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: orr r2, r3, r2 +; CHECK-NEXT: rsb r3, r4, #32 +; CHECK-NEXT: lsr r3, r0, r3 +; CHECK-NEXT: orr r3, r3, r1, lsl r4 +; CHECK-NEXT: lslpl r3, r0, r5 +; CHECK-NEXT: lsr r0, r1, lr +; CHECK-NEXT: cmp r12, #0 +; CHECK-NEXT: movwpl r0, #0 +; CHECK-NEXT: orr r1, r0, r3 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: pop {r4, r5, r11, pc} + %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z) + ret i64 %f +} + +; Vector rotate. + +define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) { +; SCALAR-LABEL: rotr_v4i32: +; SCALAR: @ %bb.0: +; SCALAR-NEXT: ldr r12, [sp] +; SCALAR-NEXT: ror r0, r0, r12 +; SCALAR-NEXT: ldr r12, [sp, #4] +; SCALAR-NEXT: ror r1, r1, r12 +; SCALAR-NEXT: ldr r12, [sp, #8] +; SCALAR-NEXT: ror r2, r2, r12 +; SCALAR-NEXT: ldr r12, [sp, #12] +; SCALAR-NEXT: ror r3, r3, r12 +; SCALAR-NEXT: bx lr +; +; NEON-LABEL: rotr_v4i32: +; NEON: @ %bb.0: +; NEON-NEXT: mov r12, sp +; NEON-NEXT: vld1.64 {d16, d17}, [r12] +; NEON-NEXT: vmov.i32 q9, #0x1f +; NEON-NEXT: vneg.s32 q10, q8 +; NEON-NEXT: vand q8, q8, q9 +; NEON-NEXT: vmov d23, r2, r3 +; NEON-NEXT: vand q9, q10, q9 +; NEON-NEXT: vneg.s32 q8, q8 +; NEON-NEXT: vmov d22, r0, r1 +; NEON-NEXT: vshl.u32 q9, q11, q9 +; NEON-NEXT: vshl.u32 q8, q11, q8 +; NEON-NEXT: vorr q8, q8, q9 +; NEON-NEXT: vmov r0, r1, d16 +; NEON-NEXT: vmov r2, r3, d17 +; NEON-NEXT: bx lr + %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) + ret <4 x i32> %f +} + +; Vector rotate by constant splat amount. + +define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) { +; SCALAR-LABEL: rotr_v4i32_const_shift: +; SCALAR: @ %bb.0: +; SCALAR-NEXT: ror r0, r0, #3 +; SCALAR-NEXT: ror r1, r1, #3 +; SCALAR-NEXT: ror r2, r2, #3 +; SCALAR-NEXT: ror r3, r3, #3 +; SCALAR-NEXT: bx lr +; +; NEON-LABEL: rotr_v4i32_const_shift: +; NEON: @ %bb.0: +; NEON-NEXT: vmov d17, r2, r3 +; NEON-NEXT: vmov d16, r0, r1 +; NEON-NEXT: vshl.i32 q9, q8, #29 +; NEON-NEXT: vshr.u32 q8, q8, #3 +; NEON-NEXT: vorr q8, q8, q9 +; NEON-NEXT: vmov r0, r1, d16 +; NEON-NEXT: vmov r2, r3, d17 +; NEON-NEXT: bx lr + %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> ) + ret <4 x i32> %f +} + +define i32 @rotl_i32_shift_by_bitwidth(i32 %x) { +; CHECK-LABEL: rotl_i32_shift_by_bitwidth: +; CHECK: @ %bb.0: +; CHECK-NEXT: bx lr + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32) + ret i32 %f +} + +define i32 @rotr_i32_shift_by_bitwidth(i32 %x) { +; CHECK-LABEL: rotr_i32_shift_by_bitwidth: +; CHECK: @ %bb.0: +; CHECK-NEXT: bx lr + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32) + ret i32 %f +} + +define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) { +; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth: +; CHECK: @ %bb.0: +; CHECK-NEXT: bx lr + %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> ) + ret <4 x i32> %f +} + +define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) { +; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth: +; CHECK: @ %bb.0: +; CHECK-NEXT: bx lr + %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> ) + ret <4 x i32> %f +} + diff --git a/llvm/test/CodeGen/ARM/funnel-shift.ll b/llvm/test/CodeGen/ARM/funnel-shift.ll new file mode 100644 index 00000000000000..a8b6aff767a748 --- /dev/null +++ b/llvm/test/CodeGen/ARM/funnel-shift.ll @@ -0,0 +1,398 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 | FileCheck %s --check-prefixes=CHECK,SCALAR +; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 -mattr=+neon | FileCheck %s --check-prefixes=CHECK,NEON + +declare i8 @llvm.fshl.i8(i8, i8, i8) +declare i16 @llvm.fshl.i16(i16, i16, i16) +declare i32 @llvm.fshl.i32(i32, i32, i32) +declare i64 @llvm.fshl.i64(i64, i64, i64) +declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) + +declare i8 @llvm.fshr.i8(i8, i8, i8) +declare i16 @llvm.fshr.i16(i16, i16, i16) +declare i32 @llvm.fshr.i32(i32, i32, i32) +declare i64 @llvm.fshr.i64(i64, i64, i64) +declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) + +; General case - all operands can be variables. + +define i16 @fshl_i16(i16 %x, i16 %y, i16 %z) { +; CHECK-LABEL: fshl_i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: and r2, r2, #15 +; CHECK-NEXT: mov r3, #31 +; CHECK-NEXT: lsl r1, r1, #16 +; CHECK-NEXT: bic r3, r3, r2 +; CHECK-NEXT: lsl r0, r0, r2 +; CHECK-NEXT: lsr r1, r1, #1 +; CHECK-NEXT: orr r0, r0, r1, lsr r3 +; CHECK-NEXT: bx lr + %f = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z) + ret i16 %f +} + +define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: fshl_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r3, #31 +; CHECK-NEXT: lsr r1, r1, #1 +; CHECK-NEXT: bic r3, r3, r2 +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: lsl r0, r0, r2 +; CHECK-NEXT: orr r0, r0, r1, lsr r3 +; CHECK-NEXT: bx lr + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) + ret i32 %f +} + +; Verify that weird types are minimally supported. +declare i37 @llvm.fshl.i37(i37, i37, i37) +define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { +; CHECK-LABEL: fshl_i37: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: ldr r0, [sp, #24] +; CHECK-NEXT: mov r6, r3 +; CHECK-NEXT: ldr r1, [sp, #28] +; CHECK-NEXT: mov r7, r2 +; CHECK-NEXT: mov r2, #37 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: bl __aeabi_uldivmod +; CHECK-NEXT: mov r0, #63 +; CHECK-NEXT: bic r1, r0, r2 +; CHECK-NEXT: lsl r0, r6, #27 +; CHECK-NEXT: lsl r3, r7, #27 +; CHECK-NEXT: orr r0, r0, r7, lsr #5 +; CHECK-NEXT: and r2, r2, #63 +; CHECK-NEXT: lsrs r7, r0, #1 +; CHECK-NEXT: rrx r0, r3 +; CHECK-NEXT: rsb r3, r1, #32 +; CHECK-NEXT: lsr r0, r0, r1 +; CHECK-NEXT: lsl r6, r4, r2 +; CHECK-NEXT: orr r0, r0, r7, lsl r3 +; CHECK-NEXT: subs r3, r1, #32 +; CHECK-NEXT: lsr r1, r7, r1 +; CHECK-NEXT: lsrpl r0, r7, r3 +; CHECK-NEXT: subs r5, r2, #32 +; CHECK-NEXT: movwpl r6, #0 +; CHECK-NEXT: orr r0, r6, r0 +; CHECK-NEXT: rsb r6, r2, #32 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: lsr r6, r4, r6 +; CHECK-NEXT: orr r2, r6, r8, lsl r2 +; CHECK-NEXT: lslpl r2, r4, r5 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: movwpl r1, #0 +; CHECK-NEXT: orr r1, r2, r1 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc} + %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) + ret i37 %f +} + +; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011 + +declare i7 @llvm.fshl.i7(i7, i7, i7) +define i7 @fshl_i7_const_fold() { +; CHECK-LABEL: fshl_i7_const_fold: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r0, #67 +; CHECK-NEXT: bx lr + %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) + ret i7 %f +} + +define i8 @fshl_i8_const_fold_overshift_1() { +; CHECK-LABEL: fshl_i8_const_fold_overshift_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r0, #128 +; CHECK-NEXT: bx lr + %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15) + ret i8 %f +} + +define i8 @fshl_i8_const_fold_overshift_2() { +; CHECK-LABEL: fshl_i8_const_fold_overshift_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r0, #120 +; CHECK-NEXT: bx lr + %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11) + ret i8 %f +} + +define i8 @fshl_i8_const_fold_overshift_3() { +; CHECK-LABEL: fshl_i8_const_fold_overshift_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: bx lr + %f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8) + ret i8 %f +} + +; With constant shift amount, this is 'extr'. + +define i32 @fshl_i32_const_shift(i32 %x, i32 %y) { +; CHECK-LABEL: fshl_i32_const_shift: +; CHECK: @ %bb.0: +; CHECK-NEXT: lsl r0, r0, #9 +; CHECK-NEXT: orr r0, r0, r1, lsr #23 +; CHECK-NEXT: bx lr + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) + ret i32 %f +} + +; Check modulo math on shift amount. + +define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) { +; CHECK-LABEL: fshl_i32_const_overshift: +; CHECK: @ %bb.0: +; CHECK-NEXT: lsl r0, r0, #9 +; CHECK-NEXT: orr r0, r0, r1, lsr #23 +; CHECK-NEXT: bx lr + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) + ret i32 %f +} + +; 64-bit should also work. + +define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) { +; CHECK-LABEL: fshl_i64_const_overshift: +; CHECK: @ %bb.0: +; CHECK-NEXT: lsr r1, r2, #23 +; CHECK-NEXT: orr r2, r1, r3, lsl #9 +; CHECK-NEXT: lsl r0, r0, #9 +; CHECK-NEXT: orr r1, r0, r3, lsr #23 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: bx lr + %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) + ret i64 %f +} + +; This should work without any node-specific logic. + +define i8 @fshl_i8_const_fold() { +; CHECK-LABEL: fshl_i8_const_fold: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r0, #128 +; CHECK-NEXT: bx lr + %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) + ret i8 %f +} + +; Repeat everything for funnel shift right. + +; General case - all operands can be variables. + +define i16 @fshr_i16(i16 %x, i16 %y, i16 %z) { +; CHECK-LABEL: fshr_i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: lsl r0, r0, #1 +; CHECK-NEXT: bfi r2, r3, #4, #28 +; CHECK-NEXT: mov r3, #31 +; CHECK-NEXT: bic r3, r3, r2 +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: lsl r1, r1, #16 +; CHECK-NEXT: lsl r0, r0, r3 +; CHECK-NEXT: orr r0, r0, r1, lsr r2 +; CHECK-NEXT: bx lr + %f = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z) + ret i16 %f +} + +define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: fshr_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r3, #31 +; CHECK-NEXT: lsl r0, r0, #1 +; CHECK-NEXT: bic r3, r3, r2 +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: lsl r0, r0, r3 +; CHECK-NEXT: orr r0, r0, r1, lsr r2 +; CHECK-NEXT: bx lr + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) + ret i32 %f +} + +; Verify that weird types are minimally supported. +declare i37 @llvm.fshr.i37(i37, i37, i37) +define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { +; CHECK-LABEL: fshr_i37: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: ldr r0, [sp, #32] +; CHECK-NEXT: mov r6, r3 +; CHECK-NEXT: ldr r1, [sp, #36] +; CHECK-NEXT: mov r7, r2 +; CHECK-NEXT: mov r2, #37 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: bl __aeabi_uldivmod +; CHECK-NEXT: add r0, r2, #27 +; CHECK-NEXT: lsl r6, r6, #27 +; CHECK-NEXT: and r1, r0, #63 +; CHECK-NEXT: lsl r2, r7, #27 +; CHECK-NEXT: orr r7, r6, r7, lsr #5 +; CHECK-NEXT: mov r6, #63 +; CHECK-NEXT: rsb r3, r1, #32 +; CHECK-NEXT: lsr r2, r2, r1 +; CHECK-NEXT: subs r12, r1, #32 +; CHECK-NEXT: bic r6, r6, r0 +; CHECK-NEXT: orr r2, r2, r7, lsl r3 +; CHECK-NEXT: lsl r5, r9, #1 +; CHECK-NEXT: lsrpl r2, r7, r12 +; CHECK-NEXT: lsl r0, r5, r6 +; CHECK-NEXT: subs r4, r6, #32 +; CHECK-NEXT: lsl r3, r8, #1 +; CHECK-NEXT: movwpl r0, #0 +; CHECK-NEXT: orr r3, r3, r9, lsr #31 +; CHECK-NEXT: orr r0, r0, r2 +; CHECK-NEXT: rsb r2, r6, #32 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: lsr r1, r7, r1 +; CHECK-NEXT: lsr r2, r5, r2 +; CHECK-NEXT: orr r2, r2, r3, lsl r6 +; CHECK-NEXT: lslpl r2, r5, r4 +; CHECK-NEXT: cmp r12, #0 +; CHECK-NEXT: movwpl r1, #0 +; CHECK-NEXT: orr r1, r2, r1 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} + %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) + ret i37 %f +} + +; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111 + +declare i7 @llvm.fshr.i7(i7, i7, i7) +define i7 @fshr_i7_const_fold() { +; CHECK-LABEL: fshr_i7_const_fold: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r0, #31 +; CHECK-NEXT: bx lr + %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) + ret i7 %f +} + +define i8 @fshr_i8_const_fold_overshift_1() { +; CHECK-LABEL: fshr_i8_const_fold_overshift_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r0, #254 +; CHECK-NEXT: bx lr + %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15) + ret i8 %f +} + +define i8 @fshr_i8_const_fold_overshift_2() { +; CHECK-LABEL: fshr_i8_const_fold_overshift_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r0, #225 +; CHECK-NEXT: bx lr + %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) + ret i8 %f +} + +define i8 @fshr_i8_const_fold_overshift_3() { +; CHECK-LABEL: fshr_i8_const_fold_overshift_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r0, #255 +; CHECK-NEXT: bx lr + %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) + ret i8 %f +} + +; With constant shift amount, this is 'extr'. + +define i32 @fshr_i32_const_shift(i32 %x, i32 %y) { +; CHECK-LABEL: fshr_i32_const_shift: +; CHECK: @ %bb.0: +; CHECK-NEXT: lsl r0, r0, #23 +; CHECK-NEXT: orr r0, r0, r1, lsr #9 +; CHECK-NEXT: bx lr + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) + ret i32 %f +} + +; Check modulo math on shift amount. 41-32=9. + +define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) { +; CHECK-LABEL: fshr_i32_const_overshift: +; CHECK: @ %bb.0: +; CHECK-NEXT: lsl r0, r0, #23 +; CHECK-NEXT: orr r0, r0, r1, lsr #9 +; CHECK-NEXT: bx lr + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) + ret i32 %f +} + +; 64-bit should also work. 105-64 = 41. + +define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) { +; CHECK-LABEL: fshr_i64_const_overshift: +; CHECK: @ %bb.0: +; CHECK-NEXT: lsl r2, r0, #23 +; CHECK-NEXT: lsl r1, r1, #23 +; CHECK-NEXT: orr r2, r2, r3, lsr #9 +; CHECK-NEXT: orr r1, r1, r0, lsr #9 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: bx lr + %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) + ret i64 %f +} + +; This should work without any node-specific logic. + +define i8 @fshr_i8_const_fold() { +; CHECK-LABEL: fshr_i8_const_fold: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r0, #254 +; CHECK-NEXT: bx lr + %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) + ret i8 %f +} + +define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) { +; CHECK-LABEL: fshl_i32_shift_by_bitwidth: +; CHECK: @ %bb.0: +; CHECK-NEXT: bx lr + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) + ret i32 %f +} + +define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) { +; CHECK-LABEL: fshr_i32_shift_by_bitwidth: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bx lr + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32) + ret i32 %f +} + +define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth: +; CHECK: @ %bb.0: +; CHECK-NEXT: bx lr + %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> ) + ret <4 x i32> %f +} + +define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { +; SCALAR-LABEL: fshr_v4i32_shift_by_bitwidth: +; SCALAR: @ %bb.0: +; SCALAR-NEXT: ldm sp, {r0, r1, r2, r3} +; SCALAR-NEXT: bx lr +; +; NEON-LABEL: fshr_v4i32_shift_by_bitwidth: +; NEON: @ %bb.0: +; NEON-NEXT: mov r0, sp +; NEON-NEXT: vld1.64 {d16, d17}, [r0] +; NEON-NEXT: vmov r0, r1, d16 +; NEON-NEXT: vmov r2, r3, d17 +; NEON-NEXT: bx lr + %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> ) + ret <4 x i32> %f +} + diff --git a/llvm/test/CodeGen/Mips/funnel-shift-rot.ll b/llvm/test/CodeGen/Mips/funnel-shift-rot.ll new file mode 100644 index 00000000000000..49532f246838ac --- /dev/null +++ b/llvm/test/CodeGen/Mips/funnel-shift-rot.ll @@ -0,0 +1,415 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=mips-linux-gnu -march=mips -mcpu=mips32 | FileCheck %s --check-prefixes=CHECK,CHECK-BE +; RUN: llc < %s -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 | FileCheck %s --check-prefixes=CHECK,CHECK-LE + +declare i8 @llvm.fshl.i8(i8, i8, i8) +declare i16 @llvm.fshl.i16(i16, i16, i16) +declare i32 @llvm.fshl.i32(i32, i32, i32) +declare i64 @llvm.fshl.i64(i64, i64, i64) +declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) + +declare i8 @llvm.fshr.i8(i8, i8, i8) +declare i16 @llvm.fshr.i16(i16, i16, i16) +declare i32 @llvm.fshr.i32(i32, i32, i32) +declare i64 @llvm.fshr.i64(i64, i64, i64) +declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) + +; When first 2 operands match, it's a rotate. + +define i8 @rotl_i8_const_shift(i8 %x) { +; CHECK-LABEL: rotl_i8_const_shift: +; CHECK: # %bb.0: +; CHECK-NEXT: sll $1, $4, 3 +; CHECK-NEXT: andi $2, $4, 224 +; CHECK-NEXT: srl $2, $2, 5 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $1, $2 + %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3) + ret i8 %f +} + +define i64 @rotl_i64_const_shift(i64 %x) { +; CHECK-LABEL: rotl_i64_const_shift: +; CHECK: # %bb.0: +; CHECK-NEXT: srl $1, $5, 29 +; CHECK-NEXT: sll $2, $4, 3 +; CHECK-NEXT: or $2, $2, $1 +; CHECK-NEXT: srl $1, $4, 29 +; CHECK-NEXT: sll $3, $5, 3 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $3, $3, $1 + %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3) + ret i64 %f +} + +; When first 2 operands match, it's a rotate (by variable amount). + +define i16 @rotl_i16(i16 %x, i16 %z) { +; CHECK-LABEL: rotl_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: andi $1, $5, 15 +; CHECK-NEXT: sllv $1, $4, $1 +; CHECK-NEXT: negu $2, $5 +; CHECK-NEXT: andi $2, $2, 15 +; CHECK-NEXT: andi $3, $4, 65535 +; CHECK-NEXT: srlv $2, $3, $2 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $1, $2 + %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z) + ret i16 %f +} + +define i32 @rotl_i32(i32 %x, i32 %z) { +; CHECK-LABEL: rotl_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: andi $1, $5, 31 +; CHECK-NEXT: sllv $1, $4, $1 +; CHECK-NEXT: negu $2, $5 +; CHECK-NEXT: andi $2, $2, 31 +; CHECK-NEXT: srlv $2, $4, $2 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $1, $2 + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z) + ret i32 %f +} + +define i64 @rotl_i64(i64 %x, i64 %z) { +; CHECK-BE-LABEL: rotl_i64: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: negu $1, $7 +; CHECK-BE-NEXT: andi $3, $1, 63 +; CHECK-BE-NEXT: srlv $6, $4, $3 +; CHECK-BE-NEXT: andi $1, $1, 32 +; CHECK-BE-NEXT: andi $2, $7, 63 +; CHECK-BE-NEXT: move $8, $6 +; CHECK-BE-NEXT: movn $8, $zero, $1 +; CHECK-BE-NEXT: sllv $9, $4, $2 +; CHECK-BE-NEXT: srl $10, $5, 1 +; CHECK-BE-NEXT: not $11, $2 +; CHECK-BE-NEXT: srlv $10, $10, $11 +; CHECK-BE-NEXT: or $9, $9, $10 +; CHECK-BE-NEXT: sllv $10, $5, $2 +; CHECK-BE-NEXT: andi $7, $7, 32 +; CHECK-BE-NEXT: movn $9, $10, $7 +; CHECK-BE-NEXT: or $2, $9, $8 +; CHECK-BE-NEXT: srlv $5, $5, $3 +; CHECK-BE-NEXT: not $3, $3 +; CHECK-BE-NEXT: sll $4, $4, 1 +; CHECK-BE-NEXT: sllv $3, $4, $3 +; CHECK-BE-NEXT: or $3, $3, $5 +; CHECK-BE-NEXT: movn $3, $6, $1 +; CHECK-BE-NEXT: movn $10, $zero, $7 +; CHECK-BE-NEXT: jr $ra +; CHECK-BE-NEXT: or $3, $10, $3 +; +; CHECK-LE-LABEL: rotl_i64: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: negu $1, $6 +; CHECK-LE-NEXT: andi $2, $1, 63 +; CHECK-LE-NEXT: srlv $7, $5, $2 +; CHECK-LE-NEXT: andi $1, $1, 32 +; CHECK-LE-NEXT: andi $3, $6, 63 +; CHECK-LE-NEXT: move $8, $7 +; CHECK-LE-NEXT: movn $8, $zero, $1 +; CHECK-LE-NEXT: sllv $9, $5, $3 +; CHECK-LE-NEXT: srl $10, $4, 1 +; CHECK-LE-NEXT: not $11, $3 +; CHECK-LE-NEXT: srlv $10, $10, $11 +; CHECK-LE-NEXT: or $9, $9, $10 +; CHECK-LE-NEXT: sllv $10, $4, $3 +; CHECK-LE-NEXT: andi $6, $6, 32 +; CHECK-LE-NEXT: movn $9, $10, $6 +; CHECK-LE-NEXT: or $3, $9, $8 +; CHECK-LE-NEXT: srlv $4, $4, $2 +; CHECK-LE-NEXT: not $2, $2 +; CHECK-LE-NEXT: sll $5, $5, 1 +; CHECK-LE-NEXT: sllv $2, $5, $2 +; CHECK-LE-NEXT: or $2, $2, $4 +; CHECK-LE-NEXT: movn $2, $7, $1 +; CHECK-LE-NEXT: movn $10, $zero, $6 +; CHECK-LE-NEXT: jr $ra +; CHECK-LE-NEXT: or $2, $10, $2 + %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z) + ret i64 %f +} + +; Vector rotate. + +define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) { +; CHECK-LABEL: rotl_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lw $1, 24($sp) +; CHECK-NEXT: negu $2, $1 +; CHECK-NEXT: lw $3, 20($sp) +; CHECK-NEXT: negu $8, $3 +; CHECK-NEXT: andi $8, $8, 31 +; CHECK-NEXT: andi $2, $2, 31 +; CHECK-NEXT: andi $3, $3, 31 +; CHECK-NEXT: andi $1, $1, 31 +; CHECK-NEXT: lw $9, 16($sp) +; CHECK-NEXT: sllv $1, $6, $1 +; CHECK-NEXT: srlv $6, $6, $2 +; CHECK-NEXT: sllv $3, $5, $3 +; CHECK-NEXT: srlv $5, $5, $8 +; CHECK-NEXT: andi $2, $9, 31 +; CHECK-NEXT: sllv $2, $4, $2 +; CHECK-NEXT: negu $8, $9 +; CHECK-NEXT: andi $8, $8, 31 +; CHECK-NEXT: srlv $4, $4, $8 +; CHECK-NEXT: lw $8, 28($sp) +; CHECK-NEXT: or $2, $2, $4 +; CHECK-NEXT: or $3, $3, $5 +; CHECK-NEXT: or $4, $1, $6 +; CHECK-NEXT: andi $1, $8, 31 +; CHECK-NEXT: sllv $1, $7, $1 +; CHECK-NEXT: negu $5, $8 +; CHECK-NEXT: andi $5, $5, 31 +; CHECK-NEXT: srlv $5, $7, $5 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $5, $1, $5 + %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) + ret <4 x i32> %f +} + +; Vector rotate by constant splat amount. + +define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) { +; CHECK-LABEL: rotl_v4i32_rotl_const_shift: +; CHECK: # %bb.0: +; CHECK-NEXT: srl $1, $5, 29 +; CHECK-NEXT: sll $3, $5, 3 +; CHECK-NEXT: srl $2, $4, 29 +; CHECK-NEXT: sll $4, $4, 3 +; CHECK-NEXT: or $2, $4, $2 +; CHECK-NEXT: or $3, $3, $1 +; CHECK-NEXT: srl $1, $6, 29 +; CHECK-NEXT: sll $4, $6, 3 +; CHECK-NEXT: or $4, $4, $1 +; CHECK-NEXT: srl $1, $7, 29 +; CHECK-NEXT: sll $5, $7, 3 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $5, $5, $1 + %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> ) + ret <4 x i32> %f +} + +; Repeat everything for funnel shift right. + +; When first 2 operands match, it's a rotate. + +define i8 @rotr_i8_const_shift(i8 %x) { +; CHECK-LABEL: rotr_i8_const_shift: +; CHECK: # %bb.0: +; CHECK-NEXT: sll $1, $4, 5 +; CHECK-NEXT: andi $2, $4, 248 +; CHECK-NEXT: srl $2, $2, 3 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $2, $1 + %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3) + ret i8 %f +} + +define i32 @rotr_i32_const_shift(i32 %x) { +; CHECK-LABEL: rotr_i32_const_shift: +; CHECK: # %bb.0: +; CHECK-NEXT: sll $1, $4, 29 +; CHECK-NEXT: srl $2, $4, 3 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $2, $1 + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3) + ret i32 %f +} + +; When first 2 operands match, it's a rotate (by variable amount). + +define i16 @rotr_i16(i16 %x, i16 %z) { +; CHECK-LABEL: rotr_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: andi $1, $5, 15 +; CHECK-NEXT: andi $2, $4, 65535 +; CHECK-NEXT: srlv $1, $2, $1 +; CHECK-NEXT: negu $2, $5 +; CHECK-NEXT: andi $2, $2, 15 +; CHECK-NEXT: sllv $2, $4, $2 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $1, $2 + %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z) + ret i16 %f +} + +define i32 @rotr_i32(i32 %x, i32 %z) { +; CHECK-LABEL: rotr_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: andi $1, $5, 31 +; CHECK-NEXT: srlv $1, $4, $1 +; CHECK-NEXT: negu $2, $5 +; CHECK-NEXT: andi $2, $2, 31 +; CHECK-NEXT: sllv $2, $4, $2 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $1, $2 + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z) + ret i32 %f +} + +define i64 @rotr_i64(i64 %x, i64 %z) { +; CHECK-BE-LABEL: rotr_i64: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: negu $1, $7 +; CHECK-BE-NEXT: andi $2, $1, 63 +; CHECK-BE-NEXT: sllv $6, $5, $2 +; CHECK-BE-NEXT: andi $1, $1, 32 +; CHECK-BE-NEXT: andi $3, $7, 63 +; CHECK-BE-NEXT: move $8, $6 +; CHECK-BE-NEXT: movn $8, $zero, $1 +; CHECK-BE-NEXT: srlv $9, $5, $3 +; CHECK-BE-NEXT: sll $10, $4, 1 +; CHECK-BE-NEXT: not $11, $3 +; CHECK-BE-NEXT: sllv $10, $10, $11 +; CHECK-BE-NEXT: or $9, $10, $9 +; CHECK-BE-NEXT: srlv $10, $4, $3 +; CHECK-BE-NEXT: andi $7, $7, 32 +; CHECK-BE-NEXT: movn $9, $10, $7 +; CHECK-BE-NEXT: or $3, $9, $8 +; CHECK-BE-NEXT: sllv $4, $4, $2 +; CHECK-BE-NEXT: not $2, $2 +; CHECK-BE-NEXT: srl $5, $5, 1 +; CHECK-BE-NEXT: srlv $2, $5, $2 +; CHECK-BE-NEXT: or $2, $4, $2 +; CHECK-BE-NEXT: movn $2, $6, $1 +; CHECK-BE-NEXT: movn $10, $zero, $7 +; CHECK-BE-NEXT: jr $ra +; CHECK-BE-NEXT: or $2, $10, $2 +; +; CHECK-LE-LABEL: rotr_i64: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: negu $1, $6 +; CHECK-LE-NEXT: andi $3, $1, 63 +; CHECK-LE-NEXT: sllv $7, $4, $3 +; CHECK-LE-NEXT: andi $1, $1, 32 +; CHECK-LE-NEXT: andi $2, $6, 63 +; CHECK-LE-NEXT: move $8, $7 +; CHECK-LE-NEXT: movn $8, $zero, $1 +; CHECK-LE-NEXT: srlv $9, $4, $2 +; CHECK-LE-NEXT: sll $10, $5, 1 +; CHECK-LE-NEXT: not $11, $2 +; CHECK-LE-NEXT: sllv $10, $10, $11 +; CHECK-LE-NEXT: or $9, $10, $9 +; CHECK-LE-NEXT: srlv $10, $5, $2 +; CHECK-LE-NEXT: andi $6, $6, 32 +; CHECK-LE-NEXT: movn $9, $10, $6 +; CHECK-LE-NEXT: or $2, $9, $8 +; CHECK-LE-NEXT: sllv $5, $5, $3 +; CHECK-LE-NEXT: not $3, $3 +; CHECK-LE-NEXT: srl $4, $4, 1 +; CHECK-LE-NEXT: srlv $3, $4, $3 +; CHECK-LE-NEXT: or $3, $5, $3 +; CHECK-LE-NEXT: movn $3, $7, $1 +; CHECK-LE-NEXT: movn $10, $zero, $6 +; CHECK-LE-NEXT: jr $ra +; CHECK-LE-NEXT: or $3, $10, $3 + %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z) + ret i64 %f +} + +; Vector rotate. + +define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) { +; CHECK-LABEL: rotr_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lw $1, 24($sp) +; CHECK-NEXT: negu $2, $1 +; CHECK-NEXT: lw $3, 20($sp) +; CHECK-NEXT: negu $8, $3 +; CHECK-NEXT: andi $8, $8, 31 +; CHECK-NEXT: andi $2, $2, 31 +; CHECK-NEXT: andi $3, $3, 31 +; CHECK-NEXT: andi $1, $1, 31 +; CHECK-NEXT: lw $9, 16($sp) +; CHECK-NEXT: srlv $1, $6, $1 +; CHECK-NEXT: sllv $6, $6, $2 +; CHECK-NEXT: srlv $3, $5, $3 +; CHECK-NEXT: sllv $5, $5, $8 +; CHECK-NEXT: andi $2, $9, 31 +; CHECK-NEXT: srlv $2, $4, $2 +; CHECK-NEXT: negu $8, $9 +; CHECK-NEXT: andi $8, $8, 31 +; CHECK-NEXT: sllv $4, $4, $8 +; CHECK-NEXT: lw $8, 28($sp) +; CHECK-NEXT: or $2, $2, $4 +; CHECK-NEXT: or $3, $3, $5 +; CHECK-NEXT: or $4, $1, $6 +; CHECK-NEXT: andi $1, $8, 31 +; CHECK-NEXT: srlv $1, $7, $1 +; CHECK-NEXT: negu $5, $8 +; CHECK-NEXT: andi $5, $5, 31 +; CHECK-NEXT: sllv $5, $7, $5 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $5, $1, $5 + %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) + ret <4 x i32> %f +} + +; Vector rotate by constant splat amount. + +define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) { +; CHECK-LABEL: rotr_v4i32_const_shift: +; CHECK: # %bb.0: +; CHECK-NEXT: sll $1, $5, 29 +; CHECK-NEXT: srl $3, $5, 3 +; CHECK-NEXT: sll $2, $4, 29 +; CHECK-NEXT: srl $4, $4, 3 +; CHECK-NEXT: or $2, $4, $2 +; CHECK-NEXT: or $3, $3, $1 +; CHECK-NEXT: sll $1, $6, 29 +; CHECK-NEXT: srl $4, $6, 3 +; CHECK-NEXT: or $4, $4, $1 +; CHECK-NEXT: sll $1, $7, 29 +; CHECK-NEXT: srl $5, $7, 3 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $5, $5, $1 + %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> ) + ret <4 x i32> %f +} + +define i32 @rotl_i32_shift_by_bitwidth(i32 %x) { +; CHECK-LABEL: rotl_i32_shift_by_bitwidth: +; CHECK: # %bb.0: +; CHECK-NEXT: jr $ra +; CHECK-NEXT: move $2, $4 + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32) + ret i32 %f +} + +define i32 @rotr_i32_shift_by_bitwidth(i32 %x) { +; CHECK-LABEL: rotr_i32_shift_by_bitwidth: +; CHECK: # %bb.0: +; CHECK-NEXT: jr $ra +; CHECK-NEXT: move $2, $4 + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32) + ret i32 %f +} + +define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) { +; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth: +; CHECK: # %bb.0: +; CHECK-NEXT: move $2, $4 +; CHECK-NEXT: move $3, $5 +; CHECK-NEXT: move $4, $6 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: move $5, $7 + %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> ) + ret <4 x i32> %f +} + +define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) { +; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth: +; CHECK: # %bb.0: +; CHECK-NEXT: move $2, $4 +; CHECK-NEXT: move $3, $5 +; CHECK-NEXT: move $4, $6 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: move $5, $7 + %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> ) + ret <4 x i32> %f +} + diff --git a/llvm/test/CodeGen/Mips/funnel-shift.ll b/llvm/test/CodeGen/Mips/funnel-shift.ll new file mode 100644 index 00000000000000..47d3db18c00370 --- /dev/null +++ b/llvm/test/CodeGen/Mips/funnel-shift.ll @@ -0,0 +1,601 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=mips-linux-gnu -march=mips -mcpu=mips32 | FileCheck %s --check-prefixes=CHECK,CHECK-BE +; RUN: llc < %s -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 | FileCheck %s --check-prefixes=CHECK,CHECK-LE + +declare i8 @llvm.fshl.i8(i8, i8, i8) +declare i16 @llvm.fshl.i16(i16, i16, i16) +declare i32 @llvm.fshl.i32(i32, i32, i32) +declare i64 @llvm.fshl.i64(i64, i64, i64) +declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) + +declare i8 @llvm.fshr.i8(i8, i8, i8) +declare i16 @llvm.fshr.i16(i16, i16, i16) +declare i32 @llvm.fshr.i32(i32, i32, i32) +declare i64 @llvm.fshr.i64(i64, i64, i64) +declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) + +; General case - all operands can be variables. + +define i16 @fshl_i16(i16 %x, i16 %y, i16 %z) { +; CHECK-LABEL: fshl_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: andi $1, $6, 15 +; CHECK-NEXT: sllv $2, $4, $1 +; CHECK-NEXT: sll $3, $5, 16 +; CHECK-NEXT: srl $3, $3, 1 +; CHECK-NEXT: not $1, $1 +; CHECK-NEXT: andi $1, $1, 31 +; CHECK-NEXT: srlv $1, $3, $1 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $2, $1 + %f = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z) + ret i16 %f +} + +define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: fshl_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: andi $1, $6, 31 +; CHECK-NEXT: sllv $1, $4, $1 +; CHECK-NEXT: srl $2, $5, 1 +; CHECK-NEXT: not $3, $6 +; CHECK-NEXT: andi $3, $3, 31 +; CHECK-NEXT: srlv $2, $2, $3 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $1, $2 + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) + ret i32 %f +} + +; Verify that weird types are minimally supported. +declare i37 @llvm.fshl.i37(i37, i37, i37) +define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { +; CHECK-BE-LABEL: fshl_i37: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: addiu $sp, $sp, -40 +; CHECK-BE-NEXT: .cfi_def_cfa_offset 40 +; CHECK-BE-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill +; CHECK-BE-NEXT: sw $19, 32($sp) # 4-byte Folded Spill +; CHECK-BE-NEXT: sw $18, 28($sp) # 4-byte Folded Spill +; CHECK-BE-NEXT: sw $17, 24($sp) # 4-byte Folded Spill +; CHECK-BE-NEXT: sw $16, 20($sp) # 4-byte Folded Spill +; CHECK-BE-NEXT: .cfi_offset 31, -4 +; CHECK-BE-NEXT: .cfi_offset 19, -8 +; CHECK-BE-NEXT: .cfi_offset 18, -12 +; CHECK-BE-NEXT: .cfi_offset 17, -16 +; CHECK-BE-NEXT: .cfi_offset 16, -20 +; CHECK-BE-NEXT: move $16, $7 +; CHECK-BE-NEXT: move $17, $6 +; CHECK-BE-NEXT: move $18, $5 +; CHECK-BE-NEXT: move $19, $4 +; CHECK-BE-NEXT: lw $4, 56($sp) +; CHECK-BE-NEXT: lw $5, 60($sp) +; CHECK-BE-NEXT: addiu $6, $zero, 0 +; CHECK-BE-NEXT: jal __umoddi3 +; CHECK-BE-NEXT: addiu $7, $zero, 37 +; CHECK-BE-NEXT: not $1, $3 +; CHECK-BE-NEXT: andi $2, $3, 63 +; CHECK-BE-NEXT: not $4, $2 +; CHECK-BE-NEXT: srl $5, $18, 1 +; CHECK-BE-NEXT: sllv $6, $19, $2 +; CHECK-BE-NEXT: srlv $4, $5, $4 +; CHECK-BE-NEXT: andi $5, $1, 63 +; CHECK-BE-NEXT: srl $7, $16, 5 +; CHECK-BE-NEXT: sll $8, $17, 27 +; CHECK-BE-NEXT: or $7, $8, $7 +; CHECK-BE-NEXT: srl $8, $7, 1 +; CHECK-BE-NEXT: srlv $9, $8, $5 +; CHECK-BE-NEXT: andi $1, $1, 32 +; CHECK-BE-NEXT: move $10, $9 +; CHECK-BE-NEXT: movn $10, $zero, $1 +; CHECK-BE-NEXT: or $4, $6, $4 +; CHECK-BE-NEXT: sllv $6, $18, $2 +; CHECK-BE-NEXT: andi $3, $3, 32 +; CHECK-BE-NEXT: movn $4, $6, $3 +; CHECK-BE-NEXT: sll $7, $7, 31 +; CHECK-BE-NEXT: sll $2, $16, 27 +; CHECK-BE-NEXT: srl $11, $2, 1 +; CHECK-BE-NEXT: or $2, $4, $10 +; CHECK-BE-NEXT: movn $6, $zero, $3 +; CHECK-BE-NEXT: or $3, $11, $7 +; CHECK-BE-NEXT: srlv $3, $3, $5 +; CHECK-BE-NEXT: not $4, $5 +; CHECK-BE-NEXT: sll $5, $8, 1 +; CHECK-BE-NEXT: sllv $4, $5, $4 +; CHECK-BE-NEXT: or $3, $4, $3 +; CHECK-BE-NEXT: movn $3, $9, $1 +; CHECK-BE-NEXT: or $3, $6, $3 +; CHECK-BE-NEXT: lw $16, 20($sp) # 4-byte Folded Reload +; CHECK-BE-NEXT: lw $17, 24($sp) # 4-byte Folded Reload +; CHECK-BE-NEXT: lw $18, 28($sp) # 4-byte Folded Reload +; CHECK-BE-NEXT: lw $19, 32($sp) # 4-byte Folded Reload +; CHECK-BE-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload +; CHECK-BE-NEXT: jr $ra +; CHECK-BE-NEXT: addiu $sp, $sp, 40 +; +; CHECK-LE-LABEL: fshl_i37: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: addiu $sp, $sp, -40 +; CHECK-LE-NEXT: .cfi_def_cfa_offset 40 +; CHECK-LE-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill +; CHECK-LE-NEXT: sw $19, 32($sp) # 4-byte Folded Spill +; CHECK-LE-NEXT: sw $18, 28($sp) # 4-byte Folded Spill +; CHECK-LE-NEXT: sw $17, 24($sp) # 4-byte Folded Spill +; CHECK-LE-NEXT: sw $16, 20($sp) # 4-byte Folded Spill +; CHECK-LE-NEXT: .cfi_offset 31, -4 +; CHECK-LE-NEXT: .cfi_offset 19, -8 +; CHECK-LE-NEXT: .cfi_offset 18, -12 +; CHECK-LE-NEXT: .cfi_offset 17, -16 +; CHECK-LE-NEXT: .cfi_offset 16, -20 +; CHECK-LE-NEXT: move $16, $7 +; CHECK-LE-NEXT: move $17, $6 +; CHECK-LE-NEXT: move $18, $5 +; CHECK-LE-NEXT: move $19, $4 +; CHECK-LE-NEXT: lw $4, 56($sp) +; CHECK-LE-NEXT: lw $5, 60($sp) +; CHECK-LE-NEXT: addiu $6, $zero, 37 +; CHECK-LE-NEXT: jal __umoddi3 +; CHECK-LE-NEXT: addiu $7, $zero, 0 +; CHECK-LE-NEXT: not $1, $2 +; CHECK-LE-NEXT: andi $3, $2, 63 +; CHECK-LE-NEXT: not $4, $3 +; CHECK-LE-NEXT: srl $5, $19, 1 +; CHECK-LE-NEXT: sllv $6, $18, $3 +; CHECK-LE-NEXT: srlv $4, $5, $4 +; CHECK-LE-NEXT: andi $5, $1, 63 +; CHECK-LE-NEXT: srl $7, $17, 5 +; CHECK-LE-NEXT: sll $8, $16, 27 +; CHECK-LE-NEXT: or $7, $8, $7 +; CHECK-LE-NEXT: srl $8, $7, 1 +; CHECK-LE-NEXT: srlv $9, $8, $5 +; CHECK-LE-NEXT: andi $1, $1, 32 +; CHECK-LE-NEXT: move $10, $9 +; CHECK-LE-NEXT: movn $10, $zero, $1 +; CHECK-LE-NEXT: or $4, $6, $4 +; CHECK-LE-NEXT: sllv $6, $19, $3 +; CHECK-LE-NEXT: andi $2, $2, 32 +; CHECK-LE-NEXT: movn $4, $6, $2 +; CHECK-LE-NEXT: sll $7, $7, 31 +; CHECK-LE-NEXT: sll $3, $17, 27 +; CHECK-LE-NEXT: srl $11, $3, 1 +; CHECK-LE-NEXT: or $3, $4, $10 +; CHECK-LE-NEXT: movn $6, $zero, $2 +; CHECK-LE-NEXT: or $2, $11, $7 +; CHECK-LE-NEXT: srlv $2, $2, $5 +; CHECK-LE-NEXT: not $4, $5 +; CHECK-LE-NEXT: sll $5, $8, 1 +; CHECK-LE-NEXT: sllv $4, $5, $4 +; CHECK-LE-NEXT: or $2, $4, $2 +; CHECK-LE-NEXT: movn $2, $9, $1 +; CHECK-LE-NEXT: or $2, $6, $2 +; CHECK-LE-NEXT: lw $16, 20($sp) # 4-byte Folded Reload +; CHECK-LE-NEXT: lw $17, 24($sp) # 4-byte Folded Reload +; CHECK-LE-NEXT: lw $18, 28($sp) # 4-byte Folded Reload +; CHECK-LE-NEXT: lw $19, 32($sp) # 4-byte Folded Reload +; CHECK-LE-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload +; CHECK-LE-NEXT: jr $ra +; CHECK-LE-NEXT: addiu $sp, $sp, 40 + %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) + ret i37 %f +} + +; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011 + +declare i7 @llvm.fshl.i7(i7, i7, i7) +define i7 @fshl_i7_const_fold() { +; CHECK-LABEL: fshl_i7_const_fold: +; CHECK: # %bb.0: +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $2, $zero, 67 + %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) + ret i7 %f +} + +define i8 @fshl_i8_const_fold_overshift_1() { +; CHECK-LABEL: fshl_i8_const_fold_overshift_1: +; CHECK: # %bb.0: +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $2, $zero, 128 + %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15) + ret i8 %f +} + +define i8 @fshl_i8_const_fold_overshift_2() { +; CHECK-LABEL: fshl_i8_const_fold_overshift_2: +; CHECK: # %bb.0: +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $2, $zero, 120 + %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11) + ret i8 %f +} + +define i8 @fshl_i8_const_fold_overshift_3() { +; CHECK-LABEL: fshl_i8_const_fold_overshift_3: +; CHECK: # %bb.0: +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $2, $zero, 0 + %f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8) + ret i8 %f +} + +; With constant shift amount, this is 'extr'. + +define i32 @fshl_i32_const_shift(i32 %x, i32 %y) { +; CHECK-LABEL: fshl_i32_const_shift: +; CHECK: # %bb.0: +; CHECK-NEXT: srl $1, $5, 23 +; CHECK-NEXT: sll $2, $4, 9 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $2, $1 + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) + ret i32 %f +} + +; Check modulo math on shift amount. + +define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) { +; CHECK-LABEL: fshl_i32_const_overshift: +; CHECK: # %bb.0: +; CHECK-NEXT: srl $1, $5, 23 +; CHECK-NEXT: sll $2, $4, 9 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $2, $1 + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) + ret i32 %f +} + +; 64-bit should also work. + +define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) { +; CHECK-BE-LABEL: fshl_i64_const_overshift: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: srl $1, $6, 23 +; CHECK-BE-NEXT: sll $2, $5, 9 +; CHECK-BE-NEXT: or $2, $2, $1 +; CHECK-BE-NEXT: sll $1, $6, 9 +; CHECK-BE-NEXT: srl $3, $7, 23 +; CHECK-BE-NEXT: jr $ra +; CHECK-BE-NEXT: or $3, $3, $1 +; +; CHECK-LE-LABEL: fshl_i64_const_overshift: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: sll $1, $7, 9 +; CHECK-LE-NEXT: srl $2, $6, 23 +; CHECK-LE-NEXT: or $2, $2, $1 +; CHECK-LE-NEXT: srl $1, $7, 23 +; CHECK-LE-NEXT: sll $3, $4, 9 +; CHECK-LE-NEXT: jr $ra +; CHECK-LE-NEXT: or $3, $3, $1 + %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) + ret i64 %f +} + +; This should work without any node-specific logic. + +define i8 @fshl_i8_const_fold() { +; CHECK-LABEL: fshl_i8_const_fold: +; CHECK: # %bb.0: +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $2, $zero, 128 + %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) + ret i8 %f +} + +; Repeat everything for funnel shift right. + +; General case - all operands can be variables. + +define i16 @fshr_i16(i16 %x, i16 %y, i16 %z) { +; CHECK-LABEL: fshr_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: sll $1, $5, 16 +; CHECK-NEXT: andi $2, $6, 15 +; CHECK-NEXT: ori $3, $2, 16 +; CHECK-NEXT: srlv $1, $1, $3 +; CHECK-NEXT: sll $3, $4, 1 +; CHECK-NEXT: xori $2, $2, 15 +; CHECK-NEXT: sllv $2, $3, $2 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $2, $1 + %f = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z) + ret i16 %f +} + +define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: fshr_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: andi $1, $6, 31 +; CHECK-NEXT: srlv $1, $5, $1 +; CHECK-NEXT: sll $2, $4, 1 +; CHECK-NEXT: not $3, $6 +; CHECK-NEXT: andi $3, $3, 31 +; CHECK-NEXT: sllv $2, $2, $3 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $2, $1 + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) + ret i32 %f +} + +; Verify that weird types are minimally supported. +declare i37 @llvm.fshr.i37(i37, i37, i37) +define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { +; CHECK-BE-LABEL: fshr_i37: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: addiu $sp, $sp, -40 +; CHECK-BE-NEXT: .cfi_def_cfa_offset 40 +; CHECK-BE-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill +; CHECK-BE-NEXT: sw $19, 32($sp) # 4-byte Folded Spill +; CHECK-BE-NEXT: sw $18, 28($sp) # 4-byte Folded Spill +; CHECK-BE-NEXT: sw $17, 24($sp) # 4-byte Folded Spill +; CHECK-BE-NEXT: sw $16, 20($sp) # 4-byte Folded Spill +; CHECK-BE-NEXT: .cfi_offset 31, -4 +; CHECK-BE-NEXT: .cfi_offset 19, -8 +; CHECK-BE-NEXT: .cfi_offset 18, -12 +; CHECK-BE-NEXT: .cfi_offset 17, -16 +; CHECK-BE-NEXT: .cfi_offset 16, -20 +; CHECK-BE-NEXT: move $16, $7 +; CHECK-BE-NEXT: move $17, $6 +; CHECK-BE-NEXT: move $18, $5 +; CHECK-BE-NEXT: move $19, $4 +; CHECK-BE-NEXT: lw $4, 56($sp) +; CHECK-BE-NEXT: lw $5, 60($sp) +; CHECK-BE-NEXT: addiu $6, $zero, 0 +; CHECK-BE-NEXT: jal __umoddi3 +; CHECK-BE-NEXT: addiu $7, $zero, 37 +; CHECK-BE-NEXT: addiu $1, $3, 27 +; CHECK-BE-NEXT: andi $2, $1, 63 +; CHECK-BE-NEXT: not $3, $2 +; CHECK-BE-NEXT: srl $4, $16, 5 +; CHECK-BE-NEXT: sll $5, $17, 27 +; CHECK-BE-NEXT: or $4, $5, $4 +; CHECK-BE-NEXT: sll $5, $4, 1 +; CHECK-BE-NEXT: sll $6, $16, 27 +; CHECK-BE-NEXT: srlv $6, $6, $2 +; CHECK-BE-NEXT: sllv $3, $5, $3 +; CHECK-BE-NEXT: not $5, $1 +; CHECK-BE-NEXT: andi $7, $5, 63 +; CHECK-BE-NEXT: sll $8, $18, 1 +; CHECK-BE-NEXT: sllv $8, $8, $7 +; CHECK-BE-NEXT: andi $5, $5, 32 +; CHECK-BE-NEXT: move $9, $8 +; CHECK-BE-NEXT: movn $9, $zero, $5 +; CHECK-BE-NEXT: or $3, $3, $6 +; CHECK-BE-NEXT: srlv $2, $4, $2 +; CHECK-BE-NEXT: andi $1, $1, 32 +; CHECK-BE-NEXT: movn $3, $2, $1 +; CHECK-BE-NEXT: srl $4, $18, 31 +; CHECK-BE-NEXT: sll $6, $19, 1 +; CHECK-BE-NEXT: or $4, $6, $4 +; CHECK-BE-NEXT: or $3, $9, $3 +; CHECK-BE-NEXT: movn $2, $zero, $1 +; CHECK-BE-NEXT: sllv $1, $4, $7 +; CHECK-BE-NEXT: not $4, $7 +; CHECK-BE-NEXT: lui $6, 32767 +; CHECK-BE-NEXT: ori $6, $6, 65535 +; CHECK-BE-NEXT: and $6, $18, $6 +; CHECK-BE-NEXT: srlv $4, $6, $4 +; CHECK-BE-NEXT: or $1, $1, $4 +; CHECK-BE-NEXT: movn $1, $8, $5 +; CHECK-BE-NEXT: or $2, $1, $2 +; CHECK-BE-NEXT: lw $16, 20($sp) # 4-byte Folded Reload +; CHECK-BE-NEXT: lw $17, 24($sp) # 4-byte Folded Reload +; CHECK-BE-NEXT: lw $18, 28($sp) # 4-byte Folded Reload +; CHECK-BE-NEXT: lw $19, 32($sp) # 4-byte Folded Reload +; CHECK-BE-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload +; CHECK-BE-NEXT: jr $ra +; CHECK-BE-NEXT: addiu $sp, $sp, 40 +; +; CHECK-LE-LABEL: fshr_i37: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: addiu $sp, $sp, -40 +; CHECK-LE-NEXT: .cfi_def_cfa_offset 40 +; CHECK-LE-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill +; CHECK-LE-NEXT: sw $19, 32($sp) # 4-byte Folded Spill +; CHECK-LE-NEXT: sw $18, 28($sp) # 4-byte Folded Spill +; CHECK-LE-NEXT: sw $17, 24($sp) # 4-byte Folded Spill +; CHECK-LE-NEXT: sw $16, 20($sp) # 4-byte Folded Spill +; CHECK-LE-NEXT: .cfi_offset 31, -4 +; CHECK-LE-NEXT: .cfi_offset 19, -8 +; CHECK-LE-NEXT: .cfi_offset 18, -12 +; CHECK-LE-NEXT: .cfi_offset 17, -16 +; CHECK-LE-NEXT: .cfi_offset 16, -20 +; CHECK-LE-NEXT: move $16, $7 +; CHECK-LE-NEXT: move $17, $6 +; CHECK-LE-NEXT: move $18, $5 +; CHECK-LE-NEXT: move $19, $4 +; CHECK-LE-NEXT: lw $4, 56($sp) +; CHECK-LE-NEXT: lw $5, 60($sp) +; CHECK-LE-NEXT: addiu $6, $zero, 37 +; CHECK-LE-NEXT: jal __umoddi3 +; CHECK-LE-NEXT: addiu $7, $zero, 0 +; CHECK-LE-NEXT: addiu $1, $2, 27 +; CHECK-LE-NEXT: andi $2, $1, 63 +; CHECK-LE-NEXT: not $3, $2 +; CHECK-LE-NEXT: srl $4, $17, 5 +; CHECK-LE-NEXT: sll $5, $16, 27 +; CHECK-LE-NEXT: or $4, $5, $4 +; CHECK-LE-NEXT: sll $5, $4, 1 +; CHECK-LE-NEXT: sll $6, $17, 27 +; CHECK-LE-NEXT: srlv $6, $6, $2 +; CHECK-LE-NEXT: sllv $3, $5, $3 +; CHECK-LE-NEXT: not $5, $1 +; CHECK-LE-NEXT: andi $7, $5, 63 +; CHECK-LE-NEXT: sll $8, $19, 1 +; CHECK-LE-NEXT: sllv $8, $8, $7 +; CHECK-LE-NEXT: andi $5, $5, 32 +; CHECK-LE-NEXT: move $9, $8 +; CHECK-LE-NEXT: movn $9, $zero, $5 +; CHECK-LE-NEXT: or $3, $3, $6 +; CHECK-LE-NEXT: srlv $4, $4, $2 +; CHECK-LE-NEXT: andi $1, $1, 32 +; CHECK-LE-NEXT: movn $3, $4, $1 +; CHECK-LE-NEXT: srl $2, $19, 31 +; CHECK-LE-NEXT: sll $6, $18, 1 +; CHECK-LE-NEXT: or $6, $6, $2 +; CHECK-LE-NEXT: or $2, $9, $3 +; CHECK-LE-NEXT: movn $4, $zero, $1 +; CHECK-LE-NEXT: sllv $1, $6, $7 +; CHECK-LE-NEXT: not $3, $7 +; CHECK-LE-NEXT: lui $6, 32767 +; CHECK-LE-NEXT: ori $6, $6, 65535 +; CHECK-LE-NEXT: and $6, $19, $6 +; CHECK-LE-NEXT: srlv $3, $6, $3 +; CHECK-LE-NEXT: or $1, $1, $3 +; CHECK-LE-NEXT: movn $1, $8, $5 +; CHECK-LE-NEXT: or $3, $1, $4 +; CHECK-LE-NEXT: lw $16, 20($sp) # 4-byte Folded Reload +; CHECK-LE-NEXT: lw $17, 24($sp) # 4-byte Folded Reload +; CHECK-LE-NEXT: lw $18, 28($sp) # 4-byte Folded Reload +; CHECK-LE-NEXT: lw $19, 32($sp) # 4-byte Folded Reload +; CHECK-LE-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload +; CHECK-LE-NEXT: jr $ra +; CHECK-LE-NEXT: addiu $sp, $sp, 40 + %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) + ret i37 %f +} + +; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111 + +declare i7 @llvm.fshr.i7(i7, i7, i7) +define i7 @fshr_i7_const_fold() { +; CHECK-LABEL: fshr_i7_const_fold: +; CHECK: # %bb.0: +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $2, $zero, 31 + %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) + ret i7 %f +} + +define i8 @fshr_i8_const_fold_overshift_1() { +; CHECK-LABEL: fshr_i8_const_fold_overshift_1: +; CHECK: # %bb.0: +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $2, $zero, 254 + %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15) + ret i8 %f +} + +define i8 @fshr_i8_const_fold_overshift_2() { +; CHECK-LABEL: fshr_i8_const_fold_overshift_2: +; CHECK: # %bb.0: +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $2, $zero, 225 + %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) + ret i8 %f +} + +define i8 @fshr_i8_const_fold_overshift_3() { +; CHECK-LABEL: fshr_i8_const_fold_overshift_3: +; CHECK: # %bb.0: +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $2, $zero, 255 + %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) + ret i8 %f +} + +; With constant shift amount, this is 'extr'. + +define i32 @fshr_i32_const_shift(i32 %x, i32 %y) { +; CHECK-LABEL: fshr_i32_const_shift: +; CHECK: # %bb.0: +; CHECK-NEXT: srl $1, $5, 9 +; CHECK-NEXT: sll $2, $4, 23 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $2, $1 + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) + ret i32 %f +} + +; Check modulo math on shift amount. 41-32=9. + +define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) { +; CHECK-LABEL: fshr_i32_const_overshift: +; CHECK: # %bb.0: +; CHECK-NEXT: srl $1, $5, 9 +; CHECK-NEXT: sll $2, $4, 23 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $2, $1 + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) + ret i32 %f +} + +; 64-bit should also work. 105-64 = 41. + +define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) { +; CHECK-BE-LABEL: fshr_i64_const_overshift: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: srl $1, $5, 9 +; CHECK-BE-NEXT: sll $2, $4, 23 +; CHECK-BE-NEXT: or $2, $2, $1 +; CHECK-BE-NEXT: srl $1, $6, 9 +; CHECK-BE-NEXT: sll $3, $5, 23 +; CHECK-BE-NEXT: jr $ra +; CHECK-BE-NEXT: or $3, $3, $1 +; +; CHECK-LE-LABEL: fshr_i64_const_overshift: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: srl $1, $7, 9 +; CHECK-LE-NEXT: sll $2, $4, 23 +; CHECK-LE-NEXT: or $2, $2, $1 +; CHECK-LE-NEXT: srl $1, $4, 9 +; CHECK-LE-NEXT: sll $3, $5, 23 +; CHECK-LE-NEXT: jr $ra +; CHECK-LE-NEXT: or $3, $3, $1 + %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) + ret i64 %f +} + +; This should work without any node-specific logic. + +define i8 @fshr_i8_const_fold() { +; CHECK-LABEL: fshr_i8_const_fold: +; CHECK: # %bb.0: +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $2, $zero, 254 + %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) + ret i8 %f +} + +define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) { +; CHECK-LABEL: fshl_i32_shift_by_bitwidth: +; CHECK: # %bb.0: +; CHECK-NEXT: jr $ra +; CHECK-NEXT: move $2, $4 + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) + ret i32 %f +} + +define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) { +; CHECK-LABEL: fshr_i32_shift_by_bitwidth: +; CHECK: # %bb.0: +; CHECK-NEXT: jr $ra +; CHECK-NEXT: move $2, $5 + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32) + ret i32 %f +} + +define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth: +; CHECK: # %bb.0: +; CHECK-NEXT: move $2, $4 +; CHECK-NEXT: move $3, $5 +; CHECK-NEXT: move $4, $6 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: move $5, $7 + %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> ) + ret <4 x i32> %f +} + +define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: fshr_v4i32_shift_by_bitwidth: +; CHECK: # %bb.0: +; CHECK-NEXT: lw $5, 28($sp) +; CHECK-NEXT: lw $4, 24($sp) +; CHECK-NEXT: lw $3, 20($sp) +; CHECK-NEXT: lw $2, 16($sp) +; CHECK-NEXT: jr $ra +; CHECK-NEXT: nop + %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> ) + ret <4 x i32> %f +} + From 2218e6d0a873f6bad4c4cdd5bccbdc0ae6f4c760 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 6 Oct 2020 20:28:43 -0700 Subject: [PATCH 03/17] [BPF] Make BPFAbstractMemberAccessPass required Or else on optnone functions we get the following during instruction selection: fatal error: error in backend: Cannot select: intrinsic %llvm.preserve.struct.access.index Currently the -O0 pipeline doesn't properly run passes registered via TargetMachine::registerPassBuilderCallbacks(), so don't add that RUN line yet. That will be fixed after this. Reviewed By: yonghong-song Differential Revision: https://reviews.llvm.org/D89083 --- llvm/lib/Target/BPF/BPF.h | 4 +++ llvm/test/CodeGen/BPF/optnone-2.ll | 52 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 llvm/test/CodeGen/BPF/optnone-2.ll diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h index 82ac091fa7fa36..8629c1503b4e90 100644 --- a/llvm/lib/Target/BPF/BPF.h +++ b/llvm/lib/Target/BPF/BPF.h @@ -46,11 +46,15 @@ class BPFAbstractMemberAccessPass public: BPFAbstractMemberAccessPass(BPFTargetMachine *TM) : TM(TM) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; class BPFPreserveDITypePass : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/test/CodeGen/BPF/optnone-2.ll b/llvm/test/CodeGen/BPF/optnone-2.ll new file mode 100644 index 00000000000000..82014bdaf2cc9f --- /dev/null +++ b/llvm/test/CodeGen/BPF/optnone-2.ll @@ -0,0 +1,52 @@ +; RUN: opt < %s -passes='default' | llc -march=bpfel -filetype=asm -o /dev/null - +; TODO: add -O0 once that's supported + +; IR generated by +; $ cat /tmp/a.c +; struct ss { int a; }; +; int foo() { return __builtin_btf_type_id(0, 0) + __builtin_preserve_type_info(*(struct ss *)0, 0); } +; $ clang -target bpf -g -S -emit-llvm t.c -Xclang -disable-llvm-passes /tmp/a.c + +target triple = "bpf" + +; Function Attrs: noinline nounwind optnone +define dso_local i32 @foo() #0 !dbg !9 { +entry: + %0 = call i32 @llvm.bpf.btf.type.id(i32 0, i64 0), !dbg !12, !llvm.preserve.access.index !4 + %1 = call i32 @llvm.bpf.preserve.type.info(i32 1, i64 0), !dbg !13, !llvm.preserve.access.index !14 + %add = add i32 %0, %1, !dbg !17 + ret i32 %add, !dbg !18 +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.bpf.btf.type.id(i32, i64) #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.bpf.preserve.type.info(i32, i64) #1 + +attributes #0 = { noinline nounwind optnone } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!5, !6, !7} +!llvm.ident = !{!8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "C:/src/tmp\\a.c", directory: "C:\\src\\llvm-project") +!2 = !{} +!3 = !{!4} +!4 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!5 = !{i32 7, !"Dwarf Version", i32 4} +!6 = !{i32 2, !"Debug Info Version", i32 3} +!7 = !{i32 1, !"wchar_size", i32 4} +!8 = !{!"clang version 12.0.0"} +!9 = distinct !DISubprogram(name: "foo", scope: !10, file: !10, line: 2, type: !11, scopeLine: 2, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!10 = !DIFile(filename: "C:/src/tmp/a.c", directory: "") +!11 = !DISubroutineType(types: !3) +!12 = !DILocation(line: 2, column: 21, scope: !9) +!13 = !DILocation(line: 2, column: 51, scope: !9) +!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ss", file: !10, line: 1, size: 32, elements: !15) +!15 = !{!16} +!16 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !14, file: !10, line: 1, baseType: !4, size: 32) +!17 = !DILocation(line: 2, column: 49, scope: !9) +!18 = !DILocation(line: 2, column: 14, scope: !9) From 4abb519619694de12e401de5454a6eed5c1384ea Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 9 Oct 2020 14:21:23 -0400 Subject: [PATCH 04/17] [libc++] NFCI: Define small methods of basic_stringstream inline It greatly increases readability because defining the methods out-of-line involves a ton of boilerplate template declarations. --- libcxx/include/sstream | 396 +++++++++++++++-------------------------- 1 file changed, 139 insertions(+), 257 deletions(-) diff --git a/libcxx/include/sstream b/libcxx/include/sstream index 4b1d17cfde3327..042766ca22c91e 100644 --- a/libcxx/include/sstream +++ b/libcxx/include/sstream @@ -208,11 +208,19 @@ private: public: // 27.8.1.1 Constructors: - inline _LIBCPP_INLINE_VISIBILITY - explicit basic_stringbuf(ios_base::openmode __wch = ios_base::in | ios_base::out); - inline _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY + explicit basic_stringbuf(ios_base::openmode __wch = ios_base::in | ios_base::out) + : __hm_(0), __mode_(__wch) + { } + + _LIBCPP_INLINE_VISIBILITY explicit basic_stringbuf(const string_type& __s, - ios_base::openmode __wch = ios_base::in | ios_base::out); + ios_base::openmode __wch = ios_base::in | ios_base::out) + : __str_(__s.get_allocator()), __hm_(0), __mode_(__wch) + { + str(__s); + } + basic_stringbuf(basic_stringbuf&& __rhs); // 27.8.1.2 Assign and swap: @@ -230,28 +238,13 @@ protected: virtual int_type overflow (int_type __c = traits_type::eof()); virtual pos_type seekoff(off_type __off, ios_base::seekdir __way, ios_base::openmode __wch = ios_base::in | ios_base::out); - inline _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY virtual pos_type seekpos(pos_type __sp, - ios_base::openmode __wch = ios_base::in | ios_base::out); + ios_base::openmode __wch = ios_base::in | ios_base::out) { + return seekoff(__sp, ios_base::beg, __wch); + } }; -template -basic_stringbuf<_CharT, _Traits, _Allocator>::basic_stringbuf(ios_base::openmode __wch) - : __hm_(0), - __mode_(__wch) -{ -} - -template -basic_stringbuf<_CharT, _Traits, _Allocator>::basic_stringbuf(const string_type& __s, - ios_base::openmode __wch) - : __str_(__s.get_allocator()), - __hm_(0), - __mode_(__wch) -{ - str(__s); -} - template basic_stringbuf<_CharT, _Traits, _Allocator>::basic_stringbuf(basic_stringbuf&& __rhs) : __mode_(__rhs.__mode_) @@ -609,14 +602,6 @@ basic_stringbuf<_CharT, _Traits, _Allocator>::seekoff(off_type __off, return pos_type(__noff); } -template -typename basic_stringbuf<_CharT, _Traits, _Allocator>::pos_type -basic_stringbuf<_CharT, _Traits, _Allocator>::seekpos(pos_type __sp, - ios_base::openmode __wch) -{ - return seekoff(__sp, ios_base::beg, __wch); -} - // basic_istringstream template @@ -638,67 +623,53 @@ private: public: // 27.8.2.1 Constructors: - inline _LIBCPP_INLINE_VISIBILITY - explicit basic_istringstream(ios_base::openmode __wch = ios_base::in); - inline _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY + explicit basic_istringstream(ios_base::openmode __wch = ios_base::in) + : basic_istream<_CharT, _Traits>(&__sb_) + , __sb_(__wch | ios_base::in) + { } + _LIBCPP_INLINE_VISIBILITY explicit basic_istringstream(const string_type& __s, - ios_base::openmode __wch = ios_base::in); - inline _LIBCPP_INLINE_VISIBILITY - basic_istringstream(basic_istringstream&& __rhs); + ios_base::openmode __wch = ios_base::in) + : basic_istream<_CharT, _Traits>(&__sb_) + , __sb_(__s, __wch | ios_base::in) + { } + + _LIBCPP_INLINE_VISIBILITY + basic_istringstream(basic_istringstream&& __rhs) + : basic_istream<_CharT, _Traits>(_VSTD::move(__rhs)) + , __sb_(_VSTD::move(__rhs.__sb_)) + { + basic_istream<_CharT, _Traits>::set_rdbuf(&__sb_); + } // 27.8.2.2 Assign and swap: - basic_istringstream& operator=(basic_istringstream&& __rhs); - inline _LIBCPP_INLINE_VISIBILITY - void swap(basic_istringstream& __rhs); + basic_istringstream& operator=(basic_istringstream&& __rhs) { + basic_istream::operator=(_VSTD::move(__rhs)); + __sb_ = _VSTD::move(__rhs.__sb_); + return *this; + } + _LIBCPP_INLINE_VISIBILITY + void swap(basic_istringstream& __rhs) { + basic_istream::swap(__rhs); + __sb_.swap(__rhs.__sb_); + } // 27.8.2.3 Members: - inline _LIBCPP_INLINE_VISIBILITY - basic_stringbuf* rdbuf() const; - inline _LIBCPP_INLINE_VISIBILITY - string_type str() const; - inline _LIBCPP_INLINE_VISIBILITY - void str(const string_type& __s); + _LIBCPP_INLINE_VISIBILITY + basic_stringbuf* rdbuf() const { + return const_cast*>(&__sb_); + } + _LIBCPP_INLINE_VISIBILITY + string_type str() const { + return __sb_.str(); + } + _LIBCPP_INLINE_VISIBILITY + void str(const string_type& __s) { + __sb_.str(__s); + } }; -template -basic_istringstream<_CharT, _Traits, _Allocator>::basic_istringstream(ios_base::openmode __wch) - : basic_istream<_CharT, _Traits>(&__sb_), - __sb_(__wch | ios_base::in) -{ -} - -template -basic_istringstream<_CharT, _Traits, _Allocator>::basic_istringstream(const string_type& __s, - ios_base::openmode __wch) - : basic_istream<_CharT, _Traits>(&__sb_), - __sb_(__s, __wch | ios_base::in) -{ -} - -template -basic_istringstream<_CharT, _Traits, _Allocator>::basic_istringstream(basic_istringstream&& __rhs) - : basic_istream<_CharT, _Traits>(_VSTD::move(__rhs)), - __sb_(_VSTD::move(__rhs.__sb_)) -{ - basic_istream<_CharT, _Traits>::set_rdbuf(&__sb_); -} - -template -basic_istringstream<_CharT, _Traits, _Allocator>& -basic_istringstream<_CharT, _Traits, _Allocator>::operator=(basic_istringstream&& __rhs) -{ - basic_istream::operator=(_VSTD::move(__rhs)); - __sb_ = _VSTD::move(__rhs.__sb_); - return *this; -} - -template -void basic_istringstream<_CharT, _Traits, _Allocator>::swap(basic_istringstream& __rhs) -{ - basic_istream::swap(__rhs); - __sb_.swap(__rhs.__sb_); -} - template inline _LIBCPP_INLINE_VISIBILITY void @@ -708,26 +679,6 @@ swap(basic_istringstream<_CharT, _Traits, _Allocator>& __x, __x.swap(__y); } -template -basic_stringbuf<_CharT, _Traits, _Allocator>* -basic_istringstream<_CharT, _Traits, _Allocator>::rdbuf() const -{ - return const_cast*>(&__sb_); -} - -template -basic_string<_CharT, _Traits, _Allocator> -basic_istringstream<_CharT, _Traits, _Allocator>::str() const -{ - return __sb_.str(); -} - -template -void basic_istringstream<_CharT, _Traits, _Allocator>::str(const string_type& __s) -{ - __sb_.str(__s); -} - // basic_ostringstream template @@ -749,68 +700,55 @@ private: public: // 27.8.2.1 Constructors: - inline _LIBCPP_INLINE_VISIBILITY - explicit basic_ostringstream(ios_base::openmode __wch = ios_base::out); - inline _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY + explicit basic_ostringstream(ios_base::openmode __wch = ios_base::out) + : basic_ostream<_CharT, _Traits>(&__sb_) + , __sb_(__wch | ios_base::out) + { } + + _LIBCPP_INLINE_VISIBILITY explicit basic_ostringstream(const string_type& __s, - ios_base::openmode __wch = ios_base::out); - inline _LIBCPP_INLINE_VISIBILITY - basic_ostringstream(basic_ostringstream&& __rhs); + ios_base::openmode __wch = ios_base::out) + : basic_ostream<_CharT, _Traits>(&__sb_) + , __sb_(__s, __wch | ios_base::out) + { } + + _LIBCPP_INLINE_VISIBILITY + basic_ostringstream(basic_ostringstream&& __rhs) + : basic_ostream<_CharT, _Traits>(_VSTD::move(__rhs)) + , __sb_(_VSTD::move(__rhs.__sb_)) + { + basic_ostream<_CharT, _Traits>::set_rdbuf(&__sb_); + } // 27.8.2.2 Assign and swap: - basic_ostringstream& operator=(basic_ostringstream&& __rhs); - inline _LIBCPP_INLINE_VISIBILITY - void swap(basic_ostringstream& __rhs); + basic_ostringstream& operator=(basic_ostringstream&& __rhs) { + basic_ostream::operator=(_VSTD::move(__rhs)); + __sb_ = _VSTD::move(__rhs.__sb_); + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + void swap(basic_ostringstream& __rhs) { + basic_ostream::swap(__rhs); + __sb_.swap(__rhs.__sb_); + } // 27.8.2.3 Members: - inline _LIBCPP_INLINE_VISIBILITY - basic_stringbuf* rdbuf() const; - inline _LIBCPP_INLINE_VISIBILITY - string_type str() const; - inline _LIBCPP_INLINE_VISIBILITY - void str(const string_type& __s); + _LIBCPP_INLINE_VISIBILITY + basic_stringbuf* rdbuf() const { + return const_cast*>(&__sb_); + } + _LIBCPP_INLINE_VISIBILITY + string_type str() const { + return __sb_.str(); + } + _LIBCPP_INLINE_VISIBILITY + void str(const string_type& __s) { + __sb_.str(__s); + } }; -template -basic_ostringstream<_CharT, _Traits, _Allocator>::basic_ostringstream(ios_base::openmode __wch) - : basic_ostream<_CharT, _Traits>(&__sb_), - __sb_(__wch | ios_base::out) -{ -} - -template -basic_ostringstream<_CharT, _Traits, _Allocator>::basic_ostringstream(const string_type& __s, - ios_base::openmode __wch) - : basic_ostream<_CharT, _Traits>(&__sb_), - __sb_(__s, __wch | ios_base::out) -{ -} - -template -basic_ostringstream<_CharT, _Traits, _Allocator>::basic_ostringstream(basic_ostringstream&& __rhs) - : basic_ostream<_CharT, _Traits>(_VSTD::move(__rhs)), - __sb_(_VSTD::move(__rhs.__sb_)) -{ - basic_ostream<_CharT, _Traits>::set_rdbuf(&__sb_); -} - -template -basic_ostringstream<_CharT, _Traits, _Allocator>& -basic_ostringstream<_CharT, _Traits, _Allocator>::operator=(basic_ostringstream&& __rhs) -{ - basic_ostream::operator=(_VSTD::move(__rhs)); - __sb_ = _VSTD::move(__rhs.__sb_); - return *this; -} - -template -void -basic_ostringstream<_CharT, _Traits, _Allocator>::swap(basic_ostringstream& __rhs) -{ - basic_ostream::swap(__rhs); - __sb_.swap(__rhs.__sb_); -} - template inline _LIBCPP_INLINE_VISIBILITY void @@ -820,27 +758,6 @@ swap(basic_ostringstream<_CharT, _Traits, _Allocator>& __x, __x.swap(__y); } -template -basic_stringbuf<_CharT, _Traits, _Allocator>* -basic_ostringstream<_CharT, _Traits, _Allocator>::rdbuf() const -{ - return const_cast*>(&__sb_); -} - -template -basic_string<_CharT, _Traits, _Allocator> -basic_ostringstream<_CharT, _Traits, _Allocator>::str() const -{ - return __sb_.str(); -} - -template -void -basic_ostringstream<_CharT, _Traits, _Allocator>::str(const string_type& __s) -{ - __sb_.str(__s); -} - // basic_stringstream template @@ -862,68 +779,54 @@ private: public: // 27.8.2.1 Constructors: - inline _LIBCPP_INLINE_VISIBILITY - explicit basic_stringstream(ios_base::openmode __wch = ios_base::in | ios_base::out); - inline _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY + explicit basic_stringstream(ios_base::openmode __wch = ios_base::in | ios_base::out) + : basic_iostream<_CharT, _Traits>(&__sb_) + , __sb_(__wch) + { } + + _LIBCPP_INLINE_VISIBILITY explicit basic_stringstream(const string_type& __s, - ios_base::openmode __wch = ios_base::in | ios_base::out); - inline _LIBCPP_INLINE_VISIBILITY - basic_stringstream(basic_stringstream&& __rhs); + ios_base::openmode __wch = ios_base::in | ios_base::out) + : basic_iostream<_CharT, _Traits>(&__sb_) + , __sb_(__s, __wch) + { } + + _LIBCPP_INLINE_VISIBILITY + basic_stringstream(basic_stringstream&& __rhs) + : basic_iostream<_CharT, _Traits>(_VSTD::move(__rhs)) + , __sb_(_VSTD::move(__rhs.__sb_)) + { + basic_istream<_CharT, _Traits>::set_rdbuf(&__sb_); + } // 27.8.2.2 Assign and swap: - basic_stringstream& operator=(basic_stringstream&& __rhs); - inline _LIBCPP_INLINE_VISIBILITY - void swap(basic_stringstream& __rhs); + basic_stringstream& operator=(basic_stringstream&& __rhs) { + basic_iostream::operator=(_VSTD::move(__rhs)); + __sb_ = _VSTD::move(__rhs.__sb_); + return *this; + } + _LIBCPP_INLINE_VISIBILITY + void swap(basic_stringstream& __rhs) { + basic_iostream::swap(__rhs); + __sb_.swap(__rhs.__sb_); + } // 27.8.2.3 Members: - inline _LIBCPP_INLINE_VISIBILITY - basic_stringbuf* rdbuf() const; - inline _LIBCPP_INLINE_VISIBILITY - string_type str() const; - inline _LIBCPP_INLINE_VISIBILITY - void str(const string_type& __s); + _LIBCPP_INLINE_VISIBILITY + basic_stringbuf* rdbuf() const { + return const_cast*>(&__sb_); + } + _LIBCPP_INLINE_VISIBILITY + string_type str() const { + return __sb_.str(); + } + _LIBCPP_INLINE_VISIBILITY + void str(const string_type& __s) { + __sb_.str(__s); + } }; -template -basic_stringstream<_CharT, _Traits, _Allocator>::basic_stringstream(ios_base::openmode __wch) - : basic_iostream<_CharT, _Traits>(&__sb_), - __sb_(__wch) -{ -} - -template -basic_stringstream<_CharT, _Traits, _Allocator>::basic_stringstream(const string_type& __s, - ios_base::openmode __wch) - : basic_iostream<_CharT, _Traits>(&__sb_), - __sb_(__s, __wch) -{ -} - -template -basic_stringstream<_CharT, _Traits, _Allocator>::basic_stringstream(basic_stringstream&& __rhs) - : basic_iostream<_CharT, _Traits>(_VSTD::move(__rhs)), - __sb_(_VSTD::move(__rhs.__sb_)) -{ - basic_istream<_CharT, _Traits>::set_rdbuf(&__sb_); -} - -template -basic_stringstream<_CharT, _Traits, _Allocator>& -basic_stringstream<_CharT, _Traits, _Allocator>::operator=(basic_stringstream&& __rhs) -{ - basic_iostream::operator=(_VSTD::move(__rhs)); - __sb_ = _VSTD::move(__rhs.__sb_); - return *this; -} - -template -void -basic_stringstream<_CharT, _Traits, _Allocator>::swap(basic_stringstream& __rhs) -{ - basic_iostream::swap(__rhs); - __sb_.swap(__rhs.__sb_); -} - template inline _LIBCPP_INLINE_VISIBILITY void @@ -933,27 +836,6 @@ swap(basic_stringstream<_CharT, _Traits, _Allocator>& __x, __x.swap(__y); } -template -basic_stringbuf<_CharT, _Traits, _Allocator>* -basic_stringstream<_CharT, _Traits, _Allocator>::rdbuf() const -{ - return const_cast*>(&__sb_); -} - -template -basic_string<_CharT, _Traits, _Allocator> -basic_stringstream<_CharT, _Traits, _Allocator>::str() const -{ - return __sb_.str(); -} - -template -void -basic_stringstream<_CharT, _Traits, _Allocator>::str(const string_type& __s) -{ - __sb_.str(__s); -} - _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS From e0d66ccf068752b7d194bb231993f171ba23d830 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 9 Oct 2020 14:39:20 -0400 Subject: [PATCH 05/17] [libc++] Rename LIBCXX_ENABLE_DEBUG_MODE to LIBCXX_ENABLE_DEBUG_MODE_SUPPORT To make it clearer this is about whether the library supports the debug mode at all, not whether the debug mode is enabled. Per comment by Nico Weber on IRC. --- libcxx/cmake/caches/Apple.cmake | 2 +- libcxx/cmake/caches/Generic-nodebug.cmake | 2 +- libcxx/src/CMakeLists.txt | 2 +- libcxx/test/CMakeLists.txt | 2 +- libcxx/test/configs/legacy.cfg.in | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/libcxx/cmake/caches/Apple.cmake b/libcxx/cmake/caches/Apple.cmake index cab7c1407d63e8..38f2c4c016b093 100644 --- a/libcxx/cmake/caches/Apple.cmake +++ b/libcxx/cmake/caches/Apple.cmake @@ -11,7 +11,7 @@ set(LIBCXX_TYPEINFO_COMPARISON_IMPLEMENTATION "1" CACHE STRING "") set(LIBCXX_CXX_ABI libcxxabi CACHE STRING "") set(LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "") set(LIBCXX_HIDE_FROM_ABI_PER_TU_BY_DEFAULT ON CACHE BOOL "") -set(LIBCXX_ENABLE_DEBUG_MODE OFF CACHE BOOL "") +set(LIBCXX_ENABLE_DEBUG_MODE_SUPPORT OFF CACHE BOOL "") set(LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS ON CACHE BOOL "") set(LIBCXXABI_ENABLE_PIC OFF CACHE BOOL "") diff --git a/libcxx/cmake/caches/Generic-nodebug.cmake b/libcxx/cmake/caches/Generic-nodebug.cmake index b301b2ef1c7d38..a62760fa78fd6f 100644 --- a/libcxx/cmake/caches/Generic-nodebug.cmake +++ b/libcxx/cmake/caches/Generic-nodebug.cmake @@ -1 +1 @@ -set(LIBCXX_ENABLE_DEBUG_MODE OFF CACHE BOOL "") +set(LIBCXX_ENABLE_DEBUG_MODE_SUPPORT OFF CACHE BOOL "") diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index 5de4a513ac1707..7f77a784577640 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -55,7 +55,7 @@ set(LIBCXX_SOURCES vector.cpp ) -if (LIBCXX_ENABLE_DEBUG_MODE) +if (LIBCXX_ENABLE_DEBUG_MODE_SUPPORT) list(APPEND LIBCXX_SOURCES debug.cpp ) diff --git a/libcxx/test/CMakeLists.txt b/libcxx/test/CMakeLists.txt index e2e3382b779a4f..4d9f2e5f0017ce 100644 --- a/libcxx/test/CMakeLists.txt +++ b/libcxx/test/CMakeLists.txt @@ -70,7 +70,7 @@ pythonize_bool(LIBCXX_HAS_ATOMIC_LIB) pythonize_bool(LIBCXX_HAVE_CXX_ATOMICS_WITH_LIB) pythonize_bool(LIBCXX_BUILD_EXTERNAL_THREAD_LIBRARY) pythonize_bool(LIBCXX_DEBUG_BUILD) -pythonize_bool(LIBCXX_ENABLE_DEBUG_MODE) +pythonize_bool(LIBCXX_ENABLE_DEBUG_MODE_SUPPORT) pythonize_bool(LIBCXX_ENABLE_PARALLEL_ALGORITHMS) # By default, for non-standalone builds, libcxx and libcxxabi share a library diff --git a/libcxx/test/configs/legacy.cfg.in b/libcxx/test/configs/legacy.cfg.in index 4bfc9bc69a7d93..f0a4e8a73e09a5 100644 --- a/libcxx/test/configs/legacy.cfg.in +++ b/libcxx/test/configs/legacy.cfg.in @@ -9,7 +9,7 @@ config.libcxx_src_root = "@LIBCXX_SOURCE_DIR@" config.libcxx_obj_root = "@LIBCXX_BINARY_DIR@" config.cxx_library_root = "@LIBCXX_LIBRARY_DIR@" config.enable_exceptions = @LIBCXX_ENABLE_EXCEPTIONS@ -config.enable_debug_tests = @LIBCXX_ENABLE_DEBUG_MODE@ +config.enable_debug_tests = @LIBCXX_ENABLE_DEBUG_MODE_SUPPORT@ config.enable_experimental = @LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY@ config.enable_filesystem = @LIBCXX_ENABLE_FILESYSTEM@ config.enable_rtti = @LIBCXX_ENABLE_RTTI@ From 877667287fa5515d525edfee169b18207b342cd5 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 9 Oct 2020 14:40:47 -0400 Subject: [PATCH 06/17] [libc++] Fixup a missing occurrence of LIBCXX_ENABLE_DEBUG_MODE --- libcxx/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index 8599e5d2cc997a..ecc8924f2cf942 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -94,7 +94,7 @@ option(LIBCXX_ENABLE_FILESYSTEM "Build filesystem as part of the main libc++ lib ${ENABLE_FILESYSTEM_DEFAULT}) option(LIBCXX_INCLUDE_TESTS "Build the libc++ tests." ${LLVM_INCLUDE_TESTS}) option(LIBCXX_ENABLE_PARALLEL_ALGORITHMS "Enable the parallel algorithms library. This requires the PSTL to be available." OFF) -option(LIBCXX_ENABLE_DEBUG_MODE +option(LIBCXX_ENABLE_DEBUG_MODE_SUPPORT "Whether to include support for libc++'s debugging mode in the library. By default, this is turned on. If you turn it off and try to enable the debug mode when compiling a program against libc++, it will fail to link From 466c8296f20f5940fc282b228e28408b7c4d7d9b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 9 Oct 2020 20:52:08 +0200 Subject: [PATCH 07/17] [MemCpyOpt] Add test for incorrectly hoisted store (NFC) --- llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll index 777ba51f38271a..6cffce50eb803a 100644 --- a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll @@ -144,3 +144,22 @@ define void @noaliasaddrproducer(%S* %src, %S* noalias %dst, i32* noalias %dstid store %S %1, %S* %dst2 ret void } + +define void @throwing_call(%S* noalias %src, %S* %dst) { +; CHECK-LABEL: @throwing_call( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* [[SRC:%.*]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[DST:%.*]] to i8* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %S* [[SRC]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP1]], i8 0, i64 16, i1 false) +; CHECK-NEXT: call void @call() [[ATTR2:#.*]] +; CHECK-NEXT: ret void +; + %1 = load %S, %S* %src + store %S zeroinitializer, %S* %src + call void @call() readnone + store %S %1, %S* %dst + ret void +} + +declare void @call() From 662024df331bd1f1a206678435e51232683e3cf6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 9 Oct 2020 10:26:50 -0700 Subject: [PATCH 08/17] [X86] Don't copy kill flag when expanding LCMPXCHG16B_SAVE_RBX The expansion code creates a copy to RBX before the real LCMPXCHG16B. It's possible this copy uses a register that is also used by the real LCMPXCHG16B. If we set the kill flag on the use in the copy, then we'll fail the machine verifier on the use on the LCMPXCHG16B. Differential Revision: https://reviews.llvm.org/D89151 --- llvm/lib/Target/X86/X86ExpandPseudo.cpp | 4 +++- llvm/test/CodeGen/X86/pr42064.ll | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index d9c0964e9ed834..b1d15225eaaf1f 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -346,7 +346,9 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // Copy the input argument of the pseudo into the argument of the // actual instruction. - TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, InArg.getReg(), InArg.isKill()); + // NOTE: We don't copy the kill flag since the input might be the same reg + // as one of the other operands of LCMPXCHG16B. + TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, InArg.getReg(), false); // Create the actual instruction. MachineInstr *NewInstr = BuildMI(MBB, MBBI, DL, TII->get(X86::LCMPXCHG16B)); // Copy the operands related to the address. diff --git a/llvm/test/CodeGen/X86/pr42064.ll b/llvm/test/CodeGen/X86/pr42064.ll index 6269a59ff055e6..089895da18a1df 100644 --- a/llvm/test/CodeGen/X86/pr42064.ll +++ b/llvm/test/CodeGen/X86/pr42064.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc19.11.0 -mattr=+avx,+cx16 | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-pc-windows-msvc19.11.0 -mattr=+avx,+cx16 | FileCheck %s %struct.TestStruct = type { %union.Int128 } %union.Int128 = type { i128 } From f34bb06935aa3bab353d70d515b767fdd2f5625c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 9 Oct 2020 11:48:10 -0700 Subject: [PATCH 09/17] [X86] When expanding LCMPXCHG16B_NO_RBX in EmitInstrWithCustomInserter, directly copy address operands instead of going through X86AddressMode. I suspect getAddressFromInstr and addFullAddress are not handling all addresses cases properly based on a report from MaskRay. So just copy the operands directly. This should be more efficient anyway. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9e17cab106436f..5d4dfaab45033d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33765,7 +33765,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case X86::LCMPXCHG16B_NO_RBX: { const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); Register BasePtr = TRI->getBaseRegister(); - X86AddressMode AM = getAddressFromInstr(&MI, 0); if (TRI->hasBasePointer(*MF) && (BasePtr == X86::RBX || BasePtr == X86::EBX)) { if (!BB->isLiveIn(BasePtr)) @@ -33776,15 +33775,20 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX) .addReg(X86::RBX); Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); - addFullAddress( - BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst), AM) - .add(MI.getOperand(X86::AddrNumOperands)) - .addReg(SaveRBX); + MachineInstrBuilder MIB = + BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst); + for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx) + MIB.add(MI.getOperand(Idx)); + MIB.add(MI.getOperand(X86::AddrNumOperands)); + MIB.addReg(SaveRBX); } else { // Simple case, just copy the virtual register to RBX. BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::RBX) .add(MI.getOperand(X86::AddrNumOperands)); - addFullAddress(BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B)), AM); + MachineInstrBuilder MIB = + BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B)); + for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx) + MIB.add(MI.getOperand(Idx)); } MI.eraseFromParent(); return BB; From 40cef5a00eb83f44e946912b74be83e8dc02effb Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Fri, 9 Oct 2020 19:02:53 +0000 Subject: [PATCH 10/17] [clang] Add a test for CGDebugInfo treatment of blocks There doesn't seem to be a direct test of this, and I'm planning to make future changes which will affect it. I'm not particularly familiar with the blocks extension, so suggestions for better tests are welcome. Differential Revision: https://reviews.llvm.org/D88754 --- clang/test/CodeGen/debug-info-block-expr.c | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 clang/test/CodeGen/debug-info-block-expr.c diff --git a/clang/test/CodeGen/debug-info-block-expr.c b/clang/test/CodeGen/debug-info-block-expr.c new file mode 100644 index 00000000000000..009e7800b6ee95 --- /dev/null +++ b/clang/test/CodeGen/debug-info-block-expr.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -fblocks -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s +// Verify that the desired DIExpression are generated for blocks. + +void test() { +// CHECK: call void @llvm.dbg.declare({{.*}}!DIExpression(DW_OP_plus_uconst, {{[0-9]+}}, DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}){{.*}}) + __block int i; +// CHECK: call void @llvm.dbg.declare({{.*}}!DIExpression(DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}, DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}, DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}){{.*}}) + ^ { i = 1; }(); +} From 0e9b572949ce00e5ca01bf7555abdda12052a213 Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Fri, 9 Oct 2020 11:49:38 -0700 Subject: [PATCH 11/17] [mlir] Fix TypeID for shared libraries built with -fvisibility=hidden. * Isolates the visibility controlled parts of its implementation to a detail namespace. * Applies a struct level visibility attribute which applies to the static local within the get() functions. * The prior version was not emitting a symbol for the static local "instance" fields when the user TU was compiled with -fvisibility=hidden. Differential Revision: https://reviews.llvm.org/D89153 --- mlir/include/mlir/Support/TypeID.h | 56 ++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 11 deletions(-) diff --git a/mlir/include/mlir/Support/TypeID.h b/mlir/include/mlir/Support/TypeID.h index 518ff39e866943..ef19182882c637 100644 --- a/mlir/include/mlir/Support/TypeID.h +++ b/mlir/include/mlir/Support/TypeID.h @@ -20,6 +20,10 @@ namespace mlir { +namespace detail { +struct TypeIDExported; +} // namespace detail + /// This class provides an efficient unique identifier for a specific C++ type. /// This allows for a C++ type to be compared, hashed, and stored in an opaque /// context. This class is similar in some ways to std::type_index, but can be @@ -62,19 +66,10 @@ class TypeID { bool operator!=(const TypeID &other) const { return !(*this == other); } /// Construct a type info object for the given type T. - /// TODO: This currently won't work when using DLLs as it requires properly - /// attaching dllimport and dllexport. Fix this when that information is - /// available within LLVM. template - LLVM_EXTERNAL_VISIBILITY static TypeID get() { - static Storage instance; - return TypeID(&instance); - } + static TypeID get(); template