Skip to content

Commit

Permalink
AMDGPU: Change boolean content type to 0 or 1
Browse files Browse the repository at this point in the history
The usage of target boolean checks is overly inflexible, since sext
and zext of a compare are equally cheap. The choice is arbitrary, but
using 0/1 to some degree is the choice of lower resistance since
that's what most targets use. This enables a few combines that don't
bother to support ZeroOrNegativeOneBooleanContent.
  • Loading branch information
arsenm committed Nov 15, 2019
1 parent 69fcfb7 commit 31479d8
Show file tree
Hide file tree
Showing 9 changed files with 33 additions and 21 deletions.
3 changes: 0 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -451,9 +451,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);

setBooleanContents(ZeroOrNegativeOneBooleanContent);
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);

setSchedulingPreference(Sched::RegPressure);
setJumpIsExpensive(true);

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);

setBooleanContents(ZeroOrNegativeOneBooleanContent);
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);

computeRegisterProperties(Subtarget->getRegisterInfo());

// Legalize loads and stores to the private address space.
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,

computeRegisterProperties(Subtarget->getRegisterInfo());

// The boolean content concept here is too inflexible. Compares only ever
// really produce a 1-bit result. Any copy/extend from these will turn into a
// select, and zext/1 or sext/-1 are equally cheap. Arbitrarily choose 0/1, as
// it's what most targets use.
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent);

// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v3i32, Custom);
Expand Down
10 changes: 5 additions & 5 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
RC == &AMDGPU::SReg_32RegClass) {
if (SrcReg == AMDGPU::SCC) {
BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
.addImm(-1)
.addImm(1)
.addImm(0);
return;
}
Expand Down Expand Up @@ -840,7 +840,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
Register SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
.addImm(-1)
.addImm(1)
.addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
Expand All @@ -855,7 +855,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
.addImm(0)
.addImm(-1);
.addImm(1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
.addReg(FalseReg)
Expand Down Expand Up @@ -900,7 +900,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
.addImm(0);
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
.addImm(-1)
.addImm(1)
.addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
Expand All @@ -919,7 +919,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
.addImm(0)
.addImm(-1);
.addImm(1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
.addReg(FalseReg)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) {
; GCN-NEXT: ; implicit-def: $vcc_hi
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_eq_u32 s0, 0
; GCN-NEXT: s_cselect_b32 s0, -1, 0
; GCN-NEXT: s_cselect_b32 s0, 1, 0
; GCN-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
; GCN-NEXT: s_or_b32 s0, s0, s1
; GCN-NEXT: v_mov_b32_e32 v0, s0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ define amdgpu_kernel void @test_wave64(i32 %arg0, [8 x i32], i64 %saved) {
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_eq_u32 s2, 0
; GCN-NEXT: s_cselect_b32 s2, -1, 0
; GCN-NEXT: s_cselect_b32 s2, 1, 0
; GCN-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, s2
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GCN-NEXT: v_mov_b32_e32 v0, s0
Expand Down
9 changes: 8 additions & 1 deletion llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone

; GCN-LABEL: {{^}}work_item_info:
; GCN-NOT: v0
; GCN: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, v{{[0-9]+}}, v0
; GCN: s_load_dword [[IN:s[0-9]+]]
; GCN-NOT: v0

; GCN-ALLOCA: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, v{{[0-9]+}}, v0

; GCN-PROMOTE: v_cmp_eq_u32_e64 vcc, [[IN]], 1
; GCN-PROMOTE-NEXT: v_addc_u32_e32 [[RESULT:v[0-9]+]], vcc, 0, v0, vcc

; GCN: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
entry:
Expand Down
14 changes: 6 additions & 8 deletions llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s

; GCN-LABEL: {{^}}add1:
; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
Expand Down Expand Up @@ -130,16 +130,14 @@ bb:
; GCN-LABEL: {{^}}sub_sube_commuted:
; GCN-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-DAG: buffer_load_dword [[V:v[0-9]+]],
; GCN: v_subbrev_u32_e{{32|64}} [[SUBB:v[0-9]+]], {{[^,]+}}, 0, [[V]], [[CC]]
; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, s{{[0-9]+}}, [[SUBB]]
; GCN: v_add_i32_e32 {{.*}}, 0x64, [[SUB]]
; GCN: v_addc_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
; GCN: v_add_i32_e32 {{.*}}, 0x64, [[ADDC]]

; GFX9-LABEL: {{^}}sub_sube_commuted:
; GFX9-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GFX9-DAG: global_load_dword [[V:v[0-9]+]],
; GFX9: v_subbrev_co_u32_e{{32|64}} [[SUBB:v[0-9]+]], {{[^,]+}}, 0, [[V]], [[CC]]
; GFX9: v_sub_u32_e32 [[SUB:v[0-9]+]], s{{[0-9]+}}, [[SUBB]]
; GFX9: v_add_u32_e32 {{.*}}, 0x64, [[SUB]]
; GFX9: v_addc_co_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
; GFX9: v_add_u32_e32 {{.*}}, 0x64, [[ADDC]]
define amdgpu_kernel void @sub_sube_commuted(i32 addrspace(1)* nocapture %arg, i32 %a) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -306,10 +306,10 @@ entry:

; GCN-LABEL: {{^}}bit128_extelt:
; GCN-NOT: buffer_
; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 1, 0,
; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1
; GCN-DAG: v_mov_b32_e32 [[LASTIDX:v[0-9]+]], 0x7f
; GCN-DAG: v_cmp_ne_u32_e32 [[CL:[^,]+]], s{{[0-9]+}}, [[LASTIDX]]
; GCN-DAG: v_cndmask_b32_e{{32|64}} [[VL:v[0-9]+]], 0, v{{[0-9]+}}, [[CL]]
; GCN-DAG: v_cndmask_b32_e{{32|64}} [[VL:v[0-9]+]], 0, [[V1]], [[CL]]
; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[VL]]
; GCN: store_dword v[{{[0-9:]+}}], [[RES]]
define amdgpu_kernel void @bit128_extelt(i32 addrspace(1)* %out, i32 %sel) {
Expand Down

0 comments on commit 31479d8

Please sign in to comment.